Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Ryan Poplin 2011-11-06 10:26:19 -05:00
commit 5c565d28b9
12 changed files with 44 additions and 58 deletions

View File

@ -131,7 +131,7 @@ public class AlignmentContextUtils {
}
}
public static Map<String, AlignmentContext> splitContextBySampleName(ReadBackedPileup pileup, String assumedSingleSample) {
public static Map<String, AlignmentContext> splitContextBySampleName(ReadBackedPileup pileup) {
return splitContextBySampleName(new AlignmentContext(pileup.getLocation(), pileup));
}

View File

@ -164,10 +164,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
@Argument(fullName="list", shortName="ls", doc="List the available annotations and exit")
protected Boolean LIST = false;
@Hidden
@Argument(fullName = "assume_single_sample_reads", shortName = "single_sample", doc = "The single sample that we should assume is represented in the input bam (and therefore associate with all reads regardless of whether they have read groups)", required = false)
protected String ASSUME_SINGLE_SAMPLE = null;
@Hidden
@Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false)
protected boolean indelsOnly = false;
@ -213,11 +209,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
List<String> rodName = Arrays.asList(variantCollection.variants.getName());
Set<String> samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), rodName);
// if there are no valid samples, warn the user
if ( samples.size() == 0 ) {
logger.warn("There are no samples input at all; use the --sampleName argument to specify one if desired.");
}
if ( USE_ALL_ANNOTATIONS )
engine = new VariantAnnotatorEngine(annotationsToExclude, this, getToolkit());
else
@ -301,9 +292,9 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
Map<String, AlignmentContext> stratifiedContexts;
if ( BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 ) {
if ( ! context.hasExtendedEventPileup() ) {
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getBasePileup(), ASSUME_SINGLE_SAMPLE);
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getBasePileup());
} else {
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getExtendedEventPileup(), ASSUME_SINGLE_SAMPLE);
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getExtendedEventPileup());
}
if ( stratifiedContexts != null ) {
annotatedVCs = new ArrayList<VariantContext>(VCs.size());

View File

@ -39,7 +39,6 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.HashSet;
import java.util.Set;
import java.util.TreeSet;
/**
@ -71,12 +70,7 @@ public class UGCalcLikelihoods extends LocusWalker<VariantCallContext, Integer>
public void initialize() {
// get all of the unique sample names
// if we're supposed to assume a single sample, do so
Set<String> samples = new TreeSet<String>();
if ( UAC.ASSUME_SINGLE_SAMPLE != null )
samples.add(UAC.ASSUME_SINGLE_SAMPLE);
else
samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
Set<String> samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples);

View File

@ -96,11 +96,6 @@ public class UnifiedArgumentCollection {
@Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype when in GENOTYPE_MODE = GENOTYPE_GIVEN_ALLELES", required=false)
public RodBinding<VariantContext> alleles;
// control the error modes
@Hidden
@Argument(fullName = "assume_single_sample_reads", shortName = "single_sample", doc = "The single sample that we should assume is represented in the input bam (and therefore associate with all reads regardless of whether they have read groups)", required = false)
public String ASSUME_SINGLE_SAMPLE = null;
/**
* The minimum confidence needed in a given base for it to be used in variant calling. Note that the base quality of a base
* is capped by the mapping quality so that bases on reads with low mapping quality may get filtered out depending on this value.
@ -170,7 +165,6 @@ public class UnifiedArgumentCollection {
uac.GenotypingMode = GenotypingMode;
uac.OutputMode = OutputMode;
uac.COMPUTE_SLOD = COMPUTE_SLOD;
uac.ASSUME_SINGLE_SAMPLE = ASSUME_SINGLE_SAMPLE;
uac.STANDARD_CONFIDENCE_FOR_CALLING = STANDARD_CONFIDENCE_FOR_CALLING;
uac.STANDARD_CONFIDENCE_FOR_EMITTING = STANDARD_CONFIDENCE_FOR_EMITTING;
uac.MIN_BASE_QUALTY_SCORE = MIN_BASE_QUALTY_SCORE;

View File

@ -206,12 +206,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
**/
public void initialize() {
// get all of the unique sample names
// if we're supposed to assume a single sample, do so
Set<String> samples = new TreeSet<String>();
if ( UAC.ASSUME_SINGLE_SAMPLE != null )
samples.add(UAC.ASSUME_SINGLE_SAMPLE);
else
samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
Set<String> samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
// initialize the verbose writer
if ( verboseWriter != null )

View File

@ -106,12 +106,7 @@ public class UnifiedGenotyperEngine {
// ---------------------------------------------------------------------------------------------------------
@Requires({"toolkit != null", "UAC != null"})
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null,
// get the number of samples
// if we're supposed to assume a single sample, do so
UAC.ASSUME_SINGLE_SAMPLE != null ?
new TreeSet<String>(Arrays.asList(UAC.ASSUME_SINGLE_SAMPLE)) :
SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()));
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()));
}
@Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0"})
@ -253,7 +248,7 @@ public class UnifiedGenotyperEngine {
pileup = rawContext.getExtendedEventPileup();
else if (rawContext.hasBasePileup())
pileup = rawContext.getBasePileup();
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE);
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
vc = annotationEngine.annotateContext(tracker, ref, stratifiedContexts, vc);
}
@ -435,7 +430,7 @@ public class UnifiedGenotyperEngine {
pileup = rawContext.getExtendedEventPileup();
else if (rawContext.hasBasePileup())
pileup = rawContext.getBasePileup();
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE);
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
vcCall = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, vcCall);
}
@ -569,7 +564,7 @@ public class UnifiedGenotyperEngine {
return null;
// stratify the AlignmentContext and cut by sample
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE);
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
} else {
@ -586,12 +581,12 @@ public class UnifiedGenotyperEngine {
return null;
// stratify the AlignmentContext and cut by sample
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE);
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
}
} else if ( model == GenotypeLikelihoodsCalculationModel.Model.SNP ) {
// stratify the AlignmentContext and cut by sample
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(rawContext.getBasePileup(), UAC.ASSUME_SINGLE_SAMPLE);
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(rawContext.getBasePileup());
if( !(UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) ) {
int numDeletions = 0;

View File

@ -817,7 +817,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
// For now, we will just arbitrarily add 10 to the mapping quality. [EB, 6/7/2010].
// TODO -- we need a better solution here
GATKSAMRecord read = aRead.getRead();
read.setMappingQuality(Math.min(aRead.getRead().getMappingQuality() + 10, 254));
if ( read.getMappingQuality() != 255 ) // 255 == Unknown, so don't modify it
read.setMappingQuality(Math.min(aRead.getRead().getMappingQuality() + 10, 254));
// before we fix the attribute tags we first need to make sure we have enough of the reference sequence
int neededBasesToLeft = leftmostIndex - read.getAlignmentStart();

View File

@ -58,15 +58,12 @@ public class RandomlySplitVariants extends RodWalker<Integer, Integer> {
@Argument(fullName="fractionToOut1", shortName="fraction", doc="Fraction of records to be placed in out1 (must be 0 >= fraction <= 1); all other records are placed in out2", required=false)
protected double fraction = 0.5;
protected int iFraction;
/**
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
*/
public void initialize() {
if ( fraction < 0.0 || fraction > 1.0 )
throw new UserException.BadArgumentValue("fractionToOut1", "this value needs to be a number between 0 and 1");
iFraction = (int)(fraction * 1000.0);
// setup the header info
final List<String> inputNames = Arrays.asList(variantCollection.variants.getName());
@ -93,8 +90,8 @@ public class RandomlySplitVariants extends RodWalker<Integer, Integer> {
Collection<VariantContext> vcs = tracker.getValues(variantCollection.variants, context.getLocation());
for ( VariantContext vc : vcs ) {
int random = GenomeAnalysisEngine.getRandomGenerator().nextInt(1000);
if ( random < iFraction )
double random = GenomeAnalysisEngine.getRandomGenerator().nextDouble();
if ( random < fraction )
vcfWriter1.add(vc);
else
vcfWriter2.add(vc);
@ -107,5 +104,8 @@ public class RandomlySplitVariants extends RodWalker<Integer, Integer> {
public Integer reduce(Integer value, Integer sum) { return value + sum; }
public void onTraversalDone(Integer result) { logger.info(result + " records processed."); }
public void onTraversalDone(Integer result) {
logger.info(result + " records processed.");
vcfWriter2.close();
}
}

View File

@ -162,19 +162,27 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
* @return a feature, (not guaranteed complete) that has the correct start and stop
*/
public Feature decodeLoc(String line) {
String[] locParts = new String[6];
lineNo++;
// the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line
if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null;
// our header cannot be null, we need the genotype sample names and counts
if (header == null) throw new ReviewedStingException("VCF Header cannot be null when decoding a record");
final String[] locParts = new String[6];
int nParts = ParsingUtils.split(line, locParts, VCFConstants.FIELD_SEPARATOR_CHAR, true);
if ( nParts != 6 )
throw new UserException.MalformedVCF("there aren't enough columns for line " + line, lineNo);
// get our alleles (because the end position depends on them)
String ref = getCachedString(locParts[3].toUpperCase());
String alts = getCachedString(locParts[4].toUpperCase());
List<Allele> alleles = parseAlleles(ref, alts, lineNo);
final String ref = getCachedString(locParts[3].toUpperCase());
final String alts = getCachedString(locParts[4].toUpperCase());
final List<Allele> alleles = parseAlleles(ref, alts, lineNo);
// find out our location
int start = Integer.valueOf(locParts[1]);
final int start = Integer.valueOf(locParts[1]);
int stop = start;
// ref alleles don't need to be single bases for monomorphic sites

View File

@ -124,6 +124,14 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
executeTest("using expression", spec);
}
@Test
public void testUsingExpressionWithID() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --resource:foo " + validationDataLocation + "targetAnnotations.vcf -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.ID -L " + validationDataLocation + "vcfexample3empty.vcf", 1,
Arrays.asList("4a6f0675242f685e9072c1da5ad9e715"));
executeTest("using expression with ID", spec);
}
@Test
public void testTabixAnnotations() {
final String MD5 = "13269d5a2e16f06fd755cc0fb9271acf";

View File

@ -9,7 +9,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
private static String variantEvalTestDataRoot = validationDataLocation + "VariantEval";
private static String fundamentalTestVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf";
private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.vcf";
private static String fundamentalTestSNPsOneSampleVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.HG00625.vcf";
private static String fundamentalTestSNPsOneSampleVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.NA12045.vcf";
private static String cmdRoot = "-T VariantEval" +
" -R " + b36KGReference;
@ -359,7 +359,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
@Test
public void testPerSampleAndSubsettedSampleHaveSameResults() {
String md5 = "b0565ac61b2860248e4abd478a177b5e";
String md5 = "7425ca5c439afd7bb33ed5cfea02c2b3";
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
@ -369,7 +369,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"--eval " + fundamentalTestSNPsVCF,
"-noEV",
"-EV CompOverlap",
"-sn HG00625",
"-sn NA12045",
"-noST",
"-L " + fundamentalTestSNPsVCF,
"-o %s"

View File

@ -56,7 +56,7 @@ trait GATKScatterFunction extends ScatterFunction {
override def init() {
this.originalGATK = this.originalFunction.asInstanceOf[CommandLineGATK]
this.referenceSequence = this.originalGATK.reference_sequence
if (this.originalGATK.intervals.isEmpty && this.originalGATK.intervalsString.isEmpty) {
if (this.originalGATK.intervals.isEmpty && (this.originalGATK.intervalsString == null || this.originalGATK.intervalsString.isEmpty)) {
this.intervals ++= GATKScatterFunction.getGATKIntervals(this.referenceSequence, List.empty[String]).contigs
} else {
this.intervals ++= this.originalGATK.intervals.map(_.toString)