diff --git a/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContextUtils.java b/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContextUtils.java index 4e75f3ddb..d589f9029 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContextUtils.java @@ -131,7 +131,7 @@ public class AlignmentContextUtils { } } - public static Map splitContextBySampleName(ReadBackedPileup pileup, String assumedSingleSample) { + public static Map splitContextBySampleName(ReadBackedPileup pileup) { return splitContextBySampleName(new AlignmentContext(pileup.getLocation(), pileup)); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index c9937f3d6..8f4bc0abd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -164,10 +164,6 @@ public class VariantAnnotator extends RodWalker implements Ann @Argument(fullName="list", shortName="ls", doc="List the available annotations and exit") protected Boolean LIST = false; - @Hidden - @Argument(fullName = "assume_single_sample_reads", shortName = "single_sample", doc = "The single sample that we should assume is represented in the input bam (and therefore associate with all reads regardless of whether they have read groups)", required = false) - protected String ASSUME_SINGLE_SAMPLE = null; - @Hidden @Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false) protected boolean indelsOnly = false; @@ -213,11 +209,6 @@ public class VariantAnnotator extends RodWalker implements Ann List rodName = Arrays.asList(variantCollection.variants.getName()); Set samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), rodName); - // if there are no valid samples, warn the user - if ( samples.size() == 0 ) { - logger.warn("There are no samples input at all; use the --sampleName argument to specify one if desired."); - } - if ( USE_ALL_ANNOTATIONS ) engine = new VariantAnnotatorEngine(annotationsToExclude, this, getToolkit()); else @@ -301,9 +292,9 @@ public class VariantAnnotator extends RodWalker implements Ann Map stratifiedContexts; if ( BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 ) { if ( ! context.hasExtendedEventPileup() ) { - stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getBasePileup(), ASSUME_SINGLE_SAMPLE); + stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getBasePileup()); } else { - stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getExtendedEventPileup(), ASSUME_SINGLE_SAMPLE); + stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getExtendedEventPileup()); } if ( stratifiedContexts != null ) { annotatedVCs = new ArrayList(VCs.size()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java index 503d87cbe..c7e577393 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java @@ -39,7 +39,6 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.HashSet; import java.util.Set; -import java.util.TreeSet; /** @@ -71,12 +70,7 @@ public class UGCalcLikelihoods extends LocusWalker public void initialize() { // get all of the unique sample names - // if we're supposed to assume a single sample, do so - Set samples = new TreeSet(); - if ( UAC.ASSUME_SINGLE_SAMPLE != null ) - samples.add(UAC.ASSUME_SINGLE_SAMPLE); - else - samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); + Set samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 07d9892a1..62218416d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -96,11 +96,6 @@ public class UnifiedArgumentCollection { @Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype when in GENOTYPE_MODE = GENOTYPE_GIVEN_ALLELES", required=false) public RodBinding alleles; - // control the error modes - @Hidden - @Argument(fullName = "assume_single_sample_reads", shortName = "single_sample", doc = "The single sample that we should assume is represented in the input bam (and therefore associate with all reads regardless of whether they have read groups)", required = false) - public String ASSUME_SINGLE_SAMPLE = null; - /** * The minimum confidence needed in a given base for it to be used in variant calling. Note that the base quality of a base * is capped by the mapping quality so that bases on reads with low mapping quality may get filtered out depending on this value. @@ -170,7 +165,6 @@ public class UnifiedArgumentCollection { uac.GenotypingMode = GenotypingMode; uac.OutputMode = OutputMode; uac.COMPUTE_SLOD = COMPUTE_SLOD; - uac.ASSUME_SINGLE_SAMPLE = ASSUME_SINGLE_SAMPLE; uac.STANDARD_CONFIDENCE_FOR_CALLING = STANDARD_CONFIDENCE_FOR_CALLING; uac.STANDARD_CONFIDENCE_FOR_EMITTING = STANDARD_CONFIDENCE_FOR_EMITTING; uac.MIN_BASE_QUALTY_SCORE = MIN_BASE_QUALTY_SCORE; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 72dc217e1..bdd4e2c65 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -206,12 +206,7 @@ public class UnifiedGenotyper extends LocusWalker samples = new TreeSet(); - if ( UAC.ASSUME_SINGLE_SAMPLE != null ) - samples.add(UAC.ASSUME_SINGLE_SAMPLE); - else - samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); + Set samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); // initialize the verbose writer if ( verboseWriter != null ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 993a434ac..cee128a6a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -106,12 +106,7 @@ public class UnifiedGenotyperEngine { // --------------------------------------------------------------------------------------------------------- @Requires({"toolkit != null", "UAC != null"}) public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) { - this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, - // get the number of samples - // if we're supposed to assume a single sample, do so - UAC.ASSUME_SINGLE_SAMPLE != null ? - new TreeSet(Arrays.asList(UAC.ASSUME_SINGLE_SAMPLE)) : - SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader())); + this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader())); } @Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0"}) @@ -253,7 +248,7 @@ public class UnifiedGenotyperEngine { pileup = rawContext.getExtendedEventPileup(); else if (rawContext.hasBasePileup()) pileup = rawContext.getBasePileup(); - stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE); + stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup); vc = annotationEngine.annotateContext(tracker, ref, stratifiedContexts, vc); } @@ -435,7 +430,7 @@ public class UnifiedGenotyperEngine { pileup = rawContext.getExtendedEventPileup(); else if (rawContext.hasBasePileup()) pileup = rawContext.getBasePileup(); - stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE); + stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup); vcCall = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, vcCall); } @@ -569,7 +564,7 @@ public class UnifiedGenotyperEngine { return null; // stratify the AlignmentContext and cut by sample - stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE); + stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup); } else { @@ -586,12 +581,12 @@ public class UnifiedGenotyperEngine { return null; // stratify the AlignmentContext and cut by sample - stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE); + stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup); } } else if ( model == GenotypeLikelihoodsCalculationModel.Model.SNP ) { // stratify the AlignmentContext and cut by sample - stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(rawContext.getBasePileup(), UAC.ASSUME_SINGLE_SAMPLE); + stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(rawContext.getBasePileup()); if( !(UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) ) { int numDeletions = 0; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 60b1f8a88..ba031c497 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -817,7 +817,8 @@ public class IndelRealigner extends ReadWalker { // For now, we will just arbitrarily add 10 to the mapping quality. [EB, 6/7/2010]. // TODO -- we need a better solution here GATKSAMRecord read = aRead.getRead(); - read.setMappingQuality(Math.min(aRead.getRead().getMappingQuality() + 10, 254)); + if ( read.getMappingQuality() != 255 ) // 255 == Unknown, so don't modify it + read.setMappingQuality(Math.min(aRead.getRead().getMappingQuality() + 10, 254)); // before we fix the attribute tags we first need to make sure we have enough of the reference sequence int neededBasesToLeft = leftmostIndex - read.getAlignmentStart(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java index fa5093839..88de12f9a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java @@ -58,15 +58,12 @@ public class RandomlySplitVariants extends RodWalker { @Argument(fullName="fractionToOut1", shortName="fraction", doc="Fraction of records to be placed in out1 (must be 0 >= fraction <= 1); all other records are placed in out2", required=false) protected double fraction = 0.5; - protected int iFraction; - /** * Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher */ public void initialize() { if ( fraction < 0.0 || fraction > 1.0 ) throw new UserException.BadArgumentValue("fractionToOut1", "this value needs to be a number between 0 and 1"); - iFraction = (int)(fraction * 1000.0); // setup the header info final List inputNames = Arrays.asList(variantCollection.variants.getName()); @@ -93,8 +90,8 @@ public class RandomlySplitVariants extends RodWalker { Collection vcs = tracker.getValues(variantCollection.variants, context.getLocation()); for ( VariantContext vc : vcs ) { - int random = GenomeAnalysisEngine.getRandomGenerator().nextInt(1000); - if ( random < iFraction ) + double random = GenomeAnalysisEngine.getRandomGenerator().nextDouble(); + if ( random < fraction ) vcfWriter1.add(vc); else vcfWriter2.add(vc); @@ -107,5 +104,8 @@ public class RandomlySplitVariants extends RodWalker { public Integer reduce(Integer value, Integer sum) { return value + sum; } - public void onTraversalDone(Integer result) { logger.info(result + " records processed."); } + public void onTraversalDone(Integer result) { + logger.info(result + " records processed."); + vcfWriter2.close(); + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 07a61ae36..0e0cb14bf 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -162,19 +162,27 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, * @return a feature, (not guaranteed complete) that has the correct start and stop */ public Feature decodeLoc(String line) { - String[] locParts = new String[6]; + lineNo++; + + // the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line + if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null; + + // our header cannot be null, we need the genotype sample names and counts + if (header == null) throw new ReviewedStingException("VCF Header cannot be null when decoding a record"); + + final String[] locParts = new String[6]; int nParts = ParsingUtils.split(line, locParts, VCFConstants.FIELD_SEPARATOR_CHAR, true); if ( nParts != 6 ) throw new UserException.MalformedVCF("there aren't enough columns for line " + line, lineNo); // get our alleles (because the end position depends on them) - String ref = getCachedString(locParts[3].toUpperCase()); - String alts = getCachedString(locParts[4].toUpperCase()); - List alleles = parseAlleles(ref, alts, lineNo); + final String ref = getCachedString(locParts[3].toUpperCase()); + final String alts = getCachedString(locParts[4].toUpperCase()); + final List alleles = parseAlleles(ref, alts, lineNo); // find out our location - int start = Integer.valueOf(locParts[1]); + final int start = Integer.valueOf(locParts[1]); int stop = start; // ref alleles don't need to be single bases for monomorphic sites diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 8e887c32a..189f643d4 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -124,6 +124,14 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { executeTest("using expression", spec); } + @Test + public void testUsingExpressionWithID() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString() + " --resource:foo " + validationDataLocation + "targetAnnotations.vcf -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.ID -L " + validationDataLocation + "vcfexample3empty.vcf", 1, + Arrays.asList("4a6f0675242f685e9072c1da5ad9e715")); + executeTest("using expression with ID", spec); + } + @Test public void testTabixAnnotations() { final String MD5 = "13269d5a2e16f06fd755cc0fb9271acf"; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index cd2493dde..cbfe0ceab 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -9,7 +9,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { private static String variantEvalTestDataRoot = validationDataLocation + "VariantEval"; private static String fundamentalTestVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf"; private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.vcf"; - private static String fundamentalTestSNPsOneSampleVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.HG00625.vcf"; + private static String fundamentalTestSNPsOneSampleVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.NA12045.vcf"; private static String cmdRoot = "-T VariantEval" + " -R " + b36KGReference; @@ -359,7 +359,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { @Test public void testPerSampleAndSubsettedSampleHaveSameResults() { - String md5 = "b0565ac61b2860248e4abd478a177b5e"; + String md5 = "7425ca5c439afd7bb33ed5cfea02c2b3"; WalkerTestSpec spec = new WalkerTestSpec( buildCommandLine( @@ -369,7 +369,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "--eval " + fundamentalTestSNPsVCF, "-noEV", "-EV CompOverlap", - "-sn HG00625", + "-sn NA12045", "-noST", "-L " + fundamentalTestSNPsVCF, "-o %s" diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKScatterFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKScatterFunction.scala index 24c2831b6..c9adff026 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKScatterFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKScatterFunction.scala @@ -56,7 +56,7 @@ trait GATKScatterFunction extends ScatterFunction { override def init() { this.originalGATK = this.originalFunction.asInstanceOf[CommandLineGATK] this.referenceSequence = this.originalGATK.reference_sequence - if (this.originalGATK.intervals.isEmpty && this.originalGATK.intervalsString.isEmpty) { + if (this.originalGATK.intervals.isEmpty && (this.originalGATK.intervalsString == null || this.originalGATK.intervalsString.isEmpty)) { this.intervals ++= GATKScatterFunction.getGATKIntervals(this.referenceSequence, List.empty[String]).contigs } else { this.intervals ++= this.originalGATK.intervals.map(_.toString)