From cfb994abd20a5f17c155dd8a644cfb6372150a79 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 13 Aug 2012 22:55:02 -0400 Subject: [PATCH 1/4] Trivial removal of ununsed variable (mentioned in resolved JIRA entry) --- .../sting/gatk/arguments/GATKArgumentCollection.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 1e6920b82..4c9235b58 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -57,8 +57,6 @@ public class GATKArgumentCollection { public GATKArgumentCollection() { } - public Map walkerArgs = new HashMap(); - // parameters and their defaults @Input(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false) public List samFiles = new ArrayList(); From 34b62fa092222d068dd193217aa3d3652ecdad2f Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 14 Aug 2012 12:54:31 -0400 Subject: [PATCH 2/4] Two changes to SelectVariants: 1) don't add DP INFO annotation if DP wasn't used in the input VCF (it was adding DP=0 previously). 2) If MLEAC or MLEAF is present in the original VCF and the number of samples decreases, remove those annotations from the VC. --- .../walkers/variantutils/SelectVariants.java | 16 +++++++++--- .../SelectVariantsIntegrationTest.java | 26 +++++++++++++++++++ 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index cf528de09..1493815ee 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -730,7 +730,13 @@ public class SelectVariants extends RodWalker implements TreeR if ( vc.getAlleles().size() != sub.getAlleles().size() ) newGC = VariantContextUtils.stripPLs(sub.getGenotypes()); - //Remove a fraction of the genotypes if needed + // if we have fewer samples in the selected VC than in the original VC, we need to strip out the MLE tags + if ( vc.getNSamples() != sub.getNSamples() ) { + builder.rmAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY); + builder.rmAttribute(VCFConstants.MLE_ALLELE_FREQUENCY_KEY); + } + + // Remove a fraction of the genotypes if needed if ( fractionGenotypes > 0 ){ ArrayList genotypes = new ArrayList(); for ( Genotype genotype : newGC ) { @@ -767,17 +773,21 @@ public class SelectVariants extends RodWalker implements TreeR VariantContextUtils.calculateChromosomeCounts(builder, false); + boolean sawDP = false; int depth = 0; for (String sample : originalVC.getSampleNames()) { Genotype g = originalVC.getGenotype(sample); if ( ! g.isFiltered() ) { - if ( g.hasDP() ) + if ( g.hasDP() ) { depth += g.getDP(); + sawDP = true; + } } } - builder.attribute("DP", depth); + if ( sawDP ) + builder.attribute("DP", depth); } private void randomlyAddVariant(int rank, VariantContext vc) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index e25d65465..7a865ddeb 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -154,6 +154,32 @@ public class SelectVariantsIntegrationTest extends WalkerTest { executeTest("testRegenotype--" + testFile, spec); } + @Test + public void testRemoveMLE() { + String testFile = privateTestDir + "vcfexample.withMLE.vcf"; + + WalkerTestSpec spec = new WalkerTestSpec( + "-T SelectVariants -R " + b36KGReference + " -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header", + 1, + Arrays.asList("") + ); + + executeTest("testRegenotype--" + testFile, spec); + } + + @Test + public void testRemoveMLEAndRegenotype() { + String testFile = privateTestDir + "vcfexample.withMLE.vcf"; + + WalkerTestSpec spec = new WalkerTestSpec( + "-T SelectVariants -R " + b36KGReference + " -regenotype -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header", + 1, + Arrays.asList("") + ); + + executeTest("testRegenotype--" + testFile, spec); + } + @Test public void testMultipleRecordsAtOnePosition() { String testFile = privateTestDir + "selectVariants.onePosition.vcf"; From 8e3774fb0e2aaa1c3b62cfda027fa1621ba0b4e8 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 14 Aug 2012 14:21:42 -0400 Subject: [PATCH 3/4] Fixing behavior of the --regenotype argument in SelectVariants to properly run in GenotypeGivenAlleles mode. Added integration tests to cover recent SV changes. --- .../genotyper/UnifiedGenotyperEngine.java | 10 ++++++---- .../walkers/variantutils/SelectVariants.java | 3 ++- .../SelectVariantsIntegrationTest.java | 16 ++++++++-------- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index f4bd196ae..f15fa9b99 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -646,15 +646,17 @@ public class UnifiedGenotyperEngine { // if we're genotyping given alleles and we have a requested SNP at this position, do SNP if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { - final VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles); - if ( vcInput == null ) + final VariantContext vcInput = getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles); + if ( vcInput == null ) { + models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"SNP")); return models; + } if ( vcInput.isSNP() ) { // ignore SNPs if the user chose INDEL mode only if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") || UAC.GLmodel.name().toUpperCase().contains("SNP") ) models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"SNP")); - } + } else if ( vcInput.isIndel() || vcInput.isMixed() ) { // ignore INDELs if the user chose SNP mode only if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") || UAC.GLmodel.name().toUpperCase().contains("INDEL") ) @@ -759,7 +761,7 @@ public class UnifiedGenotyperEngine { public static VariantContext getVCFromAllelesRod(RefMetaDataTracker tracker, ReferenceContext ref, GenomeLoc loc, boolean requireSNP, Logger logger, final RodBinding allelesBinding) { if ( tracker == null || ref == null || logger == null ) - throw new ReviewedStingException("Bad arguments: tracker=" + tracker + " ref=" + ref + " logger=" + logger); + return null; VariantContext vc = null; // search for usable record diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 1493815ee..0810710c1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -470,6 +470,7 @@ public class SelectVariants extends RodWalker implements TreeR final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection(); UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Model.BOTH; UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES; + UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES; UAC.NO_SLOD = true; UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY); headerLines.addAll(UnifiedGenotyper.getHeaderInfo(UAC, null, null)); @@ -567,7 +568,7 @@ public class SelectVariants extends RodWalker implements TreeR VariantContext sub = subsetRecord(vc, EXCLUDE_NON_VARIANTS); if ( REGENOTYPE && sub.isPolymorphicInSamples() && hasPLs(sub) ) { - final VariantContextBuilder builder = new VariantContextBuilder(UG_engine.calculateGenotypes(tracker, ref, context, sub)).filters(sub.getFiltersMaybeNull()); + final VariantContextBuilder builder = new VariantContextBuilder(UG_engine.calculateGenotypes(null, ref, context, sub)).filters(sub.getFiltersMaybeNull()); addAnnotations(builder, sub); sub = builder.make(); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index 7a865ddeb..e172200f7 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -34,7 +34,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(" -sn A -sn B -sn C --variant " + testfile), 1, - Arrays.asList("3d98a024bf3aecbd282843e0af89d0e6") + Arrays.asList("125d1c9fa111cd38dfa2ff3900f16b57") ); executeTest("testRepeatedLineSelection--" + testfile, spec); @@ -49,7 +49,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { + b37hapmapGenotypes + " -disc " + testFile + " -o %s --no_cmdline_in_header -U LENIENT_VCF_PROCESSING", 1, - Arrays.asList("54289033d35d32b8ebbb38c51fbb614c") + Arrays.asList("c0b937edb6a8b6392d477511d4f1ebcf") ); spec.disableShadowBCF(); @@ -135,7 +135,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T SelectVariants -R " + b36KGReference + " -sn NA12892 --variant:dbsnp " + testFile + " -o %s --no_cmdline_in_header", 1, - Arrays.asList("d12ae1617deb38f5ed712dc326935b9a") + Arrays.asList("a554459c9ccafb9812ff6d8c06c11726") ); executeTest("testUsingDbsnpName--" + testFile, spec); @@ -148,7 +148,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T SelectVariants -R " + b36KGReference + " -regenotype -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header", 1, - Arrays.asList("c22ad8864d9951403672a24c20d6c3c2") + Arrays.asList("52cb2f150559ca1457e9df7ec153dbb452cb2f150559ca1457e9df7ec153dbb4") ); executeTest("testRegenotype--" + testFile, spec); @@ -161,10 +161,10 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T SelectVariants -R " + b36KGReference + " -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header", 1, - Arrays.asList("") + Arrays.asList("a554459c9ccafb9812ff6d8c06c11726") ); - executeTest("testRegenotype--" + testFile, spec); + executeTest("testRemoveMLE--" + testFile, spec); } @Test @@ -174,10 +174,10 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T SelectVariants -R " + b36KGReference + " -regenotype -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header", 1, - Arrays.asList("") + Arrays.asList("52cb2f150559ca1457e9df7ec153dbb4") ); - executeTest("testRegenotype--" + testFile, spec); + executeTest("testRemoveMLEAndRegenotype--" + testFile, spec); } @Test From 87e41c83c56b2f36bd95f8a1736c530d588446b0 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 14 Aug 2012 15:02:30 -0400 Subject: [PATCH 4/4] In AlleleCount stratification, check to make sure the AC (or MLEAC) is valid (i.e. not higher than number of chromosomes) and throw a User Error if it isn't. Added a test for bad AC. --- .../varianteval/stratifications/AlleleCount.java | 4 ++++ .../varianteval/VariantEvalIntegrationTest.java | 15 +++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java index 158f20b61..50c5526e4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java @@ -51,6 +51,10 @@ public class AlleleCount extends VariantStratifier { AC = Math.max(AC, eval.getCalledChrCount(allele)); } + // make sure that the AC isn't invalid + if ( AC > eval.getCalledChrCount() ) + throw new UserException.MalformedVCF(String.format("The AC or MLEAC value (%d) at position %s:%d is larger than the possible called chromosome count (%d)", AC, eval.getChr(), eval.getStart(), eval.getCalledChrCount())); + return Collections.singletonList((Object) AC); } else { return Collections.emptyList(); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 94e52c2b9..c92d6d4cf 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -585,6 +585,21 @@ public class VariantEvalIntegrationTest extends WalkerTest { executeTest("testStandardIndelEval", spec); } + @Test + public void testBadACValue() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "-eval " + privateTestDir + "vcfexample.withBadAC.vcf", + "-noST -ST AlleleCount", + "-noEV -EV VariantSummary" + ), + 0, + UserException.class); + executeTest("testBadACValue", spec); + } + @Test() public void testIncompatibleEvalAndStrat() {