From 1da4ad9607a887bf520ceb1d896ca6512166b6bf Mon Sep 17 00:00:00 2001 From: Ami Levy Moonshine Date: Mon, 22 Oct 2012 10:45:53 -0400 Subject: [PATCH 01/12] new class to test the quals of reduced bam vs non-reduced bam --- .../gatk/walkers/qc/AssessReducedQuals.java | 116 ++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java new file mode 100644 index 000000000..c8ec33364 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java @@ -0,0 +1,116 @@ +package org.broadinstitute.sting.gatk.walkers.qc; + +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.walkers.TreeReducible; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; + +import java.io.PrintStream; + +/** + * Created with IntelliJ IDEA. + * User: ami + * Date: 10/19/12 + * Time: 9:09 AM + * To change this template use File | Settings | File Templates. + */ +public class AssessReducedQuals extends LocusWalker implements TreeReducible { + + private static final String original = "original"; + private static final String reduced = "reduced"; + + @Output + protected PrintStream out; + + @Override + public boolean includeReadsWithDeletionAtLoci() { return true; } + + public void initialize() {} //todo: why we need that? + + @Override + public GenomeLoc map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( tracker == null ) + return null; + + int originalQualsIndex = 0; + int reducedQualsIndex = 1; + double epsilon = 0; + double[] quals = getPileupQuals(context.getBasePileup()); + return (quals[originalQualsIndex] - quals[reducedQualsIndex] >= epsilon || quals[originalQualsIndex] - quals[reducedQualsIndex] <= -1*epsilon) ? ref.getLocus() : null; + + } + + private double[] getPileupQuals(final ReadBackedPileup readPileup) { + + int originalQualsIndex = 0; + int reducedQualsIndex = 1; + double[] quals = new double[2]; + + for( PileupElement p : readPileup){ + if ( (int)p.getQual() > 2 && p.getMappingQual() > 0 && !p.isDeletion() ){ + if (p.getRead().isReducedRead()){ + double tempQual = (double)(p.getQual()) * p.getRepresentativeCount(); + quals[reducedQualsIndex] += tempQual; + } + else + { + double tempQual = (double)(p.getQual()); + quals[originalQualsIndex] += tempQual; + } + } + + } + + + return quals; + } + + //public void onTraversalDone(GenomeLoc sum) { + // if ( sum != null ) + // out.println(sum); + //} + + @Override + public GenomeLoc treeReduce(GenomeLoc lhs, GenomeLoc rhs) { + if ( lhs == null ) + return rhs; + + if ( rhs == null ) + return lhs; + + // if contiguous, just merge them + if ( lhs.contiguousP(rhs) ) + return getToolkit().getGenomeLocParser().createGenomeLoc(lhs.getContig(), lhs.getStart(), rhs.getStop()); + + // otherwise, print the lhs and start over with the rhs + out.println(lhs); + return rhs; + } + + @Override + public GenomeLoc reduceInit() { + return null; + } + + @Override + public GenomeLoc reduce(GenomeLoc value, GenomeLoc sum) { + if ( value == null ) + return sum; + + if ( sum == null ) + return value; + + // if contiguous, just merge them + if ( sum.contiguousP(value) ) + return getToolkit().getGenomeLocParser().createGenomeLoc(sum.getContig(), sum.getStart(), value.getStop()); + + // otherwise, print the sum and start over with the value + out.println(sum); + return value; + } +} From e93ff3ea6ea864984e0a132239a7b34534b11629 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 25 Oct 2012 12:45:23 -0400 Subject: [PATCH 02/12] Let's go back to having the SB/SLOD NOT computed by default. If you recall, it was only enabled by default because we thought we were going to use it when we made VQSR use random forests. But since we decided not to change VQSR, there's no reason to triple the computation for every variant site anymore. --- .../genotyper/UnifiedArgumentCollection.java | 6 +++--- .../walkers/genotyper/UnifiedGenotyper.java | 2 +- .../genotyper/UnifiedGenotyperEngine.java | 2 +- .../walkers/variantutils/SelectVariants.java | 1 - .../UnifiedGenotyperIntegrationTest.java | 18 +++++++++--------- 5 files changed, 14 insertions(+), 15 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index e50f25fe8..5f6ddf0f1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -47,8 +47,8 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection /** * Note that calculating the SLOD increases the runtime by an appreciable amount. */ - @Argument(fullName = "noSLOD", shortName = "nosl", doc = "If provided, we will not calculate the SLOD", required = false) - public boolean NO_SLOD = false; + @Argument(fullName = "computeSLOD", shortName = "slod", doc = "If provided, we will calculate the SLOD (SB annotation)", required = false) + public boolean COMPUTE_SLOD = false; /** * Depending on the value of the --max_alternate_alleles argument, we may genotype only a fraction of the alleles being sent on for genotyping. @@ -204,7 +204,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection this.GLmodel = uac.GLmodel; this.AFmodel = uac.AFmodel; this.PCR_error = uac.PCR_error; - this.NO_SLOD = uac.NO_SLOD; + this.COMPUTE_SLOD = uac.COMPUTE_SLOD; this.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED = uac.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED; this.MIN_BASE_QUALTY_SCORE = uac.MIN_BASE_QUALTY_SCORE; this.MAX_DELETION_FRACTION = uac.MAX_DELETION_FRACTION; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index a17c2aa1a..ef56eaa6d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -290,7 +290,7 @@ public class UnifiedGenotyper extends LocusWalker, Unif headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions()); // annotation (INFO) fields from UnifiedGenotyper - if ( !UAC.NO_SLOD ) + if ( UAC.COMPUTE_SLOD ) VCFStandardHeaderLines.addStandardInfoLines(headerInfo, true, VCFConstants.STRAND_BIAS_KEY); if ( UAC.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 3c4a97ec1..97254c478 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -451,7 +451,7 @@ public class UnifiedGenotyperEngine { attributes.put(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, MLEfrequencies); } - if ( !UAC.NO_SLOD && !limitedContext && !bestGuessIsRef ) { + if ( UAC.COMPUTE_SLOD && !limitedContext && !bestGuessIsRef ) { //final boolean DEBUG_SLOD = false; // the overall lod diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index c7b1d0fc7..d1b7cb96f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -459,7 +459,6 @@ public class SelectVariants extends RodWalker implements TreeR UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Model.BOTH; UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES; UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES; - UAC.NO_SLOD = true; UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY); headerLines.addAll(UnifiedGenotyper.getHeaderInfo(UAC, null, null)); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 8eb1c4657..446f8cbe8 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -16,9 +16,9 @@ import java.util.List; public class UnifiedGenotyperIntegrationTest extends WalkerTest { - private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129; - private final static String baseCommandIndels = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl --no_cmdline_in_header -glm INDEL -mbq 20 -minIndelFrac 0.0 --dbsnp " + b36dbSNP129; - private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132; + private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129; + private final static String baseCommandIndels = "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm INDEL -mbq 20 -minIndelFrac 0.0 --dbsnp " + b36dbSNP129; + private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132; private final static String baseCommandNoCmdLineHeaderStdout = "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam"; // -------------------------------------------------------------------------------------------------------------- @@ -61,7 +61,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testMultipleSNPAlleles() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, + "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, Arrays.asList("543f68e42034bf44cfb24da8c9204320")); executeTest("test Multiple SNP alleles", spec); } @@ -69,7 +69,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testBadRead() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH -I " + privateTestDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1, + "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH -I " + privateTestDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1, Arrays.asList("d915535c1458733f09f82670092fcab6")); executeTest("test bad read", spec); } @@ -77,7 +77,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testReverseTrim() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1, + "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1, Arrays.asList("5ce03dd9ca2d9324c1d4a9d64389beb5")); executeTest("test reverse trim", spec); } @@ -85,7 +85,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testMismatchedPLs() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1, + "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1, Arrays.asList("3c006b06b17bbe8e787d64eff6a63a19")); executeTest("test mismatched PLs", spec); } @@ -156,7 +156,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testSLOD() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, + "-T UnifiedGenotyper -R " + b36KGReference + " --computeSLOD --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, Arrays.asList("c7429e670ba477bf9a6bbee2fb41c5a9")); executeTest("test SLOD", spec); } @@ -173,7 +173,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testCompTrack() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, - Arrays.asList("8a9b424e00cdbe6b5e73d517335b2186")); + Arrays.asList("a6de72c5197f152be98a6f9029091a9e")); executeTest("test using comp track", spec); } From cc8c12b9548cc24d13dbcc9f798e3f3dab4ded98 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 25 Oct 2012 14:46:09 -0400 Subject: [PATCH 03/12] Committing a broken version of BaseRecalibration -- I'm committing because there's some kind of fundamental problem with the ReadCovariates cache, in that historical data isn't being cleared / computed properly, and I'd rather it fail for a while than leave it in JIRA. -- The integration tests test the -nct with PrintReads to get 1, 2, 4 and the 4 fails. But that's because of this incorrect calculation -- Updating GATKPerformanceOverTime with the new @ClassType annotation --- .../walkers/bqsr/BQSRIntegrationTest.java | 22 +++++++++++++------ .../recalibration/BaseRecalibration.java | 9 +++++--- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java index 90d67d393..a50219f48 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java @@ -5,7 +5,9 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.util.ArrayList; import java.util.Arrays; +import java.util.List; /** * @author ebanks @@ -118,20 +120,26 @@ public class BQSRIntegrationTest extends WalkerTest { @DataProvider(name = "PRTest") public Object[][] createPRTestData() { - return new Object[][]{ - {new PRTest("", "ab2f209ab98ad3432e208cbd524a4c4a")}, - {new PRTest(" -qq -1", "5226c06237b213b9e9b25a32ed92d09a")}, - {new PRTest(" -qq 6", "b592a5c62b952a012e18adb898ea9c33")}, - {new PRTest(" -DIQ", "8977bea0c57b808e65e9505eb648cdf7")} - }; + List tests = new ArrayList(); + + tests.add(new Object[]{1, new PRTest(" -qq -1", "a1d87da5dcbde35170d6ba6bc3ee2812")}); + tests.add(new Object[]{1, new PRTest(" -qq 6", "a0fecae6d0e5ab9917862fa306186d10")}); + tests.add(new Object[]{1, new PRTest(" -DIQ", "8977bea0c57b808e65e9505eb648cdf7")}); + + for ( final int nct : Arrays.asList(1, 2, 4) ) { + tests.add(new Object[]{nct, new PRTest("", "d1bbb4ce6aa93e866f106f8b11d888ed")}); + } + + return tests.toArray(new Object[][]{}); } @Test(dataProvider = "PRTest") - public void testPR(PRTest params) { + public void testPR(final int nct, PRTest params) { WalkerTestSpec spec = new WalkerTestSpec( "-T PrintReads" + " -R " + hg18Reference + " -I " + privateTestDir + "HiSeq.1mb.1RG.bam" + + " -nct " + nct + " -BQSR " + privateTestDir + "HiSeq.20mb.1RG.table" + params.args + " -o %s", diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java index 7ad9302a8..2f9387acb 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java @@ -116,9 +116,12 @@ public class BaseRecalibration { // Compute all covariates for the read // TODO -- the need to clear here suggests there's an error in the indexing / assumption code // TODO -- for BI and DI. Perhaps due to the indel buffer size on the ends of the reads? - // TODO -- the output varies with -nt 1 and -nt 2 if you don't call clear here - // TODO -- needs to be fixed. - final ReadCovariates readCovariates = readCovariatesCache.get().clear(); + // TODO -- the output varies depending on whether we clear or not + //final ReadCovariates readCovariates = readCovariatesCache.get().clear(); + + // the original code -- doesn't do any clearing + final ReadCovariates readCovariates = readCovariatesCache.get(); + RecalUtils.computeCovariates(read, requestedCovariates, readCovariates); for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings From 884d031e72881f658ecd7df9803bdd930c64163b Mon Sep 17 00:00:00 2001 From: David Roazen Date: Thu, 25 Oct 2012 13:30:49 -0400 Subject: [PATCH 04/12] NestedIntegerArray: Pre-allocate only the first two dimensions It turns out that pre-allocating the entire tree was too expensive in terms of memory when using large values for the -mcs and -ics parameters. Pre-allocating the first two dimensions prevents us from ever locking the root node during a put(). Contention between threads over lower levels of the tree should be minimal given that puts are rare compared to gets. Also output dimensions and pre-allocation info at startup. If pre-allocation takes longer than usual this gives the user a sense of what is causing the delay. --- .../utils/collections/NestedIntegerArray.java | 51 ++++++++++++++----- 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/collections/NestedIntegerArray.java b/public/java/src/org/broadinstitute/sting/utils/collections/NestedIntegerArray.java index c83de8ec7..050ed52ac 100755 --- a/public/java/src/org/broadinstitute/sting/utils/collections/NestedIntegerArray.java +++ b/public/java/src/org/broadinstitute/sting/utils/collections/NestedIntegerArray.java @@ -25,9 +25,11 @@ package org.broadinstitute.sting.utils.collections; +import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; /** @@ -38,39 +40,50 @@ import java.util.List; public class NestedIntegerArray { + private static Logger logger = Logger.getLogger(NestedIntegerArray.class); + protected final Object[] data; protected final int numDimensions; protected final int[] dimensions; + // Preallocate the first two dimensions to limit contention during tree traversals in put() + private static final int NUM_DIMENSIONS_TO_PREALLOCATE = 2; + public NestedIntegerArray(final int... dimensions) { numDimensions = dimensions.length; if ( numDimensions == 0 ) throw new ReviewedStingException("There must be at least one dimension to an NestedIntegerArray"); this.dimensions = dimensions.clone(); + int dimensionsToPreallocate = Math.min(dimensions.length, NUM_DIMENSIONS_TO_PREALLOCATE); + + logger.info(String.format("Creating NestedIntegerArray with dimensions %s", Arrays.toString(dimensions))); + logger.info(String.format("Pre-allocating first %d dimensions", dimensionsToPreallocate)); + data = new Object[dimensions[0]]; - prepopulateArray(data, 0); + preallocateArray(data, 0, dimensionsToPreallocate); + + logger.info(String.format("Done pre-allocating first %d dimensions", dimensionsToPreallocate)); } /** - * Recursively allocate the entire tree of arrays in all its dimensions. + * Recursively allocate the first dimensionsToPreallocate dimensions of the tree * - * Doing this upfront uses more memory initially, but saves time over the course of the run - * and (crucially) avoids having to make threads wait while traversing the tree to check - * whether branches exist or not. + * Pre-allocating the first few dimensions helps limit contention during tree traversals in put() * * @param subarray current node in the tree * @param dimension current level in the tree + * @param dimensionsToPreallocate preallocate only this many dimensions (starting from the first) */ - private void prepopulateArray( Object[] subarray, int dimension ) { - if ( dimension >= numDimensions - 1 ) { + private void preallocateArray( Object[] subarray, int dimension, int dimensionsToPreallocate ) { + if ( dimension >= dimensionsToPreallocate - 1 ) { return; } for ( int i = 0; i < subarray.length; i++ ) { subarray[i] = new Object[dimensions[dimension + 1]]; - prepopulateArray((Object[])subarray[i], dimension + 1); + preallocateArray((Object[])subarray[i], dimension + 1, dimensionsToPreallocate); } } @@ -82,8 +95,9 @@ public class NestedIntegerArray { if ( keys[i] >= dimensions[i] ) return null; - myData = (Object[])myData[keys[i]]; // interior nodes in the tree will never be null, so we can safely traverse - // down to the leaves + myData = (Object[])myData[keys[i]]; + if ( myData == null ) + return null; } return (T)myData[keys[numNestedDimensions]]; @@ -92,8 +106,8 @@ public class NestedIntegerArray { /** * Insert a value at the position specified by the given keys. * - * This method is THREAD-SAFE despite not being synchronized, however the caller MUST - * check the return value to see if the put succeeded. This method RETURNS FALSE if + * This method is thread-safe, however the caller MUST check the + * return value to see if the put succeeded. This method RETURNS FALSE if * the value could not be inserted because there already was a value present * at the specified location. In this case the caller should do a get() to get * the already-existing value and (potentially) update it. @@ -113,8 +127,17 @@ public class NestedIntegerArray { if ( keys[i] >= dimensions[i] ) throw new ReviewedStingException("Key " + keys[i] + " is too large for dimension " + i + " (max is " + (dimensions[i]-1) + ")"); - myData = (Object[])myData[keys[i]]; // interior nodes in the tree will never be null, so we can safely traverse - // down to the leaves + // If we're at or beyond the last dimension that was pre-allocated, we need to do a synchronized + // check to see if the next branch exists, and if it doesn't, create it + if ( i >= NUM_DIMENSIONS_TO_PREALLOCATE - 1 ) { + synchronized ( myData ) { + if ( myData[keys[i]] == null ) { + myData[keys[i]] = new Object[dimensions[i + 1]]; + } + } + } + + myData = (Object[])myData[keys[i]]; } synchronized ( myData ) { // lock the bottom row while we examine and (potentially) update it From dde3060bb84ecbb83878daf834b5d922eb4ef8d4 Mon Sep 17 00:00:00 2001 From: Ami Levy Moonshine Date: Thu, 25 Oct 2012 15:36:17 -0400 Subject: [PATCH 05/12] add the CEUtrio best practices results (UG + PBT) to the bundle --- .../gatk/walkers/qc/AssessReducedQuals.java | 123 +++++++++++++----- .../queue/qscripts/GATKResourcesBundle.scala | 9 +- 2 files changed, 96 insertions(+), 36 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java index c8ec33364..e74684c53 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.qc; +import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -7,73 +8,125 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import java.io.PrintStream; +import java.util.List; /** - * Created with IntelliJ IDEA. - * User: ami - * Date: 10/19/12 - * Time: 9:09 AM - * To change this template use File | Settings | File Templates. + * Emits intervals in which the differences between the original and reduced bam quals are bigger epsilon (unless the quals of + * the reduced bam are above sufficient threshold) + * + *

Input

+ *

+ * The original and reduced BAM files. + *

+ * + *

Output

+ *

+ * A list of intervals in which the differences between the original and reduced bam quals are bigger epsilon. + *

+ * + *

Examples

+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -I:original original.bam \
+ *   -I:reduced reduced.bam \
+ *   -R ref.fasta \
+ *   -T AssessReducedQuals \
+ *   -o output.intervals
+ * 
+ * + * @author ami */ + public class AssessReducedQuals extends LocusWalker implements TreeReducible { - private static final String original = "original"; private static final String reduced = "reduced"; + private static final String original = "original"; + private static final int originalQualsIndex = 0; + private static final int reducedQualsIndex = 1; + + @Argument(fullName = "sufficientQualSum", shortName = "sufficientQualSum", doc = "When a reduced bam qual sum is above this threshold, it passes even without comparing to the non-reduced bam ", required = false) + public int sufficientQualSum = 600; + + @Argument(fullName = "qual_epsilon", shortName = "epsilon", doc = "when |Quals_reduced_bam - Quals_original_bam| > epsilon*Quals_original_bam we output this interval", required = false) + public int qual_epsilon = 0; + + @Argument(fullName = "debugLevel", shortName = "debug", doc = "debug mode on") + public int debugLevel = 0; @Output protected PrintStream out; + public void initialize() { + if (debugLevel != 0) + out.println(" Debug mode" + + "Debug:\tsufficientQualSum: "+sufficientQualSum+ "\n " + + "Debug:\tqual_epsilon: "+qual_epsilon); + } + @Override public boolean includeReadsWithDeletionAtLoci() { return true; } - public void initialize() {} //todo: why we need that? - @Override public GenomeLoc map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if ( tracker == null ) return null; - int originalQualsIndex = 0; - int reducedQualsIndex = 1; - double epsilon = 0; - double[] quals = getPileupQuals(context.getBasePileup()); - return (quals[originalQualsIndex] - quals[reducedQualsIndex] >= epsilon || quals[originalQualsIndex] - quals[reducedQualsIndex] <= -1*epsilon) ? ref.getLocus() : null; + boolean reportLocus; + final int[] quals = getPileupQuals(context.getBasePileup()); + if (debugLevel != 0) + out.println("Debug:\tLocus Quals\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]); + final int epsilon = MathUtils.fastRound(quals[originalQualsIndex]*qual_epsilon); + final int calcOriginalQuals = Math.min(quals[originalQualsIndex],sufficientQualSum); + final int calcReducedQuals = Math.min(quals[reducedQualsIndex],sufficientQualSum); + final int OriginalReducedQualDiff = calcOriginalQuals - calcReducedQuals; + reportLocus = OriginalReducedQualDiff > epsilon || OriginalReducedQualDiff < -1*epsilon; + if(debugLevel != 0 && reportLocus) + out.println("Debug:\tEmited Locus\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]+"\tepsilon\t"+epsilon+"\tdiff\t"+OriginalReducedQualDiff); + return reportLocus ? ref.getLocus() : null; } - private double[] getPileupQuals(final ReadBackedPileup readPileup) { + private final int[] getPileupQuals(final ReadBackedPileup readPileup) { - int originalQualsIndex = 0; - int reducedQualsIndex = 1; - double[] quals = new double[2]; + final int[] quals = new int[2]; + String[] printPileup = {"Debug 2:\toriginal pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n", + "Debug 2:\t reduced pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n"}; - for( PileupElement p : readPileup){ - if ( (int)p.getQual() > 2 && p.getMappingQual() > 0 && !p.isDeletion() ){ - if (p.getRead().isReducedRead()){ - double tempQual = (double)(p.getQual()) * p.getRepresentativeCount(); - quals[reducedQualsIndex] += tempQual; - } - else - { - double tempQual = (double)(p.getQual()); - quals[originalQualsIndex] += tempQual; - } + for( PileupElement p : readPileup ){ + final List tags = getToolkit().getReaderIDForRead(p.getRead()).getTags().getPositionalTags(); + if ( isGoodRead(p,tags) ){ + final int tempQual = (int)(p.getQual()) * p.getRepresentativeCount(); + final int tagIndex = getTagIndex(tags); + quals[tagIndex] += tempQual; + if(debugLevel == 2) + printPileup[tagIndex] += "\tDebug 2: ("+p+")\tMQ="+p.getMappingQual()+":QU="+p.getQual()+":RC="+p.getRepresentativeCount()+":OS="+p.getOffset()+"\n"; } - } - - + if(debugLevel == 2){ + out.println(printPileup[originalQualsIndex]); + out.println(printPileup[reducedQualsIndex]); + } return quals; } - //public void onTraversalDone(GenomeLoc sum) { - // if ( sum != null ) - // out.println(sum); - //} + private final boolean isGoodRead(PileupElement p, List tags){ + return !p.isDeletion() && (tags.contains(reduced) || (tags.contains(original) && (int)p.getQual() >= 20 && p.getMappingQual() >= 20)); + } + + private final int getTagIndex(List tags){ + return tags.contains(reduced) ? 1 : 0; + } + + @Override + public void onTraversalDone(GenomeLoc sum) { + if ( sum != null ) + out.println(sum); + } @Override public GenomeLoc treeReduce(GenomeLoc lhs, GenomeLoc rhs) { diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala index 5e66520ca..9e8815762 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala @@ -136,7 +136,14 @@ class GATKResourcesBundle extends QScript { addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/GoldStandardIndel/gold.standard.indel.MillsAnd1000G.b37.vcf", "Mills_and_1000G_gold_standard.indels", b37, true, false)) - + + // + // CEU trio (NA12878,NA12891,NA12892) best practices results (including PBT) + // + + addResource(new Resource("/humgen/gsa-hpprojects/NA12878Collection/callsets/CEUtrio_BestPractices/current/CEUTrio.HiSeq.WGS.b37.UG.snps_and_indels.recalibrated.filtered.phaseByTransmission.vcf", + "CEUTrio.HiSeq.WGS.b37.UG.bestPractices.phaseByTransmission",b37,true,false)) + // // example call set for wiki tutorial // From 422e16c62e7b99c1928de1655dfa359dc9945428 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Thu, 25 Oct 2012 16:16:00 -0400 Subject: [PATCH 06/12] BaseRecalibration: don't cache instances of ReadCovariates across reads Caching and reusing ReadCovariates instances across reads sounds good in theory, but: -it doesn't work unless you zero out the internal arrays before each read -the internal arrays must be sized proportionally to the maximum POSSIBLE recalibrated read length (5000!!!), instead of the ACTUAL read lengths By contrast, creating a new instance per read is basically equivalent to doing an efficient low-level memset-style clear on a much smaller array (since we use the actual rather than the maximum read length to create it). So this should be faster than caching instances and calling clear() but slower than caching instances and not calling clear(). Credit to Ryan to proposing this approach. --- .../recalibration/BaseRecalibration.java | 22 +------------------ 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java index 2f9387acb..5d4020a07 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java @@ -61,17 +61,6 @@ public class BaseRecalibration { // qualityScoreByFullCovariateKey[i] = new NestedHashMap(); // } - /** - * Thread local cache to allow multi-threaded use of this class - */ - private ThreadLocal readCovariatesCache; - { - readCovariatesCache = new ThreadLocal () { - @Override protected ReadCovariates initialValue() { - return new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length); - } - }; - } /** * Constructor using a GATK Report file @@ -113,16 +102,7 @@ public class BaseRecalibration { } } - // Compute all covariates for the read - // TODO -- the need to clear here suggests there's an error in the indexing / assumption code - // TODO -- for BI and DI. Perhaps due to the indel buffer size on the ends of the reads? - // TODO -- the output varies depending on whether we clear or not - //final ReadCovariates readCovariates = readCovariatesCache.get().clear(); - - // the original code -- doesn't do any clearing - final ReadCovariates readCovariates = readCovariatesCache.get(); - - RecalUtils.computeCovariates(read, requestedCovariates, readCovariates); + final ReadCovariates readCovariates = RecalUtils.computeCovariates(read, requestedCovariates); for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings if (disableIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) { From 6b8b7df6517647d05bedf3acc9411c599e5a95cd Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 25 Oct 2012 17:26:29 -0400 Subject: [PATCH 07/12] Queue now understands -nct and requests the appropriate number of cores from LSF, SGE, etc -- NCT wasn't previously recognized by Queue as needing more processors per machine. This commit fixes this. Also a potential cause of poor GATKPerformanceOverTime, in that runs with -nct could flood a node and cause it to have hundreds of cores in contention. --- .../sting/queue/extensions/gatk/ArgumentDefinitionField.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java index 368cb3d11..2226c6458 100644 --- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java +++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java @@ -346,7 +346,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField { @Override protected String getFreezeFields() { - return String.format("if (num_threads.isDefined) nCoresRequest = num_threads%n"); + return String.format("if (num_threads.isDefined) nCoresRequest = num_threads%nif (num_cpu_threads_per_data_thread.isDefined) nCoresRequest = Some(nCoresRequest.getOrElse(1) * num_cpu_threads_per_data_thread.getOrElse(1))%n"); } } From 91f2c847a33dd5966deacaae7ff9d5481f20f9db Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 26 Oct 2012 00:57:40 -0400 Subject: [PATCH 09/12] Fixing problem reported on forum for VF: DP couldn't be filtered from the FORMAT field, only from the INFO field. Fixed and added integration test. --- .../variantcontext/VariantJEXLContext.java | 2 ++ .../VariantFiltrationIntegrationTest.java | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java index 913615a84..abe85e383 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java @@ -193,6 +193,8 @@ class JEXLMap implements Map { infoMap.put("isHet", g.isHet() ? "1" : "0"); infoMap.put("isHomVar", g.isHomVar() ? "1" : "0"); infoMap.put(VCFConstants.GENOTYPE_QUALITY_KEY, g.getGQ()); + if ( g.hasDP() ) + infoMap.put(VCFConstants.DEPTH_KEY, g.getDP()); for ( Map.Entry e : g.getExtendedAttributes().entrySet() ) { if ( e.getValue() != null && !e.getValue().equals(VCFConstants.MISSING_VALUE_v4) ) infoMap.put(e.getKey(), e.getValue()); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java index 97b985a29..27e5f3d46 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java @@ -108,4 +108,22 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { Arrays.asList("8ed32a2272bab8043a255362335395ef")); executeTest("testUnfilteredBecomesFilteredAndPass", spec); } + + @Test + public void testFilteringDPfromINFO() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference + + " --filterExpression 'DP < 8' --filterName lowDP -V " + privateTestDir + "filteringDepthInFormat.vcf", 1, + Arrays.asList("a01f7cce53ea556c9741aa60b6124c41")); + executeTest("testFilteringDPfromINFO", spec); + } + + @Test + public void testFilteringDPfromFORMAT() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference + + " --genotypeFilterExpression 'DP < 8' --genotypeFilterName lowDP -V " + privateTestDir + "filteringDepthInFormat.vcf", 1, + Arrays.asList("e10485c7c33d9211d0c1294fd7858476")); + executeTest("testFilteringDPfromFORMAT", spec); + } } From bf3d61ce825caa65c17a61429d7fb8939b36af9d Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 26 Oct 2012 01:04:51 -0400 Subject: [PATCH 10/12] The default value for --contamination_fraction_to_filter is now 0.05 (5%) in both UG and HC. Users of GATK-lite get pushed down to 0% by default (since it's not enabled) or get a user error if they try to set it. --- ...GenotyperGeneralPloidyIntegrationTest.java | 14 +++--- .../HaplotypeCallerIntegrationTest.java | 18 ++++---- .../StandardCallerArgumentCollection.java | 5 +- .../walkers/genotyper/UnifiedGenotyper.java | 8 +++- .../UnifiedGenotyperIntegrationTest.java | 46 ++++++++----------- 5 files changed, 45 insertions(+), 46 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java index ae3befe66..73bc8fba6 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java @@ -55,36 +55,36 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest { @Test(enabled = true) public void testSNP_ACS_Pools() { - PC_LSV_Test_short(" -maxAltAlleles 1 -ploidy 6 -out_mode EMIT_ALL_CONFIDENT_SITES","LSV_SNP_ACS","SNP","ec19f0b7c7d57493cecfff988a4815c8"); + PC_LSV_Test_short(" -maxAltAlleles 1 -ploidy 6 -out_mode EMIT_ALL_CONFIDENT_SITES","LSV_SNP_ACS","SNP","df0e67c975ef74d593f1c704daab1705"); } @Test(enabled = true) public void testBOTH_GGA_Pools() { - PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","9ce24f4ff787aed9d3754519a60ef49f"); + PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","7e5b28c9e21cc7e45c58c41177d8a0fc"); } @Test(enabled = true) public void testINDEL_GGA_Pools() { - PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","492c8ba9a80a902097ff15bbeb031592"); + PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","ae6c276cc46785a794acff6f7d10ecf7"); } @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { - PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","848e1092b5cd57b0da5f1187e67134e7"); + PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","481452ad7d6378cffb5cd834cc621d55"); } @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() { - PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","51a7b51d82a341adec0e6510f5dfadd8"); + PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","812957e51277aca9925c1a7bb4d9a118"); } @Test(enabled = true) public void testMT_SNP_DISCOVERY_sp4() { - PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","0a8c3b06243040b743dd90d497bb3f83"); + PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","dd568dc30be90135a3a8957a45a7321c"); } @Test(enabled = true) public void testMT_SNP_GGA_sp10() { - PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "f8ea18ec6a717a77fdf8c5f2482d8d8d"); + PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "bf793c43b635a931207170be8035b288"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 71f03ea00..86f3748ce 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -21,17 +21,17 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "1d826f852ec1457efbfa7ee828c5783a"); + HCTest(CEUTRIO_BAM, "", "aa1df35d6e64d7ca93feb4d2dd15dd0e"); } @Test public void testHaplotypeCallerSingleSample() { - HCTest(NA12878_BAM, "", "01367428c26d3eaf9297c58bf8677dd3"); + HCTest(NA12878_BAM, "", "186c7f322978283c01249c6de2829215"); } @Test public void testHaplotypeCallerMultiSampleGGA() { - HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "53caa950535749f99d3c5b9bb61c7b60"); + HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "de9e78a52207fe62144dba5337965469"); } private void HCTestComplexVariants(String bam, String args, String md5) { @@ -42,7 +42,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleComplex() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "966d338f423c86a390d685aa6336ec69"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "000dbb1b48f94d017cfec127c6cabe8f"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { @@ -53,7 +53,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleSymbolic() { - HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "b4ea70a446e4782bd3700ca14dd726ff"); + HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "16013a9203367c3d1c4ce1dcdc81ef4a"); } private void HCTestIndelQualityScores(String bam, String args, String md5) { @@ -64,20 +64,20 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleIndelQualityScores() { - HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "2581e760279291a3901a506d060bfac8"); + HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "b369c2a6cb5c99a424551b33bae16f3b"); } @Test public void HCTestProblematicReadsModifiedInActiveRegions() { final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c54c0c9411054bf629bfd98b616e53fc")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c306140ad28515ee06c603c225217939")); executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec); } @Test public void HCTestStructuralIndels() { final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c642dcd93771f6f084d55de31f180d1b")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("b6c67ee8e99cc8f53a6587bb26028047")); executeTest("HCTestStructuralIndels: ", spec); } @@ -91,7 +91,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestReducedBam() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, - Arrays.asList("79af83432dc4a1768b3ebffffc4d2b8f")); + Arrays.asList("4beb9f87ab3f316a9384c3d0dca6ebe9")); executeTest("HC calling on a ReducedRead BAM", spec); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java index 2f6d9d16f..547f375bb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java @@ -75,8 +75,9 @@ public class StandardCallerArgumentCollection { * Basically, it will ignore the contamination fraction of reads for each alternate allele. So if the pileup contains N total bases, then we * will try to remove (N * contamination fraction) bases for each alternate allele. */ - @Argument(fullName = "contamination_percentage_to_filter", shortName = "contamination", doc = "Fraction of contamination in sequencing data (for all samples) to aggressively remove", required = false) - public double CONTAMINATION_FRACTION = 0.0; + @Argument(fullName = "contamination_fraction_to_filter", shortName = "contamination", doc = "Fraction of contamination in sequencing data (for all samples) to aggressively remove", required = false) + public double CONTAMINATION_FRACTION = DEFAULT_CONTAMINATION_FRACTION; + public static final double DEFAULT_CONTAMINATION_FRACTION = 0.05; @Hidden @Argument(fullName = "logRemovedReadsFromContaminationFiltering", shortName="contaminationLog", required=false) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index ef56eaa6d..36be2e7c6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; +import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.downsampling.DownsampleType; @@ -238,7 +239,12 @@ public class UnifiedGenotyper extends LocusWalker, Unif } if ( UAC.CONTAMINATION_FRACTION > 0.0 ) { - throw new UserException.NotSupportedInGATKLite("you cannot enable usage of contamination down-sampling"); + if ( UAC.CONTAMINATION_FRACTION == StandardCallerArgumentCollection.DEFAULT_CONTAMINATION_FRACTION ) { + UAC.CONTAMINATION_FRACTION = 0.0; + logger.warn("setting contamination down-sampling fraction to 0.0 because it is not enabled in GATK-lite"); + } else { + throw new UserException.NotSupportedInGATKLite("you cannot enable usage of contamination down-sampling"); + } } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 446f8cbe8..a5192ffff 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -30,7 +30,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("847605f4efafef89529fe0e496315edd")); + Arrays.asList("cdec335abc9ad8e59335e39a73e0e95a")); executeTest("test MultiSample Pilot1", spec); } @@ -62,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultipleSNPAlleles() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, - Arrays.asList("543f68e42034bf44cfb24da8c9204320")); + Arrays.asList("94dc17d76d841f1d3a36160767ffa034")); executeTest("test Multiple SNP alleles", spec); } @@ -86,7 +86,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMismatchedPLs() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1, - Arrays.asList("3c006b06b17bbe8e787d64eff6a63a19")); + Arrays.asList("d847acf841ba8ba653f996ce4869f439")); executeTest("test mismatched PLs", spec); } @@ -96,7 +96,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // // -------------------------------------------------------------------------------------------------------------- - private final static String COMPRESSED_OUTPUT_MD5 = "fd236bd635d514e4214d364f45ec4d10"; + private final static String COMPRESSED_OUTPUT_MD5 = "6792419c482e767a3deb28913ed2b1ad"; @Test public void testCompressedOutput() { @@ -149,7 +149,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMinBaseQualityScore() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --min_base_quality_score 26", 1, - Arrays.asList("839ecd30d354a36b5dfa2b5e99859765")); + Arrays.asList("56157d930da6ccd224bce1ca93f11e41")); executeTest("test min_base_quality_score 26", spec); } @@ -165,7 +165,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testNDA() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, - Arrays.asList("abd8e33e649cc11b55e200d3940cc7e2")); + Arrays.asList("480437dd6e2760f4ab3194431519f331")); executeTest("test NDA", spec); } @@ -173,7 +173,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testCompTrack() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, - Arrays.asList("a6de72c5197f152be98a6f9029091a9e")); + Arrays.asList("22c039412fd387dde6125b07c9a74a25")); executeTest("test using comp track", spec); } @@ -211,18 +211,10 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testConfidence() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1, - Arrays.asList("9addd225a985178339a0c49dc5fdc220")); + Arrays.asList("df524e98903d96ab9353bee7c16a69de")); executeTest("test confidence 1", spec1); } - @Test - public void testConfidence2() { - WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( - baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1, - Arrays.asList("9addd225a985178339a0c49dc5fdc220")); - executeTest("test confidence 2", spec2); - } - // -------------------------------------------------------------------------------------------------------------- // // testing heterozygosity @@ -230,12 +222,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // -------------------------------------------------------------------------------------------------------------- @Test public void testHeterozyosity1() { - testHeterozosity( 0.01, "986923de51c71635d47e3d06fe3794a1" ); + testHeterozosity( 0.01, "8e61498ca03a8d805372a64c466b3b42" ); } @Test public void testHeterozyosity2() { - testHeterozosity( 1.0 / 1850, "fb12b1553f813004a394a391a8540873" ); + testHeterozosity( 1.0 / 1850, "668d06b5173cf3b97d052726988e1d7b" ); } private void testHeterozosity(final double arg, final String md5) { @@ -259,7 +251,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("04a87b87ee4323eba853c78f25551d1a")); + Arrays.asList("908eb5e21fa39e7fb377cf4a9c4c7835")); executeTest(String.format("test multiple technologies"), spec); } @@ -278,7 +270,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -L 1:10,000,000-10,100,000" + " -baq CALCULATE_AS_NECESSARY", 1, - Arrays.asList("98058fc913b61c22d44875da1f5ea89c")); + Arrays.asList("c814558bb0ed2e19b12e1a2bf4465d52")); executeTest(String.format("test calling with BAQ"), spec); } @@ -312,7 +304,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -minIndelCnt 1" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("6a0c2a3a7bcc56ad01428c71408055aa")); + Arrays.asList("8b486a098029d5a106b0a37eff541c15")); executeTest(String.format("test indel caller in SLX with low min allele count"), spec); } @@ -325,7 +317,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("5f2721c3323de5390d2d47446139f32b")); + Arrays.asList("18efedc50cae2aacaba372265e38310b")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -353,13 +345,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSampleIndels1() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1, - Arrays.asList("a4761d7f25e7a62f34494801c98a0da7")); + Arrays.asList("f7d0d0aee603df25c1f0525bb8df189e")); List result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst(); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1, - Arrays.asList("c526c234947482d1cd2ffc5102083a08")); + Arrays.asList("fc91d457a16b4ca994959c2b5f3f0352")); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2); } @@ -415,7 +407,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMinIndelFraction0() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.0", 1, - Arrays.asList("ba4fafec383fb988f20c8cf53dd3e9a0")); + Arrays.asList("857b8e5df444463ac27f665c4f67fbe2")); executeTest("test minIndelFraction 0.0", spec); } @@ -423,7 +415,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMinIndelFraction25() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.25", 1, - Arrays.asList("4c57a88de275105156aaafc6f9041365")); + Arrays.asList("81d4c7d9010fd6733b2997bc378e7471")); executeTest("test minIndelFraction 0.25", spec); } @@ -490,7 +482,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testContaminationDownsampling() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --contamination_percentage_to_filter 0.20", 1, + baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --contamination_fraction_to_filter 0.20", 1, Arrays.asList("27dd04159e06d9524fb8a4eef41f96ae")); executeTest("test contamination_percentage_to_filter 0.20", spec); } From a53e03d52577e09cad20738b507260cb8e8f0980 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 26 Oct 2012 02:13:04 -0400 Subject: [PATCH 11/12] Do not let reduced reads get removed in the contamination down-sampling --- .../AlleleBiasedDownsamplingUtils.java | 4 ++ .../AdvancedPerReadAlleleLikelihoodMap.java | 9 +++-- .../UnifiedGenotyperIntegrationTest.java | 40 +++++++++---------- 3 files changed, 30 insertions(+), 23 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java b/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java index e224a9c57..830fd111b 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java @@ -60,6 +60,10 @@ public class AlleleBiasedDownsamplingUtils { // start by stratifying the reads by the alleles they represent at this position for( final PileupElement pe : pileup ) { + // abort if we have a reduced read - we do not want to remove it! + if ( pe.getRead().isReducedRead() ) + return pileup; + final int baseIndex = BaseUtils.simpleBaseToBaseIndex(pe.getBase()); if ( baseIndex != -1 ) alleleStratifiedElements[baseIndex].add(pe); diff --git a/protected/java/src/org/broadinstitute/sting/utils/genotyper/AdvancedPerReadAlleleLikelihoodMap.java b/protected/java/src/org/broadinstitute/sting/utils/genotyper/AdvancedPerReadAlleleLikelihoodMap.java index d6cddcf84..77a7c3bd9 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/genotyper/AdvancedPerReadAlleleLikelihoodMap.java +++ b/protected/java/src/org/broadinstitute/sting/utils/genotyper/AdvancedPerReadAlleleLikelihoodMap.java @@ -55,9 +55,12 @@ public class AdvancedPerReadAlleleLikelihoodMap extends StandardPerReadAlleleLik alleleReadMap.put(allele, new ArrayList()); for ( Map.Entry> entry : likelihoodReadMap.entrySet() ) { - final Allele bestAllele = getMostLikelyAllele(entry.getValue()); - if ( bestAllele != Allele.NO_CALL ) - alleleReadMap.get(bestAllele).add(entry.getKey()); + // do not remove reduced reads! + if ( !entry.getKey().isReducedRead() ) { + final Allele bestAllele = getMostLikelyAllele(entry.getValue()); + if ( bestAllele != Allele.NO_CALL ) + alleleReadMap.get(bestAllele).add(entry.getKey()); + } } // compute the reads to remove and actually remove them diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index a5192ffff..93fe7e80d 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -38,7 +38,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testWithAllelesPassedIn1() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("bc15123620e1134f799005d71d1180fe")); + Arrays.asList("efddb5e258f97fd4f6661cff9eaa57de")); executeTest("test MultiSample Pilot2 with alleles passed in", spec1); } @@ -46,7 +46,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testWithAllelesPassedIn2() { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("1ba7afccc8552f20d72d0b62237558e3")); + Arrays.asList("24532eb381724cd74e99370da28d49ed")); executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2); } @@ -54,7 +54,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSingleSamplePilot2() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1, - Arrays.asList("afb8768f31ab57eb43f75c1115eadc99")); + Arrays.asList("062a946160eec1d0fc135d58ca654ff4")); executeTest("test SingleSample Pilot2", spec); } @@ -78,7 +78,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testReverseTrim() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1, - Arrays.asList("5ce03dd9ca2d9324c1d4a9d64389beb5")); + Arrays.asList("9106d01ca0d0a8fedd068e72d509f380")); executeTest("test reverse trim", spec); } @@ -117,7 +117,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // Note that we need to turn off any randomization for this to work, so no downsampling and no annotations - String md5 = "d408b4661b820ed86272415b8ea08780"; + String md5 = "3d25ea660275a455ca443a786bff3d32"; WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1, @@ -157,7 +157,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSLOD() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b36KGReference + " --computeSLOD --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, - Arrays.asList("c7429e670ba477bf9a6bbee2fb41c5a9")); + Arrays.asList("9f95cfe14d53a697c58247833bfd72a6")); executeTest("test SLOD", spec); } @@ -177,7 +177,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { executeTest("test using comp track", spec); } - @Test + //@Test public void testNoCmdLineHeaderStdout() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommandNoCmdLineHeaderStdout + " -glm INDEL -L 1:67,225,396-67,288,518", 0, @@ -187,17 +187,17 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testOutputParameterSitesOnly() { - testOutputParameters("-sites_only", "97ba874eafc9884a4de027a84c036311"); + testOutputParameters("-sites_only", "40aeb4c9e31fe7046b72afc58e7599cb"); } @Test public void testOutputParameterAllConfident() { - testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "f9ea04d96eeef29e71d37e60518c2579"); + testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "c706ca93b25ff83613cb4e95dcac567c"); } @Test public void testOutputParameterAllSites() { - testOutputParameters("--output_mode EMIT_ALL_SITES", "41c046d38ea328421df924e37e017645"); + testOutputParameters("--output_mode EMIT_ALL_SITES", "8a263fd0a94463ce1de9990f2b8ec841"); } private void testOutputParameters(final String args, final String md5) { @@ -289,7 +289,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("650c53774afacfc07a595675e8cdde17")); + Arrays.asList("3593495aab5f6204c65de0b073a6ff65")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -327,7 +327,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("7e3f67bf371112be5dbadb4fe6faa52a")); + Arrays.asList("3ff8c7c80a518aa3eb8671a21479de5f")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec); } @@ -337,7 +337,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("bc31c4977cb7e563ddf9c8dea27f3f4f")); + Arrays.asList("578c0540f4f2052a634a829bcb9cc27d")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec); } @@ -436,8 +436,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testNsInCigar() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -I " + validationDataLocation + "testWithNs.bam -o %s -L 8:141799600-141814700", 1, - Arrays.asList("e8ebfaac0804b782f22ab8ea35152735")); + "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + validationDataLocation + "testWithNs.bam -o %s -L 8:141799600-141814700", 1, + Arrays.asList("bd7984a374f0ae5d277bd5fc5065f64f")); executeTest("test calling on reads with Ns in CIGAR", spec); } @@ -450,25 +450,25 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testReducedBam() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, - Arrays.asList("da9c05f87bd6415e97f90c49cf68ed19")); + "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, + Arrays.asList("1b711c0966a2f76eb21801e73b76b758")); executeTest("test calling on a ReducedRead BAM", spec); } @Test public void testReducedBamSNPs() { - testReducedCalling("SNP", "1d4a826b144723ff0766c36aa0239287"); + testReducedCalling("SNP", "9ba4867cadb366746ee63e7a4afdb95e"); } @Test public void testReducedBamINDELs() { - testReducedCalling("INDEL", "68ef51d5c98480e0c0192e0eecb95bca"); + testReducedCalling("INDEL", "132a4e0ccf9230b5bb4b56c649e2bdd5"); } private void testReducedCalling(final String model, final String md5) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -I " + privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.reduced.bam -o %s -L 20:10,000,000-11,000,000 -glm " + model, 1, + "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.reduced.bam -o %s -L 20:10,000,000-11,000,000 -glm " + model, 1, Arrays.asList(md5)); executeTest("test calling on a ReducedRead BAM with " + model, spec); } From ebebec7fdb74e180e4bc019e18b13261227a93c3 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 26 Oct 2012 02:15:32 -0400 Subject: [PATCH 12/12] Accidentally left one test disabled --- .../gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 93fe7e80d..1b2fbb848 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -177,7 +177,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { executeTest("test using comp track", spec); } - //@Test + @Test public void testNoCmdLineHeaderStdout() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommandNoCmdLineHeaderStdout + " -glm INDEL -L 1:67,225,396-67,288,518", 0,