From 2e5f9db758ab9100c2024f1f8d136a579243b7f5 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Wed, 7 Jan 2015 11:32:28 -0500 Subject: [PATCH] Raising per-sample limits on the number of reads in ART and HC. -- Active Region Traversal was using per sample limits on the number of reads that were too low, especially now that we are running one sample at a time. This caused issues with high confidence variants being dropped in high coverage data. -- HaplotypeCallerGVCFIntegrationTest PL/annotation changes due to using more reads in those tests -- Removed a CountReadsInActiveRegionsIntegrationTest test for excessive coverage because the read coverage no longer goes over the limits in ART --- .../haplotypecaller/HaplotypeCaller.java | 9 ++++--- .../HaplotypeCallerGVCFIntegrationTest.java | 24 +++++++++---------- .../ActiveRegionTraversalParameters.java | 4 ++-- 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java index 5922defb1..24037760e 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java @@ -235,7 +235,7 @@ import java.util.*; @BAQMode(ApplicationTime = ReadTransformer.ApplicationTime.FORBIDDEN) @ActiveRegionTraversalParameters(extension=100, maxRegion=300) @ReadFilters({HCMappingQualityFilter.class}) -@Downsample(by= DownsampleType.BY_SAMPLE, toCoverage=250) +@Downsample(by= DownsampleType.BY_SAMPLE, toCoverage=500) public class HaplotypeCaller extends ActiveRegionWalker, Integer> implements AnnotatorCompatible, NanoSchedulable { // ----------------------------------------------------------------------------------------------- // general haplotype caller arguments @@ -696,10 +696,10 @@ public class HaplotypeCaller extends ActiveRegionWalker, In * not reducing coverage at any read start to less than minReadsPerAlignmentStart */ @Argument(fullName = "maxReadsInRegionPerSample", shortName = "maxReadsInRegionPerSample", doc="Maximum reads in an active region", required = false) - protected int maxReadsInRegionPerSample = 1000; + protected int maxReadsInRegionPerSample = 10000; @Argument(fullName = "minReadsPerAlignmentStart", shortName = "minReadsPerAlignStart", doc="Minimum number of reads sharing the same alignment start for each genomic location in an active region", required = false) - protected int minReadsPerAlignmentStart = 5; + protected int minReadsPerAlignmentStart = 10; private byte MIN_TAIL_QUALITY; private static final byte MIN_TAIL_QUALITY_WITH_ERROR_CORRECTION = 6; @@ -1183,6 +1183,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In protected AssemblyResultSet assembleReads(final ActiveRegion activeRegion, final List giveAlleles) { // Create the reference haplotype which is the bases from the reference that make up the active region finalizeActiveRegion(activeRegion); // handle overlapping fragments, clip adapter and low qual tails + if( SCAC.DEBUG ) { logger.info("Assembling " + activeRegion.getLocation() + " with " + activeRegion.size() + " reads: (with overlap region = " + activeRegion.getExtendedLoc() + ")"); } final byte[] fullReferenceWithPadding = activeRegion.getActiveRegionReference(referenceReader, REFERENCE_PADDING); final GenomeLoc paddedReferenceLoc = getPaddedLoc(activeRegion); @@ -1301,8 +1302,6 @@ public class HaplotypeCaller extends ActiveRegionWalker, In private void finalizeActiveRegion( final ActiveRegion activeRegion ) { if (activeRegion.isFinalized()) return; - if( SCAC.DEBUG ) { logger.info("Assembling " + activeRegion.getLocation() + " with " + activeRegion.size() + " reads: (with overlap region = " + activeRegion.getExtendedLoc() + ")"); } - // Loop through the reads hard clipping the adaptor and low quality tails final List readsToUse = new ArrayList<>(activeRegion.getReads().size()); for( final GATKSAMRecord myRead : activeRegion.getReads() ) { diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java index 981d081e7..d709b00d8 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java @@ -77,9 +77,9 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "646ec07bd026da1e72b5e789f5aa3a3d"}); tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "cb598bee733db0461f6a24d265daed45"}); tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "c53ecb02a41511123dc4e1573dd516f6"}); - tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "c223d6fe112d2bb698811600c3b7f6af"}); - tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "dacb94af2632e4dc4a1948306dd1661c"}); - tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "ef9093880efedac09b78c8fb26420e84"}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "9dbe7721ebfba9aefcfbfd1bdd62f2c4"}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "4c99694b14beffd694f9738697b8f9d5"}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "87d1e673e2512aefdceddd0293e4f346"}); return tests.toArray(new Object[][]{}); } @@ -96,10 +96,10 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "83b96e3e364821c51e6a2c2a64616b24"}); tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "f38178834961798d79e7190dbca004bf"}); tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "a6db14e73e0a270b9d3e2035acfc1d2f"}); - tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "1c3570461e96ad6d66c6abb0fd6ee865"}); - tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "2a5a7057faba3cd488e40186072156af"}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "afede7dbbcbe0e1af4abba7fc7bef479"}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "853bd2f47496b3e8d1fe85657e4bf812"}); - final String NA12878bandedResolutionMD5 = "46bf5ca5142770a31fe0fbcd1fc988b4"; + final String NA12878bandedResolutionMD5 = "87900cbed775471f1a7ad6582b0581f9"; tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, NA12878bandedResolutionMD5}); tests.add(new Object[]{NA12878_WEx + " -I " + privateTestDir + "NA20313.highCoverageRegion.bam -sn NA12878", ReferenceConfidenceMode.GVCF, WExIntervals, NA12878bandedResolutionMD5}); @@ -119,9 +119,9 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "254b4bed7fc8ef5d3c4cb4ebf4ea73c2"}); tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "3f9e2b721c3bc8a6d76aa75bb7544f28"}); tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "bfd12d811446cde2466508033cae7122"}); - tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "eac793500fbc7de46259000dbbcdd27d"}); - tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "dc1b8abd195b75e147ac78fdbd4e2b65"}); - tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "124c794c78edfe74558307a203f93294"}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "8e35def5c821afa9b6fbeed0aed0a866"}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "8cebba1996a2aca29370c4d85f12128c"}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "8abc60102776b34e22858ca157c784a9"}); return tests.toArray(new Object[][]{}); } @@ -137,9 +137,9 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "d8daf60c68f39875680cc037f264ccc0"}); tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "dd1e6ee5e242fa1caa7f64a9605e4b16"}); tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "a040bb2ebacf5afab3490504730043a1"}); - tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "8b44f98c12ed7949d716852e0a21b346"}); - tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "4872ba8a27a29c9cac5e5c787ca853b5"}); - tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "65fc0938ca7df792a36413465878bc89"}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "d5ed4c847250a1096b6a5a2eac61d8dc"}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "0d41cee1d7bd3c6df9816cc87a8a9d7c"}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "f8775dd95c3b185fc6abdb3ef8da3833"}); return tests.toArray(new Object[][]{}); } diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/walkers/ActiveRegionTraversalParameters.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/walkers/ActiveRegionTraversalParameters.java index 7c428cd6a..bcae8ecdc 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/walkers/ActiveRegionTraversalParameters.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/walkers/ActiveRegionTraversalParameters.java @@ -85,7 +85,7 @@ public @interface ActiveRegionTraversalParameters { * of coverage in the engine. * @return the maximum number of reads we're willing to hold in memory */ - public int maxReadsToHoldInMemoryPerSample() default 3000; + public int maxReadsToHoldInMemoryPerSample() default 30000; /** * No matter what the per sample value says, we will never hold more than this @@ -93,5 +93,5 @@ public @interface ActiveRegionTraversalParameters { * of reads in the case where we have a lot of samples. * @return the maximum number of reads to hold in memory */ - public int maxReadsToHoldTotal() default 1000000; + public int maxReadsToHoldTotal() default 10000000; }