diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 2d4223e5c..91e80b45c 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -101,7 +101,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerInsertionOnEdgeOfContig() { - HCTest(CEUTRIO_MT_TEST_BAM, "-dcov 90 -L MT:1-10", "7f1fb8f9587f64643f6612ef1dd6d4ae"); + HCTest(CEUTRIO_MT_TEST_BAM, "-L MT:1-10", "7f1fb8f9587f64643f6612ef1dd6d4ae"); } private void HCTestIndelQualityScores(String bam, String args, String md5) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 314de29c7..3a8431dca 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -463,9 +463,8 @@ public class GenomeAnalysisEngine { DownsamplingMethod commandLineMethod = argCollection.getDownsamplingMethod(); DownsamplingMethod walkerMethod = WalkerManager.getDownsamplingMethod(walker); - DownsamplingMethod defaultMethod = DownsamplingMethod.getDefaultDownsamplingMethod(walker); - DownsamplingMethod method = commandLineMethod != null ? commandLineMethod : (walkerMethod != null ? walkerMethod : defaultMethod); + DownsamplingMethod method = commandLineMethod != null ? commandLineMethod : walkerMethod; method.checkCompatibilityWithWalker(walker); return method; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/DownsamplingMethod.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/DownsamplingMethod.java index 5aa27608d..8e92b1ff3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/DownsamplingMethod.java +++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/DownsamplingMethod.java @@ -61,20 +61,10 @@ public class DownsamplingMethod { public static final DownsampleType DEFAULT_DOWNSAMPLING_TYPE = DownsampleType.BY_SAMPLE; /** - * Default target coverage for locus-based traversals + * Don't allow dcov values below this threshold for locus-based traversals (ie., Locus + * and ActiveRegion walkers), as they can result in problematic downsampling artifacts */ - public static final int DEFAULT_LOCUS_TRAVERSAL_DOWNSAMPLING_COVERAGE = 1000; - - /** - * Default downsampling method for locus-based traversals - */ - public static final DownsamplingMethod DEFAULT_LOCUS_TRAVERSAL_DOWNSAMPLING_METHOD = - new DownsamplingMethod(DEFAULT_DOWNSAMPLING_TYPE, DEFAULT_LOCUS_TRAVERSAL_DOWNSAMPLING_COVERAGE, null); - - /** - * Default downsampling method for read-based traversals - */ - public static final DownsamplingMethod DEFAULT_READ_TRAVERSAL_DOWNSAMPLING_METHOD = NONE; + public static final int MINIMUM_SAFE_COVERAGE_TARGET_FOR_LOCUS_BASED_TRAVERSALS = 200; public DownsamplingMethod( DownsampleType type, Integer toCoverage, Double toFraction ) { @@ -118,6 +108,16 @@ public class DownsamplingMethod { if ( isLocusTraversal && type == DownsampleType.ALL_READS && toCoverage != null ) { throw new UserException("Downsampling to coverage with the ALL_READS method for locus-based traversals (eg., LocusWalkers) is not currently supported (though it is supported for ReadWalkers)."); } + + // For locus traversals, ensure that the dcov value (if present) is not problematically low + if ( isLocusTraversal && type != DownsampleType.NONE && toCoverage != null && + toCoverage < MINIMUM_SAFE_COVERAGE_TARGET_FOR_LOCUS_BASED_TRAVERSALS ) { + throw new UserException(String.format("Locus-based traversals (ie., Locus and ActiveRegion walkers) require " + + "a minimum -dcov value of %d when downsampling to coverage. Values less " + + "than this can produce problematic downsampling artifacts while providing " + + "only insignificant improvements in memory usage in most cases.", + MINIMUM_SAFE_COVERAGE_TARGET_FOR_LOCUS_BASED_TRAVERSALS)); + } } public String toString() { @@ -139,13 +139,4 @@ public class DownsamplingMethod { return builder.toString(); } - - public static DownsamplingMethod getDefaultDownsamplingMethod( Walker walker ) { - if ( walker instanceof LocusWalker || walker instanceof ActiveRegionWalker ) { - return DEFAULT_LOCUS_TRAVERSAL_DOWNSAMPLING_METHOD; - } - else { - return DEFAULT_READ_TRAVERSAL_DOWNSAMPLING_METHOD; - } - } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java index 9595b8f42..962f81d0d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java @@ -31,6 +31,7 @@ import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.downsampling.DownsampleType; import org.broadinstitute.sting.gatk.filters.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.GenomeLoc; @@ -57,6 +58,7 @@ import java.util.*; @PartitionBy(PartitionType.READ) @ActiveRegionTraversalParameters(extension=50,maxRegion=1500) @ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class}) +@Downsample(by = DownsampleType.BY_SAMPLE, toCoverage = 1000) @RemoveProgramRecords public abstract class ActiveRegionWalker extends Walker { /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java index 788bf11f9..9997723b8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.downsampling.DownsampleType; import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter; import org.broadinstitute.sting.gatk.filters.FailsVendorQualityCheckFilter; import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter; @@ -44,6 +45,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @Requires({DataSource.READS,DataSource.REFERENCE}) @PartitionBy(PartitionType.LOCUS) @ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class}) +@Downsample(by = DownsampleType.BY_SAMPLE, toCoverage = 1000) @RemoveProgramRecords public abstract class LocusWalker extends Walker { // Do we actually want to operate on the context? diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java index 522414c00..40485596d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java @@ -29,6 +29,7 @@ import net.sf.samtools.SAMSequenceDictionary; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.downsampling.DownsampleType; import org.broadinstitute.sting.gatk.filters.MalformedReadFilter; import org.broadinstitute.sting.gatk.iterators.ReadTransformer; import org.broadinstitute.sting.gatk.samples.Sample; @@ -50,6 +51,7 @@ import java.util.List; */ @ReadFilters(MalformedReadFilter.class) @PartitionBy(PartitionType.NONE) +@Downsample(by = DownsampleType.NONE) @BAQMode(QualityMode = BAQ.QualityMode.OVERWRITE_QUALS, ApplicationTime = ReadTransformer.ApplicationTime.ON_INPUT) @BQSRMode(ApplicationTime = ReadTransformer.ApplicationTime.ON_INPUT) @DocumentedGATKFeature(groupName = "Uncategorized", extraDocs = {CommandLineGATK.class}) diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java index 00389be97..25c71d570 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java @@ -26,7 +26,9 @@ package org.broadinstitute.sting.gatk.datasources.reads; import com.google.caliper.Param; +import org.broadinstitute.sting.gatk.WalkerManager; import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod; +import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.qc.CountLoci; /** @@ -86,7 +88,7 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark { }, PER_SAMPLE { @Override - DownsamplingMethod create() { return DownsamplingMethod.getDefaultDownsamplingMethod(new CountLoci()); } + DownsamplingMethod create() { return WalkerManager.getDownsamplingMethod(LocusWalker.class); } }; abstract DownsamplingMethod create(); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/downsampling/DownsamplingIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/downsampling/DownsamplingIntegrationTest.java new file mode 100644 index 000000000..85f9169da --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/downsampling/DownsamplingIntegrationTest.java @@ -0,0 +1,44 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.gatk.downsampling; + +import org.broadinstitute.sting.WalkerTest; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.testng.annotations.Test; + +public class DownsamplingIntegrationTest extends WalkerTest { + + @Test + public void testDetectLowDcovValueWithLocusTraversal() { + final WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-T CountLoci -R " + publicTestDir + "exampleFASTA.fasta -I " + publicTestDir + "exampleBAM.bam -o %s " + + "-dcov " + (DownsamplingMethod.MINIMUM_SAFE_COVERAGE_TARGET_FOR_LOCUS_BASED_TRAVERSALS - 1), + 1, + UserException.class + ); + executeTest("testDetectLowDcovValueWithLocusTraversal", spec); + } +}