Merge pull request #245 from broadinstitute/dr_enforce_min_dcov

Require a minimum dcov value of 200 for Locus and ActiveRegion walkers when downsampling to coverage
This commit is contained in:
Mark DePristo 2013-05-29 09:52:13 -07:00
commit 684c91c2e7
8 changed files with 68 additions and 26 deletions

View File

@ -101,7 +101,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test @Test
public void testHaplotypeCallerInsertionOnEdgeOfContig() { public void testHaplotypeCallerInsertionOnEdgeOfContig() {
HCTest(CEUTRIO_MT_TEST_BAM, "-dcov 90 -L MT:1-10", "7f1fb8f9587f64643f6612ef1dd6d4ae"); HCTest(CEUTRIO_MT_TEST_BAM, "-L MT:1-10", "7f1fb8f9587f64643f6612ef1dd6d4ae");
} }
private void HCTestIndelQualityScores(String bam, String args, String md5) { private void HCTestIndelQualityScores(String bam, String args, String md5) {

View File

@ -463,9 +463,8 @@ public class GenomeAnalysisEngine {
DownsamplingMethod commandLineMethod = argCollection.getDownsamplingMethod(); DownsamplingMethod commandLineMethod = argCollection.getDownsamplingMethod();
DownsamplingMethod walkerMethod = WalkerManager.getDownsamplingMethod(walker); DownsamplingMethod walkerMethod = WalkerManager.getDownsamplingMethod(walker);
DownsamplingMethod defaultMethod = DownsamplingMethod.getDefaultDownsamplingMethod(walker);
DownsamplingMethod method = commandLineMethod != null ? commandLineMethod : (walkerMethod != null ? walkerMethod : defaultMethod); DownsamplingMethod method = commandLineMethod != null ? commandLineMethod : walkerMethod;
method.checkCompatibilityWithWalker(walker); method.checkCompatibilityWithWalker(walker);
return method; return method;
} }

View File

@ -61,20 +61,10 @@ public class DownsamplingMethod {
public static final DownsampleType DEFAULT_DOWNSAMPLING_TYPE = DownsampleType.BY_SAMPLE; public static final DownsampleType DEFAULT_DOWNSAMPLING_TYPE = DownsampleType.BY_SAMPLE;
/** /**
* Default target coverage for locus-based traversals * Don't allow dcov values below this threshold for locus-based traversals (ie., Locus
* and ActiveRegion walkers), as they can result in problematic downsampling artifacts
*/ */
public static final int DEFAULT_LOCUS_TRAVERSAL_DOWNSAMPLING_COVERAGE = 1000; public static final int MINIMUM_SAFE_COVERAGE_TARGET_FOR_LOCUS_BASED_TRAVERSALS = 200;
/**
* Default downsampling method for locus-based traversals
*/
public static final DownsamplingMethod DEFAULT_LOCUS_TRAVERSAL_DOWNSAMPLING_METHOD =
new DownsamplingMethod(DEFAULT_DOWNSAMPLING_TYPE, DEFAULT_LOCUS_TRAVERSAL_DOWNSAMPLING_COVERAGE, null);
/**
* Default downsampling method for read-based traversals
*/
public static final DownsamplingMethod DEFAULT_READ_TRAVERSAL_DOWNSAMPLING_METHOD = NONE;
public DownsamplingMethod( DownsampleType type, Integer toCoverage, Double toFraction ) { public DownsamplingMethod( DownsampleType type, Integer toCoverage, Double toFraction ) {
@ -118,6 +108,16 @@ public class DownsamplingMethod {
if ( isLocusTraversal && type == DownsampleType.ALL_READS && toCoverage != null ) { if ( isLocusTraversal && type == DownsampleType.ALL_READS && toCoverage != null ) {
throw new UserException("Downsampling to coverage with the ALL_READS method for locus-based traversals (eg., LocusWalkers) is not currently supported (though it is supported for ReadWalkers)."); throw new UserException("Downsampling to coverage with the ALL_READS method for locus-based traversals (eg., LocusWalkers) is not currently supported (though it is supported for ReadWalkers).");
} }
// For locus traversals, ensure that the dcov value (if present) is not problematically low
if ( isLocusTraversal && type != DownsampleType.NONE && toCoverage != null &&
toCoverage < MINIMUM_SAFE_COVERAGE_TARGET_FOR_LOCUS_BASED_TRAVERSALS ) {
throw new UserException(String.format("Locus-based traversals (ie., Locus and ActiveRegion walkers) require " +
"a minimum -dcov value of %d when downsampling to coverage. Values less " +
"than this can produce problematic downsampling artifacts while providing " +
"only insignificant improvements in memory usage in most cases.",
MINIMUM_SAFE_COVERAGE_TARGET_FOR_LOCUS_BASED_TRAVERSALS));
}
} }
public String toString() { public String toString() {
@ -139,13 +139,4 @@ public class DownsamplingMethod {
return builder.toString(); return builder.toString();
} }
public static DownsamplingMethod getDefaultDownsamplingMethod( Walker walker ) {
if ( walker instanceof LocusWalker || walker instanceof ActiveRegionWalker ) {
return DEFAULT_LOCUS_TRAVERSAL_DOWNSAMPLING_METHOD;
}
else {
return DEFAULT_READ_TRAVERSAL_DOWNSAMPLING_METHOD;
}
}
} }

View File

@ -31,6 +31,7 @@ import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
import org.broadinstitute.sting.gatk.filters.*; import org.broadinstitute.sting.gatk.filters.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
@ -57,6 +58,7 @@ import java.util.*;
@PartitionBy(PartitionType.READ) @PartitionBy(PartitionType.READ)
@ActiveRegionTraversalParameters(extension=50,maxRegion=1500) @ActiveRegionTraversalParameters(extension=50,maxRegion=1500)
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class}) @ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class})
@Downsample(by = DownsampleType.BY_SAMPLE, toCoverage = 1000)
@RemoveProgramRecords @RemoveProgramRecords
public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> { public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
/** /**

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter; import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter;
import org.broadinstitute.sting.gatk.filters.FailsVendorQualityCheckFilter; import org.broadinstitute.sting.gatk.filters.FailsVendorQualityCheckFilter;
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter; import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter;
@ -44,6 +45,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@Requires({DataSource.READS,DataSource.REFERENCE}) @Requires({DataSource.READS,DataSource.REFERENCE})
@PartitionBy(PartitionType.LOCUS) @PartitionBy(PartitionType.LOCUS)
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class}) @ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
@Downsample(by = DownsampleType.BY_SAMPLE, toCoverage = 1000)
@RemoveProgramRecords @RemoveProgramRecords
public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> { public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
// Do we actually want to operate on the context? // Do we actually want to operate on the context?

View File

@ -29,6 +29,7 @@ import net.sf.samtools.SAMSequenceDictionary;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
import org.broadinstitute.sting.gatk.filters.MalformedReadFilter; import org.broadinstitute.sting.gatk.filters.MalformedReadFilter;
import org.broadinstitute.sting.gatk.iterators.ReadTransformer; import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
import org.broadinstitute.sting.gatk.samples.Sample; import org.broadinstitute.sting.gatk.samples.Sample;
@ -50,6 +51,7 @@ import java.util.List;
*/ */
@ReadFilters(MalformedReadFilter.class) @ReadFilters(MalformedReadFilter.class)
@PartitionBy(PartitionType.NONE) @PartitionBy(PartitionType.NONE)
@Downsample(by = DownsampleType.NONE)
@BAQMode(QualityMode = BAQ.QualityMode.OVERWRITE_QUALS, ApplicationTime = ReadTransformer.ApplicationTime.ON_INPUT) @BAQMode(QualityMode = BAQ.QualityMode.OVERWRITE_QUALS, ApplicationTime = ReadTransformer.ApplicationTime.ON_INPUT)
@BQSRMode(ApplicationTime = ReadTransformer.ApplicationTime.ON_INPUT) @BQSRMode(ApplicationTime = ReadTransformer.ApplicationTime.ON_INPUT)
@DocumentedGATKFeature(groupName = "Uncategorized", extraDocs = {CommandLineGATK.class}) @DocumentedGATKFeature(groupName = "Uncategorized", extraDocs = {CommandLineGATK.class})

View File

@ -26,7 +26,9 @@
package org.broadinstitute.sting.gatk.datasources.reads; package org.broadinstitute.sting.gatk.datasources.reads;
import com.google.caliper.Param; import com.google.caliper.Param;
import org.broadinstitute.sting.gatk.WalkerManager;
import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod; import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.qc.CountLoci; import org.broadinstitute.sting.gatk.walkers.qc.CountLoci;
/** /**
@ -86,7 +88,7 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark {
}, },
PER_SAMPLE { PER_SAMPLE {
@Override @Override
DownsamplingMethod create() { return DownsamplingMethod.getDefaultDownsamplingMethod(new CountLoci()); } DownsamplingMethod create() { return WalkerManager.getDownsamplingMethod(LocusWalker.class); }
}; };
abstract DownsamplingMethod create(); abstract DownsamplingMethod create();
} }

View File

@ -0,0 +1,44 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.downsampling;
import org.broadinstitute.sting.WalkerTest;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.annotations.Test;
public class DownsamplingIntegrationTest extends WalkerTest {
@Test
public void testDetectLowDcovValueWithLocusTraversal() {
final WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T CountLoci -R " + publicTestDir + "exampleFASTA.fasta -I " + publicTestDir + "exampleBAM.bam -o %s " +
"-dcov " + (DownsamplingMethod.MINIMUM_SAFE_COVERAGE_TARGET_FOR_LOCUS_BASED_TRAVERSALS - 1),
1,
UserException.class
);
executeTest("testDetectLowDcovValueWithLocusTraversal", spec);
}
}