Merge pull request #245 from broadinstitute/dr_enforce_min_dcov

Require a minimum dcov value of 200 for Locus and ActiveRegion walkers when downsampling to coverage
This commit is contained in:
Mark DePristo 2013-05-29 09:52:13 -07:00
commit 684c91c2e7
8 changed files with 68 additions and 26 deletions

View File

@ -101,7 +101,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerInsertionOnEdgeOfContig() {
HCTest(CEUTRIO_MT_TEST_BAM, "-dcov 90 -L MT:1-10", "7f1fb8f9587f64643f6612ef1dd6d4ae");
HCTest(CEUTRIO_MT_TEST_BAM, "-L MT:1-10", "7f1fb8f9587f64643f6612ef1dd6d4ae");
}
private void HCTestIndelQualityScores(String bam, String args, String md5) {

View File

@ -463,9 +463,8 @@ public class GenomeAnalysisEngine {
DownsamplingMethod commandLineMethod = argCollection.getDownsamplingMethod();
DownsamplingMethod walkerMethod = WalkerManager.getDownsamplingMethod(walker);
DownsamplingMethod defaultMethod = DownsamplingMethod.getDefaultDownsamplingMethod(walker);
DownsamplingMethod method = commandLineMethod != null ? commandLineMethod : (walkerMethod != null ? walkerMethod : defaultMethod);
DownsamplingMethod method = commandLineMethod != null ? commandLineMethod : walkerMethod;
method.checkCompatibilityWithWalker(walker);
return method;
}

View File

@ -61,20 +61,10 @@ public class DownsamplingMethod {
public static final DownsampleType DEFAULT_DOWNSAMPLING_TYPE = DownsampleType.BY_SAMPLE;
/**
* Default target coverage for locus-based traversals
* Don't allow dcov values below this threshold for locus-based traversals (ie., Locus
* and ActiveRegion walkers), as they can result in problematic downsampling artifacts
*/
public static final int DEFAULT_LOCUS_TRAVERSAL_DOWNSAMPLING_COVERAGE = 1000;
/**
* Default downsampling method for locus-based traversals
*/
public static final DownsamplingMethod DEFAULT_LOCUS_TRAVERSAL_DOWNSAMPLING_METHOD =
new DownsamplingMethod(DEFAULT_DOWNSAMPLING_TYPE, DEFAULT_LOCUS_TRAVERSAL_DOWNSAMPLING_COVERAGE, null);
/**
* Default downsampling method for read-based traversals
*/
public static final DownsamplingMethod DEFAULT_READ_TRAVERSAL_DOWNSAMPLING_METHOD = NONE;
public static final int MINIMUM_SAFE_COVERAGE_TARGET_FOR_LOCUS_BASED_TRAVERSALS = 200;
public DownsamplingMethod( DownsampleType type, Integer toCoverage, Double toFraction ) {
@ -118,6 +108,16 @@ public class DownsamplingMethod {
if ( isLocusTraversal && type == DownsampleType.ALL_READS && toCoverage != null ) {
throw new UserException("Downsampling to coverage with the ALL_READS method for locus-based traversals (eg., LocusWalkers) is not currently supported (though it is supported for ReadWalkers).");
}
// For locus traversals, ensure that the dcov value (if present) is not problematically low
if ( isLocusTraversal && type != DownsampleType.NONE && toCoverage != null &&
toCoverage < MINIMUM_SAFE_COVERAGE_TARGET_FOR_LOCUS_BASED_TRAVERSALS ) {
throw new UserException(String.format("Locus-based traversals (ie., Locus and ActiveRegion walkers) require " +
"a minimum -dcov value of %d when downsampling to coverage. Values less " +
"than this can produce problematic downsampling artifacts while providing " +
"only insignificant improvements in memory usage in most cases.",
MINIMUM_SAFE_COVERAGE_TARGET_FOR_LOCUS_BASED_TRAVERSALS));
}
}
public String toString() {
@ -139,13 +139,4 @@ public class DownsamplingMethod {
return builder.toString();
}
public static DownsamplingMethod getDefaultDownsamplingMethod( Walker walker ) {
if ( walker instanceof LocusWalker || walker instanceof ActiveRegionWalker ) {
return DEFAULT_LOCUS_TRAVERSAL_DOWNSAMPLING_METHOD;
}
else {
return DEFAULT_READ_TRAVERSAL_DOWNSAMPLING_METHOD;
}
}
}

View File

@ -31,6 +31,7 @@ import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
import org.broadinstitute.sting.gatk.filters.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.GenomeLoc;
@ -57,6 +58,7 @@ import java.util.*;
@PartitionBy(PartitionType.READ)
@ActiveRegionTraversalParameters(extension=50,maxRegion=1500)
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class})
@Downsample(by = DownsampleType.BY_SAMPLE, toCoverage = 1000)
@RemoveProgramRecords
public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
/**

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter;
import org.broadinstitute.sting.gatk.filters.FailsVendorQualityCheckFilter;
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter;
@ -44,6 +45,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@Requires({DataSource.READS,DataSource.REFERENCE})
@PartitionBy(PartitionType.LOCUS)
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
@Downsample(by = DownsampleType.BY_SAMPLE, toCoverage = 1000)
@RemoveProgramRecords
public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
// Do we actually want to operate on the context?

View File

@ -29,6 +29,7 @@ import net.sf.samtools.SAMSequenceDictionary;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
import org.broadinstitute.sting.gatk.filters.MalformedReadFilter;
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
import org.broadinstitute.sting.gatk.samples.Sample;
@ -50,6 +51,7 @@ import java.util.List;
*/
@ReadFilters(MalformedReadFilter.class)
@PartitionBy(PartitionType.NONE)
@Downsample(by = DownsampleType.NONE)
@BAQMode(QualityMode = BAQ.QualityMode.OVERWRITE_QUALS, ApplicationTime = ReadTransformer.ApplicationTime.ON_INPUT)
@BQSRMode(ApplicationTime = ReadTransformer.ApplicationTime.ON_INPUT)
@DocumentedGATKFeature(groupName = "Uncategorized", extraDocs = {CommandLineGATK.class})

View File

@ -26,7 +26,9 @@
package org.broadinstitute.sting.gatk.datasources.reads;
import com.google.caliper.Param;
import org.broadinstitute.sting.gatk.WalkerManager;
import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.qc.CountLoci;
/**
@ -86,7 +88,7 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark {
},
PER_SAMPLE {
@Override
DownsamplingMethod create() { return DownsamplingMethod.getDefaultDownsamplingMethod(new CountLoci()); }
DownsamplingMethod create() { return WalkerManager.getDownsamplingMethod(LocusWalker.class); }
};
abstract DownsamplingMethod create();
}

View File

@ -0,0 +1,44 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.downsampling;
import org.broadinstitute.sting.WalkerTest;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.annotations.Test;
public class DownsamplingIntegrationTest extends WalkerTest {
@Test
public void testDetectLowDcovValueWithLocusTraversal() {
final WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T CountLoci -R " + publicTestDir + "exampleFASTA.fasta -I " + publicTestDir + "exampleBAM.bam -o %s " +
"-dcov " + (DownsamplingMethod.MINIMUM_SAFE_COVERAGE_TARGET_FOR_LOCUS_BASED_TRAVERSALS - 1),
1,
UserException.class
);
executeTest("testDetectLowDcovValueWithLocusTraversal", spec);
}
}