General DiagnoseTargets documentation cleanup

* remove interval statistic low_median_coverage -- it is already captured by low coverage and coverage gaps.
   * add gatkdocs to all the parameters
   * clean up the logic on callable status a bit (still need to be re-worked into a plugin system)
   * update integration tests
This commit is contained in:
Mauricio Carneiro 2013-04-20 18:34:03 -04:00
parent b3c0abd9e8
commit eb6308a0e4
8 changed files with 60 additions and 81 deletions

View File

@ -66,12 +66,7 @@ public enum CallableStatus {
BAD_MATE("the reads are not properly mated, suggesting mapping errors"), BAD_MATE("the reads are not properly mated, suggesting mapping errors"),
NO_READS("there are no reads contained in the interval"), NO_READS("there are no reads contained in the interval");
//
// Interval-level statuses
//
LOW_MEDIAN_DEPTH("interval has insufficient median depth across samples");
public final String description; public final String description;

View File

@ -48,6 +48,7 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
import net.sf.picard.util.PeekableIterator; import net.sf.picard.util.PeekableIterator;
import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
@ -110,45 +111,71 @@ import java.util.*;
@PartitionBy(PartitionType.INTERVAL) @PartitionBy(PartitionType.INTERVAL)
public class DiagnoseTargets extends LocusWalker<Long, Long> { public class DiagnoseTargets extends LocusWalker<Long, Long> {
@Output(doc = "File to which variants should be written") @Output(doc = "File to which interval statistics should be written")
private VariantContextWriter vcfWriter = null; private VariantContextWriter vcfWriter = null;
/**
* Only bases with quality greater than this will be considered in the coverage metrics.
*/
@Argument(fullName = "minimum_base_quality", shortName = "BQ", doc = "The minimum Base Quality that is considered for calls", required = false) @Argument(fullName = "minimum_base_quality", shortName = "BQ", doc = "The minimum Base Quality that is considered for calls", required = false)
private int minimumBaseQuality = 20; private int minimumBaseQuality = 20;
/**
* Only reads with mapping quality greater than this will be considered in the coverage metrics.
*/
@Argument(fullName = "minimum_mapping_quality", shortName = "MQ", doc = "The minimum read mapping quality considered for calls", required = false) @Argument(fullName = "minimum_mapping_quality", shortName = "MQ", doc = "The minimum read mapping quality considered for calls", required = false)
private int minimumMappingQuality = 20; private int minimumMappingQuality = 20;
/**
* If at any locus, a sample has less coverage than this, it will be reported as LOW_COVERAGE
*/
@Argument(fullName = "minimum_coverage", shortName = "min", doc = "The minimum allowable coverage, used for calling LOW_COVERAGE", required = false) @Argument(fullName = "minimum_coverage", shortName = "min", doc = "The minimum allowable coverage, used for calling LOW_COVERAGE", required = false)
private int minimumCoverage = 5; private int minimumCoverage = 5;
/**
* If at any locus, a sample has more coverage than this, it will be reported as EXCESSIVE_COVERAGE
*/
@Argument(fullName = "maximum_coverage", shortName = "max", doc = "The maximum allowable coverage, used for calling EXCESSIVE_COVERAGE", required = false) @Argument(fullName = "maximum_coverage", shortName = "max", doc = "The maximum allowable coverage, used for calling EXCESSIVE_COVERAGE", required = false)
private int maximumCoverage = 700; private int maximumCoverage = 700;
@Argument(fullName = "minimum_median_depth", shortName = "med", doc = "The minimum allowable median coverage, used for calling LOW_MEDIAN_DEPTH", required = false) /**
private int minMedianDepth = 10; * If any sample has a paired read whose distance between alignment starts (between the pairs) is greater than this, it will be reported as BAD_MATE
*/
@Argument(fullName = "maximum_insert_size", shortName = "ins", doc = "The maximum allowed distance between a read and its mate", required = false) @Argument(fullName = "maximum_insert_size", shortName = "ins", doc = "The maximum allowed distance between a read and its mate", required = false)
private int maxInsertSize = 500; private int maxInsertSize = 500;
@Argument(fullName = "voting_status_threshold", shortName = "stV", doc = "The needed percentage of samples containing a call for the interval to adopt the call ", required = false) /**
* The proportion of samples that must have a status for it to filter the entire interval. Example: 8 out of 10 samples have low coverage status on the interval,
* with a threshold higher than 0.2, this interval will be filtered as LOW_COVERAGE.
*/
@Argument(fullName = "voting_status_threshold", shortName = "stV", doc = "The needed proportion of samples containing a call for the interval to adopt the call ", required = false)
private double votePercentage = 0.50; private double votePercentage = 0.50;
@Argument(fullName = "low_median_depth_status_threshold", shortName = "stMED", doc = "The percentage of the loci needed for calling LOW_MEDIAN_DEPTH", required = false) /**
private double lowMedianDepthPercentage = 0.20; * The proportion of reads in the loci that must have bad mates for the sample to be reported as BAD_MATE
*/
@Argument(fullName = "bad_mate_status_threshold", shortName = "stBM", doc = "The percentage of the loci needed for calling BAD_MATE", required = false) @Argument(fullName = "bad_mate_status_threshold", shortName = "stBM", doc = "The proportion of the loci needed for calling BAD_MATE", required = false)
private double badMateStatusThreshold = 0.50; private double badMateStatusThreshold = 0.50;
@Argument(fullName = "coverage_status_threshold", shortName = "stC", doc = "The percentage of the loci needed for calling LOW_COVERAGE and COVERAGE_GAPS", required = false) /**
* The proportion of loci in a sample that must fall under the LOW_COVERAGE or COVERAGE_GAPS category for the sample to be reported as either (or both)
*/
@Argument(fullName = "coverage_status_threshold", shortName = "stC", doc = "The proportion of the loci needed for calling LOW_COVERAGE and COVERAGE_GAPS", required = false)
private double coverageStatusThreshold = 0.20; private double coverageStatusThreshold = 0.20;
@Argument(fullName = "excessive_coverage_status_threshold", shortName = "stXC", doc = "The percentage of the loci needed for calling EXCESSIVE_COVERAGE", required = false) /**
* The proportion of loci in a sample that must fall under the EXCESSIVE_COVERAGE category for the sample to be reported as EXCESSIVE_COVERAGE
*/
@Argument(fullName = "excessive_coverage_status_threshold", shortName = "stXC", doc = "The proportion of the loci needed for calling EXCESSIVE_COVERAGE", required = false)
private double excessiveCoverageThreshold = 0.20; private double excessiveCoverageThreshold = 0.20;
@Argument(fullName = "quality_status_threshold", shortName = "stQ", doc = "The percentage of the loci needed for calling POOR_QUALITY", required = false) /**
* The proportion of loci in a sample that must fall under the LOW_QUALITY category for the sample to be reported as LOW_QUALITY
*/
@Argument(fullName = "quality_status_threshold", shortName = "stQ", doc = "The proportion of the loci needed for calling POOR_QUALITY", required = false)
private double qualityStatusThreshold = 0.50; private double qualityStatusThreshold = 0.50;
@Hidden
@Argument(fullName = "print_debug_log", shortName = "dl", doc = "Used only for debugging the walker. Prints extra info to screen", required = false) @Argument(fullName = "print_debug_log", shortName = "dl", doc = "Used only for debugging the walker. Prints extra info to screen", required = false)
private boolean debug = false; private boolean debug = false;
@ -168,10 +195,8 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
if (getToolkit().getIntervals() == null) if (getToolkit().getIntervals() == null)
throw new UserException("This tool only works if you provide one or more intervals. ( Use the -L argument )"); throw new UserException("This tool only works if you provide one or more intervals. ( Use the -L argument )");
thresholds = new ThresHolder(minimumBaseQuality, minimumMappingQuality, minimumCoverage, maximumCoverage, thresholds = new ThresHolder(minimumBaseQuality, minimumMappingQuality, minimumCoverage, maximumCoverage, maxInsertSize, votePercentage,
minMedianDepth, maxInsertSize, votePercentage, lowMedianDepthPercentage, badMateStatusThreshold, coverageStatusThreshold, excessiveCoverageThreshold, qualityStatusThreshold);
badMateStatusThreshold, coverageStatusThreshold, excessiveCoverageThreshold,
qualityStatusThreshold);
intervalMap = new HashMap<GenomeLoc, IntervalStatistics>(INITIAL_HASH_SIZE); intervalMap = new HashMap<GenomeLoc, IntervalStatistics>(INITIAL_HASH_SIZE);
intervalListIterator = new PeekableIterator<GenomeLoc>(getToolkit().getIntervals().iterator()); intervalListIterator = new PeekableIterator<GenomeLoc>(getToolkit().getIntervals().iterator());

View File

@ -154,26 +154,6 @@ class IntervalStatistics {
output.add(status); output.add(status);
} }
// get median DP of each sample
final double minMedianDepth = thresholds.getLowMedianDepthThreshold() * samples.size();
final int nSamples = samples.size();
int nLowMedianDepth = 0;
int samplesSeen = 0;
for (SampleStatistics sample : samples.values()) {
samplesSeen++;
final double medianDepth = sample.getQuantileDepth(0.5);
if (medianDepth > 0 && medianDepth < thresholds.getMinimumMedianDepth()) {
nLowMedianDepth++;
}
if (nLowMedianDepth > minMedianDepth) {
output.add(CallableStatus.LOW_MEDIAN_DEPTH);
break;
}
if (nSamples - samplesSeen + nLowMedianDepth < minMedianDepth)
break;
}
return output; return output;
} }
} }

View File

@ -293,17 +293,11 @@ class SampleStatistics {
if (read.getReadNegativeStrandFlag() == read.getMateNegativeStrandFlag()) if (read.getReadNegativeStrandFlag() == read.getMateNegativeStrandFlag())
return false; return false;
// inverted // todo -- inverted ?
if (read.getReadNegativeStrandFlag() ==
read.getAlignmentStart() < read.getMateAlignmentStart())
return false;
// TODO note: IGV uses a different algorithm for insert size, there should be a common util class that does this for you
// mates are too far apart // mates are too far apart
if (Math.abs(read.getAlignmentStart() - read.getMateAlignmentStart()) > thresholds.getMaximumInsertSize()) return Math.abs(read.getAlignmentStart() - read.getMateAlignmentStart()) <= thresholds.getMaximumInsertSize();
return false;
return true;
} }
public int getnReads() { public int getnReads() {

View File

@ -54,44 +54,38 @@ import java.util.Set;
class ThresHolder { class ThresHolder {
public static final String AVG_INTERVAL_DP_KEY = "AVG_INTERVAL_DP"; public static final String AVG_INTERVAL_DP_KEY = "AVG_INTERVAL_DP";
public static final ThresHolder DEFAULTS = new ThresHolder(20, 20, 5, 700, 20, 50, 0.5, 0.2, 0.5, 0.2, 0.2, 0.5); public static final ThresHolder DEFAULTS = new ThresHolder(20, 20, 5, 700, 50, 0.5, 0.5, 0.2, 0.2, 0.5);
private final int minimumBaseQuality; private final int minimumBaseQuality;
private final int minimumMappingQuality; private final int minimumMappingQuality;
private final int minimumCoverage; private final int minimumCoverage;
private final int maximumCoverage; private final int maximumCoverage;
private final int minimumMedianDepth;
private final int maximumInsertSize; private final int maximumInsertSize;
private final double votePercentageThreshold; private final double votePercentageThreshold;
private final double lowMedianDepthThreshold;
private final double badMateStatusThreshold; private final double badMateStatusThreshold;
private final double coverageStatusThreshold; private final double coverageStatusThreshold;
private final double excessiveCoverageThreshold; private final double excessiveCoverageThreshold;
private final double qualityStatusThreshold; private final double qualityStatusThreshold;
public ThresHolder(int minimumBaseQuality, public ThresHolder(final int minimumBaseQuality,
int minimumMappingQuality, final int minimumMappingQuality,
int minimumCoverage, final int minimumCoverage,
int maximumCoverage, final int maximumCoverage,
int minimumMedianDepth, final int maximumInsertSize,
int maximumInsertSize, final double votePercentageThreshold,
double votePercentageThreshold, final double badMateStatusThreshold,
double lowMedianDepthThreshold, final double coverageStatusThreshold,
double badMateStatusThreshold, final double excessiveCoverageThreshold,
double coverageStatusThreshold, final double qualityStatusThreshold) {
double excessiveCoverageThreshold,
double qualityStatusThreshold) {
this.minimumBaseQuality = minimumBaseQuality; this.minimumBaseQuality = minimumBaseQuality;
this.minimumMappingQuality = minimumMappingQuality; this.minimumMappingQuality = minimumMappingQuality;
this.minimumCoverage = minimumCoverage; this.minimumCoverage = minimumCoverage;
this.maximumCoverage = maximumCoverage; this.maximumCoverage = maximumCoverage;
this.minimumMedianDepth = minimumMedianDepth;
this.maximumInsertSize = maximumInsertSize; this.maximumInsertSize = maximumInsertSize;
this.votePercentageThreshold = votePercentageThreshold; this.votePercentageThreshold = votePercentageThreshold;
this.lowMedianDepthThreshold = lowMedianDepthThreshold;
this.badMateStatusThreshold = badMateStatusThreshold; this.badMateStatusThreshold = badMateStatusThreshold;
this.coverageStatusThreshold = coverageStatusThreshold; this.coverageStatusThreshold = coverageStatusThreshold;
this.excessiveCoverageThreshold = excessiveCoverageThreshold; this.excessiveCoverageThreshold = excessiveCoverageThreshold;
@ -106,10 +100,6 @@ class ThresHolder {
return maximumCoverage; return maximumCoverage;
} }
public int getMinimumMedianDepth() {
return minimumMedianDepth;
}
public int getMaximumInsertSize() { public int getMaximumInsertSize() {
return maximumInsertSize; return maximumInsertSize;
} }
@ -118,10 +108,6 @@ class ThresHolder {
return votePercentageThreshold; return votePercentageThreshold;
} }
public double getLowMedianDepthThreshold() {
return lowMedianDepthThreshold;
}
public double getBadMateStatusThreshold() { public double getBadMateStatusThreshold() {
return badMateStatusThreshold; return badMateStatusThreshold;
} }
@ -156,7 +142,6 @@ class ThresHolder {
headerLines.add(new VCFInfoHeaderLine("Diagnose Targets", 0, VCFHeaderLineType.Flag, "DiagnoseTargets mode")); headerLines.add(new VCFInfoHeaderLine("Diagnose Targets", 0, VCFHeaderLineType.Flag, "DiagnoseTargets mode"));
// FORMAT fields for each genotype // FORMAT fields for each genotype
// todo -- find the appropriate VCF constants
headerLines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY)); headerLines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY));
headerLines.add(new VCFFormatHeaderLine(AVG_INTERVAL_DP_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size.")); headerLines.add(new VCFFormatHeaderLine(AVG_INTERVAL_DP_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
headerLines.add(new VCFFormatHeaderLine("Q1", 1, VCFHeaderLineType.Float, "Lower Quartile of depth distribution.")); headerLines.add(new VCFFormatHeaderLine("Q1", 1, VCFHeaderLineType.Float, "Lower Quartile of depth distribution."));

View File

@ -66,11 +66,11 @@ public class DiagnoseTargetsIntegrationTest extends WalkerTest {
@Test(enabled = true) @Test(enabled = true)
public void testSingleSample() { public void testSingleSample() {
DTTest("testSingleSample ", "-I " + singleSample + " -max 75", "3d558ec8828c269774ee45e5df086a5f"); DTTest("testSingleSample ", "-I " + singleSample + " -max 75", "742c13fc092b42f9ff71fc3fff4a95cc");
} }
@Test(enabled = true) @Test(enabled = true)
public void testMultiSample() { public void testMultiSample() {
DTTest("testMultiSample ", "-I " + multiSample, "d40cf1f1daf68f2740cd411e2cf361fc"); DTTest("testMultiSample ", "-I " + multiSample, "7083cc720a2caa02fb0fa8f49f94a826");
} }
} }

View File

@ -57,7 +57,7 @@ public class LocusStatisticsUnitTest /*extends BaseTest*/ {
@Test(dataProvider = "StatusTestValues") @Test(dataProvider = "StatusTestValues")
public void testCallableStatuses(int coverage, int rawCoverage, CallableStatus status) { public void testCallableStatuses(int coverage, int rawCoverage, CallableStatus status) {
// The min Coverage threshold is 10, the max is 100 // The min Coverage threshold is 10, the max is 100
ThresHolder thresholds = new ThresHolder(20, 20, 10, 100, 20, 50, 0.5, 0.2, 0.5, 0.2, 0.2, 0.5); ThresHolder thresholds = new ThresHolder(20, 20, 10, 100, 50, 0.5, 0.5, 0.2, 0.2, 0.5);
Set<CallableStatus> statuses = new LocusStatistics(coverage, rawCoverage).callableStatuses(thresholds); Set<CallableStatus> statuses = new LocusStatistics(coverage, rawCoverage).callableStatuses(thresholds);
// Check to make sure the status provides matches the actual // Check to make sure the status provides matches the actual
Assert.assertTrue((status == null) ? statuses.isEmpty() : (statuses.contains(status) && statuses.size() == 1)); Assert.assertTrue((status == null) ? statuses.isEmpty() : (statuses.contains(status) && statuses.size() == 1));

View File

@ -95,7 +95,7 @@ public class SampleStatisticsUnitTest/* extends BaseTest */ {
GATKSAMRecord noPair = ArtificialSAMUtils.createArtificialRead(header, "test", 0, 100, 50); GATKSAMRecord noPair = ArtificialSAMUtils.createArtificialRead(header, "test", 0, 100, 50);
GATKSAMRecord good = ArtificialSAMUtils.createPair(header, "test", 30, 100, 150, true, false).get(0); GATKSAMRecord good = ArtificialSAMUtils.createPair(header, "test", 30, 100, 150, true, false).get(0);
GATKSAMRecord bigInsertSize = ArtificialSAMUtils.createPair(header, "test", 30, 100, 151, true, false).get(0); GATKSAMRecord bigInsertSize = ArtificialSAMUtils.createPair(header, "test", 30, 100, 151, true, false).get(0);
GATKSAMRecord inverted = ArtificialSAMUtils.createPair(header, "test", 30, 151, 150, true, false).get(0); // GATKSAMRecord inverted = ArtificialSAMUtils.createPair(header, "test", 30, 151, 150, true, false).get(0);
GATKSAMRecord sameOrientation = ArtificialSAMUtils.createPair(header, "test", 30, 100, 151, true, true).get(0); GATKSAMRecord sameOrientation = ArtificialSAMUtils.createPair(header, "test", 30, 100, 151, true, true).get(0);
GATKSAMRecord pairNotMapped = ArtificialSAMUtils.createPair(header, "test", 30, 100, 140, true, false).get(1); GATKSAMRecord pairNotMapped = ArtificialSAMUtils.createPair(header, "test", 30, 100, 140, true, false).get(1);
@ -106,7 +106,7 @@ public class SampleStatisticsUnitTest/* extends BaseTest */ {
new Object[]{noPair, false}, new Object[]{noPair, false},
new Object[]{good, true}, new Object[]{good, true},
new Object[]{bigInsertSize, false}, new Object[]{bigInsertSize, false},
new Object[]{inverted, false}, // new Object[]{inverted, false},
new Object[]{sameOrientation, false}, new Object[]{sameOrientation, false},
new Object[]{pairNotMapped, false} new Object[]{pairNotMapped, false}
}; };