General DiagnoseTargets documentation cleanup

* remove interval statistic low_median_coverage -- it is already captured by low coverage and coverage gaps.
   * add gatkdocs to all the parameters
   * clean up the logic on callable status a bit (still need to be re-worked into a plugin system)
   * update integration tests
This commit is contained in:
Mauricio Carneiro 2013-04-20 18:34:03 -04:00
parent b3c0abd9e8
commit eb6308a0e4
8 changed files with 60 additions and 81 deletions

View File

@ -66,12 +66,7 @@ public enum CallableStatus {
BAD_MATE("the reads are not properly mated, suggesting mapping errors"),
NO_READS("there are no reads contained in the interval"),
//
// Interval-level statuses
//
LOW_MEDIAN_DEPTH("interval has insufficient median depth across samples");
NO_READS("there are no reads contained in the interval");
public final String description;

View File

@ -48,6 +48,7 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
import net.sf.picard.util.PeekableIterator;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
@ -110,45 +111,71 @@ import java.util.*;
@PartitionBy(PartitionType.INTERVAL)
public class DiagnoseTargets extends LocusWalker<Long, Long> {
@Output(doc = "File to which variants should be written")
@Output(doc = "File to which interval statistics should be written")
private VariantContextWriter vcfWriter = null;
/**
* Only bases with quality greater than this will be considered in the coverage metrics.
*/
@Argument(fullName = "minimum_base_quality", shortName = "BQ", doc = "The minimum Base Quality that is considered for calls", required = false)
private int minimumBaseQuality = 20;
/**
* Only reads with mapping quality greater than this will be considered in the coverage metrics.
*/
@Argument(fullName = "minimum_mapping_quality", shortName = "MQ", doc = "The minimum read mapping quality considered for calls", required = false)
private int minimumMappingQuality = 20;
/**
* If at any locus, a sample has less coverage than this, it will be reported as LOW_COVERAGE
*/
@Argument(fullName = "minimum_coverage", shortName = "min", doc = "The minimum allowable coverage, used for calling LOW_COVERAGE", required = false)
private int minimumCoverage = 5;
/**
* If at any locus, a sample has more coverage than this, it will be reported as EXCESSIVE_COVERAGE
*/
@Argument(fullName = "maximum_coverage", shortName = "max", doc = "The maximum allowable coverage, used for calling EXCESSIVE_COVERAGE", required = false)
private int maximumCoverage = 700;
@Argument(fullName = "minimum_median_depth", shortName = "med", doc = "The minimum allowable median coverage, used for calling LOW_MEDIAN_DEPTH", required = false)
private int minMedianDepth = 10;
/**
* If any sample has a paired read whose distance between alignment starts (between the pairs) is greater than this, it will be reported as BAD_MATE
*/
@Argument(fullName = "maximum_insert_size", shortName = "ins", doc = "The maximum allowed distance between a read and its mate", required = false)
private int maxInsertSize = 500;
@Argument(fullName = "voting_status_threshold", shortName = "stV", doc = "The needed percentage of samples containing a call for the interval to adopt the call ", required = false)
/**
* The proportion of samples that must have a status for it to filter the entire interval. Example: 8 out of 10 samples have low coverage status on the interval,
* with a threshold higher than 0.2, this interval will be filtered as LOW_COVERAGE.
*/
@Argument(fullName = "voting_status_threshold", shortName = "stV", doc = "The needed proportion of samples containing a call for the interval to adopt the call ", required = false)
private double votePercentage = 0.50;
@Argument(fullName = "low_median_depth_status_threshold", shortName = "stMED", doc = "The percentage of the loci needed for calling LOW_MEDIAN_DEPTH", required = false)
private double lowMedianDepthPercentage = 0.20;
@Argument(fullName = "bad_mate_status_threshold", shortName = "stBM", doc = "The percentage of the loci needed for calling BAD_MATE", required = false)
/**
* The proportion of reads in the loci that must have bad mates for the sample to be reported as BAD_MATE
*/
@Argument(fullName = "bad_mate_status_threshold", shortName = "stBM", doc = "The proportion of the loci needed for calling BAD_MATE", required = false)
private double badMateStatusThreshold = 0.50;
@Argument(fullName = "coverage_status_threshold", shortName = "stC", doc = "The percentage of the loci needed for calling LOW_COVERAGE and COVERAGE_GAPS", required = false)
/**
* The proportion of loci in a sample that must fall under the LOW_COVERAGE or COVERAGE_GAPS category for the sample to be reported as either (or both)
*/
@Argument(fullName = "coverage_status_threshold", shortName = "stC", doc = "The proportion of the loci needed for calling LOW_COVERAGE and COVERAGE_GAPS", required = false)
private double coverageStatusThreshold = 0.20;
@Argument(fullName = "excessive_coverage_status_threshold", shortName = "stXC", doc = "The percentage of the loci needed for calling EXCESSIVE_COVERAGE", required = false)
/**
* The proportion of loci in a sample that must fall under the EXCESSIVE_COVERAGE category for the sample to be reported as EXCESSIVE_COVERAGE
*/
@Argument(fullName = "excessive_coverage_status_threshold", shortName = "stXC", doc = "The proportion of the loci needed for calling EXCESSIVE_COVERAGE", required = false)
private double excessiveCoverageThreshold = 0.20;
@Argument(fullName = "quality_status_threshold", shortName = "stQ", doc = "The percentage of the loci needed for calling POOR_QUALITY", required = false)
/**
* The proportion of loci in a sample that must fall under the LOW_QUALITY category for the sample to be reported as LOW_QUALITY
*/
@Argument(fullName = "quality_status_threshold", shortName = "stQ", doc = "The proportion of the loci needed for calling POOR_QUALITY", required = false)
private double qualityStatusThreshold = 0.50;
@Hidden
@Argument(fullName = "print_debug_log", shortName = "dl", doc = "Used only for debugging the walker. Prints extra info to screen", required = false)
private boolean debug = false;
@ -168,10 +195,8 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
if (getToolkit().getIntervals() == null)
throw new UserException("This tool only works if you provide one or more intervals. ( Use the -L argument )");
thresholds = new ThresHolder(minimumBaseQuality, minimumMappingQuality, minimumCoverage, maximumCoverage,
minMedianDepth, maxInsertSize, votePercentage, lowMedianDepthPercentage,
badMateStatusThreshold, coverageStatusThreshold, excessiveCoverageThreshold,
qualityStatusThreshold);
thresholds = new ThresHolder(minimumBaseQuality, minimumMappingQuality, minimumCoverage, maximumCoverage, maxInsertSize, votePercentage,
badMateStatusThreshold, coverageStatusThreshold, excessiveCoverageThreshold, qualityStatusThreshold);
intervalMap = new HashMap<GenomeLoc, IntervalStatistics>(INITIAL_HASH_SIZE);
intervalListIterator = new PeekableIterator<GenomeLoc>(getToolkit().getIntervals().iterator());

View File

@ -154,26 +154,6 @@ class IntervalStatistics {
output.add(status);
}
// get median DP of each sample
final double minMedianDepth = thresholds.getLowMedianDepthThreshold() * samples.size();
final int nSamples = samples.size();
int nLowMedianDepth = 0;
int samplesSeen = 0;
for (SampleStatistics sample : samples.values()) {
samplesSeen++;
final double medianDepth = sample.getQuantileDepth(0.5);
if (medianDepth > 0 && medianDepth < thresholds.getMinimumMedianDepth()) {
nLowMedianDepth++;
}
if (nLowMedianDepth > minMedianDepth) {
output.add(CallableStatus.LOW_MEDIAN_DEPTH);
break;
}
if (nSamples - samplesSeen + nLowMedianDepth < minMedianDepth)
break;
}
return output;
}
}

View File

@ -293,17 +293,11 @@ class SampleStatistics {
if (read.getReadNegativeStrandFlag() == read.getMateNegativeStrandFlag())
return false;
// inverted
if (read.getReadNegativeStrandFlag() ==
read.getAlignmentStart() < read.getMateAlignmentStart())
return false;
// todo -- inverted ?
// TODO note: IGV uses a different algorithm for insert size, there should be a common util class that does this for you
// mates are too far apart
if (Math.abs(read.getAlignmentStart() - read.getMateAlignmentStart()) > thresholds.getMaximumInsertSize())
return false;
return Math.abs(read.getAlignmentStart() - read.getMateAlignmentStart()) <= thresholds.getMaximumInsertSize();
return true;
}
public int getnReads() {

View File

@ -54,44 +54,38 @@ import java.util.Set;
class ThresHolder {
public static final String AVG_INTERVAL_DP_KEY = "AVG_INTERVAL_DP";
public static final ThresHolder DEFAULTS = new ThresHolder(20, 20, 5, 700, 20, 50, 0.5, 0.2, 0.5, 0.2, 0.2, 0.5);
public static final ThresHolder DEFAULTS = new ThresHolder(20, 20, 5, 700, 50, 0.5, 0.5, 0.2, 0.2, 0.5);
private final int minimumBaseQuality;
private final int minimumMappingQuality;
private final int minimumCoverage;
private final int maximumCoverage;
private final int minimumMedianDepth;
private final int maximumInsertSize;
private final double votePercentageThreshold;
private final double lowMedianDepthThreshold;
private final double badMateStatusThreshold;
private final double coverageStatusThreshold;
private final double excessiveCoverageThreshold;
private final double qualityStatusThreshold;
public ThresHolder(int minimumBaseQuality,
int minimumMappingQuality,
int minimumCoverage,
int maximumCoverage,
int minimumMedianDepth,
int maximumInsertSize,
double votePercentageThreshold,
double lowMedianDepthThreshold,
double badMateStatusThreshold,
double coverageStatusThreshold,
double excessiveCoverageThreshold,
double qualityStatusThreshold) {
public ThresHolder(final int minimumBaseQuality,
final int minimumMappingQuality,
final int minimumCoverage,
final int maximumCoverage,
final int maximumInsertSize,
final double votePercentageThreshold,
final double badMateStatusThreshold,
final double coverageStatusThreshold,
final double excessiveCoverageThreshold,
final double qualityStatusThreshold) {
this.minimumBaseQuality = minimumBaseQuality;
this.minimumMappingQuality = minimumMappingQuality;
this.minimumCoverage = minimumCoverage;
this.maximumCoverage = maximumCoverage;
this.minimumMedianDepth = minimumMedianDepth;
this.maximumInsertSize = maximumInsertSize;
this.votePercentageThreshold = votePercentageThreshold;
this.lowMedianDepthThreshold = lowMedianDepthThreshold;
this.badMateStatusThreshold = badMateStatusThreshold;
this.coverageStatusThreshold = coverageStatusThreshold;
this.excessiveCoverageThreshold = excessiveCoverageThreshold;
@ -106,10 +100,6 @@ class ThresHolder {
return maximumCoverage;
}
public int getMinimumMedianDepth() {
return minimumMedianDepth;
}
public int getMaximumInsertSize() {
return maximumInsertSize;
}
@ -118,10 +108,6 @@ class ThresHolder {
return votePercentageThreshold;
}
public double getLowMedianDepthThreshold() {
return lowMedianDepthThreshold;
}
public double getBadMateStatusThreshold() {
return badMateStatusThreshold;
}
@ -156,7 +142,6 @@ class ThresHolder {
headerLines.add(new VCFInfoHeaderLine("Diagnose Targets", 0, VCFHeaderLineType.Flag, "DiagnoseTargets mode"));
// FORMAT fields for each genotype
// todo -- find the appropriate VCF constants
headerLines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY));
headerLines.add(new VCFFormatHeaderLine(AVG_INTERVAL_DP_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
headerLines.add(new VCFFormatHeaderLine("Q1", 1, VCFHeaderLineType.Float, "Lower Quartile of depth distribution."));

View File

@ -66,11 +66,11 @@ public class DiagnoseTargetsIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testSingleSample() {
DTTest("testSingleSample ", "-I " + singleSample + " -max 75", "3d558ec8828c269774ee45e5df086a5f");
DTTest("testSingleSample ", "-I " + singleSample + " -max 75", "742c13fc092b42f9ff71fc3fff4a95cc");
}
@Test(enabled = true)
public void testMultiSample() {
DTTest("testMultiSample ", "-I " + multiSample, "d40cf1f1daf68f2740cd411e2cf361fc");
DTTest("testMultiSample ", "-I " + multiSample, "7083cc720a2caa02fb0fa8f49f94a826");
}
}

View File

@ -57,7 +57,7 @@ public class LocusStatisticsUnitTest /*extends BaseTest*/ {
@Test(dataProvider = "StatusTestValues")
public void testCallableStatuses(int coverage, int rawCoverage, CallableStatus status) {
// The min Coverage threshold is 10, the max is 100
ThresHolder thresholds = new ThresHolder(20, 20, 10, 100, 20, 50, 0.5, 0.2, 0.5, 0.2, 0.2, 0.5);
ThresHolder thresholds = new ThresHolder(20, 20, 10, 100, 50, 0.5, 0.5, 0.2, 0.2, 0.5);
Set<CallableStatus> statuses = new LocusStatistics(coverage, rawCoverage).callableStatuses(thresholds);
// Check to make sure the status provides matches the actual
Assert.assertTrue((status == null) ? statuses.isEmpty() : (statuses.contains(status) && statuses.size() == 1));

View File

@ -95,7 +95,7 @@ public class SampleStatisticsUnitTest/* extends BaseTest */ {
GATKSAMRecord noPair = ArtificialSAMUtils.createArtificialRead(header, "test", 0, 100, 50);
GATKSAMRecord good = ArtificialSAMUtils.createPair(header, "test", 30, 100, 150, true, false).get(0);
GATKSAMRecord bigInsertSize = ArtificialSAMUtils.createPair(header, "test", 30, 100, 151, true, false).get(0);
GATKSAMRecord inverted = ArtificialSAMUtils.createPair(header, "test", 30, 151, 150, true, false).get(0);
// GATKSAMRecord inverted = ArtificialSAMUtils.createPair(header, "test", 30, 151, 150, true, false).get(0);
GATKSAMRecord sameOrientation = ArtificialSAMUtils.createPair(header, "test", 30, 100, 151, true, true).get(0);
GATKSAMRecord pairNotMapped = ArtificialSAMUtils.createPair(header, "test", 30, 100, 140, true, false).get(1);
@ -106,7 +106,7 @@ public class SampleStatisticsUnitTest/* extends BaseTest */ {
new Object[]{noPair, false},
new Object[]{good, true},
new Object[]{bigInsertSize, false},
new Object[]{inverted, false},
// new Object[]{inverted, false},
new Object[]{sameOrientation, false},
new Object[]{pairNotMapped, false}
};