General DiagnoseTargets documentation cleanup
* remove interval statistic low_median_coverage -- it is already captured by low coverage and coverage gaps. * add gatkdocs to all the parameters * clean up the logic on callable status a bit (still need to be re-worked into a plugin system) * update integration tests
This commit is contained in:
parent
b3c0abd9e8
commit
eb6308a0e4
|
|
@ -66,12 +66,7 @@ public enum CallableStatus {
|
|||
|
||||
BAD_MATE("the reads are not properly mated, suggesting mapping errors"),
|
||||
|
||||
NO_READS("there are no reads contained in the interval"),
|
||||
|
||||
//
|
||||
// Interval-level statuses
|
||||
//
|
||||
LOW_MEDIAN_DEPTH("interval has insufficient median depth across samples");
|
||||
NO_READS("there are no reads contained in the interval");
|
||||
|
||||
public final String description;
|
||||
|
||||
|
|
|
|||
|
|
@ -48,6 +48,7 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
|
|||
|
||||
import net.sf.picard.util.PeekableIterator;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Hidden;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
|
|
@ -110,45 +111,71 @@ import java.util.*;
|
|||
@PartitionBy(PartitionType.INTERVAL)
|
||||
public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
||||
|
||||
@Output(doc = "File to which variants should be written")
|
||||
@Output(doc = "File to which interval statistics should be written")
|
||||
private VariantContextWriter vcfWriter = null;
|
||||
|
||||
/**
|
||||
* Only bases with quality greater than this will be considered in the coverage metrics.
|
||||
*/
|
||||
@Argument(fullName = "minimum_base_quality", shortName = "BQ", doc = "The minimum Base Quality that is considered for calls", required = false)
|
||||
private int minimumBaseQuality = 20;
|
||||
|
||||
/**
|
||||
* Only reads with mapping quality greater than this will be considered in the coverage metrics.
|
||||
*/
|
||||
@Argument(fullName = "minimum_mapping_quality", shortName = "MQ", doc = "The minimum read mapping quality considered for calls", required = false)
|
||||
private int minimumMappingQuality = 20;
|
||||
|
||||
/**
|
||||
* If at any locus, a sample has less coverage than this, it will be reported as LOW_COVERAGE
|
||||
*/
|
||||
@Argument(fullName = "minimum_coverage", shortName = "min", doc = "The minimum allowable coverage, used for calling LOW_COVERAGE", required = false)
|
||||
private int minimumCoverage = 5;
|
||||
|
||||
/**
|
||||
* If at any locus, a sample has more coverage than this, it will be reported as EXCESSIVE_COVERAGE
|
||||
*/
|
||||
@Argument(fullName = "maximum_coverage", shortName = "max", doc = "The maximum allowable coverage, used for calling EXCESSIVE_COVERAGE", required = false)
|
||||
private int maximumCoverage = 700;
|
||||
|
||||
@Argument(fullName = "minimum_median_depth", shortName = "med", doc = "The minimum allowable median coverage, used for calling LOW_MEDIAN_DEPTH", required = false)
|
||||
private int minMedianDepth = 10;
|
||||
|
||||
/**
|
||||
* If any sample has a paired read whose distance between alignment starts (between the pairs) is greater than this, it will be reported as BAD_MATE
|
||||
*/
|
||||
@Argument(fullName = "maximum_insert_size", shortName = "ins", doc = "The maximum allowed distance between a read and its mate", required = false)
|
||||
private int maxInsertSize = 500;
|
||||
|
||||
@Argument(fullName = "voting_status_threshold", shortName = "stV", doc = "The needed percentage of samples containing a call for the interval to adopt the call ", required = false)
|
||||
/**
|
||||
* The proportion of samples that must have a status for it to filter the entire interval. Example: 8 out of 10 samples have low coverage status on the interval,
|
||||
* with a threshold higher than 0.2, this interval will be filtered as LOW_COVERAGE.
|
||||
*/
|
||||
@Argument(fullName = "voting_status_threshold", shortName = "stV", doc = "The needed proportion of samples containing a call for the interval to adopt the call ", required = false)
|
||||
private double votePercentage = 0.50;
|
||||
|
||||
@Argument(fullName = "low_median_depth_status_threshold", shortName = "stMED", doc = "The percentage of the loci needed for calling LOW_MEDIAN_DEPTH", required = false)
|
||||
private double lowMedianDepthPercentage = 0.20;
|
||||
|
||||
@Argument(fullName = "bad_mate_status_threshold", shortName = "stBM", doc = "The percentage of the loci needed for calling BAD_MATE", required = false)
|
||||
/**
|
||||
* The proportion of reads in the loci that must have bad mates for the sample to be reported as BAD_MATE
|
||||
*/
|
||||
@Argument(fullName = "bad_mate_status_threshold", shortName = "stBM", doc = "The proportion of the loci needed for calling BAD_MATE", required = false)
|
||||
private double badMateStatusThreshold = 0.50;
|
||||
|
||||
@Argument(fullName = "coverage_status_threshold", shortName = "stC", doc = "The percentage of the loci needed for calling LOW_COVERAGE and COVERAGE_GAPS", required = false)
|
||||
/**
|
||||
* The proportion of loci in a sample that must fall under the LOW_COVERAGE or COVERAGE_GAPS category for the sample to be reported as either (or both)
|
||||
*/
|
||||
@Argument(fullName = "coverage_status_threshold", shortName = "stC", doc = "The proportion of the loci needed for calling LOW_COVERAGE and COVERAGE_GAPS", required = false)
|
||||
private double coverageStatusThreshold = 0.20;
|
||||
|
||||
@Argument(fullName = "excessive_coverage_status_threshold", shortName = "stXC", doc = "The percentage of the loci needed for calling EXCESSIVE_COVERAGE", required = false)
|
||||
/**
|
||||
* The proportion of loci in a sample that must fall under the EXCESSIVE_COVERAGE category for the sample to be reported as EXCESSIVE_COVERAGE
|
||||
*/
|
||||
@Argument(fullName = "excessive_coverage_status_threshold", shortName = "stXC", doc = "The proportion of the loci needed for calling EXCESSIVE_COVERAGE", required = false)
|
||||
private double excessiveCoverageThreshold = 0.20;
|
||||
|
||||
@Argument(fullName = "quality_status_threshold", shortName = "stQ", doc = "The percentage of the loci needed for calling POOR_QUALITY", required = false)
|
||||
/**
|
||||
* The proportion of loci in a sample that must fall under the LOW_QUALITY category for the sample to be reported as LOW_QUALITY
|
||||
*/
|
||||
@Argument(fullName = "quality_status_threshold", shortName = "stQ", doc = "The proportion of the loci needed for calling POOR_QUALITY", required = false)
|
||||
private double qualityStatusThreshold = 0.50;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "print_debug_log", shortName = "dl", doc = "Used only for debugging the walker. Prints extra info to screen", required = false)
|
||||
private boolean debug = false;
|
||||
|
||||
|
|
@ -168,10 +195,8 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
if (getToolkit().getIntervals() == null)
|
||||
throw new UserException("This tool only works if you provide one or more intervals. ( Use the -L argument )");
|
||||
|
||||
thresholds = new ThresHolder(minimumBaseQuality, minimumMappingQuality, minimumCoverage, maximumCoverage,
|
||||
minMedianDepth, maxInsertSize, votePercentage, lowMedianDepthPercentage,
|
||||
badMateStatusThreshold, coverageStatusThreshold, excessiveCoverageThreshold,
|
||||
qualityStatusThreshold);
|
||||
thresholds = new ThresHolder(minimumBaseQuality, minimumMappingQuality, minimumCoverage, maximumCoverage, maxInsertSize, votePercentage,
|
||||
badMateStatusThreshold, coverageStatusThreshold, excessiveCoverageThreshold, qualityStatusThreshold);
|
||||
|
||||
intervalMap = new HashMap<GenomeLoc, IntervalStatistics>(INITIAL_HASH_SIZE);
|
||||
intervalListIterator = new PeekableIterator<GenomeLoc>(getToolkit().getIntervals().iterator());
|
||||
|
|
|
|||
|
|
@ -154,26 +154,6 @@ class IntervalStatistics {
|
|||
output.add(status);
|
||||
}
|
||||
|
||||
// get median DP of each sample
|
||||
final double minMedianDepth = thresholds.getLowMedianDepthThreshold() * samples.size();
|
||||
final int nSamples = samples.size();
|
||||
int nLowMedianDepth = 0;
|
||||
int samplesSeen = 0;
|
||||
for (SampleStatistics sample : samples.values()) {
|
||||
samplesSeen++;
|
||||
final double medianDepth = sample.getQuantileDepth(0.5);
|
||||
if (medianDepth > 0 && medianDepth < thresholds.getMinimumMedianDepth()) {
|
||||
nLowMedianDepth++;
|
||||
}
|
||||
if (nLowMedianDepth > minMedianDepth) {
|
||||
output.add(CallableStatus.LOW_MEDIAN_DEPTH);
|
||||
break;
|
||||
}
|
||||
if (nSamples - samplesSeen + nLowMedianDepth < minMedianDepth)
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
return output;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -293,17 +293,11 @@ class SampleStatistics {
|
|||
if (read.getReadNegativeStrandFlag() == read.getMateNegativeStrandFlag())
|
||||
return false;
|
||||
|
||||
// inverted
|
||||
if (read.getReadNegativeStrandFlag() ==
|
||||
read.getAlignmentStart() < read.getMateAlignmentStart())
|
||||
return false;
|
||||
// todo -- inverted ?
|
||||
|
||||
// TODO note: IGV uses a different algorithm for insert size, there should be a common util class that does this for you
|
||||
// mates are too far apart
|
||||
if (Math.abs(read.getAlignmentStart() - read.getMateAlignmentStart()) > thresholds.getMaximumInsertSize())
|
||||
return false;
|
||||
return Math.abs(read.getAlignmentStart() - read.getMateAlignmentStart()) <= thresholds.getMaximumInsertSize();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public int getnReads() {
|
||||
|
|
|
|||
|
|
@ -54,44 +54,38 @@ import java.util.Set;
|
|||
|
||||
class ThresHolder {
|
||||
public static final String AVG_INTERVAL_DP_KEY = "AVG_INTERVAL_DP";
|
||||
public static final ThresHolder DEFAULTS = new ThresHolder(20, 20, 5, 700, 20, 50, 0.5, 0.2, 0.5, 0.2, 0.2, 0.5);
|
||||
public static final ThresHolder DEFAULTS = new ThresHolder(20, 20, 5, 700, 50, 0.5, 0.5, 0.2, 0.2, 0.5);
|
||||
|
||||
private final int minimumBaseQuality;
|
||||
private final int minimumMappingQuality;
|
||||
|
||||
private final int minimumCoverage;
|
||||
private final int maximumCoverage;
|
||||
private final int minimumMedianDepth;
|
||||
|
||||
private final int maximumInsertSize;
|
||||
|
||||
private final double votePercentageThreshold;
|
||||
private final double lowMedianDepthThreshold;
|
||||
private final double badMateStatusThreshold;
|
||||
private final double coverageStatusThreshold;
|
||||
private final double excessiveCoverageThreshold;
|
||||
private final double qualityStatusThreshold;
|
||||
|
||||
public ThresHolder(int minimumBaseQuality,
|
||||
int minimumMappingQuality,
|
||||
int minimumCoverage,
|
||||
int maximumCoverage,
|
||||
int minimumMedianDepth,
|
||||
int maximumInsertSize,
|
||||
double votePercentageThreshold,
|
||||
double lowMedianDepthThreshold,
|
||||
double badMateStatusThreshold,
|
||||
double coverageStatusThreshold,
|
||||
double excessiveCoverageThreshold,
|
||||
double qualityStatusThreshold) {
|
||||
public ThresHolder(final int minimumBaseQuality,
|
||||
final int minimumMappingQuality,
|
||||
final int minimumCoverage,
|
||||
final int maximumCoverage,
|
||||
final int maximumInsertSize,
|
||||
final double votePercentageThreshold,
|
||||
final double badMateStatusThreshold,
|
||||
final double coverageStatusThreshold,
|
||||
final double excessiveCoverageThreshold,
|
||||
final double qualityStatusThreshold) {
|
||||
this.minimumBaseQuality = minimumBaseQuality;
|
||||
this.minimumMappingQuality = minimumMappingQuality;
|
||||
this.minimumCoverage = minimumCoverage;
|
||||
this.maximumCoverage = maximumCoverage;
|
||||
this.minimumMedianDepth = minimumMedianDepth;
|
||||
this.maximumInsertSize = maximumInsertSize;
|
||||
this.votePercentageThreshold = votePercentageThreshold;
|
||||
this.lowMedianDepthThreshold = lowMedianDepthThreshold;
|
||||
this.badMateStatusThreshold = badMateStatusThreshold;
|
||||
this.coverageStatusThreshold = coverageStatusThreshold;
|
||||
this.excessiveCoverageThreshold = excessiveCoverageThreshold;
|
||||
|
|
@ -106,10 +100,6 @@ class ThresHolder {
|
|||
return maximumCoverage;
|
||||
}
|
||||
|
||||
public int getMinimumMedianDepth() {
|
||||
return minimumMedianDepth;
|
||||
}
|
||||
|
||||
public int getMaximumInsertSize() {
|
||||
return maximumInsertSize;
|
||||
}
|
||||
|
|
@ -118,10 +108,6 @@ class ThresHolder {
|
|||
return votePercentageThreshold;
|
||||
}
|
||||
|
||||
public double getLowMedianDepthThreshold() {
|
||||
return lowMedianDepthThreshold;
|
||||
}
|
||||
|
||||
public double getBadMateStatusThreshold() {
|
||||
return badMateStatusThreshold;
|
||||
}
|
||||
|
|
@ -156,7 +142,6 @@ class ThresHolder {
|
|||
headerLines.add(new VCFInfoHeaderLine("Diagnose Targets", 0, VCFHeaderLineType.Flag, "DiagnoseTargets mode"));
|
||||
|
||||
// FORMAT fields for each genotype
|
||||
// todo -- find the appropriate VCF constants
|
||||
headerLines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY));
|
||||
headerLines.add(new VCFFormatHeaderLine(AVG_INTERVAL_DP_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
|
||||
headerLines.add(new VCFFormatHeaderLine("Q1", 1, VCFHeaderLineType.Float, "Lower Quartile of depth distribution."));
|
||||
|
|
|
|||
|
|
@ -66,11 +66,11 @@ public class DiagnoseTargetsIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testSingleSample() {
|
||||
DTTest("testSingleSample ", "-I " + singleSample + " -max 75", "3d558ec8828c269774ee45e5df086a5f");
|
||||
DTTest("testSingleSample ", "-I " + singleSample + " -max 75", "742c13fc092b42f9ff71fc3fff4a95cc");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMultiSample() {
|
||||
DTTest("testMultiSample ", "-I " + multiSample, "d40cf1f1daf68f2740cd411e2cf361fc");
|
||||
DTTest("testMultiSample ", "-I " + multiSample, "7083cc720a2caa02fb0fa8f49f94a826");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ public class LocusStatisticsUnitTest /*extends BaseTest*/ {
|
|||
@Test(dataProvider = "StatusTestValues")
|
||||
public void testCallableStatuses(int coverage, int rawCoverage, CallableStatus status) {
|
||||
// The min Coverage threshold is 10, the max is 100
|
||||
ThresHolder thresholds = new ThresHolder(20, 20, 10, 100, 20, 50, 0.5, 0.2, 0.5, 0.2, 0.2, 0.5);
|
||||
ThresHolder thresholds = new ThresHolder(20, 20, 10, 100, 50, 0.5, 0.5, 0.2, 0.2, 0.5);
|
||||
Set<CallableStatus> statuses = new LocusStatistics(coverage, rawCoverage).callableStatuses(thresholds);
|
||||
// Check to make sure the status provides matches the actual
|
||||
Assert.assertTrue((status == null) ? statuses.isEmpty() : (statuses.contains(status) && statuses.size() == 1));
|
||||
|
|
|
|||
|
|
@ -95,7 +95,7 @@ public class SampleStatisticsUnitTest/* extends BaseTest */ {
|
|||
GATKSAMRecord noPair = ArtificialSAMUtils.createArtificialRead(header, "test", 0, 100, 50);
|
||||
GATKSAMRecord good = ArtificialSAMUtils.createPair(header, "test", 30, 100, 150, true, false).get(0);
|
||||
GATKSAMRecord bigInsertSize = ArtificialSAMUtils.createPair(header, "test", 30, 100, 151, true, false).get(0);
|
||||
GATKSAMRecord inverted = ArtificialSAMUtils.createPair(header, "test", 30, 151, 150, true, false).get(0);
|
||||
// GATKSAMRecord inverted = ArtificialSAMUtils.createPair(header, "test", 30, 151, 150, true, false).get(0);
|
||||
GATKSAMRecord sameOrientation = ArtificialSAMUtils.createPair(header, "test", 30, 100, 151, true, true).get(0);
|
||||
|
||||
GATKSAMRecord pairNotMapped = ArtificialSAMUtils.createPair(header, "test", 30, 100, 140, true, false).get(1);
|
||||
|
|
@ -106,7 +106,7 @@ public class SampleStatisticsUnitTest/* extends BaseTest */ {
|
|||
new Object[]{noPair, false},
|
||||
new Object[]{good, true},
|
||||
new Object[]{bigInsertSize, false},
|
||||
new Object[]{inverted, false},
|
||||
// new Object[]{inverted, false},
|
||||
new Object[]{sameOrientation, false},
|
||||
new Object[]{pairNotMapped, false}
|
||||
};
|
||||
|
|
|
|||
Loading…
Reference in New Issue