Fixes for DiagnoseTargets to be VCF/BCF2 spec complaint

-- Don't use DP for average interval depth but rather AVG_INTERVAL_DP, which is a float now, not an int
-- Don't add PASS filter value to genotypes, as this is actually considered failing filters in the GATK.  Genotype filters should be empty for PASSing sites
This commit is contained in:
Mark DePristo 2012-06-27 17:33:37 -04:00
parent e8288c78d7
commit 93426a44b1
2 changed files with 12 additions and 9 deletions

View File

@ -266,13 +266,13 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
alleles.add(refAllele);
alleles.add(SYMBOLIC_ALLELE);
VariantContextBuilder vcb = new VariantContextBuilder("DiagnoseTargets", interval.getContig(), interval.getStart(), interval.getStart(), alleles);
VariantContextBuilder vcb = new VariantContextBuilder("DiagnoseTargets", interval.getContig(), interval.getStart(), interval.getStop(), alleles);
vcb = vcb.log10PError(VariantContext.NO_LOG10_PERROR); // QUAL field makes no sense in our VCF
vcb.filters(new HashSet<String>(statusesToStrings(stats.callableStatuses(thresholds))));
vcb.filters(new HashSet<String>(statusesToStrings(stats.callableStatuses(thresholds), true)));
attributes.put(VCFConstants.END_KEY, interval.getStop());
attributes.put(VCFConstants.DEPTH_KEY, stats.averageCoverage());
attributes.put(ThresHolder.AVG_INTERVAL_DP_KEY, stats.averageCoverage());
vcb = vcb.attributes(attributes);
if (debug) {
@ -282,7 +282,7 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
final GenotypeBuilder gb = new GenotypeBuilder(sample);
SampleStatistics sampleStat = stats.getSample(sample);
gb.DP((int)sampleStat.averageCoverage());
gb.attribute(ThresHolder.AVG_INTERVAL_DP_KEY, sampleStat.averageCoverage());
gb.attribute("Q1", sampleStat.getQuantileDepth(0.25));
gb.attribute("MED", sampleStat.getQuantileDepth(0.50));
gb.attribute("Q3", sampleStat.getQuantileDepth(0.75));
@ -290,7 +290,7 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
if (debug) {
System.out.printf("Found %d bad mates out of %d reads %n", sampleStat.getnBadMates(), sampleStat.getnReads());
}
gb.filters(statusesToStrings(stats.getSample(sample).getCallableStatuses(thresholds)));
gb.filters(statusesToStrings(stats.getSample(sample).getCallableStatuses(thresholds), false));
genotypes.add(gb.make());
}
@ -307,11 +307,12 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
* @param statuses the set of statuses to be converted
* @return a matching set of strings
*/
private List<String> statusesToStrings(Set<CallableStatus> statuses) {
private List<String> statusesToStrings(Set<CallableStatus> statuses, final boolean includePASS) {
List<String> output = new ArrayList<String>(statuses.size());
for (CallableStatus status : statuses)
output.add(status.name());
if ( includePASS || status != CallableStatus.PASS ) // adding pass => results in a filter for genotypes
output.add(status.name());
return output;
}

View File

@ -31,6 +31,7 @@ import java.util.HashSet;
import java.util.Set;
class ThresHolder {
public static final String AVG_INTERVAL_DP_KEY = "AVG_INTERVAL_DP";
public static final ThresHolder DEFAULTS = new ThresHolder(20, 20, 5, 700, 20, 50, 0.5, 0.2, 0.5, 0.2, 0.2, 0.5);
private final int minimumBaseQuality;
@ -129,12 +130,13 @@ class ThresHolder {
// INFO fields for overall data
headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY));
headerLines.add(new VCFInfoHeaderLine("AVG_INTERVAL_DP", 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
headerLines.add(new VCFInfoHeaderLine(AVG_INTERVAL_DP_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
headerLines.add(new VCFInfoHeaderLine("Diagnose Targets", 0, VCFHeaderLineType.Flag, "DiagnoseTargets mode"));
// FORMAT fields for each genotype
// todo -- find the appropriate VCF constants
headerLines.add(new VCFFormatHeaderLine("AVG_INTERVAL_DP", 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
headerLines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY));
headerLines.add(new VCFFormatHeaderLine(AVG_INTERVAL_DP_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
headerLines.add(new VCFFormatHeaderLine("Q1", 1, VCFHeaderLineType.Float, "Lower Quartile of depth distribution."));
headerLines.add(new VCFFormatHeaderLine("MED", 1, VCFHeaderLineType.Float, "Median of depth distribution."));
headerLines.add(new VCFFormatHeaderLine("Q3", 1, VCFHeaderLineType.Float, "Upper Quartile of depth Distribution."));