Fixes for DiagnoseTargets to be VCF/BCF2 spec complaint
-- Don't use DP for average interval depth but rather AVG_INTERVAL_DP, which is a float now, not an int -- Don't add PASS filter value to genotypes, as this is actually considered failing filters in the GATK. Genotype filters should be empty for PASSing sites
This commit is contained in:
parent
e8288c78d7
commit
93426a44b1
|
|
@ -266,13 +266,13 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
|
||||
alleles.add(refAllele);
|
||||
alleles.add(SYMBOLIC_ALLELE);
|
||||
VariantContextBuilder vcb = new VariantContextBuilder("DiagnoseTargets", interval.getContig(), interval.getStart(), interval.getStart(), alleles);
|
||||
VariantContextBuilder vcb = new VariantContextBuilder("DiagnoseTargets", interval.getContig(), interval.getStart(), interval.getStop(), alleles);
|
||||
|
||||
vcb = vcb.log10PError(VariantContext.NO_LOG10_PERROR); // QUAL field makes no sense in our VCF
|
||||
vcb.filters(new HashSet<String>(statusesToStrings(stats.callableStatuses(thresholds))));
|
||||
vcb.filters(new HashSet<String>(statusesToStrings(stats.callableStatuses(thresholds), true)));
|
||||
|
||||
attributes.put(VCFConstants.END_KEY, interval.getStop());
|
||||
attributes.put(VCFConstants.DEPTH_KEY, stats.averageCoverage());
|
||||
attributes.put(ThresHolder.AVG_INTERVAL_DP_KEY, stats.averageCoverage());
|
||||
|
||||
vcb = vcb.attributes(attributes);
|
||||
if (debug) {
|
||||
|
|
@ -282,7 +282,7 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
final GenotypeBuilder gb = new GenotypeBuilder(sample);
|
||||
|
||||
SampleStatistics sampleStat = stats.getSample(sample);
|
||||
gb.DP((int)sampleStat.averageCoverage());
|
||||
gb.attribute(ThresHolder.AVG_INTERVAL_DP_KEY, sampleStat.averageCoverage());
|
||||
gb.attribute("Q1", sampleStat.getQuantileDepth(0.25));
|
||||
gb.attribute("MED", sampleStat.getQuantileDepth(0.50));
|
||||
gb.attribute("Q3", sampleStat.getQuantileDepth(0.75));
|
||||
|
|
@ -290,7 +290,7 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
if (debug) {
|
||||
System.out.printf("Found %d bad mates out of %d reads %n", sampleStat.getnBadMates(), sampleStat.getnReads());
|
||||
}
|
||||
gb.filters(statusesToStrings(stats.getSample(sample).getCallableStatuses(thresholds)));
|
||||
gb.filters(statusesToStrings(stats.getSample(sample).getCallableStatuses(thresholds), false));
|
||||
|
||||
genotypes.add(gb.make());
|
||||
}
|
||||
|
|
@ -307,11 +307,12 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
* @param statuses the set of statuses to be converted
|
||||
* @return a matching set of strings
|
||||
*/
|
||||
private List<String> statusesToStrings(Set<CallableStatus> statuses) {
|
||||
private List<String> statusesToStrings(Set<CallableStatus> statuses, final boolean includePASS) {
|
||||
List<String> output = new ArrayList<String>(statuses.size());
|
||||
|
||||
for (CallableStatus status : statuses)
|
||||
output.add(status.name());
|
||||
if ( includePASS || status != CallableStatus.PASS ) // adding pass => results in a filter for genotypes
|
||||
output.add(status.name());
|
||||
|
||||
return output;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ import java.util.HashSet;
|
|||
import java.util.Set;
|
||||
|
||||
class ThresHolder {
|
||||
public static final String AVG_INTERVAL_DP_KEY = "AVG_INTERVAL_DP";
|
||||
public static final ThresHolder DEFAULTS = new ThresHolder(20, 20, 5, 700, 20, 50, 0.5, 0.2, 0.5, 0.2, 0.2, 0.5);
|
||||
|
||||
private final int minimumBaseQuality;
|
||||
|
|
@ -129,12 +130,13 @@ class ThresHolder {
|
|||
|
||||
// INFO fields for overall data
|
||||
headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY));
|
||||
headerLines.add(new VCFInfoHeaderLine("AVG_INTERVAL_DP", 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
|
||||
headerLines.add(new VCFInfoHeaderLine(AVG_INTERVAL_DP_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
|
||||
headerLines.add(new VCFInfoHeaderLine("Diagnose Targets", 0, VCFHeaderLineType.Flag, "DiagnoseTargets mode"));
|
||||
|
||||
// FORMAT fields for each genotype
|
||||
// todo -- find the appropriate VCF constants
|
||||
headerLines.add(new VCFFormatHeaderLine("AVG_INTERVAL_DP", 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
|
||||
headerLines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY));
|
||||
headerLines.add(new VCFFormatHeaderLine(AVG_INTERVAL_DP_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
|
||||
headerLines.add(new VCFFormatHeaderLine("Q1", 1, VCFHeaderLineType.Float, "Lower Quartile of depth distribution."));
|
||||
headerLines.add(new VCFFormatHeaderLine("MED", 1, VCFHeaderLineType.Float, "Median of depth distribution."));
|
||||
headerLines.add(new VCFFormatHeaderLine("Q3", 1, VCFHeaderLineType.Float, "Upper Quartile of depth Distribution."));
|
||||
|
|
|
|||
Loading…
Reference in New Issue