Fixes for DiagnoseTargets to be VCF/BCF2 spec complaint
-- Don't use DP for average interval depth but rather AVG_INTERVAL_DP, which is a float now, not an int -- Don't add PASS filter value to genotypes, as this is actually considered failing filters in the GATK. Genotype filters should be empty for PASSing sites
This commit is contained in:
parent
e8288c78d7
commit
93426a44b1
|
|
@ -266,13 +266,13 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
||||||
|
|
||||||
alleles.add(refAllele);
|
alleles.add(refAllele);
|
||||||
alleles.add(SYMBOLIC_ALLELE);
|
alleles.add(SYMBOLIC_ALLELE);
|
||||||
VariantContextBuilder vcb = new VariantContextBuilder("DiagnoseTargets", interval.getContig(), interval.getStart(), interval.getStart(), alleles);
|
VariantContextBuilder vcb = new VariantContextBuilder("DiagnoseTargets", interval.getContig(), interval.getStart(), interval.getStop(), alleles);
|
||||||
|
|
||||||
vcb = vcb.log10PError(VariantContext.NO_LOG10_PERROR); // QUAL field makes no sense in our VCF
|
vcb = vcb.log10PError(VariantContext.NO_LOG10_PERROR); // QUAL field makes no sense in our VCF
|
||||||
vcb.filters(new HashSet<String>(statusesToStrings(stats.callableStatuses(thresholds))));
|
vcb.filters(new HashSet<String>(statusesToStrings(stats.callableStatuses(thresholds), true)));
|
||||||
|
|
||||||
attributes.put(VCFConstants.END_KEY, interval.getStop());
|
attributes.put(VCFConstants.END_KEY, interval.getStop());
|
||||||
attributes.put(VCFConstants.DEPTH_KEY, stats.averageCoverage());
|
attributes.put(ThresHolder.AVG_INTERVAL_DP_KEY, stats.averageCoverage());
|
||||||
|
|
||||||
vcb = vcb.attributes(attributes);
|
vcb = vcb.attributes(attributes);
|
||||||
if (debug) {
|
if (debug) {
|
||||||
|
|
@ -282,7 +282,7 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
||||||
final GenotypeBuilder gb = new GenotypeBuilder(sample);
|
final GenotypeBuilder gb = new GenotypeBuilder(sample);
|
||||||
|
|
||||||
SampleStatistics sampleStat = stats.getSample(sample);
|
SampleStatistics sampleStat = stats.getSample(sample);
|
||||||
gb.DP((int)sampleStat.averageCoverage());
|
gb.attribute(ThresHolder.AVG_INTERVAL_DP_KEY, sampleStat.averageCoverage());
|
||||||
gb.attribute("Q1", sampleStat.getQuantileDepth(0.25));
|
gb.attribute("Q1", sampleStat.getQuantileDepth(0.25));
|
||||||
gb.attribute("MED", sampleStat.getQuantileDepth(0.50));
|
gb.attribute("MED", sampleStat.getQuantileDepth(0.50));
|
||||||
gb.attribute("Q3", sampleStat.getQuantileDepth(0.75));
|
gb.attribute("Q3", sampleStat.getQuantileDepth(0.75));
|
||||||
|
|
@ -290,7 +290,7 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
||||||
if (debug) {
|
if (debug) {
|
||||||
System.out.printf("Found %d bad mates out of %d reads %n", sampleStat.getnBadMates(), sampleStat.getnReads());
|
System.out.printf("Found %d bad mates out of %d reads %n", sampleStat.getnBadMates(), sampleStat.getnReads());
|
||||||
}
|
}
|
||||||
gb.filters(statusesToStrings(stats.getSample(sample).getCallableStatuses(thresholds)));
|
gb.filters(statusesToStrings(stats.getSample(sample).getCallableStatuses(thresholds), false));
|
||||||
|
|
||||||
genotypes.add(gb.make());
|
genotypes.add(gb.make());
|
||||||
}
|
}
|
||||||
|
|
@ -307,10 +307,11 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
||||||
* @param statuses the set of statuses to be converted
|
* @param statuses the set of statuses to be converted
|
||||||
* @return a matching set of strings
|
* @return a matching set of strings
|
||||||
*/
|
*/
|
||||||
private List<String> statusesToStrings(Set<CallableStatus> statuses) {
|
private List<String> statusesToStrings(Set<CallableStatus> statuses, final boolean includePASS) {
|
||||||
List<String> output = new ArrayList<String>(statuses.size());
|
List<String> output = new ArrayList<String>(statuses.size());
|
||||||
|
|
||||||
for (CallableStatus status : statuses)
|
for (CallableStatus status : statuses)
|
||||||
|
if ( includePASS || status != CallableStatus.PASS ) // adding pass => results in a filter for genotypes
|
||||||
output.add(status.name());
|
output.add(status.name());
|
||||||
|
|
||||||
return output;
|
return output;
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,7 @@ import java.util.HashSet;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
class ThresHolder {
|
class ThresHolder {
|
||||||
|
public static final String AVG_INTERVAL_DP_KEY = "AVG_INTERVAL_DP";
|
||||||
public static final ThresHolder DEFAULTS = new ThresHolder(20, 20, 5, 700, 20, 50, 0.5, 0.2, 0.5, 0.2, 0.2, 0.5);
|
public static final ThresHolder DEFAULTS = new ThresHolder(20, 20, 5, 700, 20, 50, 0.5, 0.2, 0.5, 0.2, 0.2, 0.5);
|
||||||
|
|
||||||
private final int minimumBaseQuality;
|
private final int minimumBaseQuality;
|
||||||
|
|
@ -129,12 +130,13 @@ class ThresHolder {
|
||||||
|
|
||||||
// INFO fields for overall data
|
// INFO fields for overall data
|
||||||
headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY));
|
headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY));
|
||||||
headerLines.add(new VCFInfoHeaderLine("AVG_INTERVAL_DP", 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
|
headerLines.add(new VCFInfoHeaderLine(AVG_INTERVAL_DP_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
|
||||||
headerLines.add(new VCFInfoHeaderLine("Diagnose Targets", 0, VCFHeaderLineType.Flag, "DiagnoseTargets mode"));
|
headerLines.add(new VCFInfoHeaderLine("Diagnose Targets", 0, VCFHeaderLineType.Flag, "DiagnoseTargets mode"));
|
||||||
|
|
||||||
// FORMAT fields for each genotype
|
// FORMAT fields for each genotype
|
||||||
// todo -- find the appropriate VCF constants
|
// todo -- find the appropriate VCF constants
|
||||||
headerLines.add(new VCFFormatHeaderLine("AVG_INTERVAL_DP", 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
|
headerLines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY));
|
||||||
|
headerLines.add(new VCFFormatHeaderLine(AVG_INTERVAL_DP_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
|
||||||
headerLines.add(new VCFFormatHeaderLine("Q1", 1, VCFHeaderLineType.Float, "Lower Quartile of depth distribution."));
|
headerLines.add(new VCFFormatHeaderLine("Q1", 1, VCFHeaderLineType.Float, "Lower Quartile of depth distribution."));
|
||||||
headerLines.add(new VCFFormatHeaderLine("MED", 1, VCFHeaderLineType.Float, "Median of depth distribution."));
|
headerLines.add(new VCFFormatHeaderLine("MED", 1, VCFHeaderLineType.Float, "Median of depth distribution."));
|
||||||
headerLines.add(new VCFFormatHeaderLine("Q3", 1, VCFHeaderLineType.Float, "Upper Quartile of depth Distribution."));
|
headerLines.add(new VCFFormatHeaderLine("Q3", 1, VCFHeaderLineType.Float, "Upper Quartile of depth Distribution."));
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue