Fixed bugs in DiagnoseTargets
DT would not report bad mates! that has been fixed Signed-off-by: Mauricio Carneiro <carneiro@broadinstitute.org>
This commit is contained in:
parent
538cdf9210
commit
bdf5945dcc
|
|
@ -250,6 +250,7 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
||||||
private void outputStatsToVCF(IntervalStatistics stats, Allele refAllele) {
|
private void outputStatsToVCF(IntervalStatistics stats, Allele refAllele) {
|
||||||
GenomeLoc interval = stats.getInterval();
|
GenomeLoc interval = stats.getInterval();
|
||||||
|
|
||||||
|
|
||||||
List<Allele> alleles = new ArrayList<Allele>();
|
List<Allele> alleles = new ArrayList<Allele>();
|
||||||
Map<String, Object> attributes = new HashMap<String, Object>();
|
Map<String, Object> attributes = new HashMap<String, Object>();
|
||||||
ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
|
ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
|
||||||
|
|
@ -265,7 +266,9 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
||||||
attributes.put(VCFConstants.DEPTH_KEY, stats.averageCoverage());
|
attributes.put(VCFConstants.DEPTH_KEY, stats.averageCoverage());
|
||||||
|
|
||||||
vcb = vcb.attributes(attributes);
|
vcb = vcb.attributes(attributes);
|
||||||
|
if (debug) {
|
||||||
|
System.out.printf("Output -- Interval: %s, Coverage: %.2f%n", stats.getInterval(), stats.averageCoverage());
|
||||||
|
}
|
||||||
for (String sample : samples) {
|
for (String sample : samples) {
|
||||||
Map<String, Object> infos = new HashMap<String, Object>();
|
Map<String, Object> infos = new HashMap<String, Object>();
|
||||||
SampleStatistics sampleStat = stats.getSample(sample);
|
SampleStatistics sampleStat = stats.getSample(sample);
|
||||||
|
|
@ -277,14 +280,14 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
||||||
Set<String> filters = new HashSet<String>();
|
Set<String> filters = new HashSet<String>();
|
||||||
filters.addAll(statusesToStrings(stats.getSample(sample).getCallableStatuses(thresholds)));
|
filters.addAll(statusesToStrings(stats.getSample(sample).getCallableStatuses(thresholds)));
|
||||||
|
|
||||||
|
if (debug) {
|
||||||
|
System.out.printf("Found %d bad mates out of %d reads %n", sampleStat.getnBadMates(), sampleStat.getnReads());
|
||||||
|
}
|
||||||
|
|
||||||
genotypes.add(new Genotype(sample, null, VariantContext.NO_LOG10_PERROR, filters, infos, false));
|
genotypes.add(new Genotype(sample, null, VariantContext.NO_LOG10_PERROR, filters, infos, false));
|
||||||
}
|
}
|
||||||
vcb = vcb.genotypes(genotypes);
|
vcb = vcb.genotypes(genotypes);
|
||||||
|
|
||||||
if (debug) {
|
|
||||||
System.out.printf("Output -- Interval: %s, Coverage: %.2f%n", stats.getInterval(), stats.averageCoverage());
|
|
||||||
}
|
|
||||||
|
|
||||||
vcfWriter.add(vcb.make());
|
vcfWriter.add(vcb.make());
|
||||||
|
|
||||||
|
|
@ -306,6 +309,6 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
||||||
}
|
}
|
||||||
|
|
||||||
private IntervalStatistics createIntervalStatistic(GenomeLoc interval) {
|
private IntervalStatistics createIntervalStatistic(GenomeLoc interval) {
|
||||||
return new IntervalStatistics(samples, interval /*, minimumCoverage, maximumCoverage, minimumMappingQuality, minimumBaseQuality*/);
|
return new IntervalStatistics(samples, interval);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -104,19 +104,19 @@ class SampleStatistics {
|
||||||
|
|
||||||
double intervalSize = interval.size();
|
double intervalSize = interval.size();
|
||||||
|
|
||||||
if ((nBadMates / nReads) > thresholds.getBadMateStatusThreshold())
|
if (((double) nBadMates / nReads) >= thresholds.getBadMateStatusThreshold())
|
||||||
output.add(CallableStatus.BAD_MATE);
|
output.add(CallableStatus.BAD_MATE);
|
||||||
|
|
||||||
if ((totals.get(CallableStatus.COVERAGE_GAPS) / intervalSize) > thresholds.getCoverageStatusThreshold())
|
if ((totals.get(CallableStatus.COVERAGE_GAPS) / intervalSize) >= thresholds.getCoverageStatusThreshold())
|
||||||
output.add(CallableStatus.COVERAGE_GAPS);
|
output.add(CallableStatus.COVERAGE_GAPS);
|
||||||
|
|
||||||
if ((totals.get(CallableStatus.LOW_COVERAGE) / intervalSize) > thresholds.getCoverageStatusThreshold())
|
if ((totals.get(CallableStatus.LOW_COVERAGE) / intervalSize) >= thresholds.getCoverageStatusThreshold())
|
||||||
output.add(CallableStatus.LOW_COVERAGE);
|
output.add(CallableStatus.LOW_COVERAGE);
|
||||||
|
|
||||||
if ((totals.get(CallableStatus.EXCESSIVE_COVERAGE) / intervalSize) > thresholds.getExcessiveCoverageThreshold())
|
if ((totals.get(CallableStatus.EXCESSIVE_COVERAGE) / intervalSize) >= thresholds.getExcessiveCoverageThreshold())
|
||||||
output.add(CallableStatus.EXCESSIVE_COVERAGE);
|
output.add(CallableStatus.EXCESSIVE_COVERAGE);
|
||||||
|
|
||||||
if ((totals.get(CallableStatus.POOR_QUALITY) / intervalSize) > thresholds.getQualityStatusThreshold())
|
if ((totals.get(CallableStatus.POOR_QUALITY) / intervalSize) >= thresholds.getQualityStatusThreshold())
|
||||||
output.add(CallableStatus.POOR_QUALITY);
|
output.add(CallableStatus.POOR_QUALITY);
|
||||||
|
|
||||||
if (totals.get(CallableStatus.REF_N) > 0)
|
if (totals.get(CallableStatus.REF_N) > 0)
|
||||||
|
|
@ -161,7 +161,7 @@ class SampleStatistics {
|
||||||
// Was this read already processed?
|
// Was this read already processed?
|
||||||
if (read.getTemporaryAttribute("checkedBadMate") == null) {
|
if (read.getTemporaryAttribute("checkedBadMate") == null) {
|
||||||
nReads++;
|
nReads++;
|
||||||
if (hasValidMate(read, thresholds))
|
if (!hasValidMate(read, thresholds))
|
||||||
nBadMates++;
|
nBadMates++;
|
||||||
read.setTemporaryAttribute("checkedBadMate", true);
|
read.setTemporaryAttribute("checkedBadMate", true);
|
||||||
}
|
}
|
||||||
|
|
@ -254,7 +254,7 @@ class SampleStatistics {
|
||||||
* reasonable insert size?
|
* reasonable insert size?
|
||||||
* inverted?
|
* inverted?
|
||||||
* same orientation?
|
* same orientation?
|
||||||
* todo - same contig?
|
* same contig?
|
||||||
* is pair mapped?
|
* is pair mapped?
|
||||||
* todo - is forced mate?
|
* todo - is forced mate?
|
||||||
*
|
*
|
||||||
|
|
@ -264,6 +264,10 @@ class SampleStatistics {
|
||||||
if (!read.getReadPairedFlag())
|
if (!read.getReadPairedFlag())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
// different contigs
|
||||||
|
if (read.getMateReferenceIndex() != read.getReferenceIndex())
|
||||||
|
return false;
|
||||||
|
|
||||||
// unmapped
|
// unmapped
|
||||||
if (read.getMateUnmappedFlag() || read.getReadUnmappedFlag())
|
if (read.getMateUnmappedFlag() || read.getReadUnmappedFlag())
|
||||||
return false;
|
return false;
|
||||||
|
|
@ -277,10 +281,19 @@ class SampleStatistics {
|
||||||
read.getAlignmentStart() < read.getMateAlignmentStart())
|
read.getAlignmentStart() < read.getMateAlignmentStart())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
// TODO note: IGV uses a different alorithm for insert size, there should be a common util class that does this for you
|
||||||
// mates are too far apart
|
// mates are too far apart
|
||||||
if (Math.abs(read.getAlignmentStart() - read.getMateAlignmentStart()) > thresholds.getMaximumInsertSize())
|
if (Math.abs(read.getAlignmentStart() - read.getMateAlignmentStart()) > thresholds.getMaximumInsertSize())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int getnReads() {
|
||||||
|
return nReads;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getnBadMates() {
|
||||||
|
return nBadMates;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -129,12 +129,12 @@ class ThresHolder {
|
||||||
|
|
||||||
// INFO fields for overall data
|
// INFO fields for overall data
|
||||||
headerLines.add(new VCFInfoHeaderLine(VCFConstants.END_KEY, 1, VCFHeaderLineType.Integer, "Stop position of the interval"));
|
headerLines.add(new VCFInfoHeaderLine(VCFConstants.END_KEY, 1, VCFHeaderLineType.Integer, "Stop position of the interval"));
|
||||||
headerLines.add(new VCFInfoHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a lci divided by interval size."));
|
headerLines.add(new VCFInfoHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
|
||||||
headerLines.add(new VCFInfoHeaderLine("Diagnose Targets", 0, VCFHeaderLineType.Flag, "DiagnoseTargets mode"));
|
headerLines.add(new VCFInfoHeaderLine("Diagnose Targets", 0, VCFHeaderLineType.Flag, "DiagnoseTargets mode"));
|
||||||
|
|
||||||
// FORMAT fields for each genotype
|
// FORMAT fields for each genotype
|
||||||
// todo -- find the appropriate VCF constants
|
// todo -- find the appropriate VCF constants
|
||||||
headerLines.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a lci divided by interval size."));
|
headerLines.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
|
||||||
headerLines.add(new VCFFormatHeaderLine("Q1", 1, VCFHeaderLineType.Float, "Lower Quartile of depth distribution."));
|
headerLines.add(new VCFFormatHeaderLine("Q1", 1, VCFHeaderLineType.Float, "Lower Quartile of depth distribution."));
|
||||||
headerLines.add(new VCFFormatHeaderLine("MED", 1, VCFHeaderLineType.Float, "Median of depth distribution."));
|
headerLines.add(new VCFFormatHeaderLine("MED", 1, VCFHeaderLineType.Float, "Median of depth distribution."));
|
||||||
headerLines.add(new VCFFormatHeaderLine("Q3", 1, VCFHeaderLineType.Float, "Upper Quartile of depth Distribution."));
|
headerLines.add(new VCFFormatHeaderLine("Q3", 1, VCFHeaderLineType.Float, "Upper Quartile of depth Distribution."));
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue