DiagnoseTargets with working Q1,Median,Q3
- Merged Roger's metrics with Mauricio's optimizations
- Added Stats for DiagnoseTargets
- now has functions to find the median depth, and upper/lower quartile
- the REF_N callable status is implemented
- The walker now runs efficiently
- Diagnose Targets accepts overlapping intervals
- Diagnose Targets now checks for bad mates
- The read mates are checked in a memory efficient manner
- The statistics thresholds have been consolidated and moved outside of the statistics classes and into the walker.
- Fixed some bugs
- Removed rod binding
Added more Unit tests
- Test callable statuses on the locus level
- Test bad mates
- Changed NO_COVERAGE -> COVERAGE_GAPS to avoid confusion
Signed-off-by: Mauricio Carneiro <carneiro@broadinstitute.org>
This commit is contained in:
parent
50031b63c5
commit
b8b139841d
|
|
@ -31,38 +31,29 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
|
|||
* @since 2/1/12
|
||||
*/
|
||||
public enum CallableStatus {
|
||||
/**
|
||||
* the reference base was an N, which is not considered callable the GATK
|
||||
*/
|
||||
// todo -- implement this status
|
||||
|
||||
REF_N("the reference base was an N, which is not considered callable the GATK"),
|
||||
/**
|
||||
* the base satisfied the min. depth for calling but had less than maxDepth to avoid having EXCESSIVE_COVERAGE
|
||||
*/
|
||||
|
||||
PASS("the base satisfied the min. depth for calling but had less than maxDepth to avoid having EXCESSIVE_COVERAGE"),
|
||||
/**
|
||||
* absolutely no reads were seen at this locus, regardless of the filtering parameters
|
||||
*/
|
||||
NO_COVERAGE("absolutely no reads were seen at this locus, regardless of the filtering parameters"),
|
||||
/**
|
||||
* there were less than min. depth bases at the locus, after applying filters
|
||||
*/
|
||||
|
||||
COVERAGE_GAPS("absolutely no coverage was observed at a locus, regardless of the filtering parameters"),
|
||||
|
||||
LOW_COVERAGE("there were less than min. depth bases at the locus, after applying filters"),
|
||||
/**
|
||||
* more than -maxDepth read at the locus, indicating some sort of mapping problem
|
||||
*/
|
||||
|
||||
EXCESSIVE_COVERAGE("more than -maxDepth read at the locus, indicating some sort of mapping problem"),
|
||||
/**
|
||||
* more than --maxFractionOfReadsWithLowMAPQ at the locus, indicating a poor mapping quality of the reads
|
||||
*/
|
||||
|
||||
POOR_QUALITY("more than --maxFractionOfReadsWithLowMAPQ at the locus, indicating a poor mapping quality of the reads"),
|
||||
|
||||
BAD_MATE(""),
|
||||
BAD_MATE("the reads are not properly mated, suggesting mapping errors"),
|
||||
|
||||
INCONSISTENT_COVERAGE("");
|
||||
NO_READS("there are no reads contained in the interval"),
|
||||
|
||||
//
|
||||
// Interval-level statuses
|
||||
//
|
||||
LOW_MEDIAN_DEPTH("interval has insufficient median depth across samples");
|
||||
|
||||
public String description;
|
||||
public final String description;
|
||||
|
||||
private CallableStatus(String description) {
|
||||
this.description = description;
|
||||
|
|
|
|||
|
|
@ -25,89 +25,126 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
|
||||
|
||||
import net.sf.picard.util.PeekableIterator;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Short one line description of the walker.
|
||||
* Analyzes coverage distribution and validates read mates for a given interval and sample.
|
||||
* <p/>
|
||||
* <p>
|
||||
* [Long description of the walker]
|
||||
* Used to diagnose regions with bad coverage, mapping, or read mating. Analyzes each sample independently in addition
|
||||
* to interval wide analysis.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <p/>
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* [Description of the Input]
|
||||
* <ul>
|
||||
* <li>A reference file</li>
|
||||
* <li>one or more input BAMs</li>
|
||||
* <li>One or more intervals</li>
|
||||
* </ul>
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* [Description of the Output]
|
||||
* A modified VCF detailing each interval by sample
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java
|
||||
* -jar GenomeAnalysisTK.jar
|
||||
* -T [walker name]
|
||||
* -T DiagnoseTargets \
|
||||
* -R reference.fasta \
|
||||
* -o output.vcf \
|
||||
* -I sample1.bam \
|
||||
* -I sample2.bam \
|
||||
* -I sample3.bam \
|
||||
* -L intervals.interval_list
|
||||
* </pre>
|
||||
*
|
||||
* @author Mauricio Carneiro
|
||||
* @since 2/1/12
|
||||
* @author Mauricio Carneiro, Roger Zurawicki
|
||||
* @since 5/8/12
|
||||
*/
|
||||
@By(value = DataSource.READS)
|
||||
@PartitionBy(PartitionType.INTERVAL)
|
||||
public class DiagnoseTargets extends LocusWalker<Long, Long> implements AnnotatorCompatibleWalker {
|
||||
@Input(fullName = "interval_track", shortName = "int", doc = "", required = true)
|
||||
private IntervalBinding<Feature> intervalTrack = null;
|
||||
public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
||||
|
||||
@Output(doc = "File to which variants should be written", required = true)
|
||||
private VariantContextWriter vcfWriter = null;
|
||||
|
||||
@Argument(fullName = "minimum_base_quality", shortName = "mbq", doc = "", required = false)
|
||||
@Argument(fullName = "minimum_base_quality", shortName = "BQ", doc = "The minimum Base Quality that is considered for calls", required = false)
|
||||
private int minimumBaseQuality = 20;
|
||||
|
||||
@Argument(fullName = "minimum_mapping_quality", shortName = "mmq", doc = "", required = false)
|
||||
@Argument(fullName = "minimum_mapping_quality", shortName = "MQ", doc = "The minimum read mapping quality considered for calls", required = false)
|
||||
private int minimumMappingQuality = 20;
|
||||
|
||||
@Argument(fullName = "minimum_coverage", shortName = "mincov", doc = "", required = false)
|
||||
@Argument(fullName = "minimum_coverage", shortName = "min", doc = "The minimum allowable coverage, used for calling LOW_COVERAGE", required = false)
|
||||
private int minimumCoverage = 5;
|
||||
|
||||
@Argument(fullName = "maximum_coverage", shortName = "maxcov", doc = "", required = false)
|
||||
@Argument(fullName = "maximum_coverage", shortName = "max", doc = "The maximum allowable coverage, used for calling EXCESSIVE_COVERAGE", required = false)
|
||||
private int maximumCoverage = 700;
|
||||
|
||||
@Argument(fullName = "minimum_median_depth", shortName = "med", doc = "The minimum allowable median coverage, used for calling LOW_MEDIAN_DEPTH", required = false)
|
||||
private int minMedianDepth = 20;
|
||||
|
||||
@Argument(fullName = "maximum_insert_size", shortName = "ins", doc = "The maximum allowed distance between a read and its mate", required = false)
|
||||
private int maxInsertSize = 50;
|
||||
|
||||
@Argument(fullName = "voting_status_threshold", shortName = "stV", doc = "The needed percentage of samples containing a call for the interval to adopt the call ", required = false)
|
||||
private double votePercentage = 0.50;
|
||||
|
||||
@Argument(fullName = "low_median_depth_status_threshold", shortName = "stMED", doc = "The percentage of the loci needed for calling LOW_MEDIAN_DEPTH", required = false)
|
||||
private double lowMedianDepthPercentage = 0.20;
|
||||
|
||||
@Argument(fullName = "bad_mate_status_threshold", shortName = "stBM", doc = "The percentage of the loci needed for calling BAD_MATE", required = false)
|
||||
private double badMateStatusThreshold = 0.50;
|
||||
|
||||
@Argument(fullName = "coverage_status_threshold", shortName = "stC", doc = "The percentage of the loci needed for calling LOW_COVERAGE and COVERAGE_GAPS", required = false)
|
||||
private double coverageStatusThreshold = 0.20;
|
||||
|
||||
@Argument(fullName = "excessive_coverage_status_threshold", shortName = "stXC", doc = "The percentage of the loci needed for calling EXCESSIVE_COVERAGE", required = false)
|
||||
private double excessiveCoverageThreshold = 0.20;
|
||||
|
||||
@Argument(fullName = "quality_status_threshold", shortName = "stQ", doc = "The percentage of the loci needed for calling POOR_QUALITY", required = false)
|
||||
private double qualityStatusThreshold = 0.50;
|
||||
|
||||
@Argument(fullName = "print_debug_log", shortName = "dl", doc = "Used only for debugging the walker. Prints extra info to screen", required = false)
|
||||
private boolean debug = false;
|
||||
|
||||
private HashMap<GenomeLoc, IntervalStatistics> intervalMap = null; // interval => statistics
|
||||
private PeekableIterator<GenomeLoc> intervalListIterator; // an iterator to go over all the intervals provided as we traverse the genome
|
||||
private Set<String> samples = null; // all the samples being processed
|
||||
|
||||
private final Allele SYMBOLIC_ALLELE = Allele.create("<DT>", false); // avoid creating the symbolic allele multiple times
|
||||
private ThresHolder thresholds = null;
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
super.initialize();
|
||||
|
||||
if (intervalTrack == null)
|
||||
throw new UserException("This tool currently only works if you provide an interval track");
|
||||
if (getToolkit().getIntervals() == null)
|
||||
throw new UserException("This tool currently only works if you provide one or more interval");
|
||||
|
||||
thresholds = new ThresHolder(minimumBaseQuality, minimumMappingQuality, minimumCoverage, maximumCoverage, minMedianDepth, maxInsertSize, votePercentage, lowMedianDepthPercentage, badMateStatusThreshold, coverageStatusThreshold, excessiveCoverageThreshold, qualityStatusThreshold);
|
||||
|
||||
intervalMap = new HashMap<GenomeLoc, IntervalStatistics>();
|
||||
intervalListIterator = new PeekableIterator<GenomeLoc>(intervalTrack.getIntervals(getToolkit()).listIterator());
|
||||
intervalListIterator = new PeekableIterator<GenomeLoc>(getToolkit().getIntervals().iterator());
|
||||
|
||||
samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); // get all of the unique sample names for the VCF Header
|
||||
vcfWriter.writeHeader(new VCFHeader(getHeaderInfo(), samples)); // initialize the VCF header
|
||||
|
|
@ -121,7 +158,7 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> implements Annotato
|
|||
addNewOverlappingIntervals(refLocus); // add all new intervals that may overlap this reference locus
|
||||
|
||||
for (IntervalStatistics intervalStatistics : intervalMap.values())
|
||||
intervalStatistics.addLocus(context); // Add current locus to stats
|
||||
intervalStatistics.addLocus(context, ref, thresholds); // Add current locus to stats
|
||||
|
||||
return 1L;
|
||||
}
|
||||
|
|
@ -151,45 +188,40 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> implements Annotato
|
|||
@Override
|
||||
public void onTraversalDone(Long result) {
|
||||
for (GenomeLoc interval : intervalMap.keySet())
|
||||
processIntervalStats(intervalMap.get(interval), Allele.create("A"));
|
||||
outputStatsToVCF(intervalMap.get(interval), Allele.create("A", true));
|
||||
}
|
||||
|
||||
@Override
|
||||
public RodBinding<VariantContext> getSnpEffRodBinding() {return null;}
|
||||
private GenomeLoc getIntervalMapSpan() {
|
||||
GenomeLoc loc = null;
|
||||
for (GenomeLoc interval : intervalMap.keySet()) {
|
||||
if (loc == null) {
|
||||
loc = interval;
|
||||
} else
|
||||
loc = interval.union(loc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public RodBinding<VariantContext> getDbsnpRodBinding() {return null;}
|
||||
|
||||
@Override
|
||||
public List<RodBinding<VariantContext>> getCompRodBindings() {return null;}
|
||||
|
||||
@Override
|
||||
public List<RodBinding<VariantContext>> getResourceRodBindings() {return null;}
|
||||
|
||||
@Override
|
||||
public boolean alwaysAppendDbsnpId() {return false;}
|
||||
return loc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes all intervals that are behind the current reference locus from the intervalMap
|
||||
*
|
||||
* @param refLocus the current reference locus
|
||||
* @param refBase the reference allele
|
||||
* @param refBase the reference allele
|
||||
*/
|
||||
private void removePastIntervals(GenomeLoc refLocus, byte refBase) {
|
||||
List<GenomeLoc> toRemove = new LinkedList<GenomeLoc>();
|
||||
for (GenomeLoc interval : intervalMap.keySet())
|
||||
if (interval.isBefore(refLocus)) {
|
||||
processIntervalStats(intervalMap.get(interval), Allele.create(refBase, true));
|
||||
toRemove.add(interval);
|
||||
// if all intervals are safe
|
||||
if (getIntervalMapSpan() != null && getIntervalMapSpan().isBefore(refLocus)) {
|
||||
for (GenomeLoc interval : intervalMap.keySet()) {
|
||||
outputStatsToVCF(intervalMap.get(interval), Allele.create(refBase, true));
|
||||
intervalMap.remove(interval);
|
||||
}
|
||||
|
||||
for (GenomeLoc interval : toRemove)
|
||||
intervalMap.remove(interval);
|
||||
}
|
||||
|
||||
GenomeLoc interval = intervalListIterator.peek(); // clean up all intervals that we might have skipped because there was no data
|
||||
while(interval != null && interval.isBefore(refLocus)) {
|
||||
while (interval != null && interval.isBefore(refLocus)) {
|
||||
interval = intervalListIterator.next();
|
||||
processIntervalStats(createIntervalStatistic(interval), Allele.create(refBase, true));
|
||||
outputStatsToVCF(createIntervalStatistic(interval), Allele.create(refBase, true));
|
||||
interval = intervalListIterator.peek();
|
||||
}
|
||||
}
|
||||
|
|
@ -202,7 +234,6 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> implements Annotato
|
|||
private void addNewOverlappingIntervals(GenomeLoc refLocus) {
|
||||
GenomeLoc interval = intervalListIterator.peek();
|
||||
while (interval != null && !interval.isPast(refLocus)) {
|
||||
System.out.println("LOCUS : " + refLocus + " -- " + interval);
|
||||
intervalMap.put(interval, createIntervalStatistic(interval));
|
||||
intervalListIterator.next(); // discard the interval (we've already added it to the map)
|
||||
interval = intervalListIterator.peek();
|
||||
|
|
@ -210,14 +241,14 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> implements Annotato
|
|||
}
|
||||
|
||||
/**
|
||||
* Takes the interval, finds it in the stash, prints it to the VCF, and removes it
|
||||
* Takes the interval, finds it in the stash, prints it to the VCF
|
||||
*
|
||||
* @param stats The statistics of the interval
|
||||
* @param stats The statistics of the interval
|
||||
* @param refAllele the reference allele
|
||||
*/
|
||||
private void processIntervalStats(IntervalStatistics stats, Allele refAllele) {
|
||||
private void outputStatsToVCF(IntervalStatistics stats, Allele refAllele) {
|
||||
GenomeLoc interval = stats.getInterval();
|
||||
|
||||
|
||||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
Map<String, Object> attributes = new HashMap<String, Object>();
|
||||
ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
|
||||
|
|
@ -227,7 +258,7 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> implements Annotato
|
|||
VariantContextBuilder vcb = new VariantContextBuilder("DiagnoseTargets", interval.getContig(), interval.getStart(), interval.getStart(), alleles);
|
||||
|
||||
vcb = vcb.log10PError(VariantContext.NO_LOG10_PERROR); // QUAL field makes no sense in our VCF
|
||||
vcb.filters(statusesToStrings(stats.callableStatuses()));
|
||||
vcb.filters(statusesToStrings(stats.callableStatuses(thresholds)));
|
||||
|
||||
attributes.put(VCFConstants.END_KEY, interval.getStop());
|
||||
attributes.put(VCFConstants.DEPTH_KEY, stats.averageCoverage());
|
||||
|
|
@ -236,16 +267,24 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> implements Annotato
|
|||
|
||||
for (String sample : samples) {
|
||||
Map<String, Object> infos = new HashMap<String, Object>();
|
||||
infos.put(VCFConstants.DEPTH_KEY, stats.getSample(sample).averageCoverage());
|
||||
SampleStatistics sampleStat = stats.getSample(sample);
|
||||
infos.put(VCFConstants.DEPTH_KEY, sampleStat.averageCoverage());
|
||||
infos.put("Q1", sampleStat.getQuantileDepth(0.25));
|
||||
infos.put("MED", sampleStat.getQuantileDepth(0.50));
|
||||
infos.put("Q3", sampleStat.getQuantileDepth(0.75));
|
||||
|
||||
Set<String> filters = new HashSet<String>();
|
||||
filters.addAll(statusesToStrings(stats.getSample(sample).getCallableStatuses()));
|
||||
filters.addAll(statusesToStrings(stats.getSample(sample).getCallableStatuses(thresholds)));
|
||||
|
||||
|
||||
genotypes.add(new Genotype(sample, null, VariantContext.NO_LOG10_PERROR, filters, infos, false));
|
||||
}
|
||||
vcb = vcb.genotypes(genotypes);
|
||||
|
||||
if (debug) {
|
||||
System.out.printf("Output -- Interval: %s, Coverage: %.2f%n", stats.getInterval(), stats.averageCoverage());
|
||||
}
|
||||
|
||||
vcfWriter.add(vcb.make());
|
||||
|
||||
}
|
||||
|
|
@ -264,7 +303,12 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> implements Annotato
|
|||
headerLines.add(new VCFInfoHeaderLine("Diagnose Targets", 0, VCFHeaderLineType.Flag, "DiagnoseTargets mode"));
|
||||
|
||||
// FORMAT fields for each genotype
|
||||
// todo -- find the appropriate VCF constants
|
||||
headerLines.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a lci divided by interval size."));
|
||||
headerLines.add(new VCFFormatHeaderLine("Q1", 1, VCFHeaderLineType.Float, "Lower Quartile of depth distribution."));
|
||||
headerLines.add(new VCFFormatHeaderLine("MED", 1, VCFHeaderLineType.Float, "Median of depth distribution."));
|
||||
headerLines.add(new VCFFormatHeaderLine("Q3", 1, VCFHeaderLineType.Float, "Upper Quartile of depth Distribution."));
|
||||
|
||||
|
||||
// FILTER fields
|
||||
for (CallableStatus stat : CallableStatus.values())
|
||||
|
|
@ -273,8 +317,13 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> implements Annotato
|
|||
return headerLines;
|
||||
}
|
||||
|
||||
|
||||
private static Set<String> statusesToStrings(Set<CallableStatus> statuses) {
|
||||
/**
|
||||
* Function that process a set of statuses into strings
|
||||
*
|
||||
* @param statuses the set of statuses to be converted
|
||||
* @return a matching set of strings
|
||||
*/
|
||||
private Set<String> statusesToStrings(Set<CallableStatus> statuses) {
|
||||
Set<String> output = new HashSet<String>(statuses.size());
|
||||
|
||||
for (CallableStatus status : statuses)
|
||||
|
|
@ -284,6 +333,6 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> implements Annotato
|
|||
}
|
||||
|
||||
private IntervalStatistics createIntervalStatistic(GenomeLoc interval) {
|
||||
return new IntervalStatistics(samples, interval, minimumCoverage, maximumCoverage, minimumMappingQuality, minimumBaseQuality);
|
||||
return new IntervalStatistics(samples, interval /*, minimumCoverage, maximumCoverage, minimumMappingQuality, minimumBaseQuality*/);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
@ -34,19 +35,24 @@ import java.util.HashSet;
|
|||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
public class IntervalStatistics {
|
||||
class IntervalStatistics {
|
||||
|
||||
private final Map<String, SampleStatistics> samples;
|
||||
private final GenomeLoc interval;
|
||||
private boolean hasNref = false;
|
||||
|
||||
private int preComputedTotalCoverage = -1; // avoids re-calculating the total sum (-1 means we haven't pre-computed it yet)
|
||||
|
||||
|
||||
public IntervalStatistics(Set<String> samples, GenomeLoc interval, int minimumCoverageThreshold, int maximumCoverageThreshold, int minimumMappingQuality, int minimumBaseQuality) {
|
||||
/*
|
||||
private double minMedianDepth = 20.0;
|
||||
private double badMedianDepthPercentage = 0.20;
|
||||
private double votePercentage = 0.50;
|
||||
*/
|
||||
public IntervalStatistics(Set<String> samples, GenomeLoc interval/*, int minimumCoverageThreshold, int maximumCoverageThreshold, int minimumMappingQuality, int minimumBaseQuality*/) {
|
||||
this.interval = interval;
|
||||
this.samples = new HashMap<String, SampleStatistics>(samples.size());
|
||||
for (String sample : samples)
|
||||
this.samples.put(sample, new SampleStatistics(interval, minimumCoverageThreshold, maximumCoverageThreshold, minimumMappingQuality, minimumBaseQuality));
|
||||
this.samples.put(sample, new SampleStatistics(interval /*, minimumCoverageThreshold, maximumCoverageThreshold, minimumMappingQuality, minimumBaseQuality*/));
|
||||
}
|
||||
|
||||
public SampleStatistics getSample(String sample) {
|
||||
|
|
@ -57,9 +63,19 @@ public class IntervalStatistics {
|
|||
return interval;
|
||||
}
|
||||
|
||||
public void addLocus(AlignmentContext context) {
|
||||
/**
|
||||
* The function to populate data into the Statistics from the walker.
|
||||
* This takes the input and manages passing the data to the SampleStatistics and Locus Statistics
|
||||
*
|
||||
* @param context The alignment context given from the walker
|
||||
* @param ref the reference context given from the walker
|
||||
* @param thresholds the class contains the statistical threshold for making calls
|
||||
*/
|
||||
public void addLocus(AlignmentContext context, ReferenceContext ref, ThresHolder thresholds) {
|
||||
ReadBackedPileup pileup = context.getBasePileup();
|
||||
|
||||
//System.out.println(ref.getLocus().toString());
|
||||
|
||||
Map<String, ReadBackedPileup> samplePileups = pileup.getPileupsForSamples(samples.keySet());
|
||||
|
||||
for (Map.Entry<String, ReadBackedPileup> entry : samplePileups.entrySet()) {
|
||||
|
|
@ -67,15 +83,16 @@ public class IntervalStatistics {
|
|||
ReadBackedPileup samplePileup = entry.getValue();
|
||||
SampleStatistics sampleStatistics = samples.get(sample);
|
||||
|
||||
if (sampleStatistics == null)
|
||||
if (sampleStatistics == null)
|
||||
throw new ReviewedStingException(String.format("Trying to add locus statistics to a sample (%s) that doesn't exist in the Interval.", sample));
|
||||
|
||||
sampleStatistics.addLocus(context.getLocation(), samplePileup);
|
||||
|
||||
sampleStatistics.addLocus(context.getLocation(), samplePileup, thresholds);
|
||||
}
|
||||
|
||||
if (!hasNref && ref.getBase() == 'N')
|
||||
hasNref = true;
|
||||
}
|
||||
|
||||
|
||||
public double averageCoverage() {
|
||||
if (preComputedTotalCoverage < 0)
|
||||
calculateTotalCoverage();
|
||||
|
|
@ -90,15 +107,43 @@ public class IntervalStatistics {
|
|||
|
||||
/**
|
||||
* Return the Callable statuses for the interval as a whole
|
||||
* todo -- add a voting system for sample flags and add interval specific statuses
|
||||
* todo -- add missingness filter
|
||||
*
|
||||
* @param thresholds the class contains the statistical threshold for making calls
|
||||
* @return the callable status(es) for the whole interval
|
||||
*/
|
||||
public Set<CallableStatus> callableStatuses() {
|
||||
public Set<CallableStatus> callableStatuses(ThresHolder thresholds) {
|
||||
Set<CallableStatus> output = new HashSet<CallableStatus>();
|
||||
|
||||
// Initialize the Map
|
||||
Map<CallableStatus, Integer> votes = new HashMap<CallableStatus, Integer>();
|
||||
for (CallableStatus status : CallableStatus.values())
|
||||
votes.put(status, 0);
|
||||
|
||||
// tally up the votes
|
||||
for (SampleStatistics sample : samples.values())
|
||||
output.addAll(sample.getCallableStatuses());
|
||||
for (CallableStatus status : sample.getCallableStatuses(thresholds))
|
||||
votes.put(status, votes.get(status) + 1);
|
||||
|
||||
// output tall values above the threshold
|
||||
for (CallableStatus status : votes.keySet()) {
|
||||
if (votes.get(status) > (samples.size() * thresholds.getVotePercentageThreshold()) && !(status.equals(CallableStatus.PASS)))
|
||||
output.add(status);
|
||||
}
|
||||
|
||||
|
||||
if (hasNref)
|
||||
output.add(CallableStatus.REF_N);
|
||||
|
||||
// get median DP of each sample
|
||||
int nLowMedianDepth = 0;
|
||||
for (SampleStatistics sample : samples.values()) {
|
||||
if (sample.getQuantileDepth(0.5) < thresholds.getMinimumMedianDepth())
|
||||
nLowMedianDepth++;
|
||||
}
|
||||
|
||||
if (nLowMedianDepth > (samples.size() * thresholds.getLowMedianDepthThreshold()))
|
||||
output.add(CallableStatus.LOW_MEDIAN_DEPTH);
|
||||
|
||||
return output;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,9 +27,9 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
|
|||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
public class LocusStatistics {
|
||||
final int coverage;
|
||||
final int rawCoverage;
|
||||
class LocusStatistics {
|
||||
private final int coverage;
|
||||
private final int rawCoverage;
|
||||
|
||||
public LocusStatistics() {
|
||||
this.coverage = 0;
|
||||
|
|
@ -52,21 +52,20 @@ public class LocusStatistics {
|
|||
/**
|
||||
* Generates all applicable statuses from the coverages in this locus
|
||||
*
|
||||
* @param minimumCoverageThreshold the minimum threshold for determining low coverage/poor quality
|
||||
* @param maximumCoverageThreshold the maximum threshold for determining excessive coverage
|
||||
* @param thresholds the class contains the statistical threshold for making calls
|
||||
* @return a set of all statuses that apply
|
||||
*/
|
||||
public Set<CallableStatus> callableStatuses(int minimumCoverageThreshold, int maximumCoverageThreshold) {
|
||||
public Set<CallableStatus> callableStatuses(ThresHolder thresholds) {
|
||||
Set<CallableStatus> output = new HashSet<CallableStatus>();
|
||||
|
||||
// if too much coverage
|
||||
if (getCoverage() > maximumCoverageThreshold)
|
||||
if (getCoverage() > thresholds.getMaximumCoverage())
|
||||
output.add(CallableStatus.EXCESSIVE_COVERAGE);
|
||||
|
||||
// if not enough coverage
|
||||
if (getCoverage() < minimumCoverageThreshold) {
|
||||
if (getCoverage() < thresholds.getMinimumCoverage()) {
|
||||
// was there a lot of low Qual coverage?
|
||||
if (getRawCoverage() >= minimumCoverageThreshold)
|
||||
if (getRawCoverage() >= thresholds.getMinimumCoverage())
|
||||
output.add(CallableStatus.POOR_QUALITY);
|
||||
// no?
|
||||
else {
|
||||
|
|
@ -74,7 +73,7 @@ public class LocusStatistics {
|
|||
if (getRawCoverage() > 0)
|
||||
output.add(CallableStatus.LOW_COVERAGE);
|
||||
else
|
||||
output.add(CallableStatus.NO_COVERAGE);
|
||||
output.add(CallableStatus.COVERAGE_GAPS);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -27,41 +27,36 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Short one line description of the walker.
|
||||
*
|
||||
* @author Mauricio Carneiro
|
||||
* @since 2/1/12
|
||||
* The statistics calculator for a specific sample given the interval
|
||||
*/
|
||||
class SampleStatistics {
|
||||
private final GenomeLoc interval;
|
||||
private final ArrayList<LocusStatistics> loci;
|
||||
|
||||
private final int minimumCoverageThreshold;
|
||||
private final int maximumCoverageThreshold;
|
||||
private final int minimumMappingQuality;
|
||||
private final int minimumBaseQuality;
|
||||
private int[] preSortedDepths = null;
|
||||
private int preComputedTotalCoverage = -1; // avoids re-calculating the total sum (-1 means we haven't pre-computed it yet)
|
||||
|
||||
private int preComputedTotalCoverage = -1; // avoids re-calculating the total sum (-1 means we haven't pre-computed it yet)
|
||||
private int nReads = -1;
|
||||
private int nBadMates = -1;
|
||||
|
||||
private SampleStatistics(GenomeLoc interval, ArrayList<LocusStatistics> loci, int minimumCoverageThreshold, int maximumCoverageThreshold, int minimumMappingQuality, int minimumBaseQuality) {
|
||||
private SampleStatistics(GenomeLoc interval, ArrayList<LocusStatistics> loci) {
|
||||
this.interval = interval;
|
||||
this.loci = loci;
|
||||
this.minimumCoverageThreshold = minimumCoverageThreshold;
|
||||
this.maximumCoverageThreshold = maximumCoverageThreshold;
|
||||
this.minimumMappingQuality = minimumMappingQuality;
|
||||
this.minimumBaseQuality = minimumBaseQuality;
|
||||
nReads = 0;
|
||||
nBadMates = 0;
|
||||
}
|
||||
|
||||
public SampleStatistics(GenomeLoc interval, int minimumCoverageThreshold, int maximumCoverageThreshold, int minimumMappingQuality, int minimumBaseQuality) {
|
||||
this(interval, new ArrayList<LocusStatistics>(interval.size()), minimumCoverageThreshold, maximumCoverageThreshold, minimumMappingQuality, minimumBaseQuality);
|
||||
public SampleStatistics(GenomeLoc interval) {
|
||||
this(interval, new ArrayList<LocusStatistics>(interval.size()));
|
||||
|
||||
// Initialize every loci (this way we don't have to worry about non-existent loci in the object
|
||||
for (int i = 0; i < interval.size(); i++)
|
||||
this.loci.add(i, new LocusStatistics());
|
||||
this.loci.add(new LocusStatistics());
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -78,51 +73,56 @@ class SampleStatistics {
|
|||
}
|
||||
|
||||
/**
|
||||
* Calculates the callable statuses of the entire interval
|
||||
* Calculates the callable statuses of the entire sample
|
||||
*
|
||||
* @return the callable statuses of the entire interval
|
||||
* @param thresholds the class contains the statistical threshold for making calls
|
||||
* @return the callable statuses of the entire sample
|
||||
*/
|
||||
public Set<CallableStatus> getCallableStatuses() {
|
||||
public Set<CallableStatus> getCallableStatuses(ThresHolder thresholds) {
|
||||
Set<CallableStatus> output = new HashSet<CallableStatus>();
|
||||
|
||||
Map<CallableStatus, Integer> totals = new HashMap<CallableStatus, Integer>(CallableStatus.values().length);
|
||||
// We check if reads are present ot prevent div / 0 exceptions
|
||||
if (nReads == 0) {
|
||||
output.add(CallableStatus.NO_READS);
|
||||
return output;
|
||||
}
|
||||
|
||||
Map<CallableStatus, Double> totals = new HashMap<CallableStatus, Double>(CallableStatus.values().length);
|
||||
|
||||
// initialize map
|
||||
for (CallableStatus status : CallableStatus.values())
|
||||
totals.put(status, 0);
|
||||
totals.put(status, 0.0);
|
||||
|
||||
// sum up all the callable statuses for each locus
|
||||
for (int i = 0; i < interval.size(); i++) {
|
||||
for (CallableStatus status : callableStatus(i)) {
|
||||
int count = totals.get(status);
|
||||
for (CallableStatus status : callableStatus(i, thresholds)) {
|
||||
double count = totals.get(status);
|
||||
|
||||
totals.put(status, count + 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Set<CallableStatus> output = new HashSet<CallableStatus>();
|
||||
|
||||
// double to avoid type casting
|
||||
double intervalSize = interval.size();
|
||||
|
||||
double coverageStatusThreshold = 0.20;
|
||||
if ((totals.get(CallableStatus.NO_COVERAGE) / intervalSize) > coverageStatusThreshold)
|
||||
output.add(CallableStatus.NO_COVERAGE);
|
||||
if ((nBadMates / nReads) > thresholds.getBadMateStatusThreshold())
|
||||
output.add(CallableStatus.BAD_MATE);
|
||||
|
||||
if ((totals.get(CallableStatus.LOW_COVERAGE) / intervalSize) > coverageStatusThreshold)
|
||||
if ((totals.get(CallableStatus.COVERAGE_GAPS) / intervalSize) > thresholds.getCoverageStatusThreshold())
|
||||
output.add(CallableStatus.COVERAGE_GAPS);
|
||||
|
||||
if ((totals.get(CallableStatus.LOW_COVERAGE) / intervalSize) > thresholds.getCoverageStatusThreshold())
|
||||
output.add(CallableStatus.LOW_COVERAGE);
|
||||
|
||||
double excessiveCoverageThreshold = 0.20;
|
||||
if ((totals.get(CallableStatus.EXCESSIVE_COVERAGE) / intervalSize) > excessiveCoverageThreshold)
|
||||
if ((totals.get(CallableStatus.EXCESSIVE_COVERAGE) / intervalSize) > thresholds.getExcessiveCoverageThreshold())
|
||||
output.add(CallableStatus.EXCESSIVE_COVERAGE);
|
||||
|
||||
double qualityStatusThreshold = 0.50;
|
||||
if ((totals.get(CallableStatus.POOR_QUALITY) / intervalSize) > qualityStatusThreshold)
|
||||
if ((totals.get(CallableStatus.POOR_QUALITY) / intervalSize) > thresholds.getQualityStatusThreshold())
|
||||
output.add(CallableStatus.POOR_QUALITY);
|
||||
|
||||
if (totals.get(CallableStatus.REF_N) > 0)
|
||||
output.add(CallableStatus.REF_N);
|
||||
|
||||
|
||||
if (output.isEmpty()) {
|
||||
output.add(CallableStatus.PASS);
|
||||
}
|
||||
|
|
@ -132,12 +132,13 @@ class SampleStatistics {
|
|||
/**
|
||||
* Adds a locus to the interval wide stats
|
||||
*
|
||||
* @param locus The locus given as a GenomeLoc
|
||||
* @param pileup The pileup of that locus
|
||||
* @param locus The locus given as a GenomeLoc
|
||||
* @param pileup The pileup of that locus, this exclusively contains the sample
|
||||
* @param thresholds the class contains the statistical threshold for making calls
|
||||
*/
|
||||
public void addLocus(GenomeLoc locus, ReadBackedPileup pileup) {
|
||||
public void addLocus(GenomeLoc locus, ReadBackedPileup pileup, ThresHolder thresholds) {
|
||||
if (!interval.containsP(locus))
|
||||
throw new ReviewedStingException(String.format("Locus %s is not part of the Interval", locus));
|
||||
throw new ReviewedStingException(String.format("Locus %s is not part of the Interval %s", locus, interval));
|
||||
|
||||
// a null pileup means there nothing ot add
|
||||
if (pileup != null) {
|
||||
|
|
@ -145,31 +146,141 @@ class SampleStatistics {
|
|||
int locusIndex = locus.getStart() - interval.getStart();
|
||||
|
||||
int rawCoverage = pileup.depthOfCoverage();
|
||||
int coverage = pileup.getBaseAndMappingFilteredPileup(minimumBaseQuality, minimumMappingQuality).depthOfCoverage();
|
||||
int coverage = pileup.getBaseAndMappingFilteredPileup(thresholds.getMinimumBaseQuality(), thresholds.getMinimumMappingQuality()).depthOfCoverage();
|
||||
|
||||
LocusStatistics locusData = new LocusStatistics(coverage, rawCoverage);
|
||||
|
||||
loci.add(locusIndex, locusData);
|
||||
loci.set(locusIndex, locusData);
|
||||
|
||||
for (GATKSAMRecord read : pileup.getReads())
|
||||
processRead(read, thresholds);
|
||||
}
|
||||
}
|
||||
|
||||
private void processRead(GATKSAMRecord read, ThresHolder thresholds) {
|
||||
// Was this read already processed?
|
||||
if (read.getTemporaryAttribute("checkedBadMate") == null) {
|
||||
nReads++;
|
||||
if (hasValidMate(read, thresholds))
|
||||
nBadMates++;
|
||||
read.setTemporaryAttribute("checkedBadMate", true);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* returns the callable status of this locus without taking the reference base into account.
|
||||
* returns the callable status of a given locus without taking the reference base into account.
|
||||
*
|
||||
* @param locusIndex location in the genome to inquire (only one locus)
|
||||
* @param thresholds the class contains the statistical threshold for making calls
|
||||
* @return the callable status of a locus
|
||||
*/
|
||||
private Set<CallableStatus> callableStatus(int locusIndex) {
|
||||
private Set<CallableStatus> callableStatus(int locusIndex, ThresHolder thresholds) {
|
||||
LocusStatistics locus = loci.get(locusIndex);
|
||||
|
||||
return locus.callableStatuses(minimumCoverageThreshold, maximumCoverageThreshold);
|
||||
return locus.callableStatuses(thresholds);
|
||||
}
|
||||
|
||||
|
||||
private void calculateTotalCoverage() {
|
||||
preComputedTotalCoverage = 0;
|
||||
for (LocusStatistics locus : loci)
|
||||
preComputedTotalCoverage += locus.getCoverage();
|
||||
}
|
||||
|
||||
public double getQuantileDepth(double percentage) {
|
||||
if (preSortedDepths == null)
|
||||
getDepthsAsSortedArray();
|
||||
|
||||
return getQuartile(preSortedDepths, percentage);
|
||||
}
|
||||
|
||||
static double getQuartile(int[] data, double percentage) {
|
||||
int size = data.length;
|
||||
if (size == 1)
|
||||
return (double) data[0];
|
||||
|
||||
if (percentage == 0.5) {
|
||||
return getMedian(data);
|
||||
}
|
||||
|
||||
double position = (size - 1.0) / 2;
|
||||
if (percentage == 0.25) {
|
||||
// if the position is a whole number
|
||||
return getMedian(Arrays.copyOfRange(data, 0, (int) position + 1));
|
||||
|
||||
}
|
||||
if (percentage == 0.75) {
|
||||
if (position % 1 == 0) {
|
||||
return getMedian(Arrays.copyOfRange(data, (int) position, size));
|
||||
} else {
|
||||
return getMedian(Arrays.copyOfRange(data, (int) position + 1, size));
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Assumes data is sorted
|
||||
private static double getMedian(int[] data) {
|
||||
double size = (double) data.length;
|
||||
if (size == 1)
|
||||
return (double) data[0];
|
||||
|
||||
double position = (size - 1.0) / 2;
|
||||
|
||||
if (position % 1 == 0)
|
||||
return (double) data[(int) position];
|
||||
|
||||
else {
|
||||
double high = (double) data[(int) Math.ceil(position)];
|
||||
double low = (double) data[(int) Math.floor(position)];
|
||||
|
||||
return (high + low) / 2;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void getDepthsAsSortedArray() {
|
||||
preSortedDepths = new int[loci.size()];
|
||||
|
||||
for (int i = 0; i < loci.size(); i++)
|
||||
preSortedDepths[i] = loci.get(i).getCoverage();
|
||||
|
||||
Arrays.sort(preSortedDepths);
|
||||
}
|
||||
|
||||
boolean hasValidMate(GATKSAMRecord read, ThresHolder thresholds) {
|
||||
/** Check the following
|
||||
* Does it have a pair?
|
||||
* reasonable insert size?
|
||||
* inverted?
|
||||
* same orientation?
|
||||
* todo - same contig?
|
||||
* is pair mapped?
|
||||
* todo - is forced mate?
|
||||
*
|
||||
*/
|
||||
|
||||
// has NO pair
|
||||
if (!read.getReadPairedFlag())
|
||||
return false;
|
||||
|
||||
// unmapped
|
||||
if (read.getMateUnmappedFlag() || read.getReadUnmappedFlag())
|
||||
return false;
|
||||
|
||||
// same orientation
|
||||
if (read.getReadNegativeStrandFlag() == read.getMateNegativeStrandFlag())
|
||||
return false;
|
||||
|
||||
// inverted
|
||||
if (read.getReadNegativeStrandFlag() ==
|
||||
read.getAlignmentStart() < read.getMateAlignmentStart())
|
||||
return false;
|
||||
|
||||
// mates are too far apart
|
||||
if (Math.abs(read.getAlignmentStart() - read.getMateAlignmentStart()) > thresholds.getMaximumInsertSize())
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,119 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
|
||||
|
||||
class ThresHolder {
|
||||
public static final ThresHolder DEFAULTS = new ThresHolder(20, 20, 5, 700, 20, 50, 0.5, 0.2, 0.5, 0.2, 0.2, 0.5);
|
||||
|
||||
private final int minimumBaseQuality;
|
||||
private final int minimumMappingQuality;
|
||||
|
||||
private final int minimumCoverage;
|
||||
private final int maximumCoverage;
|
||||
private final int minimumMedianDepth;
|
||||
|
||||
private final int maximumInsertSize;
|
||||
|
||||
private final double votePercentageThreshold;
|
||||
private final double lowMedianDepthThreshold;
|
||||
private final double badMateStatusThreshold;
|
||||
private final double coverageStatusThreshold;
|
||||
private final double excessiveCoverageThreshold;
|
||||
private final double qualityStatusThreshold;
|
||||
|
||||
public ThresHolder(int minimumBaseQuality,
|
||||
int minimumMappingQuality,
|
||||
int minimumCoverage,
|
||||
int maximumCoverage,
|
||||
int minimumMedianDepth,
|
||||
int maximumInsertSize,
|
||||
double votePercentageThreshold,
|
||||
double lowMedianDepthThreshold,
|
||||
double badMateStatusThreshold,
|
||||
double coverageStatusThreshold,
|
||||
double excessiveCoverageThreshold,
|
||||
double qualityStatusThreshold) {
|
||||
this.minimumBaseQuality = minimumBaseQuality;
|
||||
this.minimumMappingQuality = minimumMappingQuality;
|
||||
this.minimumCoverage = minimumCoverage;
|
||||
this.maximumCoverage = maximumCoverage;
|
||||
this.minimumMedianDepth = minimumMedianDepth;
|
||||
this.maximumInsertSize = maximumInsertSize;
|
||||
this.votePercentageThreshold = votePercentageThreshold;
|
||||
this.lowMedianDepthThreshold = lowMedianDepthThreshold;
|
||||
this.badMateStatusThreshold = badMateStatusThreshold;
|
||||
this.coverageStatusThreshold = coverageStatusThreshold;
|
||||
this.excessiveCoverageThreshold = excessiveCoverageThreshold;
|
||||
this.qualityStatusThreshold = qualityStatusThreshold;
|
||||
}
|
||||
|
||||
public int getMinimumBaseQuality() {
|
||||
return minimumBaseQuality;
|
||||
}
|
||||
|
||||
public int getMinimumMappingQuality() {
|
||||
return minimumMappingQuality;
|
||||
}
|
||||
|
||||
public int getMinimumCoverage() {
|
||||
return minimumCoverage;
|
||||
}
|
||||
|
||||
public int getMaximumCoverage() {
|
||||
return maximumCoverage;
|
||||
}
|
||||
|
||||
public int getMinimumMedianDepth() {
|
||||
return minimumMedianDepth;
|
||||
}
|
||||
|
||||
public int getMaximumInsertSize() {
|
||||
return maximumInsertSize;
|
||||
}
|
||||
|
||||
public double getVotePercentageThreshold() {
|
||||
return votePercentageThreshold;
|
||||
}
|
||||
|
||||
public double getLowMedianDepthThreshold() {
|
||||
return lowMedianDepthThreshold;
|
||||
}
|
||||
|
||||
public double getBadMateStatusThreshold() {
|
||||
return badMateStatusThreshold;
|
||||
}
|
||||
|
||||
public double getCoverageStatusThreshold() {
|
||||
return coverageStatusThreshold;
|
||||
}
|
||||
|
||||
public double getExcessiveCoverageThreshold() {
|
||||
return excessiveCoverageThreshold;
|
||||
}
|
||||
|
||||
public double getQualityStatusThreshold() {
|
||||
return qualityStatusThreshold;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -696,26 +696,32 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
@Override
|
||||
public Map<String, ReadBackedPileup> getPileupsForSamples(Collection<String> sampleNames) {
|
||||
Map<String, ReadBackedPileup> result = new HashMap<String, ReadBackedPileup>();
|
||||
Map<String, UnifiedPileupElementTracker<PE>> trackerMap = new HashMap<String, UnifiedPileupElementTracker<PE>>();
|
||||
|
||||
for (String sample : sampleNames) { // initialize pileups for each sample
|
||||
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
|
||||
trackerMap.put(sample, filteredTracker);
|
||||
}
|
||||
|
||||
for (PE p : pileupElementTracker) { // go through all pileup elements only once and add them to the respective sample's pileup
|
||||
GATKSAMRecord read = p.getRead();
|
||||
if (read.getReadGroup() != null) {
|
||||
String sample = read.getReadGroup().getSample();
|
||||
UnifiedPileupElementTracker<PE> tracker = trackerMap.get(sample);
|
||||
if (tracker != null) // we only add the pileup the requested samples. Completely ignore the rest
|
||||
tracker.add(p);
|
||||
if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>) pileupElementTracker;
|
||||
for (String sample : sampleNames) {
|
||||
PileupElementTracker<PE> filteredElements = tracker.getElements(sample);
|
||||
if (filteredElements != null)
|
||||
result.put(sample, createNewPileup(loc, filteredElements));
|
||||
}
|
||||
} else {
|
||||
Map<String, UnifiedPileupElementTracker<PE>> trackerMap = new HashMap<String, UnifiedPileupElementTracker<PE>>();
|
||||
|
||||
for (String sample : sampleNames) { // initialize pileups for each sample
|
||||
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
|
||||
trackerMap.put(sample, filteredTracker);
|
||||
}
|
||||
for (PE p : pileupElementTracker) { // go through all pileup elements only once and add them to the respective sample's pileup
|
||||
GATKSAMRecord read = p.getRead();
|
||||
if (read.getReadGroup() != null) {
|
||||
String sample = read.getReadGroup().getSample();
|
||||
UnifiedPileupElementTracker<PE> tracker = trackerMap.get(sample);
|
||||
if (tracker != null) // we only add the pileup the requested samples. Completely ignore the rest
|
||||
tracker.add(p);
|
||||
}
|
||||
}
|
||||
for (Map.Entry<String, UnifiedPileupElementTracker<PE>> entry : trackerMap.entrySet()) // create the RBP for each sample
|
||||
result.put(entry.getKey(), createNewPileup(loc, entry.getValue()));
|
||||
}
|
||||
|
||||
for (Map.Entry<String, UnifiedPileupElementTracker<PE>> entry : trackerMap.entrySet()) // create the RBP for each sample
|
||||
result.put(entry.getKey(), createNewPileup(loc, entry.getValue()));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,63 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
|
||||
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
public class LocusStatisticsUnitTest /*extends BaseTest*/ {
|
||||
|
||||
@Test(dataProvider = "StatusTestValues")
|
||||
public void testCallableStatuses(int coverage, int rawCoverage, CallableStatus status) {
|
||||
// The min Coverage threshold is 10, the max is 100
|
||||
ThresHolder thresholds = new ThresHolder(20, 20, 10, 100, 20, 50, 0.5, 0.2, 0.5, 0.2, 0.2, 0.5);
|
||||
Set<CallableStatus> statuses = new LocusStatistics(coverage, rawCoverage).callableStatuses(thresholds);
|
||||
// Check to make sure the status provides matches the actual
|
||||
Assert.assertTrue((status == null) ? statuses.isEmpty() : (statuses.contains(status) && statuses.size() == 1));
|
||||
|
||||
}
|
||||
|
||||
@DataProvider(name = "StatusTestValues")
|
||||
public Object[][] getStatusTestValues() {
|
||||
return new Object[][]{
|
||||
new Object[]{100, 100, null},
|
||||
new Object[]{100, 101, null},
|
||||
new Object[]{101, 101, CallableStatus.EXCESSIVE_COVERAGE},
|
||||
new Object[]{10, 101, null},
|
||||
new Object[]{9, 101, CallableStatus.POOR_QUALITY},
|
||||
new Object[]{9, 10, CallableStatus.POOR_QUALITY},
|
||||
new Object[]{9, 9, CallableStatus.LOW_COVERAGE},
|
||||
new Object[]{0, 0, CallableStatus.COVERAGE_GAPS},
|
||||
new Object[]{0, 9, CallableStatus.LOW_COVERAGE},
|
||||
new Object[]{0, 101, CallableStatus.POOR_QUALITY},
|
||||
new Object[]{10, Integer.MAX_VALUE, null},
|
||||
new Object[]{Integer.MAX_VALUE, Integer.MAX_VALUE, CallableStatus.EXCESSIVE_COVERAGE},
|
||||
};
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,99 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
public class SampleStatisticsUnitTest/* extends BaseTest */ {
|
||||
|
||||
@DataProvider(name = "QuartileValues")
|
||||
public Object[][] getQuantileValues() {
|
||||
|
||||
int[] a1 = {5};
|
||||
int[] a2 = {1, 2};
|
||||
int[] a5 = {10, 20, 30, 40, 50};
|
||||
int[] a10 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
|
||||
|
||||
|
||||
return new Object[][]{
|
||||
new Object[]{a1, 0.5, 5},
|
||||
new Object[]{a1, 0, 5},
|
||||
new Object[]{a1, 1, 5},
|
||||
new Object[]{a2, 0.5, 1.5},
|
||||
new Object[]{a2, 0.25, 1},
|
||||
new Object[]{a2, 0.75, 2},
|
||||
new Object[]{a5, 0.5, 30},
|
||||
new Object[]{a5, 0.25, 20},
|
||||
new Object[]{a5, 0.75, 40},
|
||||
new Object[]{a5, 0, -1},
|
||||
new Object[]{a10, 0.5, 5.5},
|
||||
new Object[]{a10, 0.25, 3},
|
||||
new Object[]{a10, 0.75, 8}
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "QuartileValues")
|
||||
public void testGetQuartile(int[] dataList, double percentage, double expected) {
|
||||
Assert.assertEquals(SampleStatistics.getQuartile(dataList, percentage), expected);
|
||||
|
||||
}
|
||||
|
||||
@DataProvider(name = "ReadsAndMates")
|
||||
public Object[][] getReadAndMates() {
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
|
||||
|
||||
GATKSAMRecord noPair = ArtificialSAMUtils.createArtificialRead(header, "test", 0, 100, 50);
|
||||
GATKSAMRecord good = ArtificialSAMUtils.createPair(header, "test", 30, 100, 150, true, false).get(0);
|
||||
GATKSAMRecord bigInsertSize = ArtificialSAMUtils.createPair(header, "test", 30, 100, 151, true, false).get(0);
|
||||
GATKSAMRecord inverted = ArtificialSAMUtils.createPair(header, "test", 30, 151, 150, true, false).get(0);
|
||||
GATKSAMRecord sameOrientation = ArtificialSAMUtils.createPair(header, "test", 30, 100, 151, true, true).get(0);
|
||||
|
||||
GATKSAMRecord pairNotMapped = ArtificialSAMUtils.createPair(header, "test", 30, 100, 140, true, false).get(1);
|
||||
pairNotMapped.setMateUnmappedFlag(true);
|
||||
|
||||
// finish test
|
||||
return new Object[][]{
|
||||
new Object[]{noPair, false},
|
||||
new Object[]{good, true},
|
||||
new Object[]{bigInsertSize, false},
|
||||
new Object[]{inverted, false},
|
||||
new Object[]{sameOrientation, false},
|
||||
new Object[]{pairNotMapped, false}
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "ReadsAndMates")
|
||||
public void testHasValidMate(GATKSAMRecord read, boolean expected) {
|
||||
//50 is out maximum insert size
|
||||
Assert.assertEquals(new SampleStatistics(GenomeLoc.UNMAPPED).hasValidMate(read, ThresHolder.DEFAULTS), expected);
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue