DiagnoseTargets with working Q1,Median,Q3

- Merged Roger's metrics with Mauricio's optimizations
 - Added Stats for DiagnoseTargets
     - now has functions to find the median depth, and upper/lower quartile
     - the REF_N callable status is implemented
 - The walker now runs efficiently
 - Diagnose Targets accepts overlapping intervals
 - Diagnose Targets now checks for bad mates
 - The read mates are checked in a memory efficient manner
 - The statistics thresholds have been consolidated and moved outside of the statistics classes and into the walker.
 - Fixed some bugs
 - Removed rod binding

Added more Unit tests

 - Test callable statuses on the locus level
 - Test bad mates

 - Changed NO_COVERAGE -> COVERAGE_GAPS to avoid confusion

Signed-off-by: Mauricio Carneiro <carneiro@broadinstitute.org>
This commit is contained in:
Roger Zurawicki 2012-04-12 18:54:29 -04:00 committed by Mauricio Carneiro
parent 50031b63c5
commit b8b139841d
9 changed files with 651 additions and 169 deletions

View File

@ -31,38 +31,29 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
* @since 2/1/12
*/
public enum CallableStatus {
/**
* the reference base was an N, which is not considered callable the GATK
*/
// todo -- implement this status
REF_N("the reference base was an N, which is not considered callable the GATK"),
/**
* the base satisfied the min. depth for calling but had less than maxDepth to avoid having EXCESSIVE_COVERAGE
*/
PASS("the base satisfied the min. depth for calling but had less than maxDepth to avoid having EXCESSIVE_COVERAGE"),
/**
* absolutely no reads were seen at this locus, regardless of the filtering parameters
*/
NO_COVERAGE("absolutely no reads were seen at this locus, regardless of the filtering parameters"),
/**
* there were less than min. depth bases at the locus, after applying filters
*/
COVERAGE_GAPS("absolutely no coverage was observed at a locus, regardless of the filtering parameters"),
LOW_COVERAGE("there were less than min. depth bases at the locus, after applying filters"),
/**
* more than -maxDepth read at the locus, indicating some sort of mapping problem
*/
EXCESSIVE_COVERAGE("more than -maxDepth read at the locus, indicating some sort of mapping problem"),
/**
* more than --maxFractionOfReadsWithLowMAPQ at the locus, indicating a poor mapping quality of the reads
*/
POOR_QUALITY("more than --maxFractionOfReadsWithLowMAPQ at the locus, indicating a poor mapping quality of the reads"),
BAD_MATE(""),
BAD_MATE("the reads are not properly mated, suggesting mapping errors"),
INCONSISTENT_COVERAGE("");
NO_READS("there are no reads contained in the interval"),
//
// Interval-level statuses
//
LOW_MEDIAN_DEPTH("interval has insufficient median depth across samples");
public String description;
public final String description;
private CallableStatus(String description) {
this.description = description;

View File

@ -25,89 +25,126 @@
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
import net.sf.picard.util.PeekableIterator;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
import java.util.*;
/**
* Short one line description of the walker.
* Analyzes coverage distribution and validates read mates for a given interval and sample.
* <p/>
* <p>
* [Long description of the walker]
* Used to diagnose regions with bad coverage, mapping, or read mating. Analyzes each sample independently in addition
* to interval wide analysis.
* </p>
* <p/>
* <p/>
* <h2>Input</h2>
* <p>
* [Description of the Input]
* <ul>
* <li>A reference file</li>
* <li>one or more input BAMs</li>
* <li>One or more intervals</li>
* </ul>
* </p>
* <p/>
* <h2>Output</h2>
* <p>
* [Description of the Output]
* A modified VCF detailing each interval by sample
* </p>
* <p/>
* <h2>Examples</h2>
* <pre>
* java
* -jar GenomeAnalysisTK.jar
* -T [walker name]
* -T DiagnoseTargets \
* -R reference.fasta \
* -o output.vcf \
* -I sample1.bam \
* -I sample2.bam \
* -I sample3.bam \
* -L intervals.interval_list
* </pre>
*
* @author Mauricio Carneiro
* @since 2/1/12
* @author Mauricio Carneiro, Roger Zurawicki
* @since 5/8/12
*/
@By(value = DataSource.READS)
@PartitionBy(PartitionType.INTERVAL)
public class DiagnoseTargets extends LocusWalker<Long, Long> implements AnnotatorCompatibleWalker {
@Input(fullName = "interval_track", shortName = "int", doc = "", required = true)
private IntervalBinding<Feature> intervalTrack = null;
public class DiagnoseTargets extends LocusWalker<Long, Long> {
@Output(doc = "File to which variants should be written", required = true)
private VariantContextWriter vcfWriter = null;
@Argument(fullName = "minimum_base_quality", shortName = "mbq", doc = "", required = false)
@Argument(fullName = "minimum_base_quality", shortName = "BQ", doc = "The minimum Base Quality that is considered for calls", required = false)
private int minimumBaseQuality = 20;
@Argument(fullName = "minimum_mapping_quality", shortName = "mmq", doc = "", required = false)
@Argument(fullName = "minimum_mapping_quality", shortName = "MQ", doc = "The minimum read mapping quality considered for calls", required = false)
private int minimumMappingQuality = 20;
@Argument(fullName = "minimum_coverage", shortName = "mincov", doc = "", required = false)
@Argument(fullName = "minimum_coverage", shortName = "min", doc = "The minimum allowable coverage, used for calling LOW_COVERAGE", required = false)
private int minimumCoverage = 5;
@Argument(fullName = "maximum_coverage", shortName = "maxcov", doc = "", required = false)
@Argument(fullName = "maximum_coverage", shortName = "max", doc = "The maximum allowable coverage, used for calling EXCESSIVE_COVERAGE", required = false)
private int maximumCoverage = 700;
@Argument(fullName = "minimum_median_depth", shortName = "med", doc = "The minimum allowable median coverage, used for calling LOW_MEDIAN_DEPTH", required = false)
private int minMedianDepth = 20;
@Argument(fullName = "maximum_insert_size", shortName = "ins", doc = "The maximum allowed distance between a read and its mate", required = false)
private int maxInsertSize = 50;
@Argument(fullName = "voting_status_threshold", shortName = "stV", doc = "The needed percentage of samples containing a call for the interval to adopt the call ", required = false)
private double votePercentage = 0.50;
@Argument(fullName = "low_median_depth_status_threshold", shortName = "stMED", doc = "The percentage of the loci needed for calling LOW_MEDIAN_DEPTH", required = false)
private double lowMedianDepthPercentage = 0.20;
@Argument(fullName = "bad_mate_status_threshold", shortName = "stBM", doc = "The percentage of the loci needed for calling BAD_MATE", required = false)
private double badMateStatusThreshold = 0.50;
@Argument(fullName = "coverage_status_threshold", shortName = "stC", doc = "The percentage of the loci needed for calling LOW_COVERAGE and COVERAGE_GAPS", required = false)
private double coverageStatusThreshold = 0.20;
@Argument(fullName = "excessive_coverage_status_threshold", shortName = "stXC", doc = "The percentage of the loci needed for calling EXCESSIVE_COVERAGE", required = false)
private double excessiveCoverageThreshold = 0.20;
@Argument(fullName = "quality_status_threshold", shortName = "stQ", doc = "The percentage of the loci needed for calling POOR_QUALITY", required = false)
private double qualityStatusThreshold = 0.50;
@Argument(fullName = "print_debug_log", shortName = "dl", doc = "Used only for debugging the walker. Prints extra info to screen", required = false)
private boolean debug = false;
private HashMap<GenomeLoc, IntervalStatistics> intervalMap = null; // interval => statistics
private PeekableIterator<GenomeLoc> intervalListIterator; // an iterator to go over all the intervals provided as we traverse the genome
private Set<String> samples = null; // all the samples being processed
private final Allele SYMBOLIC_ALLELE = Allele.create("<DT>", false); // avoid creating the symbolic allele multiple times
private ThresHolder thresholds = null;
@Override
public void initialize() {
super.initialize();
if (intervalTrack == null)
throw new UserException("This tool currently only works if you provide an interval track");
if (getToolkit().getIntervals() == null)
throw new UserException("This tool currently only works if you provide one or more interval");
thresholds = new ThresHolder(minimumBaseQuality, minimumMappingQuality, minimumCoverage, maximumCoverage, minMedianDepth, maxInsertSize, votePercentage, lowMedianDepthPercentage, badMateStatusThreshold, coverageStatusThreshold, excessiveCoverageThreshold, qualityStatusThreshold);
intervalMap = new HashMap<GenomeLoc, IntervalStatistics>();
intervalListIterator = new PeekableIterator<GenomeLoc>(intervalTrack.getIntervals(getToolkit()).listIterator());
intervalListIterator = new PeekableIterator<GenomeLoc>(getToolkit().getIntervals().iterator());
samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); // get all of the unique sample names for the VCF Header
vcfWriter.writeHeader(new VCFHeader(getHeaderInfo(), samples)); // initialize the VCF header
@ -121,7 +158,7 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> implements Annotato
addNewOverlappingIntervals(refLocus); // add all new intervals that may overlap this reference locus
for (IntervalStatistics intervalStatistics : intervalMap.values())
intervalStatistics.addLocus(context); // Add current locus to stats
intervalStatistics.addLocus(context, ref, thresholds); // Add current locus to stats
return 1L;
}
@ -151,45 +188,40 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> implements Annotato
@Override
public void onTraversalDone(Long result) {
for (GenomeLoc interval : intervalMap.keySet())
processIntervalStats(intervalMap.get(interval), Allele.create("A"));
outputStatsToVCF(intervalMap.get(interval), Allele.create("A", true));
}
@Override
public RodBinding<VariantContext> getSnpEffRodBinding() {return null;}
private GenomeLoc getIntervalMapSpan() {
GenomeLoc loc = null;
for (GenomeLoc interval : intervalMap.keySet()) {
if (loc == null) {
loc = interval;
} else
loc = interval.union(loc);
}
@Override
public RodBinding<VariantContext> getDbsnpRodBinding() {return null;}
@Override
public List<RodBinding<VariantContext>> getCompRodBindings() {return null;}
@Override
public List<RodBinding<VariantContext>> getResourceRodBindings() {return null;}
@Override
public boolean alwaysAppendDbsnpId() {return false;}
return loc;
}
/**
* Removes all intervals that are behind the current reference locus from the intervalMap
*
* @param refLocus the current reference locus
* @param refBase the reference allele
* @param refBase the reference allele
*/
private void removePastIntervals(GenomeLoc refLocus, byte refBase) {
List<GenomeLoc> toRemove = new LinkedList<GenomeLoc>();
for (GenomeLoc interval : intervalMap.keySet())
if (interval.isBefore(refLocus)) {
processIntervalStats(intervalMap.get(interval), Allele.create(refBase, true));
toRemove.add(interval);
// if all intervals are safe
if (getIntervalMapSpan() != null && getIntervalMapSpan().isBefore(refLocus)) {
for (GenomeLoc interval : intervalMap.keySet()) {
outputStatsToVCF(intervalMap.get(interval), Allele.create(refBase, true));
intervalMap.remove(interval);
}
for (GenomeLoc interval : toRemove)
intervalMap.remove(interval);
}
GenomeLoc interval = intervalListIterator.peek(); // clean up all intervals that we might have skipped because there was no data
while(interval != null && interval.isBefore(refLocus)) {
while (interval != null && interval.isBefore(refLocus)) {
interval = intervalListIterator.next();
processIntervalStats(createIntervalStatistic(interval), Allele.create(refBase, true));
outputStatsToVCF(createIntervalStatistic(interval), Allele.create(refBase, true));
interval = intervalListIterator.peek();
}
}
@ -202,7 +234,6 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> implements Annotato
private void addNewOverlappingIntervals(GenomeLoc refLocus) {
GenomeLoc interval = intervalListIterator.peek();
while (interval != null && !interval.isPast(refLocus)) {
System.out.println("LOCUS : " + refLocus + " -- " + interval);
intervalMap.put(interval, createIntervalStatistic(interval));
intervalListIterator.next(); // discard the interval (we've already added it to the map)
interval = intervalListIterator.peek();
@ -210,14 +241,14 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> implements Annotato
}
/**
* Takes the interval, finds it in the stash, prints it to the VCF, and removes it
* Takes the interval, finds it in the stash, prints it to the VCF
*
* @param stats The statistics of the interval
* @param stats The statistics of the interval
* @param refAllele the reference allele
*/
private void processIntervalStats(IntervalStatistics stats, Allele refAllele) {
private void outputStatsToVCF(IntervalStatistics stats, Allele refAllele) {
GenomeLoc interval = stats.getInterval();
List<Allele> alleles = new ArrayList<Allele>();
Map<String, Object> attributes = new HashMap<String, Object>();
ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
@ -227,7 +258,7 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> implements Annotato
VariantContextBuilder vcb = new VariantContextBuilder("DiagnoseTargets", interval.getContig(), interval.getStart(), interval.getStart(), alleles);
vcb = vcb.log10PError(VariantContext.NO_LOG10_PERROR); // QUAL field makes no sense in our VCF
vcb.filters(statusesToStrings(stats.callableStatuses()));
vcb.filters(statusesToStrings(stats.callableStatuses(thresholds)));
attributes.put(VCFConstants.END_KEY, interval.getStop());
attributes.put(VCFConstants.DEPTH_KEY, stats.averageCoverage());
@ -236,16 +267,24 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> implements Annotato
for (String sample : samples) {
Map<String, Object> infos = new HashMap<String, Object>();
infos.put(VCFConstants.DEPTH_KEY, stats.getSample(sample).averageCoverage());
SampleStatistics sampleStat = stats.getSample(sample);
infos.put(VCFConstants.DEPTH_KEY, sampleStat.averageCoverage());
infos.put("Q1", sampleStat.getQuantileDepth(0.25));
infos.put("MED", sampleStat.getQuantileDepth(0.50));
infos.put("Q3", sampleStat.getQuantileDepth(0.75));
Set<String> filters = new HashSet<String>();
filters.addAll(statusesToStrings(stats.getSample(sample).getCallableStatuses()));
filters.addAll(statusesToStrings(stats.getSample(sample).getCallableStatuses(thresholds)));
genotypes.add(new Genotype(sample, null, VariantContext.NO_LOG10_PERROR, filters, infos, false));
}
vcb = vcb.genotypes(genotypes);
if (debug) {
System.out.printf("Output -- Interval: %s, Coverage: %.2f%n", stats.getInterval(), stats.averageCoverage());
}
vcfWriter.add(vcb.make());
}
@ -264,7 +303,12 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> implements Annotato
headerLines.add(new VCFInfoHeaderLine("Diagnose Targets", 0, VCFHeaderLineType.Flag, "DiagnoseTargets mode"));
// FORMAT fields for each genotype
// todo -- find the appropriate VCF constants
headerLines.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a lci divided by interval size."));
headerLines.add(new VCFFormatHeaderLine("Q1", 1, VCFHeaderLineType.Float, "Lower Quartile of depth distribution."));
headerLines.add(new VCFFormatHeaderLine("MED", 1, VCFHeaderLineType.Float, "Median of depth distribution."));
headerLines.add(new VCFFormatHeaderLine("Q3", 1, VCFHeaderLineType.Float, "Upper Quartile of depth Distribution."));
// FILTER fields
for (CallableStatus stat : CallableStatus.values())
@ -273,8 +317,13 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> implements Annotato
return headerLines;
}
private static Set<String> statusesToStrings(Set<CallableStatus> statuses) {
/**
* Function that process a set of statuses into strings
*
* @param statuses the set of statuses to be converted
* @return a matching set of strings
*/
private Set<String> statusesToStrings(Set<CallableStatus> statuses) {
Set<String> output = new HashSet<String>(statuses.size());
for (CallableStatus status : statuses)
@ -284,6 +333,6 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> implements Annotato
}
private IntervalStatistics createIntervalStatistic(GenomeLoc interval) {
return new IntervalStatistics(samples, interval, minimumCoverage, maximumCoverage, minimumMappingQuality, minimumBaseQuality);
return new IntervalStatistics(samples, interval /*, minimumCoverage, maximumCoverage, minimumMappingQuality, minimumBaseQuality*/);
}
}

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -34,19 +35,24 @@ import java.util.HashSet;
import java.util.Map;
import java.util.Set;
public class IntervalStatistics {
class IntervalStatistics {
private final Map<String, SampleStatistics> samples;
private final GenomeLoc interval;
private boolean hasNref = false;
private int preComputedTotalCoverage = -1; // avoids re-calculating the total sum (-1 means we haven't pre-computed it yet)
public IntervalStatistics(Set<String> samples, GenomeLoc interval, int minimumCoverageThreshold, int maximumCoverageThreshold, int minimumMappingQuality, int minimumBaseQuality) {
/*
private double minMedianDepth = 20.0;
private double badMedianDepthPercentage = 0.20;
private double votePercentage = 0.50;
*/
public IntervalStatistics(Set<String> samples, GenomeLoc interval/*, int minimumCoverageThreshold, int maximumCoverageThreshold, int minimumMappingQuality, int minimumBaseQuality*/) {
this.interval = interval;
this.samples = new HashMap<String, SampleStatistics>(samples.size());
for (String sample : samples)
this.samples.put(sample, new SampleStatistics(interval, minimumCoverageThreshold, maximumCoverageThreshold, minimumMappingQuality, minimumBaseQuality));
this.samples.put(sample, new SampleStatistics(interval /*, minimumCoverageThreshold, maximumCoverageThreshold, minimumMappingQuality, minimumBaseQuality*/));
}
public SampleStatistics getSample(String sample) {
@ -57,9 +63,19 @@ public class IntervalStatistics {
return interval;
}
public void addLocus(AlignmentContext context) {
/**
* The function to populate data into the Statistics from the walker.
* This takes the input and manages passing the data to the SampleStatistics and Locus Statistics
*
* @param context The alignment context given from the walker
* @param ref the reference context given from the walker
* @param thresholds the class contains the statistical threshold for making calls
*/
public void addLocus(AlignmentContext context, ReferenceContext ref, ThresHolder thresholds) {
ReadBackedPileup pileup = context.getBasePileup();
//System.out.println(ref.getLocus().toString());
Map<String, ReadBackedPileup> samplePileups = pileup.getPileupsForSamples(samples.keySet());
for (Map.Entry<String, ReadBackedPileup> entry : samplePileups.entrySet()) {
@ -67,15 +83,16 @@ public class IntervalStatistics {
ReadBackedPileup samplePileup = entry.getValue();
SampleStatistics sampleStatistics = samples.get(sample);
if (sampleStatistics == null)
if (sampleStatistics == null)
throw new ReviewedStingException(String.format("Trying to add locus statistics to a sample (%s) that doesn't exist in the Interval.", sample));
sampleStatistics.addLocus(context.getLocation(), samplePileup);
sampleStatistics.addLocus(context.getLocation(), samplePileup, thresholds);
}
if (!hasNref && ref.getBase() == 'N')
hasNref = true;
}
public double averageCoverage() {
if (preComputedTotalCoverage < 0)
calculateTotalCoverage();
@ -90,15 +107,43 @@ public class IntervalStatistics {
/**
* Return the Callable statuses for the interval as a whole
* todo -- add a voting system for sample flags and add interval specific statuses
* todo -- add missingness filter
*
* @param thresholds the class contains the statistical threshold for making calls
* @return the callable status(es) for the whole interval
*/
public Set<CallableStatus> callableStatuses() {
public Set<CallableStatus> callableStatuses(ThresHolder thresholds) {
Set<CallableStatus> output = new HashSet<CallableStatus>();
// Initialize the Map
Map<CallableStatus, Integer> votes = new HashMap<CallableStatus, Integer>();
for (CallableStatus status : CallableStatus.values())
votes.put(status, 0);
// tally up the votes
for (SampleStatistics sample : samples.values())
output.addAll(sample.getCallableStatuses());
for (CallableStatus status : sample.getCallableStatuses(thresholds))
votes.put(status, votes.get(status) + 1);
// output tall values above the threshold
for (CallableStatus status : votes.keySet()) {
if (votes.get(status) > (samples.size() * thresholds.getVotePercentageThreshold()) && !(status.equals(CallableStatus.PASS)))
output.add(status);
}
if (hasNref)
output.add(CallableStatus.REF_N);
// get median DP of each sample
int nLowMedianDepth = 0;
for (SampleStatistics sample : samples.values()) {
if (sample.getQuantileDepth(0.5) < thresholds.getMinimumMedianDepth())
nLowMedianDepth++;
}
if (nLowMedianDepth > (samples.size() * thresholds.getLowMedianDepthThreshold()))
output.add(CallableStatus.LOW_MEDIAN_DEPTH);
return output;
}

View File

@ -27,9 +27,9 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
import java.util.HashSet;
import java.util.Set;
public class LocusStatistics {
final int coverage;
final int rawCoverage;
class LocusStatistics {
private final int coverage;
private final int rawCoverage;
public LocusStatistics() {
this.coverage = 0;
@ -52,21 +52,20 @@ public class LocusStatistics {
/**
* Generates all applicable statuses from the coverages in this locus
*
* @param minimumCoverageThreshold the minimum threshold for determining low coverage/poor quality
* @param maximumCoverageThreshold the maximum threshold for determining excessive coverage
* @param thresholds the class contains the statistical threshold for making calls
* @return a set of all statuses that apply
*/
public Set<CallableStatus> callableStatuses(int minimumCoverageThreshold, int maximumCoverageThreshold) {
public Set<CallableStatus> callableStatuses(ThresHolder thresholds) {
Set<CallableStatus> output = new HashSet<CallableStatus>();
// if too much coverage
if (getCoverage() > maximumCoverageThreshold)
if (getCoverage() > thresholds.getMaximumCoverage())
output.add(CallableStatus.EXCESSIVE_COVERAGE);
// if not enough coverage
if (getCoverage() < minimumCoverageThreshold) {
if (getCoverage() < thresholds.getMinimumCoverage()) {
// was there a lot of low Qual coverage?
if (getRawCoverage() >= minimumCoverageThreshold)
if (getRawCoverage() >= thresholds.getMinimumCoverage())
output.add(CallableStatus.POOR_QUALITY);
// no?
else {
@ -74,7 +73,7 @@ public class LocusStatistics {
if (getRawCoverage() > 0)
output.add(CallableStatus.LOW_COVERAGE);
else
output.add(CallableStatus.NO_COVERAGE);
output.add(CallableStatus.COVERAGE_GAPS);
}
}

View File

@ -27,41 +27,36 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.*;
/**
* Short one line description of the walker.
*
* @author Mauricio Carneiro
* @since 2/1/12
* The statistics calculator for a specific sample given the interval
*/
class SampleStatistics {
private final GenomeLoc interval;
private final ArrayList<LocusStatistics> loci;
private final int minimumCoverageThreshold;
private final int maximumCoverageThreshold;
private final int minimumMappingQuality;
private final int minimumBaseQuality;
private int[] preSortedDepths = null;
private int preComputedTotalCoverage = -1; // avoids re-calculating the total sum (-1 means we haven't pre-computed it yet)
private int preComputedTotalCoverage = -1; // avoids re-calculating the total sum (-1 means we haven't pre-computed it yet)
private int nReads = -1;
private int nBadMates = -1;
private SampleStatistics(GenomeLoc interval, ArrayList<LocusStatistics> loci, int minimumCoverageThreshold, int maximumCoverageThreshold, int minimumMappingQuality, int minimumBaseQuality) {
private SampleStatistics(GenomeLoc interval, ArrayList<LocusStatistics> loci) {
this.interval = interval;
this.loci = loci;
this.minimumCoverageThreshold = minimumCoverageThreshold;
this.maximumCoverageThreshold = maximumCoverageThreshold;
this.minimumMappingQuality = minimumMappingQuality;
this.minimumBaseQuality = minimumBaseQuality;
nReads = 0;
nBadMates = 0;
}
public SampleStatistics(GenomeLoc interval, int minimumCoverageThreshold, int maximumCoverageThreshold, int minimumMappingQuality, int minimumBaseQuality) {
this(interval, new ArrayList<LocusStatistics>(interval.size()), minimumCoverageThreshold, maximumCoverageThreshold, minimumMappingQuality, minimumBaseQuality);
public SampleStatistics(GenomeLoc interval) {
this(interval, new ArrayList<LocusStatistics>(interval.size()));
// Initialize every loci (this way we don't have to worry about non-existent loci in the object
for (int i = 0; i < interval.size(); i++)
this.loci.add(i, new LocusStatistics());
this.loci.add(new LocusStatistics());
}
@ -78,51 +73,56 @@ class SampleStatistics {
}
/**
* Calculates the callable statuses of the entire interval
* Calculates the callable statuses of the entire sample
*
* @return the callable statuses of the entire interval
* @param thresholds the class contains the statistical threshold for making calls
* @return the callable statuses of the entire sample
*/
public Set<CallableStatus> getCallableStatuses() {
public Set<CallableStatus> getCallableStatuses(ThresHolder thresholds) {
Set<CallableStatus> output = new HashSet<CallableStatus>();
Map<CallableStatus, Integer> totals = new HashMap<CallableStatus, Integer>(CallableStatus.values().length);
// We check if reads are present ot prevent div / 0 exceptions
if (nReads == 0) {
output.add(CallableStatus.NO_READS);
return output;
}
Map<CallableStatus, Double> totals = new HashMap<CallableStatus, Double>(CallableStatus.values().length);
// initialize map
for (CallableStatus status : CallableStatus.values())
totals.put(status, 0);
totals.put(status, 0.0);
// sum up all the callable statuses for each locus
for (int i = 0; i < interval.size(); i++) {
for (CallableStatus status : callableStatus(i)) {
int count = totals.get(status);
for (CallableStatus status : callableStatus(i, thresholds)) {
double count = totals.get(status);
totals.put(status, count + 1);
}
}
Set<CallableStatus> output = new HashSet<CallableStatus>();
// double to avoid type casting
double intervalSize = interval.size();
double coverageStatusThreshold = 0.20;
if ((totals.get(CallableStatus.NO_COVERAGE) / intervalSize) > coverageStatusThreshold)
output.add(CallableStatus.NO_COVERAGE);
if ((nBadMates / nReads) > thresholds.getBadMateStatusThreshold())
output.add(CallableStatus.BAD_MATE);
if ((totals.get(CallableStatus.LOW_COVERAGE) / intervalSize) > coverageStatusThreshold)
if ((totals.get(CallableStatus.COVERAGE_GAPS) / intervalSize) > thresholds.getCoverageStatusThreshold())
output.add(CallableStatus.COVERAGE_GAPS);
if ((totals.get(CallableStatus.LOW_COVERAGE) / intervalSize) > thresholds.getCoverageStatusThreshold())
output.add(CallableStatus.LOW_COVERAGE);
double excessiveCoverageThreshold = 0.20;
if ((totals.get(CallableStatus.EXCESSIVE_COVERAGE) / intervalSize) > excessiveCoverageThreshold)
if ((totals.get(CallableStatus.EXCESSIVE_COVERAGE) / intervalSize) > thresholds.getExcessiveCoverageThreshold())
output.add(CallableStatus.EXCESSIVE_COVERAGE);
double qualityStatusThreshold = 0.50;
if ((totals.get(CallableStatus.POOR_QUALITY) / intervalSize) > qualityStatusThreshold)
if ((totals.get(CallableStatus.POOR_QUALITY) / intervalSize) > thresholds.getQualityStatusThreshold())
output.add(CallableStatus.POOR_QUALITY);
if (totals.get(CallableStatus.REF_N) > 0)
output.add(CallableStatus.REF_N);
if (output.isEmpty()) {
output.add(CallableStatus.PASS);
}
@ -132,12 +132,13 @@ class SampleStatistics {
/**
* Adds a locus to the interval wide stats
*
* @param locus The locus given as a GenomeLoc
* @param pileup The pileup of that locus
* @param locus The locus given as a GenomeLoc
* @param pileup The pileup of that locus, this exclusively contains the sample
* @param thresholds the class contains the statistical threshold for making calls
*/
public void addLocus(GenomeLoc locus, ReadBackedPileup pileup) {
public void addLocus(GenomeLoc locus, ReadBackedPileup pileup, ThresHolder thresholds) {
if (!interval.containsP(locus))
throw new ReviewedStingException(String.format("Locus %s is not part of the Interval", locus));
throw new ReviewedStingException(String.format("Locus %s is not part of the Interval %s", locus, interval));
// a null pileup means there nothing ot add
if (pileup != null) {
@ -145,31 +146,141 @@ class SampleStatistics {
int locusIndex = locus.getStart() - interval.getStart();
int rawCoverage = pileup.depthOfCoverage();
int coverage = pileup.getBaseAndMappingFilteredPileup(minimumBaseQuality, minimumMappingQuality).depthOfCoverage();
int coverage = pileup.getBaseAndMappingFilteredPileup(thresholds.getMinimumBaseQuality(), thresholds.getMinimumMappingQuality()).depthOfCoverage();
LocusStatistics locusData = new LocusStatistics(coverage, rawCoverage);
loci.add(locusIndex, locusData);
loci.set(locusIndex, locusData);
for (GATKSAMRecord read : pileup.getReads())
processRead(read, thresholds);
}
}
private void processRead(GATKSAMRecord read, ThresHolder thresholds) {
// Was this read already processed?
if (read.getTemporaryAttribute("checkedBadMate") == null) {
nReads++;
if (hasValidMate(read, thresholds))
nBadMates++;
read.setTemporaryAttribute("checkedBadMate", true);
}
}
/**
* returns the callable status of this locus without taking the reference base into account.
* returns the callable status of a given locus without taking the reference base into account.
*
* @param locusIndex location in the genome to inquire (only one locus)
* @param thresholds the class contains the statistical threshold for making calls
* @return the callable status of a locus
*/
private Set<CallableStatus> callableStatus(int locusIndex) {
private Set<CallableStatus> callableStatus(int locusIndex, ThresHolder thresholds) {
LocusStatistics locus = loci.get(locusIndex);
return locus.callableStatuses(minimumCoverageThreshold, maximumCoverageThreshold);
return locus.callableStatuses(thresholds);
}
private void calculateTotalCoverage() {
preComputedTotalCoverage = 0;
for (LocusStatistics locus : loci)
preComputedTotalCoverage += locus.getCoverage();
}
public double getQuantileDepth(double percentage) {
if (preSortedDepths == null)
getDepthsAsSortedArray();
return getQuartile(preSortedDepths, percentage);
}
static double getQuartile(int[] data, double percentage) {
int size = data.length;
if (size == 1)
return (double) data[0];
if (percentage == 0.5) {
return getMedian(data);
}
double position = (size - 1.0) / 2;
if (percentage == 0.25) {
// if the position is a whole number
return getMedian(Arrays.copyOfRange(data, 0, (int) position + 1));
}
if (percentage == 0.75) {
if (position % 1 == 0) {
return getMedian(Arrays.copyOfRange(data, (int) position, size));
} else {
return getMedian(Arrays.copyOfRange(data, (int) position + 1, size));
}
}
return -1;
}
// Assumes data is sorted
private static double getMedian(int[] data) {
double size = (double) data.length;
if (size == 1)
return (double) data[0];
double position = (size - 1.0) / 2;
if (position % 1 == 0)
return (double) data[(int) position];
else {
double high = (double) data[(int) Math.ceil(position)];
double low = (double) data[(int) Math.floor(position)];
return (high + low) / 2;
}
}
private void getDepthsAsSortedArray() {
preSortedDepths = new int[loci.size()];
for (int i = 0; i < loci.size(); i++)
preSortedDepths[i] = loci.get(i).getCoverage();
Arrays.sort(preSortedDepths);
}
boolean hasValidMate(GATKSAMRecord read, ThresHolder thresholds) {
/** Check the following
* Does it have a pair?
* reasonable insert size?
* inverted?
* same orientation?
* todo - same contig?
* is pair mapped?
* todo - is forced mate?
*
*/
// has NO pair
if (!read.getReadPairedFlag())
return false;
// unmapped
if (read.getMateUnmappedFlag() || read.getReadUnmappedFlag())
return false;
// same orientation
if (read.getReadNegativeStrandFlag() == read.getMateNegativeStrandFlag())
return false;
// inverted
if (read.getReadNegativeStrandFlag() ==
read.getAlignmentStart() < read.getMateAlignmentStart())
return false;
// mates are too far apart
if (Math.abs(read.getAlignmentStart() - read.getMateAlignmentStart()) > thresholds.getMaximumInsertSize())
return false;
return true;
}
}

View File

@ -0,0 +1,119 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
class ThresHolder {
public static final ThresHolder DEFAULTS = new ThresHolder(20, 20, 5, 700, 20, 50, 0.5, 0.2, 0.5, 0.2, 0.2, 0.5);
private final int minimumBaseQuality;
private final int minimumMappingQuality;
private final int minimumCoverage;
private final int maximumCoverage;
private final int minimumMedianDepth;
private final int maximumInsertSize;
private final double votePercentageThreshold;
private final double lowMedianDepthThreshold;
private final double badMateStatusThreshold;
private final double coverageStatusThreshold;
private final double excessiveCoverageThreshold;
private final double qualityStatusThreshold;
public ThresHolder(int minimumBaseQuality,
int minimumMappingQuality,
int minimumCoverage,
int maximumCoverage,
int minimumMedianDepth,
int maximumInsertSize,
double votePercentageThreshold,
double lowMedianDepthThreshold,
double badMateStatusThreshold,
double coverageStatusThreshold,
double excessiveCoverageThreshold,
double qualityStatusThreshold) {
this.minimumBaseQuality = minimumBaseQuality;
this.minimumMappingQuality = minimumMappingQuality;
this.minimumCoverage = minimumCoverage;
this.maximumCoverage = maximumCoverage;
this.minimumMedianDepth = minimumMedianDepth;
this.maximumInsertSize = maximumInsertSize;
this.votePercentageThreshold = votePercentageThreshold;
this.lowMedianDepthThreshold = lowMedianDepthThreshold;
this.badMateStatusThreshold = badMateStatusThreshold;
this.coverageStatusThreshold = coverageStatusThreshold;
this.excessiveCoverageThreshold = excessiveCoverageThreshold;
this.qualityStatusThreshold = qualityStatusThreshold;
}
public int getMinimumBaseQuality() {
return minimumBaseQuality;
}
public int getMinimumMappingQuality() {
return minimumMappingQuality;
}
public int getMinimumCoverage() {
return minimumCoverage;
}
public int getMaximumCoverage() {
return maximumCoverage;
}
public int getMinimumMedianDepth() {
return minimumMedianDepth;
}
public int getMaximumInsertSize() {
return maximumInsertSize;
}
public double getVotePercentageThreshold() {
return votePercentageThreshold;
}
public double getLowMedianDepthThreshold() {
return lowMedianDepthThreshold;
}
public double getBadMateStatusThreshold() {
return badMateStatusThreshold;
}
public double getCoverageStatusThreshold() {
return coverageStatusThreshold;
}
public double getExcessiveCoverageThreshold() {
return excessiveCoverageThreshold;
}
public double getQualityStatusThreshold() {
return qualityStatusThreshold;
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2010, The Broad Institute
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@ -696,26 +696,32 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
@Override
public Map<String, ReadBackedPileup> getPileupsForSamples(Collection<String> sampleNames) {
Map<String, ReadBackedPileup> result = new HashMap<String, ReadBackedPileup>();
Map<String, UnifiedPileupElementTracker<PE>> trackerMap = new HashMap<String, UnifiedPileupElementTracker<PE>>();
for (String sample : sampleNames) { // initialize pileups for each sample
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
trackerMap.put(sample, filteredTracker);
}
for (PE p : pileupElementTracker) { // go through all pileup elements only once and add them to the respective sample's pileup
GATKSAMRecord read = p.getRead();
if (read.getReadGroup() != null) {
String sample = read.getReadGroup().getSample();
UnifiedPileupElementTracker<PE> tracker = trackerMap.get(sample);
if (tracker != null) // we only add the pileup the requested samples. Completely ignore the rest
tracker.add(p);
if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>) pileupElementTracker;
for (String sample : sampleNames) {
PileupElementTracker<PE> filteredElements = tracker.getElements(sample);
if (filteredElements != null)
result.put(sample, createNewPileup(loc, filteredElements));
}
} else {
Map<String, UnifiedPileupElementTracker<PE>> trackerMap = new HashMap<String, UnifiedPileupElementTracker<PE>>();
for (String sample : sampleNames) { // initialize pileups for each sample
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
trackerMap.put(sample, filteredTracker);
}
for (PE p : pileupElementTracker) { // go through all pileup elements only once and add them to the respective sample's pileup
GATKSAMRecord read = p.getRead();
if (read.getReadGroup() != null) {
String sample = read.getReadGroup().getSample();
UnifiedPileupElementTracker<PE> tracker = trackerMap.get(sample);
if (tracker != null) // we only add the pileup the requested samples. Completely ignore the rest
tracker.add(p);
}
}
for (Map.Entry<String, UnifiedPileupElementTracker<PE>> entry : trackerMap.entrySet()) // create the RBP for each sample
result.put(entry.getKey(), createNewPileup(loc, entry.getValue()));
}
for (Map.Entry<String, UnifiedPileupElementTracker<PE>> entry : trackerMap.entrySet()) // create the RBP for each sample
result.put(entry.getKey(), createNewPileup(loc, entry.getValue()));
return result;
}

View File

@ -0,0 +1,63 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.Set;
public class LocusStatisticsUnitTest /*extends BaseTest*/ {
@Test(dataProvider = "StatusTestValues")
public void testCallableStatuses(int coverage, int rawCoverage, CallableStatus status) {
// The min Coverage threshold is 10, the max is 100
ThresHolder thresholds = new ThresHolder(20, 20, 10, 100, 20, 50, 0.5, 0.2, 0.5, 0.2, 0.2, 0.5);
Set<CallableStatus> statuses = new LocusStatistics(coverage, rawCoverage).callableStatuses(thresholds);
// Check to make sure the status provides matches the actual
Assert.assertTrue((status == null) ? statuses.isEmpty() : (statuses.contains(status) && statuses.size() == 1));
}
@DataProvider(name = "StatusTestValues")
public Object[][] getStatusTestValues() {
return new Object[][]{
new Object[]{100, 100, null},
new Object[]{100, 101, null},
new Object[]{101, 101, CallableStatus.EXCESSIVE_COVERAGE},
new Object[]{10, 101, null},
new Object[]{9, 101, CallableStatus.POOR_QUALITY},
new Object[]{9, 10, CallableStatus.POOR_QUALITY},
new Object[]{9, 9, CallableStatus.LOW_COVERAGE},
new Object[]{0, 0, CallableStatus.COVERAGE_GAPS},
new Object[]{0, 9, CallableStatus.LOW_COVERAGE},
new Object[]{0, 101, CallableStatus.POOR_QUALITY},
new Object[]{10, Integer.MAX_VALUE, null},
new Object[]{Integer.MAX_VALUE, Integer.MAX_VALUE, CallableStatus.EXCESSIVE_COVERAGE},
};
}
}

View File

@ -0,0 +1,99 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
public class SampleStatisticsUnitTest/* extends BaseTest */ {
@DataProvider(name = "QuartileValues")
public Object[][] getQuantileValues() {
int[] a1 = {5};
int[] a2 = {1, 2};
int[] a5 = {10, 20, 30, 40, 50};
int[] a10 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
return new Object[][]{
new Object[]{a1, 0.5, 5},
new Object[]{a1, 0, 5},
new Object[]{a1, 1, 5},
new Object[]{a2, 0.5, 1.5},
new Object[]{a2, 0.25, 1},
new Object[]{a2, 0.75, 2},
new Object[]{a5, 0.5, 30},
new Object[]{a5, 0.25, 20},
new Object[]{a5, 0.75, 40},
new Object[]{a5, 0, -1},
new Object[]{a10, 0.5, 5.5},
new Object[]{a10, 0.25, 3},
new Object[]{a10, 0.75, 8}
};
}
@Test(dataProvider = "QuartileValues")
public void testGetQuartile(int[] dataList, double percentage, double expected) {
Assert.assertEquals(SampleStatistics.getQuartile(dataList, percentage), expected);
}
@DataProvider(name = "ReadsAndMates")
public Object[][] getReadAndMates() {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
GATKSAMRecord noPair = ArtificialSAMUtils.createArtificialRead(header, "test", 0, 100, 50);
GATKSAMRecord good = ArtificialSAMUtils.createPair(header, "test", 30, 100, 150, true, false).get(0);
GATKSAMRecord bigInsertSize = ArtificialSAMUtils.createPair(header, "test", 30, 100, 151, true, false).get(0);
GATKSAMRecord inverted = ArtificialSAMUtils.createPair(header, "test", 30, 151, 150, true, false).get(0);
GATKSAMRecord sameOrientation = ArtificialSAMUtils.createPair(header, "test", 30, 100, 151, true, true).get(0);
GATKSAMRecord pairNotMapped = ArtificialSAMUtils.createPair(header, "test", 30, 100, 140, true, false).get(1);
pairNotMapped.setMateUnmappedFlag(true);
// finish test
return new Object[][]{
new Object[]{noPair, false},
new Object[]{good, true},
new Object[]{bigInsertSize, false},
new Object[]{inverted, false},
new Object[]{sameOrientation, false},
new Object[]{pairNotMapped, false}
};
}
@Test(dataProvider = "ReadsAndMates")
public void testHasValidMate(GATKSAMRecord read, boolean expected) {
//50 is out maximum insert size
Assert.assertEquals(new SampleStatistics(GenomeLoc.UNMAPPED).hasValidMate(read, ThresHolder.DEFAULTS), expected);
}
}