Added integration Tests for DiagnoseTargets

Signed-off-by: Mauricio Carneiro <carneiro@broadinstitute.org>
This commit is contained in:
Roger Zurawicki 2012-06-15 12:11:13 -04:00 committed by Mauricio Carneiro
parent 12d1c594df
commit 7eb3e4da41
3 changed files with 80 additions and 15 deletions

View File

@ -126,7 +126,7 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
@Argument(fullName = "print_debug_log", shortName = "dl", doc = "Used only for debugging the walker. Prints extra info to screen", required = false)
private boolean debug = false;
private HashMap<GenomeLoc, IntervalStatistics> intervalMap = null; // interval => statistics
private HashMap<GenomeLoc, IntervalStatistics> intervalMap = null; // maps each interval => statistics
private PeekableIterator<GenomeLoc> intervalListIterator; // an iterator to go over all the intervals provided as we traverse the genome
private Set<String> samples = null; // all the samples being processed
private final Allele SYMBOLIC_ALLELE = Allele.create("<DT>", false); // avoid creating the symbolic allele multiple times
@ -137,7 +137,7 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
super.initialize();
if (getToolkit().getIntervals() == null)
throw new UserException("This tool currently only works if you provide one or more interval");
throw new UserException("This tool only works if you provide one or more intervals. ( Use the -L argument )");
thresholds = new ThresHolder(minimumBaseQuality, minimumMappingQuality, minimumCoverage, maximumCoverage, minMedianDepth, maxInsertSize, votePercentage, lowMedianDepthPercentage, badMateStatusThreshold, coverageStatusThreshold, excessiveCoverageThreshold, qualityStatusThreshold);
@ -201,6 +201,20 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
return loc;
}
private GenomeLoc getFinishedIntervalSpan(GenomeLoc pos) {
GenomeLoc loc = null;
for (GenomeLoc interval : intervalMap.keySet()) {
if (interval.isBefore(pos)) {
if (loc == null)
loc = interval;
else
loc = interval.union(loc);
}
}
return loc;
}
/**
* Removes all intervals that are behind the current reference locus from the intervalMap
*
@ -208,19 +222,17 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
* @param refBase the reference allele
*/
private void removePastIntervals(GenomeLoc refLocus, byte refBase) {
// if all intervals are safe
if (getIntervalMapSpan() != null && getIntervalMapSpan().isBefore(refLocus)) {
for (GenomeLoc interval : intervalMap.keySet()) {
outputStatsToVCF(intervalMap.get(interval), Allele.create(refBase, true));
intervalMap.remove(interval);
}
}
// if there are statistics to output/ check to see that we can output them in order
if (getFinishedIntervalSpan(refLocus) != null &&
getIntervalMapSpan().getStart() == getFinishedIntervalSpan(refLocus).getStart()) {
for (GenomeLoc interval : intervalMap.keySet()) {
if (interval.isBefore(refLocus)) {
outputStatsToVCF(intervalMap.get(interval), Allele.create(refBase, true));
intervalMap.remove(interval);
}
}
GenomeLoc interval = intervalListIterator.peek(); // clean up all intervals that we might have skipped because there was no data
while (interval != null && interval.isBefore(refLocus)) {
interval = intervalListIterator.next();
outputStatsToVCF(createIntervalStatistic(interval), Allele.create(refBase, true));
interval = intervalListIterator.peek();
}
}

View File

@ -280,7 +280,7 @@ class SampleStatistics {
read.getAlignmentStart() < read.getMateAlignmentStart())
return false;
// TODO note: IGV uses a different alorithm for insert size, there should be a common util class that does this for you
// TODO note: IGV uses a different algorithm for insert size, there should be a common util class that does this for you
// mates are too far apart
if (Math.abs(read.getAlignmentStart() - read.getMateAlignmentStart()) > thresholds.getMaximumInsertSize())
return false;

View File

@ -0,0 +1,53 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
import org.broadinstitute.sting.WalkerTest;
import org.testng.annotations.Test;
import java.util.Arrays;
public class DiagnoseTargetsIntegrationTest extends WalkerTest {
final static String REF = b37KGReference;
final String singleSample = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.bam";
final String multiSample = validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam";
final String L = validationDataLocation + "DT-itest.interval_list";
private void DTTest(String testName, String args, String md5) {
String base = String.format("-T DiagnoseTargets -R %s -L %s", REF, L) + " -o %s ";
WalkerTestSpec spec = new WalkerTestSpec(base + args, Arrays.asList(md5));
executeTest(testName, spec);
}
@Test(enabled = true)
public void testSingleSample() {
DTTest("testSingleSample ", "-I " + singleSample + " -max 75", "2df47009571fe83ead779c94be97fe96");
}
@Test(enabled = true)
public void testMultiSample() {
DTTest("testMultiSample ", "-I " + multiSample, "6f0c070b9671e1d007ce6374c3183014");
}
}