From 7eb3e4da4164f6d7077a88978d3abe48fb828584 Mon Sep 17 00:00:00 2001 From: Roger Zurawicki Date: Fri, 15 Jun 2012 12:11:13 -0400 Subject: [PATCH] Added integration Tests for DiagnoseTargets Signed-off-by: Mauricio Carneiro --- .../diagnostics/targets/DiagnoseTargets.java | 40 +++++++++----- .../diagnostics/targets/SampleStatistics.java | 2 +- .../DiagnoseTargetsIntegrationTest.java | 53 +++++++++++++++++++ 3 files changed, 80 insertions(+), 15 deletions(-) create mode 100644 public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargetsIntegrationTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java index 3b149ef22..cba38d0de 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java @@ -126,7 +126,7 @@ public class DiagnoseTargets extends LocusWalker { @Argument(fullName = "print_debug_log", shortName = "dl", doc = "Used only for debugging the walker. Prints extra info to screen", required = false) private boolean debug = false; - private HashMap intervalMap = null; // interval => statistics + private HashMap intervalMap = null; // maps each interval => statistics private PeekableIterator intervalListIterator; // an iterator to go over all the intervals provided as we traverse the genome private Set samples = null; // all the samples being processed private final Allele SYMBOLIC_ALLELE = Allele.create("
", false); // avoid creating the symbolic allele multiple times @@ -137,7 +137,7 @@ public class DiagnoseTargets extends LocusWalker { super.initialize(); if (getToolkit().getIntervals() == null) - throw new UserException("This tool currently only works if you provide one or more interval"); + throw new UserException("This tool only works if you provide one or more intervals. ( Use the -L argument )"); thresholds = new ThresHolder(minimumBaseQuality, minimumMappingQuality, minimumCoverage, maximumCoverage, minMedianDepth, maxInsertSize, votePercentage, lowMedianDepthPercentage, badMateStatusThreshold, coverageStatusThreshold, excessiveCoverageThreshold, qualityStatusThreshold); @@ -201,6 +201,20 @@ public class DiagnoseTargets extends LocusWalker { return loc; } + private GenomeLoc getFinishedIntervalSpan(GenomeLoc pos) { + GenomeLoc loc = null; + for (GenomeLoc interval : intervalMap.keySet()) { + if (interval.isBefore(pos)) { + if (loc == null) + loc = interval; + else + loc = interval.union(loc); + } + } + + return loc; + } + /** * Removes all intervals that are behind the current reference locus from the intervalMap * @@ -208,19 +222,17 @@ public class DiagnoseTargets extends LocusWalker { * @param refBase the reference allele */ private void removePastIntervals(GenomeLoc refLocus, byte refBase) { - // if all intervals are safe - if (getIntervalMapSpan() != null && getIntervalMapSpan().isBefore(refLocus)) { - for (GenomeLoc interval : intervalMap.keySet()) { - outputStatsToVCF(intervalMap.get(interval), Allele.create(refBase, true)); - intervalMap.remove(interval); - } - } + // if there are statistics to output/ check to see that we can output them in order + if (getFinishedIntervalSpan(refLocus) != null && + getIntervalMapSpan().getStart() == getFinishedIntervalSpan(refLocus).getStart()) { + + for (GenomeLoc interval : intervalMap.keySet()) { + if (interval.isBefore(refLocus)) { + outputStatsToVCF(intervalMap.get(interval), Allele.create(refBase, true)); + intervalMap.remove(interval); + } + } - GenomeLoc interval = intervalListIterator.peek(); // clean up all intervals that we might have skipped because there was no data - while (interval != null && interval.isBefore(refLocus)) { - interval = intervalListIterator.next(); - outputStatsToVCF(createIntervalStatistic(interval), Allele.create(refBase, true)); - interval = intervalListIterator.peek(); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/SampleStatistics.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/SampleStatistics.java index 03bceb662..0fc2d8929 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/SampleStatistics.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/SampleStatistics.java @@ -280,7 +280,7 @@ class SampleStatistics { read.getAlignmentStart() < read.getMateAlignmentStart()) return false; - // TODO note: IGV uses a different alorithm for insert size, there should be a common util class that does this for you + // TODO note: IGV uses a different algorithm for insert size, there should be a common util class that does this for you // mates are too far apart if (Math.abs(read.getAlignmentStart() - read.getMateAlignmentStart()) > thresholds.getMaximumInsertSize()) return false; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargetsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargetsIntegrationTest.java new file mode 100644 index 000000000..355071e73 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargetsIntegrationTest.java @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.diagnostics.targets; + +import org.broadinstitute.sting.WalkerTest; +import org.testng.annotations.Test; + +import java.util.Arrays; + +public class DiagnoseTargetsIntegrationTest extends WalkerTest { + final static String REF = b37KGReference; + final String singleSample = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.bam"; + final String multiSample = validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam"; + final String L = validationDataLocation + "DT-itest.interval_list"; + + private void DTTest(String testName, String args, String md5) { + String base = String.format("-T DiagnoseTargets -R %s -L %s", REF, L) + " -o %s "; + WalkerTestSpec spec = new WalkerTestSpec(base + args, Arrays.asList(md5)); + executeTest(testName, spec); + } + + @Test(enabled = true) + public void testSingleSample() { + DTTest("testSingleSample ", "-I " + singleSample + " -max 75", "2df47009571fe83ead779c94be97fe96"); + } + + @Test(enabled = true) + public void testMultiSample() { + DTTest("testMultiSample ", "-I " + multiSample, "6f0c070b9671e1d007ce6374c3183014"); + } +}