From f313e14e4ef2c3f933505bb16527313ce09e618c Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 12 Jul 2011 08:50:58 -0400 Subject: [PATCH 01/14] Now deletes the dump directory on ant clean Moving diffengine tests from private to public --- build.xml | 1 + .../diffengine/DiffEngineUnitTest.java | 229 ++++++++++++++++ .../walkers/diffengine/DiffNodeUnitTest.java | 249 ++++++++++++++++++ .../diffengine/DiffableReaderUnitTest.java | 143 ++++++++++ .../diffengine/DifferenceUnitTest.java | 95 +++++++ 5 files changed, 717 insertions(+) create mode 100644 public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java create mode 100644 public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNodeUnitTest.java create mode 100644 public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java create mode 100644 public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DifferenceUnitTest.java diff --git a/build.xml b/build.xml index 80627fae0..068c69316 100644 --- a/build.xml +++ b/build.xml @@ -981,6 +981,7 @@ + diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java new file mode 100644 index 000000000..cd6c3598a --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// our package +package org.broadinstitute.sting.gatk.walkers.diffengine; + + +// the imports for unit testing. + +import org.broadinstitute.sting.BaseTest; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.*; + +/** + * Basic unit test for DifferableReaders in reduced reads + */ +public class DiffEngineUnitTest extends BaseTest { + DiffEngine engine; + + @BeforeClass(enabled = true) + public void createDiffEngine() { + engine = new DiffEngine(); + } + + // -------------------------------------------------------------------------------- + // + // Difference testing routines + // + // -------------------------------------------------------------------------------- + + private class DifferenceTest extends TestDataProvider { + public DiffElement tree1, tree2; + public List differences; + + private DifferenceTest(String tree1, String tree2) { + this(tree1, tree2, Collections.emptyList()); + } + + private DifferenceTest(String tree1, String tree2, String difference) { + this(tree1, tree2, Arrays.asList(difference)); + } + + private DifferenceTest(String tree1, String tree2, List differences) { + super(DifferenceTest.class); + this.tree1 = DiffNode.fromString(tree1); + this.tree2 = DiffNode.fromString(tree2); + this.differences = differences; + } + + public String toString() { + return String.format("tree1=%s tree2=%s diff=%s", + tree1.toOneLineString(), tree2.toOneLineString(), differences); + } + } + + @DataProvider(name = "trees") + public Object[][] createTrees() { + new DifferenceTest("A=X", "A=X"); + new DifferenceTest("A=X", "A=Y", "A:X!=Y"); + new DifferenceTest("A=X", "B=X", Arrays.asList("A:X!=MISSING", "B:MISSING!=X")); + new DifferenceTest("A=(X=1)", "B=(X=1)", Arrays.asList("A:(X=1)!=MISSING", "B:MISSING!=(X=1)")); + new DifferenceTest("A=(X=1)", "A=(X=1)"); + new DifferenceTest("A=(X=1 Y=2)", "A=(X=1 Y=2)"); + new DifferenceTest("A=(X=1 Y=2 B=(Z=3))", "A=(X=1 Y=2 B=(Z=3))"); + new DifferenceTest("A=(X=1)", "A=(X=2)", "A.X:1!=2"); + new DifferenceTest("A=(X=1 Y=2 B=(Z=3))", "A=(X=1 Y=2 B=(Z=4))", "A.B.Z:3!=4"); + new DifferenceTest("A=(X=1)", "A=(X=1 Y=2)", "A.Y:MISSING!=2"); + new DifferenceTest("A=(X=1 Y=2 B=(Z=3))", "A=(X=1 Y=2)", "A.B:(Z=3)!=MISSING"); + return DifferenceTest.getTests(DifferenceTest.class); + } + + @Test(enabled = true, dataProvider = "trees") + public void testDiffs(DifferenceTest test) { + logger.warn("Test tree1: " + test.tree1.toOneLineString()); + logger.warn("Test tree2: " + test.tree2.toOneLineString()); + + List diffs = engine.diff(test.tree1, test.tree2); + logger.warn("Test expected diff : " + test.differences); + logger.warn("Observed diffs : " + diffs); + } + + // -------------------------------------------------------------------------------- + // + // Low-level routines for summarizing differences + // + // -------------------------------------------------------------------------------- + + @Test(enabled = true) + public void testLongestCommonPostfix() { + testLongestCommonPostfixHelper("A", "A", 1); + testLongestCommonPostfixHelper("A", "B", 0); + testLongestCommonPostfixHelper("A.B", "A.B", 2); + testLongestCommonPostfixHelper("A.B.C", "A.B.C", 3); + testLongestCommonPostfixHelper("A.B.C", "X.B.C", 2); + testLongestCommonPostfixHelper("A.B.C", "X.Y.C", 1); + testLongestCommonPostfixHelper("A.B.C", "X.Y.Z", 0); + testLongestCommonPostfixHelper("A.B.C", "A.X.C", 1); + testLongestCommonPostfixHelper("A.B.C", "A.X.Z", 0); + testLongestCommonPostfixHelper("A.B.C", "A.B.Z", 0); + } + + public void testLongestCommonPostfixHelper(String p1, String p2, int expected) { + String[] parts1 = p1.split("\\."); + String[] parts2 = p2.split("\\."); + int obs = DiffEngine.longestCommonPostfix(parts1, parts2); + Assert.assertEquals(obs, expected, "p1=" + p1 + " p2=" + p2 + " failed"); + } + + @Test(enabled = true, dependsOnMethods = "testLongestCommonPostfix") + public void testSummarizePath() { + testSummarizePathHelper("A", "A", "A"); + testSummarizePathHelper("A", "B", "*"); + testSummarizePathHelper("A.B", "A.B", "A.B"); + testSummarizePathHelper("A.B", "X.B", "*.B"); + testSummarizePathHelper("A.B", "X.Y", "*.*"); + testSummarizePathHelper("A.B.C", "A.B.C", "A.B.C"); + testSummarizePathHelper("A.B.C", "X.B.C", "*.B.C"); + testSummarizePathHelper("A.B.C", "X.Y.C", "*.*.C"); + testSummarizePathHelper("A.B.C", "X.Y.Z", "*.*.*"); + testSummarizePathHelper("A.B.C", "A.X.C", "*.*.C"); + testSummarizePathHelper("A.B.C", "A.X.Z", "*.*.*"); + testSummarizePathHelper("A.B.C", "A.B.Z", "*.*.*"); + } + + public void testSummarizePathHelper(String p1, String p2, String expected) { + String[] parts1 = DiffEngine.diffNameToPath(p1); + String[] parts2 = DiffEngine.diffNameToPath(p2); + int obs = DiffEngine.longestCommonPostfix(parts1, parts2); + String path = DiffEngine.summarizedPath(parts2, obs); + Assert.assertEquals(path, expected, "p1=" + p1 + " p2=" + p2 + " failed"); + } + + // -------------------------------------------------------------------------------- + // + // High-level difference summary + // + // -------------------------------------------------------------------------------- + + private class SummarizeDifferenceTest extends TestDataProvider { + List diffs = new ArrayList(); + List expecteds = new ArrayList(); + + public SummarizeDifferenceTest() { super(SummarizeDifferenceTest.class); } + + public SummarizeDifferenceTest addDiff(String... diffsToAdd) { + diffs.addAll(Arrays.asList(diffsToAdd)); + return this; + } + + public SummarizeDifferenceTest addSummary(String... expectedSummary) { + expecteds.addAll(Arrays.asList(expectedSummary)); + return this; + } + + public String toString() { + return String.format("diffs=%s => expected=%s", diffs, expecteds); + } + + public void test() { + List diffPaths = new ArrayList(diffs.size()); + for ( String diff : diffs ) { diffPaths.add(DiffEngine.diffNameToPath(diff)); } + + List sumDiffs = engine.summarizedDifferencesOfPaths(diffPaths); + + Assert.assertEquals(sumDiffs.size(), expecteds.size(), "Unexpected number of summarized differences: " + sumDiffs); + + for ( int i = 0; i < sumDiffs.size(); i++ ) { + DiffEngine.SummarizedDifference sumDiff = sumDiffs.get(i); + String expected = expecteds.get(i); + String[] pathCount = expected.split(":"); + String path = pathCount[0]; + int count = Integer.valueOf(pathCount[1]); + Assert.assertEquals(sumDiff.getPath(), path, "Unexpected path at: " + expected + " obs=" + sumDiff + " all=" + sumDiffs); + Assert.assertEquals(sumDiff.getCount(), count, "Unexpected counts at: " + expected + " obs=" + sumDiff + " all=" + sumDiffs); + } + } + } + + @DataProvider(name = "summaries") + public Object[][] createSummaries() { + new SummarizeDifferenceTest().addDiff("A", "A").addSummary("A:2"); + new SummarizeDifferenceTest().addDiff("A", "B").addSummary("A:1", "B:1"); + new SummarizeDifferenceTest().addDiff("A", "A", "A").addSummary("A:3"); + new SummarizeDifferenceTest().addDiff("A", "A", "A", "B").addSummary("A:3", "B:1"); + new SummarizeDifferenceTest().addDiff("A", "A", "A", "B", "B").addSummary("A:3", "B:2"); + new SummarizeDifferenceTest().addDiff("A", "A", "A", "B", "B", "C").addSummary("A:3", "B:2", "C:1"); + new SummarizeDifferenceTest().addDiff("A.X", "A.X").addSummary("A.X:2"); + new SummarizeDifferenceTest().addDiff("A.X", "A.X", "B.X").addSummary("*.X:3", "A.X:2", "B.X:1"); + new SummarizeDifferenceTest().addDiff("A.X", "A.X", "B.X", "B.X").addSummary("*.X:4", "A.X:2", "B.X:2"); + new SummarizeDifferenceTest().addDiff("A.B.C", "X.B.C").addSummary("*.B.C:2", "A.B.C:1", "X.B.C:1"); + new SummarizeDifferenceTest().addDiff("A.B.C", "X.Y.C", "X.Y.C").addSummary("*.*.C:3", "X.Y.C:2", "A.B.C:1"); + new SummarizeDifferenceTest().addDiff("A.B.C", "A.X.C", "X.Y.C").addSummary("*.*.C:3", "A.B.C:1", "A.X.C:1", "X.Y.C:1"); + new SummarizeDifferenceTest().addDiff("A.B.C", "A.X.C", "B.X.C").addSummary("*.*.C:3", "*.X.C:2", "A.B.C:1", "A.X.C:1", "B.X.C:1"); + new SummarizeDifferenceTest().addDiff("A.B.C", "A.X.C", "B.X.C", "B.X.C").addSummary("*.*.C:4", "*.X.C:3", "B.X.C:2", "A.B.C:1", "A.X.C:1"); + + return SummarizeDifferenceTest.getTests(SummarizeDifferenceTest.class); + } + + + @Test(enabled = true, dependsOnMethods = "testSummarizePath", dataProvider = "summaries") + public void testSummarizeDifferences(SummarizeDifferenceTest test) { + test.test(); + } +} \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNodeUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNodeUnitTest.java new file mode 100644 index 000000000..534416d29 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNodeUnitTest.java @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// our package +package org.broadinstitute.sting.gatk.walkers.diffengine; + + +// the imports for unit testing. + + +import org.broadinstitute.sting.BaseTest; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.*; + +/** + * Basic unit test for DifferableReaders in reduced reads + */ +public class DiffNodeUnitTest extends BaseTest { + // Data is: + // MY_ROOT + // fields: A=A, B=B + // nodes: C, D + // C: fields: E=E, nodes: none + // D: fields: F=F, G=G, nodes: none + static DiffNode MY_ROOT = DiffNode.rooted("MY_ROOT"); + static DiffValue Value_A = new DiffValue("A", MY_ROOT, "A"); + static DiffValue Value_B = new DiffValue("B", MY_ROOT, "B"); + static DiffNode NODE_C = DiffNode.empty("C", MY_ROOT); + static DiffNode NODE_D = DiffNode.empty("D", MY_ROOT); + static DiffValue Value_E = new DiffValue("E", NODE_C, "E"); + static DiffValue Value_F = new DiffValue("F", NODE_D, "F"); + static DiffValue Value_G = new DiffValue("G", NODE_D, "G"); + + static { + MY_ROOT.add(Value_A); + MY_ROOT.add(Value_B); + MY_ROOT.add(NODE_C); + MY_ROOT.add(NODE_D); + NODE_C.add(Value_E); + NODE_D.add(Value_F); + NODE_D.add(Value_G); + } + + + // -------------------------------------------------------------------------------- + // + // Element testing routines + // + // -------------------------------------------------------------------------------- + + private class ElementTest extends TestDataProvider { + public DiffElement elt; + public String name; + public String fullName; + public DiffElement parent; + + private ElementTest(DiffValue elt, DiffValue parent, String name, String fullName) { + this(elt.getBinding(), parent.getBinding(), name, fullName); + } + + private ElementTest(DiffElement elt, DiffElement parent, String name, String fullName) { + super(ElementTest.class); + this.elt = elt; + this.name = name; + this.fullName = fullName; + this.parent = parent; + } + + public String toString() { + return String.format("ElementTest elt=%s name=%s fullName=%s parent=%s", + elt.toOneLineString(), name, fullName, parent.getName()); + } + } + + @DataProvider(name = "elementdata") + public Object[][] createElementData() { + new ElementTest(MY_ROOT.getBinding(), DiffElement.ROOT, "MY_ROOT", "MY_ROOT"); + new ElementTest(NODE_C, MY_ROOT, "C", "MY_ROOT.C"); + new ElementTest(NODE_D, MY_ROOT, "D", "MY_ROOT.D"); + new ElementTest(Value_A, MY_ROOT, "A", "MY_ROOT.A"); + new ElementTest(Value_B, MY_ROOT, "B", "MY_ROOT.B"); + new ElementTest(Value_E, NODE_C, "E", "MY_ROOT.C.E"); + new ElementTest(Value_F, NODE_D, "F", "MY_ROOT.D.F"); + new ElementTest(Value_G, NODE_D, "G", "MY_ROOT.D.G"); + return TestDataProvider.getTests(ElementTest.class); + } + + @Test(enabled = true, dataProvider = "elementdata") + public void testElementMethods(ElementTest test) { + Assert.assertNotNull(test.elt.getName()); + Assert.assertNotNull(test.elt.getParent()); + Assert.assertEquals(test.elt.getName(), test.name); + Assert.assertEquals(test.elt.getParent(), test.parent); + Assert.assertEquals(test.elt.fullyQualifiedName(), test.fullName); + } + + // -------------------------------------------------------------------------------- + // + // DiffValue testing routines + // + // -------------------------------------------------------------------------------- + + private class LeafTest extends TestDataProvider { + public DiffValue diffvalue; + public Object value; + + private LeafTest(DiffValue diffvalue, Object value) { + super(LeafTest.class); + this.diffvalue = diffvalue; + this.value = value; + } + + public String toString() { + return String.format("LeafTest diffvalue=%s value=%s", diffvalue.toOneLineString(), value); + } + } + + @DataProvider(name = "leafdata") + public Object[][] createLeafData() { + new LeafTest(Value_A, "A"); + new LeafTest(Value_B, "B"); + new LeafTest(Value_E, "E"); + new LeafTest(Value_F, "F"); + new LeafTest(Value_G, "G"); + return TestDataProvider.getTests(LeafTest.class); + } + + @Test(enabled = true, dataProvider = "leafdata") + public void testLeafMethods(LeafTest test) { + Assert.assertNotNull(test.diffvalue.getValue()); + Assert.assertEquals(test.diffvalue.getValue(), test.value); + } + + // -------------------------------------------------------------------------------- + // + // Node testing routines + // + // -------------------------------------------------------------------------------- + + private class NodeTest extends TestDataProvider { + public DiffNode node; + public Set fields; + public Set subnodes; + public Set allNames; + + private NodeTest(DiffNode node, List fields, List subnodes) { + super(NodeTest.class); + this.node = node; + this.fields = new HashSet(fields); + this.subnodes = new HashSet(subnodes); + this.allNames = new HashSet(fields); + allNames.addAll(subnodes); + } + + public String toString() { + return String.format("NodeTest node=%s fields=%s subnodes=%s", + node.toOneLineString(), fields, subnodes); + } + } + + @DataProvider(name = "nodedata") + public Object[][] createData1() { + new NodeTest(MY_ROOT, Arrays.asList("A", "B"), Arrays.asList("C", "D")); + new NodeTest(NODE_C, Arrays.asList("E"), Collections.emptyList()); + new NodeTest(NODE_D, Arrays.asList("F", "G"), Collections.emptyList()); + return TestDataProvider.getTests(NodeTest.class); + } + + @Test(enabled = true, dataProvider = "nodedata") + public void testNodeAccessors(NodeTest test) { + Assert.assertNotNull(test.node.getElements()); + + for ( String name : test.allNames ) { + DiffElement elt = test.node.getElement(name); + Assert.assertNotNull(elt, "Failed to find field " + elt + " in " + test.node); + Assert.assertEquals(elt.getName(), name); + Assert.assertEquals(elt.getValue().isAtomic(), test.fields.contains(name), "Failed atomic/compound expectation: " + test.node); + } + } + + // NOTE: add routines are being implicitly tested by the creation of the data structures + + @Test(enabled = true, dataProvider = "nodedata") + public void testCounts(NodeTest test) { + Assert.assertEquals(test.node.getElements().size(), test.allNames.size()); + Assert.assertEquals(test.node.getElementNames(), test.allNames); + } + + // -------------------------------------------------------------------------------- + // + // fromString testing routines + // + // -------------------------------------------------------------------------------- + + private class FromStringTest extends TestDataProvider { + public String string; + public DiffElement expected; + + private FromStringTest(String string, DiffElement expected) { + super(FromStringTest.class); + this.string = string; + this.expected = expected; + } + + public String toString() { + return String.format("FromStringTest string=%s expected=%s", string, expected.toOneLineString()); + } + } + + @DataProvider(name = "fromstringdata") + public Object[][] createFromData() { + new FromStringTest("A=A", Value_A.getBinding()); + new FromStringTest("B=B", Value_B.getBinding()); + new FromStringTest("C=(E=E)", NODE_C.getBinding()); + new FromStringTest("D=(F=F G=G)", NODE_D.getBinding()); + return TestDataProvider.getTests(FromStringTest.class); + } + + @Test(enabled = true, dataProvider = "fromstringdata") + public void parseFromString(FromStringTest test) { + logger.warn("Testing from string: " + test.string); + DiffElement elt = DiffNode.fromString(test.string); + Assert.assertEquals(elt.toOneLineString(), test.expected.toOneLineString()); + } +} \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java new file mode 100644 index 000000000..5738b643f --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// our package +package org.broadinstitute.sting.gatk.walkers.diffengine; + + +// the imports for unit testing. + + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.io.File; +import java.util.*; + +/** + * Basic unit test for DifferableReaders in reduced reads + */ +public class DiffableReaderUnitTest extends BaseTest { + DiffEngine engine; + + File vcfFile = new File(testDir + "diffTestMaster.vcf"); + File bamFile = new File(testDir + "exampleBAM.bam"); + + @BeforeClass(enabled = true) + public void createDiffEngine() { + engine = new DiffEngine(); + } + + @Test(enabled = true) + public void testPluggableDiffableReaders() { + logger.warn("testPluggableDiffableReaders"); + Map readers = engine.getReaders(); + Assert.assertNotNull(readers); + Assert.assertTrue(readers.size() > 0); + Assert.assertNotNull(readers.get("VCF")); + for ( Map.Entry e : engine.getReaders().entrySet() ) { + logger.warn("Found diffable reader: " + e.getKey()); + Assert.assertEquals(e.getValue().getName(), e.getKey()); + Assert.assertEquals(e.getValue(), engine.getReader(e.getKey())); + } + } + + private static void testLeaf(DiffNode rec, String field, Object expected) { + DiffElement value = rec.getElement(field); + Assert.assertNotNull(value, "Expected to see leaf named " + field + " in rec " + rec); + Assert.assertEquals(value.getValue().getValue(), expected, "Expected to leaf named " + field + " to have value " + expected + " in rec " + rec); + } + + @Test(enabled = true, dependsOnMethods = "testPluggableDiffableReaders") + public void testVCF1() { + logger.warn("testVCF1"); + DiffableReader vcfReader = engine.getReader("VCF"); + Assert.assertTrue(vcfReader.canRead(vcfFile)); + Assert.assertFalse(vcfReader.canRead(bamFile)); + + DiffElement diff = vcfReader.readFromFile(vcfFile); + Assert.assertNotNull(diff); + + Assert.assertEquals(diff.getName(), vcfFile.getName()); + Assert.assertSame(diff.getParent(), DiffElement.ROOT); + + DiffNode node = diff.getValueAsNode(); + Assert.assertEquals(node.getElements().size(), 9); + + // chr1 2646 rs62635284 G A 0.15 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:53,75:3:-12.40,-0.90,-0.00:9.03 + DiffNode rec1 = node.getElement("chr1:2646").getValueAsNode(); + testLeaf(rec1, "CHROM", "chr1"); + testLeaf(rec1, "POS", 2646); + testLeaf(rec1, "ID", "rs62635284"); + testLeaf(rec1, "REF", Allele.create("G", true)); + testLeaf(rec1, "ALT", new HashSet(Arrays.asList(Allele.create("A")))); + testLeaf(rec1, "QUAL", 0.15); + testLeaf(rec1, "FILTER", Collections.emptySet()); + testLeaf(rec1, "AC", "2"); + testLeaf(rec1, "AF", "1.00"); + testLeaf(rec1, "AN", "2"); + } + + @Test(enabled = true, dependsOnMethods = "testPluggableDiffableReaders") + public void testBAM() { + logger.warn("testBAM"); + DiffableReader bamReader = engine.getReader("BAM"); + Assert.assertTrue(bamReader.canRead(bamFile)); + Assert.assertFalse(bamReader.canRead(vcfFile)); + + DiffElement diff = bamReader.readFromFile(bamFile); + Assert.assertNotNull(diff); + + Assert.assertEquals(diff.getName(), bamFile.getName()); + Assert.assertSame(diff.getParent(), DiffElement.ROOT); + + DiffNode node = diff.getValueAsNode(); + Assert.assertEquals(node.getElements().size(), 33); + + // 30PPJAAXX090125:1:42:512:1817#0 99 chr1 200 0 76M = + // 255 -130 ACCCTAACCCTAACCCTAACCCTAACCATAACCCTAAGACTAACCCTAAACCTAACCCTCATAATCGAAATACAAC + // BBBBC@C?AABCBB<63>=B@>+B9-9+)2B8,+@327B5A>90((>-+''3?(/'''A)(''19('7.,**%)3: + // PG:Z:0 RG:Z:exampleBAM.bam SM:Z:exampleBAM.bam + + DiffNode rec1 = node.getElement("30PPJAAXX090125:1:42:512:1817#0_1").getValueAsNode(); + testLeaf(rec1, "NAME", "30PPJAAXX090125:1:42:512:1817#0"); + testLeaf(rec1, "FLAGS", 99); + testLeaf(rec1, "RNAME", "chr1"); + testLeaf(rec1, "POS", 200); + testLeaf(rec1, "MAPQ", 0); + testLeaf(rec1, "CIGAR", "76M"); + testLeaf(rec1, "RNEXT", "chr1"); + testLeaf(rec1, "PNEXT", 255); + testLeaf(rec1, "TLEN", -130); + testLeaf(rec1, "SEQ", "ACCCTAACCCTAACCCTAACCCTAACCATAACCCTAAGACTAACCCTAAACCTAACCCTCATAATCGAAATACAAC"); + testLeaf(rec1, "QUAL", "BBBBC@C?AABCBB<63>=B@>+B9-9+)2B8,+@327B5A>90((>-+''3?(/'''A)(''19('7.,**%)3:"); + testLeaf(rec1, "PG", "0"); + testLeaf(rec1, "RG", "exampleBAM.bam"); + testLeaf(rec1, "SM", "exampleBAM.bam"); + } +} \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DifferenceUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DifferenceUnitTest.java new file mode 100644 index 000000000..da272ec30 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DifferenceUnitTest.java @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// our package +package org.broadinstitute.sting.gatk.walkers.diffengine; + + +// the imports for unit testing. + + +import org.broadinstitute.sting.BaseTest; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +/** + * Basic unit test for DifferableReaders in reduced reads + */ +public class DifferenceUnitTest extends BaseTest { + // -------------------------------------------------------------------------------- + // + // testing routines + // + // -------------------------------------------------------------------------------- + + private class DifferenceTest extends TestDataProvider { + public DiffElement tree1, tree2; + public String difference; + + private DifferenceTest(String tree1, String tree2, String difference) { + this(DiffNode.fromString(tree1), DiffNode.fromString(tree2), difference); + } + + private DifferenceTest(DiffElement tree1, DiffElement tree2, String difference) { + super(DifferenceTest.class); + this.tree1 = tree1; + this.tree2 = tree2; + this.difference = difference; + } + + public String toString() { + return String.format("tree1=%s tree2=%s diff=%s", + tree1 == null ? "null" : tree1.toOneLineString(), + tree2 == null ? "null" : tree2.toOneLineString(), + difference); + } + } + + @DataProvider(name = "data") + public Object[][] createTrees() { + new DifferenceTest("A=X", "A=Y", "A:X!=Y"); + new DifferenceTest("A=Y", "A=X", "A:Y!=X"); + new DifferenceTest(DiffNode.fromString("A=X"), null, "A:X!=MISSING"); + new DifferenceTest(null, DiffNode.fromString("A=X"), "A:MISSING!=X"); + return DifferenceTest.getTests(DifferenceTest.class); + } + + @Test(enabled = true, dataProvider = "data") + public void testDiffToString(DifferenceTest test) { + logger.warn("Test tree1: " + (test.tree1 == null ? "null" : test.tree1.toOneLineString())); + logger.warn("Test tree2: " + (test.tree2 == null ? "null" : test.tree2.toOneLineString())); + logger.warn("Test expected diff : " + test.difference); + Difference diff = new Difference(test.tree1, test.tree2); + logger.warn("Observed diffs : " + diff); + Assert.assertEquals(diff.toString(), test.difference, "Observed diff string " + diff + " not equal to expected difference string " + test.difference ); + + } +} \ No newline at end of file From 8056a3fe89046d942c4b656ff8138283e0235769 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 12 Jul 2011 08:52:31 -0400 Subject: [PATCH 02/14] getElement() now uses O(1) get from hash instead of linear O(n) search. Enables us to read large files easily. --- .../sting/gatk/walkers/diffengine/DiffNode.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java index 0720e18c0..3e1be8609 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java @@ -107,11 +107,13 @@ public class DiffNode extends DiffValue { return getElements(false); } + /** + * Returns the element bound to name, or null if no such binding exists + * @param name + * @return + */ public DiffElement getElement(String name) { - for ( DiffElement elt : getElements() ) - if ( elt.getName().equals(name) ) - return elt; - return null; + return getElementMap().get(name); } /** From 05212aea62b2f78f7a739257bac86fd0b16d2c5b Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 12 Jul 2011 08:53:19 -0400 Subject: [PATCH 03/14] reader now takes an argument for the maximum number of elements to read from the file. --- .../walkers/diffengine/BAMDiffableReader.java | 5 ++--- .../gatk/walkers/diffengine/DiffEngine.java | 7 ++++++- .../walkers/diffengine/DiffObjectsWalker.java | 17 ++++++++++------- .../gatk/walkers/diffengine/DiffableReader.java | 2 +- .../walkers/diffengine/VCFDiffableReader.java | 10 ++++++++-- .../diffengine/DiffableReaderUnitTest.java | 4 ++-- 6 files changed, 29 insertions(+), 16 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java index f7a395d9d..a5ebf27bb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java @@ -51,12 +51,11 @@ import java.util.zip.GZIPInputStream; * Class implementing diffnode reader for VCF */ public class BAMDiffableReader implements DiffableReader { - private final static int MAX_RECORDS_TO_READ = 1000; @Override public String getName() { return "BAM"; } @Override - public DiffElement readFromFile(File file) { + public DiffElement readFromFile(File file, int maxElementsToRead) { final SAMFileReader reader = new SAMFileReader(file, null); // null because we don't want it to look for the index reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT); @@ -65,7 +64,7 @@ public class BAMDiffableReader implements DiffableReader { int count = 0; while ( iterator.hasNext() ) { - if ( count++ > MAX_RECORDS_TO_READ ) + if ( count++ > maxElementsToRead && maxElementsToRead != -1) break; final SAMRecord record = iterator.next(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java index ba2713bff..54a7a464d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java @@ -385,12 +385,17 @@ public class DiffEngine { return findReaderForFile(file) != null; } + public DiffElement createDiffableFromFile(File file) { + return createDiffableFromFile(file, -1); + } + + public DiffElement createDiffableFromFile(File file, int maxElementsToRead) { DiffableReader reader = findReaderForFile(file); if ( reader == null ) throw new UserException("Unsupported file type: " + file); else - return reader.readFromFile(file); + return reader.readFromFile(file, maxElementsToRead); } public static boolean simpleDiffFiles(File masterFile, File testFile, DiffEngine.SummaryReportParams params) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java index a08108db2..fe411b195 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java @@ -48,11 +48,14 @@ public class DiffObjectsWalker extends RodWalker { @Output(doc="File to which results should be written",required=true) protected PrintStream out; - @Argument(fullName="maxRecords", shortName="M", doc="Max. number of records to process", required=false) - int MAX_RECORDS = 0; + @Argument(fullName="maxObjectsToRead", shortName="motr", doc="Max. number of objects to read from the files. -1 [default] means unlimited", required=false) + int MAX_OBJECTS_TO_READ = -1; - @Argument(fullName="maxCount1Records", shortName="M1", doc="Max. number of records occuring exactly once in the file to process", required=false) - int MAX_COUNT1_RECORDS = 0; + @Argument(fullName="maxDiffs", shortName="M", doc="Max. number of diffs to process", required=false) + int MAX_DIFFS = 0; + + @Argument(fullName="maxCount1Diffs", shortName="M1", doc="Max. number of diffs occuring exactly once in the file to process", required=false) + int MAX_COUNT1_DIFFS = 0; @Argument(fullName="minCountForDiff", shortName="MCFD", doc="Min number of observations for a records to display", required=false) int minCountForDiff = 1; @@ -91,9 +94,9 @@ public class DiffObjectsWalker extends RodWalker { @Override public void onTraversalDone(Integer sum) { out.printf("Reading master file %s%n", masterFile); - DiffElement master = diffEngine.createDiffableFromFile(masterFile); + DiffElement master = diffEngine.createDiffableFromFile(masterFile, MAX_OBJECTS_TO_READ); out.printf("Reading test file %s%n", testFile); - DiffElement test = diffEngine.createDiffableFromFile(testFile); + DiffElement test = diffEngine.createDiffableFromFile(testFile, MAX_OBJECTS_TO_READ); // out.printf("Master diff objects%n"); // out.println(master.toString()); @@ -107,7 +110,7 @@ public class DiffObjectsWalker extends RodWalker { out.printf("DIFF: %s%n", diff.toString()); } - DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out, MAX_RECORDS, MAX_COUNT1_RECORDS, minCountForDiff); + DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out, MAX_DIFFS, MAX_COUNT1_DIFFS, minCountForDiff); diffEngine.reportSummarizedDifferences(diffs, params); } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java index 84c2eed10..af5771c55 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java @@ -43,7 +43,7 @@ public interface DiffableReader { @Ensures("result != null") @Requires("file != null") - public DiffElement readFromFile(File file); + public DiffElement readFromFile(File file, int maxElementsToRead); @Requires("file != null") public boolean canRead(File file); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java index 743178538..06d14366f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java @@ -51,15 +51,21 @@ public class VCFDiffableReader implements DiffableReader { public String getName() { return "VCF"; } @Override - public DiffElement readFromFile(File file) { + public DiffElement readFromFile(File file, int maxElementsToRead) { DiffNode root = DiffNode.rooted(file.getName()); try { LineReader lineReader = new AsciiLineReader(new FileInputStream(file)); VCFCodec vcfCodec = new VCFCodec(); - VCFHeader header = (VCFHeader)vcfCodec.readHeader(lineReader); + + // must be read as state is stored in reader itself + vcfCodec.readHeader(lineReader); String line = lineReader.readLine(); + int count = 0; while ( line != null ) { + if ( count++ > maxElementsToRead && maxElementsToRead != -1) + break; + VariantContext vc = (VariantContext)vcfCodec.decode(line); String name = vc.getChr() + ":" + vc.getStart(); DiffNode vcRoot = DiffNode.empty(name, root); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java index 5738b643f..baa2f0383 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java @@ -80,7 +80,7 @@ public class DiffableReaderUnitTest extends BaseTest { Assert.assertTrue(vcfReader.canRead(vcfFile)); Assert.assertFalse(vcfReader.canRead(bamFile)); - DiffElement diff = vcfReader.readFromFile(vcfFile); + DiffElement diff = vcfReader.readFromFile(vcfFile, -1); Assert.assertNotNull(diff); Assert.assertEquals(diff.getName(), vcfFile.getName()); @@ -110,7 +110,7 @@ public class DiffableReaderUnitTest extends BaseTest { Assert.assertTrue(bamReader.canRead(bamFile)); Assert.assertFalse(bamReader.canRead(vcfFile)); - DiffElement diff = bamReader.readFromFile(bamFile); + DiffElement diff = bamReader.readFromFile(bamFile, -1); Assert.assertNotNull(diff); Assert.assertEquals(diff.getName(), bamFile.getName()); From cfe43e3971327ff26ef9087e31b4294d4a98d99c Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Tue, 12 Jul 2011 13:43:46 -0400 Subject: [PATCH 04/14] Bug fix for Genotype given alleles: if we are in INDEL mode ignore SNPs and MNPs instead of emitting an empty site with alleles but no annotations --- .../genotyper/UnifiedGenotyperEngine.java | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 4c9080884..6fc972b5d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -634,17 +634,27 @@ public class UnifiedGenotyperEngine { if (vcInput == null) return null; - if (vcInput.isSNP() && ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP)) - return GenotypeLikelihoodsCalculationModel.Model.SNP; + // todo - no support to genotype MNP's yet + if (vcInput.isMNP()) + return null; + + if (vcInput.isSNP()) { + if (( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP)) + return GenotypeLikelihoodsCalculationModel.Model.SNP; + else + // ignore SNP's if user chose INDEL mode + return null; + } else if ((vcInput.isIndel() || vcInput.isMixed()) && (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL)) return GenotypeLikelihoodsCalculationModel.Model.INDEL; - } else { + } + else { // todo - this assumes SNP's take priority when BOTH is selected, should do a smarter way once extended events are removed if( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP) return GenotypeLikelihoodsCalculationModel.Model.SNP; else if (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL) return GenotypeLikelihoodsCalculationModel.Model.INDEL; - } + } } return null; } From 73735863b0fbff0e7dc5ee789f2e075ead13f7fa Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Tue, 12 Jul 2011 13:55:21 -0400 Subject: [PATCH 05/14] Fix for the case of requesting genotype for a sample that doesn't exist in a VariantContext --- .../sting/utils/variantcontext/VariantContext.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 5787b591f..da80a3431 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -867,7 +867,10 @@ public class VariantContext implements Feature { // to enable tribble intergrati for ( String name : sampleNames ) { if ( map.containsKey(name) ) throw new IllegalArgumentException("Duplicate names detected in requested samples " + sampleNames); - map.put(name, getGenotype(name)); + final Genotype g = getGenotype(name); + if ( g != null ) { + map.put(name, g); + } } return map; From a2597e7f00824b37174a648da7c648938f5c4886 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 12 Jul 2011 14:11:53 -0400 Subject: [PATCH 06/14] This commit incorporates several different changes that each pretty much break all the VCF-based integration tests, so I bunched them all together. We now officially emit VCF4.1 files (woo hoo), which means that the VCF headers are now all different (header version is 4.1 plus counts for some of the annotations are 'A' or 'G'). Also, I've added a Read Filter for reads with MQ=255 ('unavailable' in the SAM spec) and have applied this to the UG and the RMS MQ annotation. --- .../MappingQualityUnavailableReadFilter.java | 43 +++++++++++++++++ ...java => MappingQualityZeroReadFilter.java} | 5 +- .../annotator/AlleleBalanceBySample.java | 2 +- .../walkers/annotator/ChromosomeCounts.java | 4 +- .../annotator/MappingQualityRankSumTest.java | 7 ++- .../gatk/walkers/annotator/NBaseCount.java | 2 +- .../walkers/annotator/RMSMappingQuality.java | 7 ++- .../gatk/walkers/annotator/RankSumTest.java | 5 +- .../walkers/genotyper/UnifiedGenotyper.java | 5 +- .../indels/RealignerTargetCreator.java | 4 +- .../indels/SomaticIndelDetectorWalker.java | 2 +- .../phasing/ReadBackedPhasingWalker.java | 4 +- .../recalibration/CountCovariatesWalker.java | 4 +- .../sting/utils/QualityUtils.java | 4 ++ .../utils/codecs/vcf/StandardVCFWriter.java | 6 +-- .../VariantAnnotatorIntegrationTest.java | 28 +++++------ .../GenomicAnnotatorIntegrationTest.java | 6 +-- .../walkers/beagle/BeagleIntegrationTest.java | 6 +-- .../VariantFiltrationIntegrationTest.java | 22 ++++----- .../UnifiedGenotyperIntegrationTest.java | 48 +++++++++---------- .../ReadBackedPhasingIntegrationTest.java | 12 ++--- ...ntRecalibrationWalkersIntegrationTest.java | 2 +- .../CombineVariantsIntegrationTest.java | 34 ++++++------- .../LiftoverVariantsIntegrationTest.java | 6 +-- .../SelectVariantsIntegrationTest.java | 8 ++-- .../VCFStreamingIntegrationTest.java | 2 +- .../VariantsToVCFIntegrationTest.java | 8 ++-- .../VariantContextIntegrationTest.java | 2 +- 28 files changed, 169 insertions(+), 119 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableReadFilter.java rename public/java/src/org/broadinstitute/sting/gatk/filters/{ZeroMappingQualityReadFilter.java => MappingQualityZeroReadFilter.java} (90%) diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableReadFilter.java new file mode 100644 index 000000000..cecbedda8 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableReadFilter.java @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2009 The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.filters; + +import net.sf.picard.util.QualityUtil; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.QualityUtils; + +/** + * Filter out mapping quality zero reads. + * + * @author ebanks + * @version 0.1 + */ + +public class MappingQualityUnavailableReadFilter extends ReadFilter { + public boolean filterOut(SAMRecord rec) { + return (rec.getMappingQuality() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE); + } +} + diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/ZeroMappingQualityReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroReadFilter.java similarity index 90% rename from public/java/src/org/broadinstitute/sting/gatk/filters/ZeroMappingQualityReadFilter.java rename to public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroReadFilter.java index 7e6fc5e82..e49d4117c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/ZeroMappingQualityReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroReadFilter.java @@ -24,17 +24,16 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; /** - * Filter out zero mapping quality reads. + * Filter out mapping quality zero reads. * * @author hanna * @version 0.1 */ -public class ZeroMappingQualityReadFilter extends ReadFilter { +public class MappingQualityZeroReadFilter extends ReadFilter { public boolean filterOut(SAMRecord rec) { return (rec.getMappingQuality() == 0); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java index 0be737897..51d290763 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java @@ -62,5 +62,5 @@ public class AlleleBalanceBySample implements GenotypeAnnotation, ExperimentalAn public List getKeyNames() { return Arrays.asList("AB"); } - public List getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), -1, VCFHeaderLineType.Float, "Allele balance for each het genotype")); } + public List getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Allele balance for each het genotype")); } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java index ed10d2072..f3ec2b1df 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java @@ -42,8 +42,8 @@ import java.util.*; public class ChromosomeCounts implements InfoFieldAnnotation, StandardAnnotation { private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY }; - private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"), - new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"), + private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"), + new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"), new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") }; public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java index 11f86b972..8260a5a81 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; @@ -21,7 +22,7 @@ public class MappingQualityRankSumTest extends RankSumTest { protected void fillQualsFromPileup(byte ref, byte alt, ReadBackedPileup pileup, List refQuals, List altQuals) { for ( final PileupElement p : pileup ) { - if( isUsableBase(p) && p.getMappingQual() < 254 ) { // 254 and 255 are special mapping qualities used as a code by aligners + if ( isUsableBase(p) ) { if ( p.getBase() == ref ) { refQuals.add((double)p.getMappingQual()); } else if ( p.getBase() == alt ) { @@ -34,7 +35,7 @@ public class MappingQualityRankSumTest extends RankSumTest { // equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele ? HashMap> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(); for (final PileupElement p: pileup) { - if (indelLikelihoodMap.containsKey(p) && p.getMappingQual() < 254) { + if (indelLikelihoodMap.containsKey(p) && p.getMappingQual() != 0 && p.getMappingQual() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE) { // retrieve likelihood information corresponding to this read LinkedHashMap el = indelLikelihoodMap.get(p); // by design, first element in LinkedHashMap was ref allele @@ -54,8 +55,6 @@ public class MappingQualityRankSumTest extends RankSumTest { refQuals.add((double)p.getMappingQual()); else if (altLikelihood > refLikelihood + INDEL_LIKELIHOOD_THRESH) altQuals.add((double)p.getMappingQual()); - - } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java index ba3e2cc8b..3b64abfff 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java @@ -47,5 +47,5 @@ public class NBaseCount implements InfoFieldAnnotation { public List getKeyNames() { return Arrays.asList("PercentNBaseSolid"); } - public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("PercentNBaseSolid", 4, VCFHeaderLineType.Float, "Percentage of N bases in the pileup (counting only SOLiD reads)")); } + public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("PercentNBaseSolid", 1, VCFHeaderLineType.Float, "Percentage of N bases in the pileup (counting only SOLiD reads)")); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java index 6e80c7555..1ef7ccd0b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; @@ -38,8 +39,10 @@ public class RMSMappingQuality implements InfoFieldAnnotation, StandardAnnotatio pileup = context.getBasePileup(); if (pileup != null) { - for (PileupElement p : pileup ) - qualities[index++] = p.getRead().getMappingQuality(); + for (PileupElement p : pileup ) { + if ( p.getMappingQual() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE ) + qualities[index++] = p.getMappingQual(); + } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java index 1a967293f..f00abd6a1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java @@ -106,6 +106,9 @@ public abstract class RankSumTest implements InfoFieldAnnotation, StandardAnnota protected abstract void fillIndelQualsFromPileup(ReadBackedPileup pileup, List refQuals, List altQuals); protected static boolean isUsableBase( final PileupElement p ) { - return !( p.isDeletion() || p.getMappingQual() == 0 || ((int)p.getQual()) < 6 ); // need the unBAQed quality score here + return !( p.isDeletion() || + p.getMappingQual() == 0 || + p.getMappingQual() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE || + ((int)p.getQual()) < QualityUtils.MIN_USABLE_Q_SCORE ); // need the unBAQed quality score here } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index fe0084a19..fc8a5819a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; +import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableReadFilter; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.gatk.contexts.*; import org.broadinstitute.sting.gatk.filters.BadMateFilter; @@ -47,7 +48,7 @@ import java.io.PrintStream; * multi-sample data. The user can choose from several different incorporated calculation models. */ @BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_INPUT) -@ReadFilters( {BadMateFilter.class} ) +@ReadFilters( {BadMateFilter.class, MappingQualityUnavailableReadFilter.class} ) @Reference(window=@Window(start=-200,stop=200)) @By(DataSource.REFERENCE) @Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250) @@ -175,7 +176,7 @@ public class UnifiedGenotyper extends LocusWalker { // @Output // PrintStream out; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java index e59b29502..4833a6cad 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java @@ -32,7 +32,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.datasources.sample.Sample; -import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter; +import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.walkers.*; @@ -58,7 +58,7 @@ import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFr @Requires(value = {DataSource.READS, DataSource.REFERENCE}, referenceMetaData = @RMD(name = "variant", type = ReferenceOrderedDatum.class)) @By(DataSource.READS) -@ReadFilters({ZeroMappingQualityReadFilter.class}) +@ReadFilters({MappingQualityZeroReadFilter.class}) // Filter out all reads with zero mapping quality public class ReadBackedPhasingWalker extends RodWalker { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java index ee504b6e7..6673bec92 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java @@ -34,7 +34,7 @@ import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter; +import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.walkers.*; @@ -75,7 +75,7 @@ import java.util.Map; @BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN) @By( DataSource.READS ) // Only look at covered loci, not every loci of the reference file -@ReadFilters( {ZeroMappingQualityReadFilter.class} ) // Filter out all reads with zero mapping quality +@ReadFilters( {MappingQualityZeroReadFilter.class} ) // Filter out all reads with zero mapping quality @Requires( {DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES} ) // This walker requires both -I input.bam and -R reference.fasta @PartitionBy(PartitionType.LOCUS) public class CountCovariatesWalker extends LocusWalker implements TreeReducible { diff --git a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java index 23054e95f..fad2320fc 100755 --- a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java @@ -9,9 +9,13 @@ import net.sf.samtools.SAMUtils; * @author Kiran Garimella */ public class QualityUtils { + public final static byte MAX_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE; public final static double MIN_REASONABLE_ERROR = 0.0001; public final static byte MAX_REASONABLE_Q_SCORE = 40; + public final static byte MIN_USABLE_Q_SCORE = 6; + + public final static int MAPPING_QUALITY_UNAVAILABLE = 255; /** * Private constructor. No instantiating this class! diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java index f4996b487..a8bf74707 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java @@ -123,12 +123,10 @@ public class StandardVCFWriter implements VCFWriter { try { // the file format field needs to be written first - mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF4_0.getFormatString() + "=" + VCFHeaderVersion.VCF4_0.getVersionString() + "\n"); + mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF4_1.getFormatString() + "=" + VCFHeaderVersion.VCF4_1.getVersionString() + "\n"); for ( VCFHeaderLine line : mHeader.getMetaData() ) { - if ( line.getKey().equals(VCFHeaderVersion.VCF4_0.getFormatString()) || - line.getKey().equals(VCFHeaderVersion.VCF3_3.getFormatString()) || - line.getKey().equals(VCFHeaderVersion.VCF3_2.getFormatString()) ) + if ( VCFHeaderVersion.isFormatString(line.getKey()) ) continue; // are the records filtered (so we know what to put in the FILTER column of passing records) ? diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 6ba6926c6..e6300e6c9 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -15,7 +15,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsNotAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("4cc077eb3d343e6b7ba12bff86ebe347")); + Arrays.asList("8a105fa5eebdfffe7326bc5b3d8ffd1c")); executeTest("test file has annotations, not asking for annotations, #1", spec); } @@ -23,7 +23,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsNotAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("1de8e943fbf55246ebd19efa32f22a58")); + Arrays.asList("964f1016ec9a3c55333f62dd834c14d6")); executeTest("test file has annotations, not asking for annotations, #2", spec); } @@ -31,7 +31,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("93c110e45fd4aedb044a8a5501e23336")); + Arrays.asList("8e7de435105499cd71ffc099e268a83e")); executeTest("test file has annotations, asking for annotations, #1", spec); } @@ -39,7 +39,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("f5cb45910ed719f46159f9f71acaecf4")); + Arrays.asList("64b6804cb1e27826e3a47089349be581")); executeTest("test file has annotations, asking for annotations, #2", spec); } @@ -47,7 +47,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsNotAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("4b48e7d095ef73e3151542ea976ecd89")); + Arrays.asList("42ccee09fa9f8c58f4a0d4f1139c094f")); executeTest("test file doesn't have annotations, not asking for annotations, #1", spec); } @@ -55,7 +55,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsNotAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("28dfbfd178aca071b948cd3dc2365357")); + Arrays.asList("f2ddfa8105c290b1f34b7a261a02a1ac")); executeTest("test file doesn't have annotations, not asking for annotations, #2", spec); } @@ -63,7 +63,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("a330a5bc3ee72a51dbeb7e6c97a0db99")); + Arrays.asList("fd1ffb669800c2e07df1e2719aa38e49")); executeTest("test file doesn't have annotations, asking for annotations, #1", spec); } @@ -71,7 +71,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("3a31d1ef471acfb881a2dec7963fe3f4")); + Arrays.asList("09f8e840770a9411ff77508e0ed0837f")); executeTest("test file doesn't have annotations, asking for annotations, #2", spec); } @@ -79,7 +79,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testOverwritingHeader() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G \"Standard\" -B:variant,VCF " + validationDataLocation + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1, - Arrays.asList("a63fd8ff7bafbd46b7f009144a7c2ad1")); + Arrays.asList("78d2c19f8107d865970dbaf3e12edd92")); executeTest("test overwriting header", spec); } @@ -87,7 +87,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoReads() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1, - Arrays.asList("36378f1245bb99d902fbfe147605bc42")); + Arrays.asList("16e3a1403fc376320d7c69492cad9345")); executeTest("not passing it any reads", spec); } @@ -95,7 +95,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testDBTagWithDbsnp() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -D " + GATKDataLocation + "dbsnp_129_b36.rod -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1, - Arrays.asList("0257a1cc3c703535b2d3c5046bf88ab7")); + Arrays.asList("3da8ca2b6bdaf6e92d94a8c77a71313d")); executeTest("getting DB tag with dbSNP", spec); } @@ -103,7 +103,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testDBTagWithHapMap() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B:compH3,VCF " + validationDataLocation + "fakeHM3.vcf -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1, - Arrays.asList("2d7c73489dcf0db433bebdf79a068764")); + Arrays.asList("1bc01c5b3bd0b7aef75230310c3ce688")); executeTest("getting DB tag with HM3", spec); } @@ -111,13 +111,13 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testUsingExpression() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B:foo,VCF " + validationDataLocation + "targetAnnotations.vcf -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.AF -BTI variant", 1, - Arrays.asList("2f6efd08d818faa1eb0631844437c64a")); + Arrays.asList("e9c0d832dc6b4ed06c955060f830c140")); executeTest("using expression", spec); } @Test public void testTabixAnnotations() { - final String MD5 = "6c7a6a1c0027bf82656542a9b2671a35"; + final String MD5 = "13269d5a2e16f06fd755cc0fb9271acf"; for ( String file : Arrays.asList("CEU.exon.2010_03.sites.vcf", "CEU.exon.2010_03.sites.vcf.gz")) { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -A HomopolymerRun -B:variant,VCF " + validationDataLocation + "/" + file + " -BTI variant -NO_HEADER", 1, diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotatorIntegrationTest.java index c4f6d5ebc..c75a5b2dc 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotatorIntegrationTest.java @@ -29,7 +29,7 @@ public class GenomicAnnotatorIntegrationTest extends WalkerTest { */ - String[] md5WithDashSArg = {"3d3b61a83c1189108eabb2df04218099"}; + String[] md5WithDashSArg = {"efba4ce1641cfa2ef88a64395f2ebce8"}; WalkerTestSpec specWithSArg = new WalkerTestSpec( "-T GenomicAnnotator -R " + b36KGReference + " -B:variant,vcf3 /humgen/gsa-hpprojects/GATK/data/Annotations/examples/CEU_hapmap_nogt_23_subset.vcf" + @@ -58,7 +58,7 @@ public class GenomicAnnotatorIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("caa562160733aa638e1ba413ede209ae") + Arrays.asList("772fc3f43b70770ec6c6acbb8bbbd4c0") ); executeTest("testGenomicAnnotatorOnIndels", testOnIndels); } @@ -76,7 +76,7 @@ public class GenomicAnnotatorIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("a4cf76f08fa90284b6988a464b6e0c17") + Arrays.asList("081ade7f3d2d3c5f19cb1e8651a626f3") ); executeTest("testGenomicAnnotatorOnSNPsAndIndels", testOnSNPsAndIndels); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java index 70c34e729..fef1b6e64 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java @@ -41,7 +41,7 @@ public class BeagleIntegrationTest extends WalkerTest { "-B:beagleR2,BEAGLE " + beagleValidationDataLocation + "inttestbgl.r2 " + "-B:beagleProbs,BEAGLE " + beagleValidationDataLocation + "inttestbgl.gprobs " + "-B:beaglePhased,BEAGLE " + beagleValidationDataLocation + "inttestbgl.phased " + - "-o %s -NO_HEADER", 1, Arrays.asList("6bccee48ad2f06ba5a8c774fed444478")); + "-o %s -NO_HEADER", 1, Arrays.asList("3531451e84208264104040993889aaf4")); executeTest("test BeagleOutputToVCF", spec); } @@ -60,7 +60,7 @@ public class BeagleIntegrationTest extends WalkerTest { "-T ProduceBeagleInput -B:variant,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_HSQ_chr22_14-16m.vcf "+ "-B:validation,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_OMNI_chr22_14-16m.vcf "+ "-L 22:14000000-16000000 -o %s -bvcf %s -bs 0.8 -valp 0.98 -R /humgen/1kg/reference/human_g1k_v37.fasta -NO_HEADER ",2, - Arrays.asList("660986891b30cdc937e0f2a3a5743faa","223fb977e8db567dcaf632c6ee51f294")); + Arrays.asList("660986891b30cdc937e0f2a3a5743faa","e96ddd51da9f4a797b2aa8c20e404166")); executeTest("test BeagleInputWithBootstrap",spec); } @@ -72,7 +72,7 @@ public class BeagleIntegrationTest extends WalkerTest { "-B:beagleR2,beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.r2 "+ "-B:beagleProbs,beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.gprobs.bgl "+ "-B:beaglePhased,beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.phased.bgl "+ - "-L 20:1-70000 -o %s -NO_HEADER ",1,Arrays.asList("24b88ef8cdf6e347daab491f0256be5a")); + "-L 20:1-70000 -o %s -NO_HEADER ",1,Arrays.asList("8dd6ec53994fb46c5c22af8535d22965")); executeTest("testBeagleChangesSitesToRef",spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java index 3d75fdc44..7bec67d2e 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java @@ -16,7 +16,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testNoAction() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("4cc077eb3d343e6b7ba12bff86ebe347")); + Arrays.asList("8a105fa5eebdfffe7326bc5b3d8ffd1c")); executeTest("test no action", spec); } @@ -24,7 +24,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testClusteredSnps() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -window 10 -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("ada5540bb3d9b6eb8f1337ba01e90a94")); + Arrays.asList("27b13f179bb4920615dff3a32730d845")); executeTest("test clustered SNPs", spec); } @@ -32,17 +32,17 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testMasks() { WalkerTestSpec spec1 = new WalkerTestSpec( baseTestString() + " -mask foo -B:mask,VCF3 " + validationDataLocation + "vcfexample2.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("b0fcac4af3526e3b2a37602ab4c0e6ae")); + Arrays.asList("578f9e774784c25871678e6464fd212b")); executeTest("test mask all", spec1); WalkerTestSpec spec2 = new WalkerTestSpec( baseTestString() + " -mask foo -B:mask,VCF " + validationDataLocation + "vcfMask.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("b64baabe905a5d197cc1ab594147d3d5")); + Arrays.asList("bfa86a674aefca1b13d341cb14ab3c4f")); executeTest("test mask some", spec2); WalkerTestSpec spec3 = new WalkerTestSpec( baseTestString() + " -mask foo -maskExtend 10 -B:mask,VCF " + validationDataLocation + "vcfMask.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("0eff92fe72024d535c44b98e1e9e1993")); + Arrays.asList("5939f80d14b32d88587373532d7b90e5")); executeTest("test mask extend", spec3); } @@ -50,7 +50,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testFilter1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("7a40795147cbfa92941489d7239aad92")); + Arrays.asList("45219dbcfb6f81bba2ea0c35f5bfd368")); executeTest("test filter #1", spec); } @@ -58,7 +58,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testFilter2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("e9dd4991b1e325847c77d053dfe8ee54")); + Arrays.asList("c95845e817da7352b9b72bc9794f18fb")); executeTest("test filter #2", spec); } @@ -66,7 +66,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testFilterWithSeparateNames() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("9ded2cce63b8d97550079047051d80a3")); + Arrays.asList("b8cdd7f44ff1a395e0a9b06a87e1e530")); executeTest("test filter with separate names #2", spec); } @@ -74,12 +74,12 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testGenotypeFilters() { WalkerTestSpec spec1 = new WalkerTestSpec( baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("6696e3f65a62ce912230d47cdb0c129b")); + Arrays.asList("96b61e4543a73fe725e433f007260039")); executeTest("test genotype filter #1", spec1); WalkerTestSpec spec2 = new WalkerTestSpec( baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("26e5b4ee954c9e0b5eb044afd4b88ee9")); + Arrays.asList("6c8112ab17ce39c8022c891ae73bf38e")); executeTest("test genotype filter #2", spec2); } @@ -87,7 +87,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testDeletions() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --filterExpression 'QUAL < 100' --filterName foo -B:variant,VCF " + validationDataLocation + "twoDeletions.vcf", 1, - Arrays.asList("e63b58be33c9126ad6cc55489aac539b")); + Arrays.asList("569546fd798afa0e65c5b61b440d07ac")); executeTest("test deletions", spec); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 20fa7719f..1f23d262e 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -28,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("258e1954e6ae55c89abc6a716e19cbe0")); + Arrays.asList("c97829259463d04b0159591bb6fb44af")); executeTest("test MultiSample Pilot1", spec); } @@ -54,12 +54,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testWithAllelesPassedIn() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("edeb1db288a24baff59575ceedd94243")); + Arrays.asList("2b69667f4770e8c0c894066b7f27e440")); executeTest("test MultiSample Pilot2 with alleles passed in", spec1); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("581990130d90071b084024f4cd7caf91")); + Arrays.asList("b77fe007c2a97fcd59dfd5eef94d8b95")); executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2); } @@ -67,7 +67,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSingleSamplePilot2() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1, - Arrays.asList("d120db27d694a6da32367cc4fb5770fa")); + Arrays.asList("ee8a5e63ddd470726a749e69c0c20f60")); executeTest("test SingleSample Pilot2", spec); } @@ -77,7 +77,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // // -------------------------------------------------------------------------------------------------------------- - private final static String COMPRESSED_OUTPUT_MD5 = "75e5c430ed39f79f24e375037a388dc4"; + private final static String COMPRESSED_OUTPUT_MD5 = "ef31654a2b85b9b2d3bba4f4a75a17b6"; @Test public void testCompressedOutput() { @@ -107,7 +107,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // Note that we need to turn off any randomization for this to work, so no downsampling and no annotations - String md5 = "a29615dd37222a11b8dadd341b53e43c"; + String md5 = "46868a9c4134651c54535fb46b408aee"; WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1, @@ -138,9 +138,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testCallingParameters() { HashMap e = new HashMap(); - e.put( "--min_base_quality_score 26", "93e6269e38db9bc1732555e9969e3648" ); - e.put( "--min_mapping_quality_score 26", "64be99183c100caed4aa5f8bad64c7e9" ); - e.put( "--p_nonref_model GRID_SEARCH", "0592fe33f705ad8e2f13619fcf157805" ); + e.put( "--min_base_quality_score 26", "5043c9a101e691602eb7a3f9704bdf20" ); + e.put( "--min_mapping_quality_score 26", "71a833eb8fd93ee62ae0d5a430f27940" ); + e.put( "--p_nonref_model GRID_SEARCH", "ddf443e9dcadef367476b26b4d52c134" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -153,9 +153,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testOutputParameter() { HashMap e = new HashMap(); - e.put( "-sites_only", "1483e637dc0279935a7f90d136d147bb" ); - e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "adcd91bc7dae8020df8caf1a30060e98" ); - e.put( "--output_mode EMIT_ALL_SITES", "b708acc2fa40f336bcd2d0c70091e07e" ); + e.put( "-sites_only", "eaad6ceb71ab94290650a70bea5ab951" ); + e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "05bf7db8a3d19ef4a3d14772c90b732f" ); + e.put( "--output_mode EMIT_ALL_SITES", "e4b86740468d7369f0156550855586c7" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -169,12 +169,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testConfidence() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1, - Arrays.asList("64be99183c100caed4aa5f8bad64c7e9")); + Arrays.asList("71a833eb8fd93ee62ae0d5a430f27940")); executeTest("test confidence 1", spec1); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1, - Arrays.asList("e76ca54232d02f0d92730e1affeb804e")); + Arrays.asList("79968844dc3ddecb97748c1acf2984c7")); executeTest("test confidence 2", spec2); } @@ -186,8 +186,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testHeterozyosity() { HashMap e = new HashMap(); - e.put( 0.01, "18d37f7f107853b5e32c757b4e143205" ); - e.put( 1.0 / 1850, "2bcb90ce2f7542bf590f7612018fae8e" ); + e.put( 0.01, "4e878664f61d2d800146d3762303fde1" ); + e.put( 1.0 / 1850, "9204caec095ff5e63ca21a10b6fab453" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -211,7 +211,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("825f05b31b5bb7e82231a15c7e4e2b0d")); + Arrays.asList("1a58ec52df545f946f80cc16c5736a91")); executeTest(String.format("test multiple technologies"), spec); } @@ -230,7 +230,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -L 1:10,000,000-10,100,000" + " -baq CALCULATE_AS_NECESSARY", 1, - Arrays.asList("0919ab7e513c377610e23a67d33608fa")); + Arrays.asList("62d0f6d9de344ce68ce121c13b1e78b1")); executeTest(String.format("test calling with BAQ"), spec); } @@ -244,7 +244,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -L 1:10,000,000-10,100,000" + " -baq OFF", 1, - Arrays.asList("825f05b31b5bb7e82231a15c7e4e2b0d")); + Arrays.asList("1a58ec52df545f946f80cc16c5736a91")); executeTest(String.format("test calling with BAQ OFF"), spec); } @@ -263,7 +263,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("cb37348c41b8181be829912730f747e1")); + Arrays.asList("631ae1f1eb6bc4c1a4136b8495250536")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -278,7 +278,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -minIndelCnt 1" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("ca5b6a5fb53ae401b146cc3044f454f2")); + Arrays.asList("fd556585c79e2b892a5976668f45aa43")); executeTest(String.format("test indel caller in SLX witn low min allele count"), spec); } @@ -291,7 +291,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("ca4343a4ab6d3cce94ce61d7d1910f81")); + Arrays.asList("9cd56feedd2787919e571383889fde70")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -301,14 +301,14 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("3f555b53e9dd14cf7cdf96c24e322364")); + Arrays.asList("315e1b78d7a403d7fcbcf0caa8c496b8")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("1b9764b783acf7822edc58e6822eef5b")); + Arrays.asList("cf89e0c54f14482a23c105b73a333d8a")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java index 0ed16967a..1bf3e579f 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java @@ -26,7 +26,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10) + " -L chr20:332341-382503", 1, - Arrays.asList("6020a68bbec97fcd87819c10cd4e2470")); + Arrays.asList("9568ba0b6624b97ac55a59bdee2d9150")); executeTest("MAX 10 het sites [TEST ONE]; require PQ >= 10", spec); } @@ -36,7 +36,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10) + " -L chr20:1232503-1332503", 1, - Arrays.asList("712c2145df4756c9a15758865d8007b5")); + Arrays.asList("ce65194c24fe83b0ec90faa6c8e6109a")); executeTest("MAX 10 het sites [TEST TWO]; require PQ >= 10", spec); } @@ -46,7 +46,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 2, 30) + " -L chr20:332341-382503", 1, - Arrays.asList("297e0896e4761529d979f40f5ad694db")); + Arrays.asList("02d134fd544613b1e5dd7f7197fc3753")); executeTest("MAX 2 het sites [TEST THREE]; require PQ >= 30", spec); } @@ -56,7 +56,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 5, 100) + " -L chr20:332341-382503", 1, - Arrays.asList("52a17f14692d726d3b726cf0ae7f2a09")); + Arrays.asList("2f7ec9904fc054c2ba1a7db05eb29334")); executeTest("MAX 5 het sites [TEST FOUR]; require PQ >= 100", spec); } @@ -66,7 +66,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 1000, 7, 10) + " -L chr20:332341-482503", 1, - Arrays.asList("af768f7958b8f4599c2374f1cc2fc613")); + Arrays.asList("da7a31725f229d1782dd3049848730aa")); executeTest("MAX 7 het sites [TEST FIVE]; require PQ >= 10; cacheWindow = 1000", spec); } @@ -76,7 +76,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10) + " -L chr20:652810-681757", 1, - Arrays.asList("3dd886672f59a47908b94136d0427bb0")); + Arrays.asList("e9d35cb88089fb0e8ae6678bfaeeac8c")); executeTest("MAX 10 het sites [TEST SIX]; require PQ >= 10; cacheWindow = 20000; has inconsistent sites", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java index 9600046da..2fec2e70f 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java @@ -27,7 +27,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf", "d33212a84368e821cbedecd4f59756d6", // tranches "4652dca41222bebdf9d9fda343b2a835", // recal file - "5350b1a4c1250cf3b77ca45327c04711"); // cut VCF + "243a397a33a935fcaccd5deb6d16f0c0"); // cut VCF @DataProvider(name = "VRTest") public Object[][] createData1() { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index 600718aa0..daaab9425 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -71,24 +71,24 @@ public class CombineVariantsIntegrationTest extends WalkerTest { } - @Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "2117fff6e0d182cd20be508e9661829c", true); } - @Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "2cfaf7af3dd119df08b8a9c1f72e2f93", " -setKey foo", true); } - @Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "1474ac0fde2ce42a3c24f1c97eab333e", " -setKey null", true); } - @Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "7fc66df048a0ab08cf507906e1d4a308", false); } // official project VCF files in tabix format + @Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "c608b9fc1e36dba6cebb4f259883f9f0", true); } + @Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "20caad94411d6ab48153b214de916df8", " -setKey foo", true); } + @Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "004f3065cb1bc2ce2f9afd695caf0b48", " -setKey null", true); } + @Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "c9c901ff9ef2a982624b203a8086dff0", false); } // official project VCF files in tabix format - @Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "ec9715f53dbf4531570557c212822f12", false); } - @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "f1072be5f5c6ee810276d9ca6537224d", false); } + @Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "7593be578d4274d672fc22fced38012b", false); } + @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "1cd467863c4e948fadd970681552d57e", false); } - @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "b77a1eec725201d9d8e74ee0c45638d3", false); } // official project VCF files in tabix format - @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "802977fdfd2f4905b501bb06800f60af", false); } // official project VCF files in tabix format - @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "a67157287dd2b24b5cdf7ebf8fcbbe9a", false); } + @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f", false); } // official project VCF files in tabix format + @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9", false); } // official project VCF files in tabix format + @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "5b82f37df1f5ba40f0474d71c94142ec", false); } - @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e1f4718a179f1196538a33863da04f53", false); } + @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "c58dca482bf97069eac6d9f1a07a2cba", false); } - @Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "b3783384b7c8e877b971033e90beba48", true); } + @Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "89f55abea8f59e39d1effb908440548c", true); } - @Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "902e541c87caa72134db6293fc46f0ad"); } - @Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "f339ad4bb5863b58b9c919ce7d040bb9"); } + @Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "4836086891f6cbdd40eebef3076d215a"); } + @Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "6a34b5d743efda8b2f3b639f3a2f5de8"); } @Test public void threeWayWithRefs() { WalkerTestSpec spec = new WalkerTestSpec( @@ -101,7 +101,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { " -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" + " -genotypeMergeOptions UNIQUIFY -L 1"), 1, - Arrays.asList("a07995587b855f3214fb71940bf23c0f")); + Arrays.asList("8b78339ccf7a5a5a837f79e88a3a38e5")); executeTest("threeWayWithRefs", spec); } @@ -120,7 +120,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { } // @Test public void complexTestFull() { combineComplexSites("", "64b991fd3850f83614518f7d71f0532f"); } - @Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "0db9ef50fe54b60426474273d7c7fa99"); } - @Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "d20acb3d53ba0a02ce92d540ebeda2a9"); } - @Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "8d1b3d120515f8b56b5a0d10bc5da713"); } + @Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "df96cb3beb2dbb5e02f80abec7d3571e"); } + @Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "f72a178137e25dbe0b931934cdc0079d"); } + @Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "f704caeaaaed6711943014b847fe381a"); } } \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java index d32ab6282..82c894c6f 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java @@ -40,7 +40,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T LiftoverVariants -o %s -R " + b36KGReference + " -B:variant,vcf3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict", 1, - Arrays.asList("37e23efd7d6471fc0f807b31ccafe0eb")); + Arrays.asList("70aeaca5b74cc7ba8e2da7b71ff0fbfd")); executeTest("test b36 to hg19", spec); } @@ -49,7 +49,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T LiftoverVariants -o %s -R " + b36KGReference + " -B:variant,vcf3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.unsortedSamples.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict", 1, - Arrays.asList("b6ef4a2f026fd3843aeb9ed764a66921")); + Arrays.asList("3fd7ec2dc4064ef410786276b0dc9d08")); executeTest("test b36 to hg19, unsorted samples", spec); } @@ -58,7 +58,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T LiftoverVariants -o %s -R " + hg18Reference + " -B:variant,vcf " + validationDataLocation + "liftover_test.vcf -chain " + validationDataLocation + "hg18ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict", 1, - Arrays.asList("3275373b3c44ad14a270b50664b3f8a3")); + Arrays.asList("ab2c6254225d7e2ecf52eee604d5673b")); executeTest("test hg18 to hg19, unsorted", spec); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index e18287a21..b5f41542e 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -18,7 +18,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' -B:variant,VCF3 " + testfile + " -NO_HEADER"), 1, - Arrays.asList("1b9d551298dc048c7d36b60440ff4d50") + Arrays.asList("d18516c1963802e92cb9e425c0b75fd6") ); executeTest("testComplexSelection--" + testfile, spec); @@ -31,7 +31,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(" -sn A -sn B -sn C -B:variant,VCF3 " + testfile + " -NO_HEADER"), 1, - Arrays.asList("5ba7536a0819421b330350a160e4261a") + Arrays.asList("b74038779fe6485dbb8734ae48178356") ); executeTest("testRepeatedLineSelection--" + testfile, spec); @@ -44,7 +44,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -disc myvar -L 20:1012700-1020000 -B:variant,VCF " + b37hapmapGenotypes + " -B:myvar,VCF " + testFile + " -o %s -NO_HEADER", 1, - Arrays.asList("97621ae8f29955eedfc4e0be3515fcb9") + Arrays.asList("78e6842325f1f1bc9ab30d5e7737ee6e") ); executeTest("testDiscordance--" + testFile, spec); @@ -57,7 +57,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -conc hapmap -L 20:1012700-1020000 -B:hapmap,VCF " + b37hapmapGenotypes + " -B:variant,VCF " + testFile + " -o %s -NO_HEADER", 1, - Arrays.asList("a0ae016fdffcbe7bfb99fd3dbc311407") + Arrays.asList("d2ba3ea30a810f6f0fbfb1b643292b6a") ); executeTest("testConcordance--" + testFile, spec); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java index cf0673ee6..d7efe4212 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java @@ -60,7 +60,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest { " --NO_HEADER" + " -o %s", 1, - Arrays.asList("debbbf3e661b6857cc8d99ff7635bb1d") + Arrays.asList("658f580f7a294fd334bd897102616fed") ); executeTest("testSimpleVCFStreaming", spec); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java index 64d0db14b..8421076c9 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java @@ -20,7 +20,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest { @Test public void testVariantsToVCFUsingGeliInput() { List md5 = new ArrayList(); - md5.add("bd15d98adc76b5798e3bbeff3f936feb"); + md5.add("815b82fff92aab41c209eedce2d7e7d9"); WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + @@ -38,7 +38,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest { @Test public void testGenotypesToVCFUsingGeliInput() { List md5 = new ArrayList(); - md5.add("acd15d3f85bff5b545bc353e0e23cc6e"); + md5.add("22336ee9c12aa222ce29c3c5babca7d0"); WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + @@ -56,7 +56,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest { @Test public void testGenotypesToVCFUsingHapMapInput() { List md5 = new ArrayList(); - md5.add("6f34528569f8cf5941cb365fa77288c1"); + md5.add("9bedaa7670b86a07be5191898c3727cf"); WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + @@ -73,7 +73,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest { @Test public void testGenotypesToVCFUsingVCFInput() { List md5 = new ArrayList(); - md5.add("d8316fc1b9d8e954a58940354119a32e"); + md5.add("cc215edec9ca28e5c79ab1b67506f9f7"); WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java index 5d42f8d0c..a344817a0 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java @@ -49,7 +49,7 @@ public class VariantContextIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( cmdRoot + " -NO_HEADER -B:vcf,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.vcf -L 1:1-1000000 -o %s --outputVCF %s", 2, // just one output file - Arrays.asList("e3c35d0c4b5d4935c84a270f9df0951f", "e6673737acbb6bfabfcd92c4b2268241")); + Arrays.asList("e3c35d0c4b5d4935c84a270f9df0951f", "ff91731213fd0bbdc200ab6fd1c93e63")); executeTest("testToVCF", spec); } From ccedd6ff4c942c20c1a57f6a6bf65c5cb63b6e16 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 12 Jul 2011 15:20:28 -0400 Subject: [PATCH 07/14] Difference is now the general form -- used to be SummarizedDifference. The old Difference class is now a subclass of Difference that includes pointers to specific the master and test DiffElements. Added a size() function that calculates the number of elements tree from a DiffElement. --- .../gatk/walkers/diffengine/DiffElement.java | 4 + .../gatk/walkers/diffengine/DiffEngine.java | 142 +++++------------- .../gatk/walkers/diffengine/DiffNode.java | 7 + .../walkers/diffengine/DiffObjectsWalker.java | 7 +- .../gatk/walkers/diffengine/DiffValue.java | 1 + .../gatk/walkers/diffengine/Difference.java | 83 +++++++--- .../diffengine/SpecificDifference.java | 59 ++++++++ .../diffengine/DiffEngineUnitTest.java | 6 +- .../diffengine/DifferenceUnitTest.java | 2 +- 9 files changed, 176 insertions(+), 135 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/SpecificDifference.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java index eff24bb88..4c3f7bd95 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java @@ -115,4 +115,8 @@ public class DiffElement { else throw new ReviewedStingException("Illegal request conversion of a DiffValue into a DiffNode: " + this); } + + public int size() { + return 1 + getValue().size(); + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java index 54a7a464d..6d85df71d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java @@ -24,11 +24,9 @@ package org.broadinstitute.sting.gatk.walkers.diffengine; -import com.google.java.contract.Requires; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReportTable; -import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -60,7 +58,7 @@ public class DiffEngine { // // -------------------------------------------------------------------------------- - public List diff(DiffElement master, DiffElement test) { + public List diff(DiffElement master, DiffElement test) { DiffValue masterValue = master.getValue(); DiffValue testValue = test.getValue(); @@ -70,14 +68,14 @@ public class DiffEngine { return diff(masterValue, testValue); } else { // structural difference in types. one is node, other is leaf - return Arrays.asList(new Difference(master, test)); + return Arrays.asList(new SpecificDifference(master, test)); } } - public List diff(DiffNode master, DiffNode test) { + public List diff(DiffNode master, DiffNode test) { Set allNames = new HashSet(master.getElementNames()); allNames.addAll(test.getElementNames()); - List diffs = new ArrayList(); + List diffs = new ArrayList(); for ( String name : allNames ) { DiffElement masterElt = master.getElement(name); @@ -86,7 +84,7 @@ public class DiffEngine { throw new ReviewedStingException("BUG: unexceptedly got two null elements for field: " + name); } else if ( masterElt == null || testElt == null ) { // if either is null, we are missing a value // todo -- should one of these be a special MISSING item? - diffs.add(new Difference(masterElt, testElt)); + diffs.add(new SpecificDifference(masterElt, testElt)); } else { diffs.addAll(diff(masterElt, testElt)); } @@ -95,11 +93,11 @@ public class DiffEngine { return diffs; } - public List diff(DiffValue master, DiffValue test) { + public List diff(DiffValue master, DiffValue test) { if ( master.getValue().equals(test.getValue()) ) { return Collections.emptyList(); } else { - return Arrays.asList(new Difference(master.getBinding(), test.getBinding())); + return Arrays.asList(new SpecificDifference(master.getBinding(), test.getBinding())); } } @@ -147,64 +145,68 @@ public class DiffEngine { * @param params determines how we display the items * @param diffs */ - public void reportSummarizedDifferences(List diffs, SummaryReportParams params ) { + public void reportSummarizedDifferences(List diffs, SummaryReportParams params ) { printSummaryReport(summarizeDifferences(diffs), params ); } - public List summarizeDifferences(List diffs) { - List diffPaths = new ArrayList(diffs.size()); - - for ( Difference diff1 : diffs ) { - diffPaths.add(diffNameToPath(diff1.getFullyQualifiedName())); - } - - return summarizedDifferencesOfPaths(diffPaths); + public List summarizeDifferences(List diffs) { + return summarizedDifferencesOfPaths(diffs); } final protected static String[] diffNameToPath(String diffName) { return diffName.split("\\."); } - protected List summarizedDifferencesOfPaths(List diffPaths) { - Map summaries = new HashMap(); + protected List summarizedDifferencesOfPathsFromString(List singletonDiffs) { + List diffs = new ArrayList(); + + for ( String diff : singletonDiffs ) { + diffs.add(new Difference(diff)); + } + + return summarizedDifferencesOfPaths(diffs); + } + + protected List summarizedDifferencesOfPaths(List singletonDiffs) { + Map summaries = new HashMap(); // create the initial set of differences - for ( int i = 0; i < diffPaths.size(); i++ ) { + for ( int i = 0; i < singletonDiffs.size(); i++ ) { for ( int j = 0; j <= i; j++ ) { - String[] diffPath1 = diffPaths.get(i); - String[] diffPath2 = diffPaths.get(j); - if ( diffPath1.length == diffPath2.length ) { - int lcp = longestCommonPostfix(diffPath1, diffPath2); - String path = lcp > 0 ? summarizedPath(diffPath2, lcp) : Utils.join(".", diffPath2); + Difference diffPath1 = singletonDiffs.get(i); + Difference diffPath2 = singletonDiffs.get(j); + if ( diffPath1.length() == diffPath2.length() ) { + int lcp = longestCommonPostfix(diffPath1.getParts(), diffPath2.getParts()); + String path = lcp > 0 ? summarizedPath(diffPath2.getParts(), lcp) : diffPath2.getPath(); addSummary(summaries, path, true); } } } // count differences - for ( String[] diffPath : diffPaths ) { - for ( SummarizedDifference sumDiff : summaries.values() ) { - if ( sumDiff.matches(diffPath) ) + for ( Difference diffPath : singletonDiffs ) { + for ( Difference sumDiff : summaries.values() ) { + if ( sumDiff.matches(diffPath.getParts()) ) addSummary(summaries, sumDiff.getPath(), false); } } - List sortedSummaries = new ArrayList(summaries.values()); + List sortedSummaries = new ArrayList(summaries.values()); Collections.sort(sortedSummaries); return sortedSummaries; } - private static void addSummary(Map summaries, String path, boolean onlyCatalog) { + private static void addSummary(Map summaries, String path, boolean onlyCatalog) { if ( summaries.containsKey(path) ) { if ( ! onlyCatalog ) summaries.get(path).incCount(); } else { - SummarizedDifference sumDiff = new SummarizedDifference(path); + Difference sumDiff = new Difference(path); summaries.put(sumDiff.getPath(), sumDiff); } } - protected void printSummaryReport(List sortedSummaries, SummaryReportParams params ) { + protected void printSummaryReport(List sortedSummaries, SummaryReportParams params ) { GATKReport report = new GATKReport(); final String tableName = "diffences"; report.addTable(tableName, "Summarized differences between the master and test files.\nSee http://www.broadinstitute.org/gsa/wiki/index.php/DiffObjectsWalker_and_SummarizedDifferences for more information"); @@ -213,7 +215,7 @@ public class DiffEngine { table.addColumn("NumberOfOccurrences", 0); int count = 0, count1 = 0; - for ( SummarizedDifference diff : sortedSummaries ) { + for ( Difference diff : sortedSummaries ) { if ( diff.getCount() < params.minSumDiffToShow ) // in order, so break as soon as the count is too low break; @@ -261,76 +263,6 @@ public class DiffEngine { return Utils.join(".", parts); } - /** - * TODO -- all of the algorithms above should use SummarizedDifference instead - * TODO -- of some SummarizedDifferences and some low-level String[] - */ - public static class SummarizedDifference implements Comparable { - final String path; // X.Y.Z - final String[] parts; - int count = 0; - - public SummarizedDifference(String path) { - this.path = path; - this.parts = diffNameToPath(path); - } - - public void incCount() { count++; } - - public int getCount() { - return count; - } - - /** - * The fully qualified path object A.B.C etc - * @return - */ - public String getPath() { - return path; - } - - /** - * @return the length of the parts of this summary - */ - public int length() { - return this.parts.length; - } - - /** - * Returns true if the string parts matches this summary. Matches are - * must be equal() everywhere where this summary isn't *. - * @param otherParts - * @return - */ - public boolean matches(String[] otherParts) { - if ( otherParts.length != length() ) - return false; - - // TODO optimization: can start at right most non-star element - for ( int i = 0; i < length(); i++ ) { - String part = parts[i]; - if ( ! part.equals("*") && ! part.equals(otherParts[i]) ) - return false; - } - - return true; - } - - @Override - public String toString() { - return String.format("%s:%d", getPath(), getCount()); - } - - @Override - public int compareTo(SummarizedDifference other) { - // sort first highest to lowest count, then by lowest to highest path - int countCmp = Integer.valueOf(count).compareTo(other.count); - return countCmp != 0 ? -1 * countCmp : path.compareTo(other.path); - } - - - } - // -------------------------------------------------------------------------------- // // plugin manager @@ -404,7 +336,7 @@ public class DiffEngine { if ( diffEngine.canRead(masterFile) && diffEngine.canRead(testFile) ) { DiffElement master = diffEngine.createDiffableFromFile(masterFile); DiffElement test = diffEngine.createDiffableFromFile(testFile); - List diffs = diffEngine.diff(master, test); + List diffs = diffEngine.diff(master, test); diffEngine.reportSummarizedDifferences(diffs, params); return true; } else { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java index 3e1be8609..2f48de2d3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java @@ -153,6 +153,13 @@ public class DiffNode extends DiffValue { add(new DiffElement(name, this.getBinding(), new DiffValue(value))); } + public int size() { + int count = 0; + for ( DiffElement value : getElements() ) + count += value.size(); + return count; + } + // --------------------------------------------------------------------------- // // toString diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java index fe411b195..ecb836af9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java @@ -24,7 +24,6 @@ package org.broadinstitute.sting.gatk.walkers.diffengine; -import org.apache.xmlbeans.impl.tool.Diff; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -95,18 +94,20 @@ public class DiffObjectsWalker extends RodWalker { public void onTraversalDone(Integer sum) { out.printf("Reading master file %s%n", masterFile); DiffElement master = diffEngine.createDiffableFromFile(masterFile, MAX_OBJECTS_TO_READ); + out.printf(" Read %d objects%n", master.size()); out.printf("Reading test file %s%n", testFile); DiffElement test = diffEngine.createDiffableFromFile(testFile, MAX_OBJECTS_TO_READ); + out.printf(" Read %d objects%n", test.size()); // out.printf("Master diff objects%n"); // out.println(master.toString()); // out.printf("Test diff objects%n"); // out.println(test.toString()); - List diffs = diffEngine.diff(master, test); + List diffs = diffEngine.diff(master, test); if ( showItemizedDifferences ) { out.printf("Itemized results%n"); - for ( Difference diff : diffs ) + for ( SpecificDifference diff : diffs ) out.printf("DIFF: %s%n", diff.toString()); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java index 7245e9e8d..3750496a1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java @@ -87,4 +87,5 @@ public class DiffValue { public boolean isAtomic() { return true; } public boolean isCompound() { return ! isAtomic(); } + public int size() { return 1; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java index 6627a4cc5..efc6ef160 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java @@ -24,35 +24,72 @@ package org.broadinstitute.sting.gatk.walkers.diffengine; -/** - * Created by IntelliJ IDEA. - * User: depristo - * Date: 7/4/11 - * Time: 12:53 PM - * - * Represents a specific difference between two specific DiffElements - */ -public class Difference { - DiffElement master, test; +public class Difference implements Comparable { + final String path; // X.Y.Z + final String[] parts; + int count = 0; - public Difference(DiffElement master, DiffElement test) { - if ( master == null && test == null ) throw new IllegalArgumentException("Master and test both cannot be null"); - this.master = master; - this.test = test; + public Difference(String path) { + this.path = path; + this.parts = DiffEngine.diffNameToPath(path); } + public String[] getParts() { + return parts; + } + + public void incCount() { count++; } + + public int getCount() { + return count; + } + + /** + * The fully qualified path object A.B.C etc + * @return + */ + public String getPath() { + return path; + } + + /** + * @return the length of the parts of this summary + */ + public int length() { + return this.parts.length; + } + + /** + * Returns true if the string parts matches this summary. Matches are + * must be equal() everywhere where this summary isn't *. + * @param otherParts + * @return + */ + public boolean matches(String[] otherParts) { + if ( otherParts.length != length() ) + return false; + + // TODO optimization: can start at right most non-star element + for ( int i = 0; i < length(); i++ ) { + String part = parts[i]; + if ( ! part.equals("*") && ! part.equals(otherParts[i]) ) + return false; + } + + return true; + } + + @Override public String toString() { - return String.format("%s:%s!=%s", - getFullyQualifiedName(), - getOneLineString(master), - getOneLineString(test)); + return String.format("%s:%d", getPath(), getCount()); } - public String getFullyQualifiedName() { - return (master == null ? test : master).fullyQualifiedName(); + @Override + public int compareTo(Difference other) { + // sort first highest to lowest count, then by lowest to highest path + int countCmp = Integer.valueOf(count).compareTo(other.count); + return countCmp != 0 ? -1 * countCmp : path.compareTo(other.path); } - private static String getOneLineString(DiffElement elt) { - return elt == null ? "MISSING" : elt.getValue().toOneLineString(); - } + } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/SpecificDifference.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/SpecificDifference.java new file mode 100644 index 000000000..2fe9b47f8 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/SpecificDifference.java @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.diffengine; + +/** + * Created by IntelliJ IDEA. + * User: depristo + * Date: 7/4/11 + * Time: 12:53 PM + * + * Represents a specific difference between two specific DiffElements + */ +public class SpecificDifference extends Difference { + DiffElement master, test; + + public SpecificDifference(DiffElement master, DiffElement test) { + super(createName(master, test)); + if ( master == null && test == null ) throw new IllegalArgumentException("Master and test both cannot be null"); + this.master = master; + this.test = test; + } + + public String toString() { + return String.format("%s:%s!=%s", + getPath(), + getOneLineString(master), + getOneLineString(test)); + } + + private static String createName(DiffElement master, DiffElement test) { + return (master == null ? test : master).fullyQualifiedName(); + } + + private static String getOneLineString(DiffElement elt) { + return elt == null ? "MISSING" : elt.getValue().toOneLineString(); + } +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java index cd6c3598a..96dfec6e8 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java @@ -99,7 +99,7 @@ public class DiffEngineUnitTest extends BaseTest { logger.warn("Test tree1: " + test.tree1.toOneLineString()); logger.warn("Test tree2: " + test.tree2.toOneLineString()); - List diffs = engine.diff(test.tree1, test.tree2); + List diffs = engine.diff(test.tree1, test.tree2); logger.warn("Test expected diff : " + test.differences); logger.warn("Observed diffs : " + diffs); } @@ -185,12 +185,12 @@ public class DiffEngineUnitTest extends BaseTest { List diffPaths = new ArrayList(diffs.size()); for ( String diff : diffs ) { diffPaths.add(DiffEngine.diffNameToPath(diff)); } - List sumDiffs = engine.summarizedDifferencesOfPaths(diffPaths); + List sumDiffs = engine.summarizedDifferencesOfPathsFromString(diffs); Assert.assertEquals(sumDiffs.size(), expecteds.size(), "Unexpected number of summarized differences: " + sumDiffs); for ( int i = 0; i < sumDiffs.size(); i++ ) { - DiffEngine.SummarizedDifference sumDiff = sumDiffs.get(i); + Difference sumDiff = sumDiffs.get(i); String expected = expecteds.get(i); String[] pathCount = expected.split(":"); String path = pathCount[0]; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DifferenceUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DifferenceUnitTest.java index da272ec30..64579a01b 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DifferenceUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DifferenceUnitTest.java @@ -87,7 +87,7 @@ public class DifferenceUnitTest extends BaseTest { logger.warn("Test tree1: " + (test.tree1 == null ? "null" : test.tree1.toOneLineString())); logger.warn("Test tree2: " + (test.tree2 == null ? "null" : test.tree2.toOneLineString())); logger.warn("Test expected diff : " + test.difference); - Difference diff = new Difference(test.tree1, test.tree2); + SpecificDifference diff = new SpecificDifference(test.tree1, test.tree2); logger.warn("Observed diffs : " + diff); Assert.assertEquals(diff.toString(), test.difference, "Observed diff string " + diff + " not equal to expected difference string " + test.difference ); From 5077c94d85929bad35fcc00bbeab0b8036aabe4a Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Tue, 12 Jul 2011 15:39:07 -0400 Subject: [PATCH 09/14] Adding MappingQualityUnavailableReadFilter to the SNP and indel CountCovariates --- .../recalibration/CountCovariatesWalker.java | 3 +- .../RecalibrationWalkersIntegrationTest.java | 91 +++---------------- 2 files changed, 15 insertions(+), 79 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java index 6673bec92..c21f548b3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; import org.broad.tribble.bed.BEDCodec; import org.broad.tribble.dbsnp.DbSNPCodec; +import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableReadFilter; import org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec; import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; import org.broadinstitute.sting.commandline.Gather; @@ -75,7 +76,7 @@ import java.util.Map; @BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN) @By( DataSource.READS ) // Only look at covered loci, not every loci of the reference file -@ReadFilters( {MappingQualityZeroReadFilter.class} ) // Filter out all reads with zero mapping quality +@ReadFilters( {MappingQualityZeroReadFilter.class, MappingQualityUnavailableReadFilter.class} ) // Filter out all reads with zero or unavailable mapping quality @Requires( {DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES} ) // This walker requires both -I input.bam and -R reference.fasta @PartitionBy(PartitionType.LOCUS) public class CountCovariatesWalker extends LocusWalker implements TreeReducible { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java index b0f76229b..129161da3 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java @@ -19,9 +19,9 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { public void testCountCovariates1() { HashMap e = new HashMap(); e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "7b5832d4b2a23b8ef2bb639eb59bfa88" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "f4f8a49bb5764d2a8f61e055f64dcce4"); + e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "9c006f8e9fb5752b1c139f5a8cc7ea88"); e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "e6f7b4ab9aa291022e0ba8b7dbe4c77e" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "570506533f079d738d70934dfe1c02cd" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "e6b98af01c5a08e4954b79ec42db6fc3" ); for ( String parallelism : Arrays.asList("", " -nt 4")) { for ( Map.Entry entry : e.entrySet() ) { @@ -53,9 +53,9 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { public void testTableRecalibrator1() { HashMap e = new HashMap(); e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "0278cce4cfdab869dc0c11d6852a984b" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "344d4252143df8c2cce6b568747553a5"); + e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "6797d7ffa4ef6c48413719ba32696ccf"); e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "2bb3374dde131791d7638031ae3b3e10" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "064c4a7bdd23974c3a9c5f924540df76" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "1f9d8944b73169b367cb83b0d22e5432" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -107,7 +107,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibratorMaxQ70() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "344d4252143df8c2cce6b568747553a5" ); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "0278cce4cfdab869dc0c11d6852a984b" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -133,12 +133,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { } } - - @Test public void testCountCovariatesSolidIndelsRemoveRefBias() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "0a6cdb9611e5880ea6611205080aa267" ); + e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "c9ea5f995e1e2b7a5688533e678dcedc" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -164,7 +162,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibratorSolidIndelsRemoveRefBias() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "9bc7e1ad223ba759fe5e8ddb4c07369c" ); + e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "993fae4270e7e1e15986f270acf247af" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -189,13 +187,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { } } - - - @Test public void testCountCovariatesVCF() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "3700eaf567e4937f442fc777a226d6ad"); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "170f0c3cc4b8d72c539136effeec9a16"); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -219,7 +214,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesBED() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "6803891a3398821fc8a37e19ea8e5a00"); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "b460478d9683e827784e42bc352db8bb"); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -243,7 +238,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesVCFPlusDBsnp() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "f224c42fbc4026db973ccc91265ab5c7"); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "a3d892bd60d8f679affda3c1e3af96c1"); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -268,69 +263,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { } } - @Test - public void testCountCovariatesNoReadGroups() { - HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "c024e03f019aeceaf364fa58c8295ad8" ); - - for ( Map.Entry entry : e.entrySet() ) { - String bam = entry.getKey(); - String md5 = entry.getValue(); - - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-R " + b36KGReference + - " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" + - " -T CountCovariates" + - " -I " + bam + - " -L 1:10,000,000-10,200,000" + - " -cov ReadGroupCovariate" + - " -cov QualityScoreCovariate" + - " -cov CycleCovariate" + - " -cov DinucCovariate" + - " --default_read_group DefaultReadGroup" + - " --default_platform illumina" + - " --solid_recal_mode SET_Q_ZERO" + - " -recalFile %s", - 1, // just one output file - Arrays.asList(md5)); - List result = executeTest("testCountCovariatesNoReadGroups", spec).getFirst(); - paramsFilesNoReadGroupTest.put(bam, result.get(0).getAbsolutePath()); - } - } - - @Test - public void testTableRecalibratorNoReadGroups() { - HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "1eefbe7ac0376fc1ed1392d85242171e" ); - - for ( Map.Entry entry : e.entrySet() ) { - String bam = entry.getKey(); - String md5 = entry.getValue(); - String paramsFile = paramsFilesNoReadGroupTest.get(bam); - System.out.printf("PARAMS FOR %s is %s%n", bam, paramsFile); - if ( paramsFile != null ) { - WalkerTestSpec spec = new WalkerTestSpec( - "-R " + b36KGReference + - " -T TableRecalibration" + - " -I " + bam + - " -L 1:10,100,000-10,300,000" + - " -o %s" + - " --no_pg_tag" + - " --solid_recal_mode SET_Q_ZERO" + - " --default_read_group DefaultReadGroup" + - " --default_platform illumina" + - " -recalFile " + paramsFile, - 1, // just one output file - Arrays.asList(md5)); - executeTest("testTableRecalibratorNoReadGroups", spec); - } - } - } - @Test public void testCountCovariatesNoIndex() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "cfc31bb6f51436d1c3b34f62bb801dc8" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "284ccac1f8fe485e52c86333cac7c2d4" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -356,7 +292,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibratorNoIndex() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "83b848a16034c2fb423d1bb0f5be7784" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "c167799c2d9cab815d7c9b23337f162e" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -380,11 +316,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { } } - @Test public void testCountCovariatesFailWithoutDBSNP() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", ""); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", ""); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey();