From 6f26c07b8506756cc62f566099ca08adde51b101 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 18 Jul 2011 10:42:35 -0400 Subject: [PATCH] Removed the SpecificDifference class. Now Difference classes always have the option to remember specific master and test values. This means that all summarized differences carry with them specific examples of their differences. Consequently, now even summarized differences give at least one example of the specific difference, even when the count of the difference is > 1. Unit tests updated. Added DiffObjects integrationtest. VCFDiffableReader now specifically reads the first line of the VCF file to capture the version number. --- .../gatk/walkers/diffengine/DiffEngine.java | 42 +++++------ .../walkers/diffengine/DiffObjectsWalker.java | 4 +- .../gatk/walkers/diffengine/Difference.java | 45 +++++++++++- .../diffengine/SpecificDifference.java | 59 --------------- .../walkers/diffengine/VCFDiffableReader.java | 6 ++ .../diffengine/DiffEngineUnitTest.java | 2 +- .../DiffObjectsIntegrationTest.java | 71 +++++++++++++++++++ .../diffengine/DiffableReaderUnitTest.java | 2 +- .../diffengine/DifferenceUnitTest.java | 10 +-- 9 files changed, 151 insertions(+), 90 deletions(-) delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/SpecificDifference.java create mode 100644 public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java index 2f87a900a..e3910ef11 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java @@ -58,7 +58,7 @@ public class DiffEngine { // // -------------------------------------------------------------------------------- - public List diff(DiffElement master, DiffElement test) { + public List diff(DiffElement master, DiffElement test) { DiffValue masterValue = master.getValue(); DiffValue testValue = test.getValue(); @@ -68,14 +68,14 @@ public class DiffEngine { return diff(masterValue, testValue); } else { // structural difference in types. one is node, other is leaf - return Arrays.asList(new SpecificDifference(master, test)); + return Arrays.asList(new Difference(master, test)); } } - public List diff(DiffNode master, DiffNode test) { + public List diff(DiffNode master, DiffNode test) { Set allNames = new HashSet(master.getElementNames()); allNames.addAll(test.getElementNames()); - List diffs = new ArrayList(); + List diffs = new ArrayList(); for ( String name : allNames ) { DiffElement masterElt = master.getElement(name); @@ -84,7 +84,7 @@ public class DiffEngine { throw new ReviewedStingException("BUG: unexceptedly got two null elements for field: " + name); } else if ( masterElt == null || testElt == null ) { // if either is null, we are missing a value // todo -- should one of these be a special MISSING item? - diffs.add(new SpecificDifference(masterElt, testElt)); + diffs.add(new Difference(masterElt, testElt)); } else { diffs.addAll(diff(masterElt, testElt)); } @@ -93,11 +93,11 @@ public class DiffEngine { return diffs; } - public List diff(DiffValue master, DiffValue test) { + public List diff(DiffValue master, DiffValue test) { if ( master.getValue().equals(test.getValue()) ) { return Collections.emptyList(); } else { - return Arrays.asList(new SpecificDifference(master.getBinding(), test.getBinding())); + return Arrays.asList(new Difference(master.getBinding(), test.getBinding())); } } @@ -145,11 +145,11 @@ public class DiffEngine { * @param params determines how we display the items * @param diffs */ - public void reportSummarizedDifferences(List diffs, SummaryReportParams params ) { + public void reportSummarizedDifferences(List diffs, SummaryReportParams params ) { printSummaryReport(summarizeDifferences(diffs), params ); } - public List summarizeDifferences(List diffs) { + public List summarizeDifferences(List diffs) { return summarizedDifferencesOfPaths(diffs); } @@ -177,8 +177,12 @@ public class DiffEngine { Difference diffPath2 = singletonDiffs.get(j); if ( diffPath1.length() == diffPath2.length() ) { int lcp = longestCommonPostfix(diffPath1.getParts(), diffPath2.getParts()); - String path = lcp > 0 ? summarizedPath(diffPath2.getParts(), lcp) : diffPath2.getPath(); - addSummary(summaries, path, true); + String path = diffPath2.getPath(); + if ( lcp != 0 && lcp != diffPath1.length() ) + path = summarizedPath(diffPath2.getParts(), lcp); + Difference sumDiff = new Difference(path, diffPath2.getMaster(), diffPath2.getTest()); + sumDiff.setCount(0); + addSummaryIfMissing(summaries, sumDiff); } } } @@ -187,7 +191,7 @@ public class DiffEngine { for ( Difference diffPath : singletonDiffs ) { for ( Difference sumDiff : summaries.values() ) { if ( sumDiff.matches(diffPath.getParts()) ) - addSummary(summaries, sumDiff.getPath(), false); + sumDiff.incCount(); } } @@ -196,13 +200,9 @@ public class DiffEngine { return sortedSummaries; } - private static void addSummary(Map summaries, String path, boolean onlyCatalog) { - if ( summaries.containsKey(path) ) { - if ( ! onlyCatalog ) - summaries.get(path).incCount(); - } else { - Difference sumDiff = new Difference(path); - summaries.put(sumDiff.getPath(), sumDiff); + protected void addSummaryIfMissing(Map summaries, Difference diff) { + if ( ! summaries.containsKey(diff.getPath()) ) { + summaries.put(diff.getPath(), diff); } } @@ -213,6 +213,7 @@ public class DiffEngine { GATKReportTable table = report.getTable(tableName); table.addPrimaryKey("Difference", true); table.addColumn("NumberOfOccurrences", 0); + table.addColumn("SpecificDifference", 0); int count = 0, count1 = 0; for ( Difference diff : sortedSummaries ) { @@ -230,6 +231,7 @@ public class DiffEngine { } table.set(diff.getPath(), "NumberOfOccurrences", diff.getCount()); + table.set(diff.getPath(), "SpecificDifference", diff.valueDiffString()); } table.write(params.out); @@ -336,7 +338,7 @@ public class DiffEngine { if ( diffEngine.canRead(masterFile) && diffEngine.canRead(testFile) ) { DiffElement master = diffEngine.createDiffableFromFile(masterFile); DiffElement test = diffEngine.createDiffableFromFile(testFile); - List diffs = diffEngine.diff(master, test); + List diffs = diffEngine.diff(master, test); diffEngine.reportSummarizedDifferences(diffs, params); return true; } else { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java index ecb836af9..8e362dcc4 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java @@ -104,10 +104,10 @@ public class DiffObjectsWalker extends RodWalker { // out.printf("Test diff objects%n"); // out.println(test.toString()); - List diffs = diffEngine.diff(master, test); + List diffs = diffEngine.diff(master, test); if ( showItemizedDifferences ) { out.printf("Itemized results%n"); - for ( SpecificDifference diff : diffs ) + for ( Difference diff : diffs ) out.printf("DIFF: %s%n", diff.toString()); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java index efc6ef160..81b6f7e0e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java @@ -27,13 +27,24 @@ package org.broadinstitute.sting.gatk.walkers.diffengine; public class Difference implements Comparable { final String path; // X.Y.Z final String[] parts; - int count = 0; + int count = 1; + DiffElement master = null , test = null; public Difference(String path) { this.path = path; this.parts = DiffEngine.diffNameToPath(path); } + public Difference(DiffElement master, DiffElement test) { + this(createPath(master, test), master, test); + } + + public Difference(String path, DiffElement master, DiffElement test) { + this(path); + this.master = master; + this.test = test; + } + public String[] getParts() { return parts; } @@ -44,6 +55,10 @@ public class Difference implements Comparable { return count; } + public void setCount(int count) { + this.count = count; + } + /** * The fully qualified path object A.B.C etc * @return @@ -81,7 +96,7 @@ public class Difference implements Comparable { @Override public String toString() { - return String.format("%s:%d", getPath(), getCount()); + return String.format("%s:%d:%s", getPath(), getCount(), valueDiffString()); } @Override @@ -91,5 +106,31 @@ public class Difference implements Comparable { return countCmp != 0 ? -1 * countCmp : path.compareTo(other.path); } + public String valueDiffString() { + if ( hasSpecificDifference() ) { + return String.format("%s!=%s", getOneLineString(master), getOneLineString(test)); + } else { + return "N/A"; + } + } + private static String createPath(DiffElement master, DiffElement test) { + return (master == null ? test : master).fullyQualifiedName(); + } + + private static String getOneLineString(DiffElement elt) { + return elt == null ? "MISSING" : elt.getValue().toOneLineString(); + } + + public boolean hasSpecificDifference() { + return master != null || test != null; + } + + public DiffElement getMaster() { + return master; + } + + public DiffElement getTest() { + return test; + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/SpecificDifference.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/SpecificDifference.java deleted file mode 100644 index 2fe9b47f8..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/SpecificDifference.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.diffengine; - -/** - * Created by IntelliJ IDEA. - * User: depristo - * Date: 7/4/11 - * Time: 12:53 PM - * - * Represents a specific difference between two specific DiffElements - */ -public class SpecificDifference extends Difference { - DiffElement master, test; - - public SpecificDifference(DiffElement master, DiffElement test) { - super(createName(master, test)); - if ( master == null && test == null ) throw new IllegalArgumentException("Master and test both cannot be null"); - this.master = master; - this.test = test; - } - - public String toString() { - return String.format("%s:%s!=%s", - getPath(), - getOneLineString(master), - getOneLineString(test)); - } - - private static String createName(DiffElement master, DiffElement test) { - return (master == null ? test : master).fullyQualifiedName(); - } - - private static String getOneLineString(DiffElement elt) { - return elt == null ? "MISSING" : elt.getValue().toOneLineString(); - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java index 4e44578c7..df2a5cda1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java @@ -53,7 +53,13 @@ public class VCFDiffableReader implements DiffableReader { public DiffElement readFromFile(File file, int maxElementsToRead) { DiffNode root = DiffNode.rooted(file.getName()); try { + // read the version line from the file LineReader lineReader = new AsciiLineReader(new FileInputStream(file)); + final String version = lineReader.readLine(); + root.add("VERSION", version); + lineReader.close(); + + lineReader = new AsciiLineReader(new FileInputStream(file)); VCFCodec vcfCodec = new VCFCodec(); // must be read as state is stored in reader itself diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java index 96dfec6e8..2ae19264e 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java @@ -99,7 +99,7 @@ public class DiffEngineUnitTest extends BaseTest { logger.warn("Test tree1: " + test.tree1.toOneLineString()); logger.warn("Test tree2: " + test.tree2.toOneLineString()); - List diffs = engine.diff(test.tree1, test.tree2); + List diffs = engine.diff(test.tree1, test.tree2); logger.warn("Test expected diff : " + test.differences); logger.warn("Observed diffs : " + diffs); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java new file mode 100644 index 000000000..cca1eccb4 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.diffengine; + +import org.broadinstitute.sting.WalkerTest; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.File; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +public class DiffObjectsIntegrationTest extends WalkerTest { + private class TestParams extends TestDataProvider { + public File master, test; + public String MD5; + + private TestParams(String master, String test, String MD5) { + super(TestParams.class); + this.master = new File(master); + this.test = new File(test); + this.MD5 = MD5; + } + + public String toString() { + return String.format("master=%s,test=%s,md5=%s", master, test, MD5); + } + } + + @DataProvider(name = "data") + public Object[][] createData() { + new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "fb7f4e011487ca56bce865ae5468cdc5"); + new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "423cec3befbf0a72d8bc3757ee628fc4"); + return TestParams.getTests(TestParams.class); + } + + @Test(enabled = true, dataProvider = "data") + public void testDiffs(TestParams params) { + WalkerTestSpec spec = new WalkerTestSpec( + "-T DiffObjects -R public/testdata/exampleFASTA.fasta " + + " -m " + params.master + + " -t " + params.test + + " -o %s", + Arrays.asList(params.MD5)); + executeTest("testDiffObjects:"+params, spec).getFirst(); + } +} + diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java index a0cb47770..dee7bbd88 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java @@ -87,7 +87,7 @@ public class DiffableReaderUnitTest extends BaseTest { Assert.assertSame(diff.getParent(), DiffElement.ROOT); DiffNode node = diff.getValueAsNode(); - Assert.assertEquals(node.getElements().size(), 10); + Assert.assertEquals(node.getElements().size(), 11); // chr1 2646 rs62635284 G A 0.15 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:53,75:3:-12.40,-0.90,-0.00:9.03 DiffNode rec1 = node.getElement("chr1:2646").getValueAsNode(); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DifferenceUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DifferenceUnitTest.java index 64579a01b..4e4080bc7 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DifferenceUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DifferenceUnitTest.java @@ -75,10 +75,10 @@ public class DifferenceUnitTest extends BaseTest { @DataProvider(name = "data") public Object[][] createTrees() { - new DifferenceTest("A=X", "A=Y", "A:X!=Y"); - new DifferenceTest("A=Y", "A=X", "A:Y!=X"); - new DifferenceTest(DiffNode.fromString("A=X"), null, "A:X!=MISSING"); - new DifferenceTest(null, DiffNode.fromString("A=X"), "A:MISSING!=X"); + new DifferenceTest("A=X", "A=Y", "A:1:X!=Y"); + new DifferenceTest("A=Y", "A=X", "A:1:Y!=X"); + new DifferenceTest(DiffNode.fromString("A=X"), null, "A:1:X!=MISSING"); + new DifferenceTest(null, DiffNode.fromString("A=X"), "A:1:MISSING!=X"); return DifferenceTest.getTests(DifferenceTest.class); } @@ -87,7 +87,7 @@ public class DifferenceUnitTest extends BaseTest { logger.warn("Test tree1: " + (test.tree1 == null ? "null" : test.tree1.toOneLineString())); logger.warn("Test tree2: " + (test.tree2 == null ? "null" : test.tree2.toOneLineString())); logger.warn("Test expected diff : " + test.difference); - SpecificDifference diff = new SpecificDifference(test.tree1, test.tree2); + Difference diff = new Difference(test.tree1, test.tree2); logger.warn("Observed diffs : " + diff); Assert.assertEquals(diff.toString(), test.difference, "Observed diff string " + diff + " not equal to expected difference string " + test.difference );