diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java index 2f87a900a..e3910ef11 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java @@ -58,7 +58,7 @@ public class DiffEngine { // // -------------------------------------------------------------------------------- - public List diff(DiffElement master, DiffElement test) { + public List diff(DiffElement master, DiffElement test) { DiffValue masterValue = master.getValue(); DiffValue testValue = test.getValue(); @@ -68,14 +68,14 @@ public class DiffEngine { return diff(masterValue, testValue); } else { // structural difference in types. one is node, other is leaf - return Arrays.asList(new SpecificDifference(master, test)); + return Arrays.asList(new Difference(master, test)); } } - public List diff(DiffNode master, DiffNode test) { + public List diff(DiffNode master, DiffNode test) { Set allNames = new HashSet(master.getElementNames()); allNames.addAll(test.getElementNames()); - List diffs = new ArrayList(); + List diffs = new ArrayList(); for ( String name : allNames ) { DiffElement masterElt = master.getElement(name); @@ -84,7 +84,7 @@ public class DiffEngine { throw new ReviewedStingException("BUG: unexceptedly got two null elements for field: " + name); } else if ( masterElt == null || testElt == null ) { // if either is null, we are missing a value // todo -- should one of these be a special MISSING item? - diffs.add(new SpecificDifference(masterElt, testElt)); + diffs.add(new Difference(masterElt, testElt)); } else { diffs.addAll(diff(masterElt, testElt)); } @@ -93,11 +93,11 @@ public class DiffEngine { return diffs; } - public List diff(DiffValue master, DiffValue test) { + public List diff(DiffValue master, DiffValue test) { if ( master.getValue().equals(test.getValue()) ) { return Collections.emptyList(); } else { - return Arrays.asList(new SpecificDifference(master.getBinding(), test.getBinding())); + return Arrays.asList(new Difference(master.getBinding(), test.getBinding())); } } @@ -145,11 +145,11 @@ public class DiffEngine { * @param params determines how we display the items * @param diffs */ - public void reportSummarizedDifferences(List diffs, SummaryReportParams params ) { + public void reportSummarizedDifferences(List diffs, SummaryReportParams params ) { printSummaryReport(summarizeDifferences(diffs), params ); } - public List summarizeDifferences(List diffs) { + public List summarizeDifferences(List diffs) { return summarizedDifferencesOfPaths(diffs); } @@ -177,8 +177,12 @@ public class DiffEngine { Difference diffPath2 = singletonDiffs.get(j); if ( diffPath1.length() == diffPath2.length() ) { int lcp = longestCommonPostfix(diffPath1.getParts(), diffPath2.getParts()); - String path = lcp > 0 ? summarizedPath(diffPath2.getParts(), lcp) : diffPath2.getPath(); - addSummary(summaries, path, true); + String path = diffPath2.getPath(); + if ( lcp != 0 && lcp != diffPath1.length() ) + path = summarizedPath(diffPath2.getParts(), lcp); + Difference sumDiff = new Difference(path, diffPath2.getMaster(), diffPath2.getTest()); + sumDiff.setCount(0); + addSummaryIfMissing(summaries, sumDiff); } } } @@ -187,7 +191,7 @@ public class DiffEngine { for ( Difference diffPath : singletonDiffs ) { for ( Difference sumDiff : summaries.values() ) { if ( sumDiff.matches(diffPath.getParts()) ) - addSummary(summaries, sumDiff.getPath(), false); + sumDiff.incCount(); } } @@ -196,13 +200,9 @@ public class DiffEngine { return sortedSummaries; } - private static void addSummary(Map summaries, String path, boolean onlyCatalog) { - if ( summaries.containsKey(path) ) { - if ( ! onlyCatalog ) - summaries.get(path).incCount(); - } else { - Difference sumDiff = new Difference(path); - summaries.put(sumDiff.getPath(), sumDiff); + protected void addSummaryIfMissing(Map summaries, Difference diff) { + if ( ! summaries.containsKey(diff.getPath()) ) { + summaries.put(diff.getPath(), diff); } } @@ -213,6 +213,7 @@ public class DiffEngine { GATKReportTable table = report.getTable(tableName); table.addPrimaryKey("Difference", true); table.addColumn("NumberOfOccurrences", 0); + table.addColumn("SpecificDifference", 0); int count = 0, count1 = 0; for ( Difference diff : sortedSummaries ) { @@ -230,6 +231,7 @@ public class DiffEngine { } table.set(diff.getPath(), "NumberOfOccurrences", diff.getCount()); + table.set(diff.getPath(), "SpecificDifference", diff.valueDiffString()); } table.write(params.out); @@ -336,7 +338,7 @@ public class DiffEngine { if ( diffEngine.canRead(masterFile) && diffEngine.canRead(testFile) ) { DiffElement master = diffEngine.createDiffableFromFile(masterFile); DiffElement test = diffEngine.createDiffableFromFile(testFile); - List diffs = diffEngine.diff(master, test); + List diffs = diffEngine.diff(master, test); diffEngine.reportSummarizedDifferences(diffs, params); return true; } else { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java index ecb836af9..8e362dcc4 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java @@ -104,10 +104,10 @@ public class DiffObjectsWalker extends RodWalker { // out.printf("Test diff objects%n"); // out.println(test.toString()); - List diffs = diffEngine.diff(master, test); + List diffs = diffEngine.diff(master, test); if ( showItemizedDifferences ) { out.printf("Itemized results%n"); - for ( SpecificDifference diff : diffs ) + for ( Difference diff : diffs ) out.printf("DIFF: %s%n", diff.toString()); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java index efc6ef160..81b6f7e0e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java @@ -27,13 +27,24 @@ package org.broadinstitute.sting.gatk.walkers.diffengine; public class Difference implements Comparable { final String path; // X.Y.Z final String[] parts; - int count = 0; + int count = 1; + DiffElement master = null , test = null; public Difference(String path) { this.path = path; this.parts = DiffEngine.diffNameToPath(path); } + public Difference(DiffElement master, DiffElement test) { + this(createPath(master, test), master, test); + } + + public Difference(String path, DiffElement master, DiffElement test) { + this(path); + this.master = master; + this.test = test; + } + public String[] getParts() { return parts; } @@ -44,6 +55,10 @@ public class Difference implements Comparable { return count; } + public void setCount(int count) { + this.count = count; + } + /** * The fully qualified path object A.B.C etc * @return @@ -81,7 +96,7 @@ public class Difference implements Comparable { @Override public String toString() { - return String.format("%s:%d", getPath(), getCount()); + return String.format("%s:%d:%s", getPath(), getCount(), valueDiffString()); } @Override @@ -91,5 +106,31 @@ public class Difference implements Comparable { return countCmp != 0 ? -1 * countCmp : path.compareTo(other.path); } + public String valueDiffString() { + if ( hasSpecificDifference() ) { + return String.format("%s!=%s", getOneLineString(master), getOneLineString(test)); + } else { + return "N/A"; + } + } + private static String createPath(DiffElement master, DiffElement test) { + return (master == null ? test : master).fullyQualifiedName(); + } + + private static String getOneLineString(DiffElement elt) { + return elt == null ? "MISSING" : elt.getValue().toOneLineString(); + } + + public boolean hasSpecificDifference() { + return master != null || test != null; + } + + public DiffElement getMaster() { + return master; + } + + public DiffElement getTest() { + return test; + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/SpecificDifference.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/SpecificDifference.java deleted file mode 100644 index 2fe9b47f8..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/SpecificDifference.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.diffengine; - -/** - * Created by IntelliJ IDEA. - * User: depristo - * Date: 7/4/11 - * Time: 12:53 PM - * - * Represents a specific difference between two specific DiffElements - */ -public class SpecificDifference extends Difference { - DiffElement master, test; - - public SpecificDifference(DiffElement master, DiffElement test) { - super(createName(master, test)); - if ( master == null && test == null ) throw new IllegalArgumentException("Master and test both cannot be null"); - this.master = master; - this.test = test; - } - - public String toString() { - return String.format("%s:%s!=%s", - getPath(), - getOneLineString(master), - getOneLineString(test)); - } - - private static String createName(DiffElement master, DiffElement test) { - return (master == null ? test : master).fullyQualifiedName(); - } - - private static String getOneLineString(DiffElement elt) { - return elt == null ? "MISSING" : elt.getValue().toOneLineString(); - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java index 4e44578c7..df2a5cda1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java @@ -53,7 +53,13 @@ public class VCFDiffableReader implements DiffableReader { public DiffElement readFromFile(File file, int maxElementsToRead) { DiffNode root = DiffNode.rooted(file.getName()); try { + // read the version line from the file LineReader lineReader = new AsciiLineReader(new FileInputStream(file)); + final String version = lineReader.readLine(); + root.add("VERSION", version); + lineReader.close(); + + lineReader = new AsciiLineReader(new FileInputStream(file)); VCFCodec vcfCodec = new VCFCodec(); // must be read as state is stored in reader itself diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java index 96dfec6e8..2ae19264e 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java @@ -99,7 +99,7 @@ public class DiffEngineUnitTest extends BaseTest { logger.warn("Test tree1: " + test.tree1.toOneLineString()); logger.warn("Test tree2: " + test.tree2.toOneLineString()); - List diffs = engine.diff(test.tree1, test.tree2); + List diffs = engine.diff(test.tree1, test.tree2); logger.warn("Test expected diff : " + test.differences); logger.warn("Observed diffs : " + diffs); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java new file mode 100644 index 000000000..cca1eccb4 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.diffengine; + +import org.broadinstitute.sting.WalkerTest; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.File; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +public class DiffObjectsIntegrationTest extends WalkerTest { + private class TestParams extends TestDataProvider { + public File master, test; + public String MD5; + + private TestParams(String master, String test, String MD5) { + super(TestParams.class); + this.master = new File(master); + this.test = new File(test); + this.MD5 = MD5; + } + + public String toString() { + return String.format("master=%s,test=%s,md5=%s", master, test, MD5); + } + } + + @DataProvider(name = "data") + public Object[][] createData() { + new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "fb7f4e011487ca56bce865ae5468cdc5"); + new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "423cec3befbf0a72d8bc3757ee628fc4"); + return TestParams.getTests(TestParams.class); + } + + @Test(enabled = true, dataProvider = "data") + public void testDiffs(TestParams params) { + WalkerTestSpec spec = new WalkerTestSpec( + "-T DiffObjects -R public/testdata/exampleFASTA.fasta " + + " -m " + params.master + + " -t " + params.test + + " -o %s", + Arrays.asList(params.MD5)); + executeTest("testDiffObjects:"+params, spec).getFirst(); + } +} + diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java index a0cb47770..dee7bbd88 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java @@ -87,7 +87,7 @@ public class DiffableReaderUnitTest extends BaseTest { Assert.assertSame(diff.getParent(), DiffElement.ROOT); DiffNode node = diff.getValueAsNode(); - Assert.assertEquals(node.getElements().size(), 10); + Assert.assertEquals(node.getElements().size(), 11); // chr1 2646 rs62635284 G A 0.15 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:53,75:3:-12.40,-0.90,-0.00:9.03 DiffNode rec1 = node.getElement("chr1:2646").getValueAsNode(); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DifferenceUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DifferenceUnitTest.java index 64579a01b..4e4080bc7 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DifferenceUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DifferenceUnitTest.java @@ -75,10 +75,10 @@ public class DifferenceUnitTest extends BaseTest { @DataProvider(name = "data") public Object[][] createTrees() { - new DifferenceTest("A=X", "A=Y", "A:X!=Y"); - new DifferenceTest("A=Y", "A=X", "A:Y!=X"); - new DifferenceTest(DiffNode.fromString("A=X"), null, "A:X!=MISSING"); - new DifferenceTest(null, DiffNode.fromString("A=X"), "A:MISSING!=X"); + new DifferenceTest("A=X", "A=Y", "A:1:X!=Y"); + new DifferenceTest("A=Y", "A=X", "A:1:Y!=X"); + new DifferenceTest(DiffNode.fromString("A=X"), null, "A:1:X!=MISSING"); + new DifferenceTest(null, DiffNode.fromString("A=X"), "A:1:MISSING!=X"); return DifferenceTest.getTests(DifferenceTest.class); } @@ -87,7 +87,7 @@ public class DifferenceUnitTest extends BaseTest { logger.warn("Test tree1: " + (test.tree1 == null ? "null" : test.tree1.toOneLineString())); logger.warn("Test tree2: " + (test.tree2 == null ? "null" : test.tree2.toOneLineString())); logger.warn("Test expected diff : " + test.difference); - SpecificDifference diff = new SpecificDifference(test.tree1, test.tree2); + Difference diff = new Difference(test.tree1, test.tree2); logger.warn("Observed diffs : " + diff); Assert.assertEquals(diff.toString(), test.difference, "Observed diff string " + diff + " not equal to expected difference string " + test.difference );