From ccedd6ff4c942c20c1a57f6a6bf65c5cb63b6e16 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 12 Jul 2011 15:20:28 -0400 Subject: [PATCH] Difference is now the general form -- used to be SummarizedDifference. The old Difference class is now a subclass of Difference that includes pointers to specific the master and test DiffElements. Added a size() function that calculates the number of elements tree from a DiffElement. --- .../gatk/walkers/diffengine/DiffElement.java | 4 + .../gatk/walkers/diffengine/DiffEngine.java | 142 +++++------------- .../gatk/walkers/diffengine/DiffNode.java | 7 + .../walkers/diffengine/DiffObjectsWalker.java | 7 +- .../gatk/walkers/diffengine/DiffValue.java | 1 + .../gatk/walkers/diffengine/Difference.java | 83 +++++++--- .../diffengine/SpecificDifference.java | 59 ++++++++ .../diffengine/DiffEngineUnitTest.java | 6 +- .../diffengine/DifferenceUnitTest.java | 2 +- 9 files changed, 176 insertions(+), 135 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/SpecificDifference.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java index eff24bb88..4c3f7bd95 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java @@ -115,4 +115,8 @@ public class DiffElement { else throw new ReviewedStingException("Illegal request conversion of a DiffValue into a DiffNode: " + this); } + + public int size() { + return 1 + getValue().size(); + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java index 54a7a464d..6d85df71d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java @@ -24,11 +24,9 @@ package org.broadinstitute.sting.gatk.walkers.diffengine; -import com.google.java.contract.Requires; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReportTable; -import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -60,7 +58,7 @@ public class DiffEngine { // // -------------------------------------------------------------------------------- - public List diff(DiffElement master, DiffElement test) { + public List diff(DiffElement master, DiffElement test) { DiffValue masterValue = master.getValue(); DiffValue testValue = test.getValue(); @@ -70,14 +68,14 @@ public class DiffEngine { return diff(masterValue, testValue); } else { // structural difference in types. one is node, other is leaf - return Arrays.asList(new Difference(master, test)); + return Arrays.asList(new SpecificDifference(master, test)); } } - public List diff(DiffNode master, DiffNode test) { + public List diff(DiffNode master, DiffNode test) { Set allNames = new HashSet(master.getElementNames()); allNames.addAll(test.getElementNames()); - List diffs = new ArrayList(); + List diffs = new ArrayList(); for ( String name : allNames ) { DiffElement masterElt = master.getElement(name); @@ -86,7 +84,7 @@ public class DiffEngine { throw new ReviewedStingException("BUG: unexceptedly got two null elements for field: " + name); } else if ( masterElt == null || testElt == null ) { // if either is null, we are missing a value // todo -- should one of these be a special MISSING item? - diffs.add(new Difference(masterElt, testElt)); + diffs.add(new SpecificDifference(masterElt, testElt)); } else { diffs.addAll(diff(masterElt, testElt)); } @@ -95,11 +93,11 @@ public class DiffEngine { return diffs; } - public List diff(DiffValue master, DiffValue test) { + public List diff(DiffValue master, DiffValue test) { if ( master.getValue().equals(test.getValue()) ) { return Collections.emptyList(); } else { - return Arrays.asList(new Difference(master.getBinding(), test.getBinding())); + return Arrays.asList(new SpecificDifference(master.getBinding(), test.getBinding())); } } @@ -147,64 +145,68 @@ public class DiffEngine { * @param params determines how we display the items * @param diffs */ - public void reportSummarizedDifferences(List diffs, SummaryReportParams params ) { + public void reportSummarizedDifferences(List diffs, SummaryReportParams params ) { printSummaryReport(summarizeDifferences(diffs), params ); } - public List summarizeDifferences(List diffs) { - List diffPaths = new ArrayList(diffs.size()); - - for ( Difference diff1 : diffs ) { - diffPaths.add(diffNameToPath(diff1.getFullyQualifiedName())); - } - - return summarizedDifferencesOfPaths(diffPaths); + public List summarizeDifferences(List diffs) { + return summarizedDifferencesOfPaths(diffs); } final protected static String[] diffNameToPath(String diffName) { return diffName.split("\\."); } - protected List summarizedDifferencesOfPaths(List diffPaths) { - Map summaries = new HashMap(); + protected List summarizedDifferencesOfPathsFromString(List singletonDiffs) { + List diffs = new ArrayList(); + + for ( String diff : singletonDiffs ) { + diffs.add(new Difference(diff)); + } + + return summarizedDifferencesOfPaths(diffs); + } + + protected List summarizedDifferencesOfPaths(List singletonDiffs) { + Map summaries = new HashMap(); // create the initial set of differences - for ( int i = 0; i < diffPaths.size(); i++ ) { + for ( int i = 0; i < singletonDiffs.size(); i++ ) { for ( int j = 0; j <= i; j++ ) { - String[] diffPath1 = diffPaths.get(i); - String[] diffPath2 = diffPaths.get(j); - if ( diffPath1.length == diffPath2.length ) { - int lcp = longestCommonPostfix(diffPath1, diffPath2); - String path = lcp > 0 ? summarizedPath(diffPath2, lcp) : Utils.join(".", diffPath2); + Difference diffPath1 = singletonDiffs.get(i); + Difference diffPath2 = singletonDiffs.get(j); + if ( diffPath1.length() == diffPath2.length() ) { + int lcp = longestCommonPostfix(diffPath1.getParts(), diffPath2.getParts()); + String path = lcp > 0 ? summarizedPath(diffPath2.getParts(), lcp) : diffPath2.getPath(); addSummary(summaries, path, true); } } } // count differences - for ( String[] diffPath : diffPaths ) { - for ( SummarizedDifference sumDiff : summaries.values() ) { - if ( sumDiff.matches(diffPath) ) + for ( Difference diffPath : singletonDiffs ) { + for ( Difference sumDiff : summaries.values() ) { + if ( sumDiff.matches(diffPath.getParts()) ) addSummary(summaries, sumDiff.getPath(), false); } } - List sortedSummaries = new ArrayList(summaries.values()); + List sortedSummaries = new ArrayList(summaries.values()); Collections.sort(sortedSummaries); return sortedSummaries; } - private static void addSummary(Map summaries, String path, boolean onlyCatalog) { + private static void addSummary(Map summaries, String path, boolean onlyCatalog) { if ( summaries.containsKey(path) ) { if ( ! onlyCatalog ) summaries.get(path).incCount(); } else { - SummarizedDifference sumDiff = new SummarizedDifference(path); + Difference sumDiff = new Difference(path); summaries.put(sumDiff.getPath(), sumDiff); } } - protected void printSummaryReport(List sortedSummaries, SummaryReportParams params ) { + protected void printSummaryReport(List sortedSummaries, SummaryReportParams params ) { GATKReport report = new GATKReport(); final String tableName = "diffences"; report.addTable(tableName, "Summarized differences between the master and test files.\nSee http://www.broadinstitute.org/gsa/wiki/index.php/DiffObjectsWalker_and_SummarizedDifferences for more information"); @@ -213,7 +215,7 @@ public class DiffEngine { table.addColumn("NumberOfOccurrences", 0); int count = 0, count1 = 0; - for ( SummarizedDifference diff : sortedSummaries ) { + for ( Difference diff : sortedSummaries ) { if ( diff.getCount() < params.minSumDiffToShow ) // in order, so break as soon as the count is too low break; @@ -261,76 +263,6 @@ public class DiffEngine { return Utils.join(".", parts); } - /** - * TODO -- all of the algorithms above should use SummarizedDifference instead - * TODO -- of some SummarizedDifferences and some low-level String[] - */ - public static class SummarizedDifference implements Comparable { - final String path; // X.Y.Z - final String[] parts; - int count = 0; - - public SummarizedDifference(String path) { - this.path = path; - this.parts = diffNameToPath(path); - } - - public void incCount() { count++; } - - public int getCount() { - return count; - } - - /** - * The fully qualified path object A.B.C etc - * @return - */ - public String getPath() { - return path; - } - - /** - * @return the length of the parts of this summary - */ - public int length() { - return this.parts.length; - } - - /** - * Returns true if the string parts matches this summary. Matches are - * must be equal() everywhere where this summary isn't *. - * @param otherParts - * @return - */ - public boolean matches(String[] otherParts) { - if ( otherParts.length != length() ) - return false; - - // TODO optimization: can start at right most non-star element - for ( int i = 0; i < length(); i++ ) { - String part = parts[i]; - if ( ! part.equals("*") && ! part.equals(otherParts[i]) ) - return false; - } - - return true; - } - - @Override - public String toString() { - return String.format("%s:%d", getPath(), getCount()); - } - - @Override - public int compareTo(SummarizedDifference other) { - // sort first highest to lowest count, then by lowest to highest path - int countCmp = Integer.valueOf(count).compareTo(other.count); - return countCmp != 0 ? -1 * countCmp : path.compareTo(other.path); - } - - - } - // -------------------------------------------------------------------------------- // // plugin manager @@ -404,7 +336,7 @@ public class DiffEngine { if ( diffEngine.canRead(masterFile) && diffEngine.canRead(testFile) ) { DiffElement master = diffEngine.createDiffableFromFile(masterFile); DiffElement test = diffEngine.createDiffableFromFile(testFile); - List diffs = diffEngine.diff(master, test); + List diffs = diffEngine.diff(master, test); diffEngine.reportSummarizedDifferences(diffs, params); return true; } else { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java index 3e1be8609..2f48de2d3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java @@ -153,6 +153,13 @@ public class DiffNode extends DiffValue { add(new DiffElement(name, this.getBinding(), new DiffValue(value))); } + public int size() { + int count = 0; + for ( DiffElement value : getElements() ) + count += value.size(); + return count; + } + // --------------------------------------------------------------------------- // // toString diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java index fe411b195..ecb836af9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java @@ -24,7 +24,6 @@ package org.broadinstitute.sting.gatk.walkers.diffengine; -import org.apache.xmlbeans.impl.tool.Diff; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -95,18 +94,20 @@ public class DiffObjectsWalker extends RodWalker { public void onTraversalDone(Integer sum) { out.printf("Reading master file %s%n", masterFile); DiffElement master = diffEngine.createDiffableFromFile(masterFile, MAX_OBJECTS_TO_READ); + out.printf(" Read %d objects%n", master.size()); out.printf("Reading test file %s%n", testFile); DiffElement test = diffEngine.createDiffableFromFile(testFile, MAX_OBJECTS_TO_READ); + out.printf(" Read %d objects%n", test.size()); // out.printf("Master diff objects%n"); // out.println(master.toString()); // out.printf("Test diff objects%n"); // out.println(test.toString()); - List diffs = diffEngine.diff(master, test); + List diffs = diffEngine.diff(master, test); if ( showItemizedDifferences ) { out.printf("Itemized results%n"); - for ( Difference diff : diffs ) + for ( SpecificDifference diff : diffs ) out.printf("DIFF: %s%n", diff.toString()); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java index 7245e9e8d..3750496a1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java @@ -87,4 +87,5 @@ public class DiffValue { public boolean isAtomic() { return true; } public boolean isCompound() { return ! isAtomic(); } + public int size() { return 1; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java index 6627a4cc5..efc6ef160 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java @@ -24,35 +24,72 @@ package org.broadinstitute.sting.gatk.walkers.diffengine; -/** - * Created by IntelliJ IDEA. - * User: depristo - * Date: 7/4/11 - * Time: 12:53 PM - * - * Represents a specific difference between two specific DiffElements - */ -public class Difference { - DiffElement master, test; +public class Difference implements Comparable { + final String path; // X.Y.Z + final String[] parts; + int count = 0; - public Difference(DiffElement master, DiffElement test) { - if ( master == null && test == null ) throw new IllegalArgumentException("Master and test both cannot be null"); - this.master = master; - this.test = test; + public Difference(String path) { + this.path = path; + this.parts = DiffEngine.diffNameToPath(path); } + public String[] getParts() { + return parts; + } + + public void incCount() { count++; } + + public int getCount() { + return count; + } + + /** + * The fully qualified path object A.B.C etc + * @return + */ + public String getPath() { + return path; + } + + /** + * @return the length of the parts of this summary + */ + public int length() { + return this.parts.length; + } + + /** + * Returns true if the string parts matches this summary. Matches are + * must be equal() everywhere where this summary isn't *. + * @param otherParts + * @return + */ + public boolean matches(String[] otherParts) { + if ( otherParts.length != length() ) + return false; + + // TODO optimization: can start at right most non-star element + for ( int i = 0; i < length(); i++ ) { + String part = parts[i]; + if ( ! part.equals("*") && ! part.equals(otherParts[i]) ) + return false; + } + + return true; + } + + @Override public String toString() { - return String.format("%s:%s!=%s", - getFullyQualifiedName(), - getOneLineString(master), - getOneLineString(test)); + return String.format("%s:%d", getPath(), getCount()); } - public String getFullyQualifiedName() { - return (master == null ? test : master).fullyQualifiedName(); + @Override + public int compareTo(Difference other) { + // sort first highest to lowest count, then by lowest to highest path + int countCmp = Integer.valueOf(count).compareTo(other.count); + return countCmp != 0 ? -1 * countCmp : path.compareTo(other.path); } - private static String getOneLineString(DiffElement elt) { - return elt == null ? "MISSING" : elt.getValue().toOneLineString(); - } + } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/SpecificDifference.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/SpecificDifference.java new file mode 100644 index 000000000..2fe9b47f8 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/SpecificDifference.java @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.diffengine; + +/** + * Created by IntelliJ IDEA. + * User: depristo + * Date: 7/4/11 + * Time: 12:53 PM + * + * Represents a specific difference between two specific DiffElements + */ +public class SpecificDifference extends Difference { + DiffElement master, test; + + public SpecificDifference(DiffElement master, DiffElement test) { + super(createName(master, test)); + if ( master == null && test == null ) throw new IllegalArgumentException("Master and test both cannot be null"); + this.master = master; + this.test = test; + } + + public String toString() { + return String.format("%s:%s!=%s", + getPath(), + getOneLineString(master), + getOneLineString(test)); + } + + private static String createName(DiffElement master, DiffElement test) { + return (master == null ? test : master).fullyQualifiedName(); + } + + private static String getOneLineString(DiffElement elt) { + return elt == null ? "MISSING" : elt.getValue().toOneLineString(); + } +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java index cd6c3598a..96dfec6e8 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java @@ -99,7 +99,7 @@ public class DiffEngineUnitTest extends BaseTest { logger.warn("Test tree1: " + test.tree1.toOneLineString()); logger.warn("Test tree2: " + test.tree2.toOneLineString()); - List diffs = engine.diff(test.tree1, test.tree2); + List diffs = engine.diff(test.tree1, test.tree2); logger.warn("Test expected diff : " + test.differences); logger.warn("Observed diffs : " + diffs); } @@ -185,12 +185,12 @@ public class DiffEngineUnitTest extends BaseTest { List diffPaths = new ArrayList(diffs.size()); for ( String diff : diffs ) { diffPaths.add(DiffEngine.diffNameToPath(diff)); } - List sumDiffs = engine.summarizedDifferencesOfPaths(diffPaths); + List sumDiffs = engine.summarizedDifferencesOfPathsFromString(diffs); Assert.assertEquals(sumDiffs.size(), expecteds.size(), "Unexpected number of summarized differences: " + sumDiffs); for ( int i = 0; i < sumDiffs.size(); i++ ) { - DiffEngine.SummarizedDifference sumDiff = sumDiffs.get(i); + Difference sumDiff = sumDiffs.get(i); String expected = expecteds.get(i); String[] pathCount = expected.split(":"); String path = pathCount[0]; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DifferenceUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DifferenceUnitTest.java index da272ec30..64579a01b 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DifferenceUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DifferenceUnitTest.java @@ -87,7 +87,7 @@ public class DifferenceUnitTest extends BaseTest { logger.warn("Test tree1: " + (test.tree1 == null ? "null" : test.tree1.toOneLineString())); logger.warn("Test tree2: " + (test.tree2 == null ? "null" : test.tree2.toOneLineString())); logger.warn("Test expected diff : " + test.difference); - Difference diff = new Difference(test.tree1, test.tree2); + SpecificDifference diff = new SpecificDifference(test.tree1, test.tree2); logger.warn("Observed diffs : " + diff); Assert.assertEquals(diff.toString(), test.difference, "Observed diff string " + diff + " not equal to expected difference string " + test.difference );