Difference is now the general form -- used to be SummarizedDifference. The old Difference class is now a subclass of Difference that includes pointers to specific the master and test DiffElements.

Added a size() function that calculates the number of elements tree from a DiffElement.
This commit is contained in:
Mark DePristo 2011-07-12 15:20:28 -04:00
parent 05212aea62
commit ccedd6ff4c
9 changed files with 176 additions and 135 deletions

View File

@ -115,4 +115,8 @@ public class DiffElement {
else
throw new ReviewedStingException("Illegal request conversion of a DiffValue into a DiffNode: " + this);
}
public int size() {
return 1 + getValue().size();
}
}

View File

@ -24,11 +24,9 @@
package org.broadinstitute.sting.gatk.walkers.diffengine;
import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -60,7 +58,7 @@ public class DiffEngine {
//
// --------------------------------------------------------------------------------
public List<Difference> diff(DiffElement master, DiffElement test) {
public List<SpecificDifference> diff(DiffElement master, DiffElement test) {
DiffValue masterValue = master.getValue();
DiffValue testValue = test.getValue();
@ -70,14 +68,14 @@ public class DiffEngine {
return diff(masterValue, testValue);
} else {
// structural difference in types. one is node, other is leaf
return Arrays.asList(new Difference(master, test));
return Arrays.asList(new SpecificDifference(master, test));
}
}
public List<Difference> diff(DiffNode master, DiffNode test) {
public List<SpecificDifference> diff(DiffNode master, DiffNode test) {
Set<String> allNames = new HashSet<String>(master.getElementNames());
allNames.addAll(test.getElementNames());
List<Difference> diffs = new ArrayList<Difference>();
List<SpecificDifference> diffs = new ArrayList<SpecificDifference>();
for ( String name : allNames ) {
DiffElement masterElt = master.getElement(name);
@ -86,7 +84,7 @@ public class DiffEngine {
throw new ReviewedStingException("BUG: unexceptedly got two null elements for field: " + name);
} else if ( masterElt == null || testElt == null ) { // if either is null, we are missing a value
// todo -- should one of these be a special MISSING item?
diffs.add(new Difference(masterElt, testElt));
diffs.add(new SpecificDifference(masterElt, testElt));
} else {
diffs.addAll(diff(masterElt, testElt));
}
@ -95,11 +93,11 @@ public class DiffEngine {
return diffs;
}
public List<Difference> diff(DiffValue master, DiffValue test) {
public List<SpecificDifference> diff(DiffValue master, DiffValue test) {
if ( master.getValue().equals(test.getValue()) ) {
return Collections.emptyList();
} else {
return Arrays.asList(new Difference(master.getBinding(), test.getBinding()));
return Arrays.asList(new SpecificDifference(master.getBinding(), test.getBinding()));
}
}
@ -147,64 +145,68 @@ public class DiffEngine {
* @param params determines how we display the items
* @param diffs
*/
public void reportSummarizedDifferences(List<Difference> diffs, SummaryReportParams params ) {
public void reportSummarizedDifferences(List<SpecificDifference> diffs, SummaryReportParams params ) {
printSummaryReport(summarizeDifferences(diffs), params );
}
public List<SummarizedDifference> summarizeDifferences(List<Difference> diffs) {
List<String[]> diffPaths = new ArrayList<String[]>(diffs.size());
for ( Difference diff1 : diffs ) {
diffPaths.add(diffNameToPath(diff1.getFullyQualifiedName()));
}
return summarizedDifferencesOfPaths(diffPaths);
public List<Difference> summarizeDifferences(List<SpecificDifference> diffs) {
return summarizedDifferencesOfPaths(diffs);
}
final protected static String[] diffNameToPath(String diffName) {
return diffName.split("\\.");
}
protected List<SummarizedDifference> summarizedDifferencesOfPaths(List<String[]> diffPaths) {
Map<String, SummarizedDifference> summaries = new HashMap<String, SummarizedDifference>();
protected List<Difference> summarizedDifferencesOfPathsFromString(List<String> singletonDiffs) {
List<Difference> diffs = new ArrayList<Difference>();
for ( String diff : singletonDiffs ) {
diffs.add(new Difference(diff));
}
return summarizedDifferencesOfPaths(diffs);
}
protected List<Difference> summarizedDifferencesOfPaths(List<? extends Difference> singletonDiffs) {
Map<String, Difference> summaries = new HashMap<String, Difference>();
// create the initial set of differences
for ( int i = 0; i < diffPaths.size(); i++ ) {
for ( int i = 0; i < singletonDiffs.size(); i++ ) {
for ( int j = 0; j <= i; j++ ) {
String[] diffPath1 = diffPaths.get(i);
String[] diffPath2 = diffPaths.get(j);
if ( diffPath1.length == diffPath2.length ) {
int lcp = longestCommonPostfix(diffPath1, diffPath2);
String path = lcp > 0 ? summarizedPath(diffPath2, lcp) : Utils.join(".", diffPath2);
Difference diffPath1 = singletonDiffs.get(i);
Difference diffPath2 = singletonDiffs.get(j);
if ( diffPath1.length() == diffPath2.length() ) {
int lcp = longestCommonPostfix(diffPath1.getParts(), diffPath2.getParts());
String path = lcp > 0 ? summarizedPath(diffPath2.getParts(), lcp) : diffPath2.getPath();
addSummary(summaries, path, true);
}
}
}
// count differences
for ( String[] diffPath : diffPaths ) {
for ( SummarizedDifference sumDiff : summaries.values() ) {
if ( sumDiff.matches(diffPath) )
for ( Difference diffPath : singletonDiffs ) {
for ( Difference sumDiff : summaries.values() ) {
if ( sumDiff.matches(diffPath.getParts()) )
addSummary(summaries, sumDiff.getPath(), false);
}
}
List<SummarizedDifference> sortedSummaries = new ArrayList<SummarizedDifference>(summaries.values());
List<Difference> sortedSummaries = new ArrayList<Difference>(summaries.values());
Collections.sort(sortedSummaries);
return sortedSummaries;
}
private static void addSummary(Map<String, SummarizedDifference> summaries, String path, boolean onlyCatalog) {
private static void addSummary(Map<String, Difference> summaries, String path, boolean onlyCatalog) {
if ( summaries.containsKey(path) ) {
if ( ! onlyCatalog )
summaries.get(path).incCount();
} else {
SummarizedDifference sumDiff = new SummarizedDifference(path);
Difference sumDiff = new Difference(path);
summaries.put(sumDiff.getPath(), sumDiff);
}
}
protected void printSummaryReport(List<SummarizedDifference> sortedSummaries, SummaryReportParams params ) {
protected void printSummaryReport(List<Difference> sortedSummaries, SummaryReportParams params ) {
GATKReport report = new GATKReport();
final String tableName = "diffences";
report.addTable(tableName, "Summarized differences between the master and test files.\nSee http://www.broadinstitute.org/gsa/wiki/index.php/DiffObjectsWalker_and_SummarizedDifferences for more information");
@ -213,7 +215,7 @@ public class DiffEngine {
table.addColumn("NumberOfOccurrences", 0);
int count = 0, count1 = 0;
for ( SummarizedDifference diff : sortedSummaries ) {
for ( Difference diff : sortedSummaries ) {
if ( diff.getCount() < params.minSumDiffToShow )
// in order, so break as soon as the count is too low
break;
@ -261,76 +263,6 @@ public class DiffEngine {
return Utils.join(".", parts);
}
/**
* TODO -- all of the algorithms above should use SummarizedDifference instead
* TODO -- of some SummarizedDifferences and some low-level String[]
*/
public static class SummarizedDifference implements Comparable<SummarizedDifference> {
final String path; // X.Y.Z
final String[] parts;
int count = 0;
public SummarizedDifference(String path) {
this.path = path;
this.parts = diffNameToPath(path);
}
public void incCount() { count++; }
public int getCount() {
return count;
}
/**
* The fully qualified path object A.B.C etc
* @return
*/
public String getPath() {
return path;
}
/**
* @return the length of the parts of this summary
*/
public int length() {
return this.parts.length;
}
/**
* Returns true if the string parts matches this summary. Matches are
* must be equal() everywhere where this summary isn't *.
* @param otherParts
* @return
*/
public boolean matches(String[] otherParts) {
if ( otherParts.length != length() )
return false;
// TODO optimization: can start at right most non-star element
for ( int i = 0; i < length(); i++ ) {
String part = parts[i];
if ( ! part.equals("*") && ! part.equals(otherParts[i]) )
return false;
}
return true;
}
@Override
public String toString() {
return String.format("%s:%d", getPath(), getCount());
}
@Override
public int compareTo(SummarizedDifference other) {
// sort first highest to lowest count, then by lowest to highest path
int countCmp = Integer.valueOf(count).compareTo(other.count);
return countCmp != 0 ? -1 * countCmp : path.compareTo(other.path);
}
}
// --------------------------------------------------------------------------------
//
// plugin manager
@ -404,7 +336,7 @@ public class DiffEngine {
if ( diffEngine.canRead(masterFile) && diffEngine.canRead(testFile) ) {
DiffElement master = diffEngine.createDiffableFromFile(masterFile);
DiffElement test = diffEngine.createDiffableFromFile(testFile);
List<Difference> diffs = diffEngine.diff(master, test);
List<SpecificDifference> diffs = diffEngine.diff(master, test);
diffEngine.reportSummarizedDifferences(diffs, params);
return true;
} else {

View File

@ -153,6 +153,13 @@ public class DiffNode extends DiffValue {
add(new DiffElement(name, this.getBinding(), new DiffValue(value)));
}
public int size() {
int count = 0;
for ( DiffElement value : getElements() )
count += value.size();
return count;
}
// ---------------------------------------------------------------------------
//
// toString

View File

@ -24,7 +24,6 @@
package org.broadinstitute.sting.gatk.walkers.diffengine;
import org.apache.xmlbeans.impl.tool.Diff;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
@ -95,18 +94,20 @@ public class DiffObjectsWalker extends RodWalker<Integer, Integer> {
public void onTraversalDone(Integer sum) {
out.printf("Reading master file %s%n", masterFile);
DiffElement master = diffEngine.createDiffableFromFile(masterFile, MAX_OBJECTS_TO_READ);
out.printf(" Read %d objects%n", master.size());
out.printf("Reading test file %s%n", testFile);
DiffElement test = diffEngine.createDiffableFromFile(testFile, MAX_OBJECTS_TO_READ);
out.printf(" Read %d objects%n", test.size());
// out.printf("Master diff objects%n");
// out.println(master.toString());
// out.printf("Test diff objects%n");
// out.println(test.toString());
List<Difference> diffs = diffEngine.diff(master, test);
List<SpecificDifference> diffs = diffEngine.diff(master, test);
if ( showItemizedDifferences ) {
out.printf("Itemized results%n");
for ( Difference diff : diffs )
for ( SpecificDifference diff : diffs )
out.printf("DIFF: %s%n", diff.toString());
}

View File

@ -87,4 +87,5 @@ public class DiffValue {
public boolean isAtomic() { return true; }
public boolean isCompound() { return ! isAtomic(); }
public int size() { return 1; }
}

View File

@ -24,35 +24,72 @@
package org.broadinstitute.sting.gatk.walkers.diffengine;
/**
* Created by IntelliJ IDEA.
* User: depristo
* Date: 7/4/11
* Time: 12:53 PM
*
* Represents a specific difference between two specific DiffElements
*/
public class Difference {
DiffElement master, test;
public class Difference implements Comparable<Difference> {
final String path; // X.Y.Z
final String[] parts;
int count = 0;
public Difference(DiffElement master, DiffElement test) {
if ( master == null && test == null ) throw new IllegalArgumentException("Master and test both cannot be null");
this.master = master;
this.test = test;
public Difference(String path) {
this.path = path;
this.parts = DiffEngine.diffNameToPath(path);
}
public String[] getParts() {
return parts;
}
public void incCount() { count++; }
public int getCount() {
return count;
}
/**
* The fully qualified path object A.B.C etc
* @return
*/
public String getPath() {
return path;
}
/**
* @return the length of the parts of this summary
*/
public int length() {
return this.parts.length;
}
/**
* Returns true if the string parts matches this summary. Matches are
* must be equal() everywhere where this summary isn't *.
* @param otherParts
* @return
*/
public boolean matches(String[] otherParts) {
if ( otherParts.length != length() )
return false;
// TODO optimization: can start at right most non-star element
for ( int i = 0; i < length(); i++ ) {
String part = parts[i];
if ( ! part.equals("*") && ! part.equals(otherParts[i]) )
return false;
}
return true;
}
@Override
public String toString() {
return String.format("%s:%s!=%s",
getFullyQualifiedName(),
getOneLineString(master),
getOneLineString(test));
return String.format("%s:%d", getPath(), getCount());
}
public String getFullyQualifiedName() {
return (master == null ? test : master).fullyQualifiedName();
@Override
public int compareTo(Difference other) {
// sort first highest to lowest count, then by lowest to highest path
int countCmp = Integer.valueOf(count).compareTo(other.count);
return countCmp != 0 ? -1 * countCmp : path.compareTo(other.path);
}
private static String getOneLineString(DiffElement elt) {
return elt == null ? "MISSING" : elt.getValue().toOneLineString();
}
}

View File

@ -0,0 +1,59 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.diffengine;
/**
* Created by IntelliJ IDEA.
* User: depristo
* Date: 7/4/11
* Time: 12:53 PM
*
* Represents a specific difference between two specific DiffElements
*/
public class SpecificDifference extends Difference {
DiffElement master, test;
public SpecificDifference(DiffElement master, DiffElement test) {
super(createName(master, test));
if ( master == null && test == null ) throw new IllegalArgumentException("Master and test both cannot be null");
this.master = master;
this.test = test;
}
public String toString() {
return String.format("%s:%s!=%s",
getPath(),
getOneLineString(master),
getOneLineString(test));
}
private static String createName(DiffElement master, DiffElement test) {
return (master == null ? test : master).fullyQualifiedName();
}
private static String getOneLineString(DiffElement elt) {
return elt == null ? "MISSING" : elt.getValue().toOneLineString();
}
}

View File

@ -99,7 +99,7 @@ public class DiffEngineUnitTest extends BaseTest {
logger.warn("Test tree1: " + test.tree1.toOneLineString());
logger.warn("Test tree2: " + test.tree2.toOneLineString());
List<Difference> diffs = engine.diff(test.tree1, test.tree2);
List<SpecificDifference> diffs = engine.diff(test.tree1, test.tree2);
logger.warn("Test expected diff : " + test.differences);
logger.warn("Observed diffs : " + diffs);
}
@ -185,12 +185,12 @@ public class DiffEngineUnitTest extends BaseTest {
List<String[]> diffPaths = new ArrayList<String[]>(diffs.size());
for ( String diff : diffs ) { diffPaths.add(DiffEngine.diffNameToPath(diff)); }
List<DiffEngine.SummarizedDifference> sumDiffs = engine.summarizedDifferencesOfPaths(diffPaths);
List<Difference> sumDiffs = engine.summarizedDifferencesOfPathsFromString(diffs);
Assert.assertEquals(sumDiffs.size(), expecteds.size(), "Unexpected number of summarized differences: " + sumDiffs);
for ( int i = 0; i < sumDiffs.size(); i++ ) {
DiffEngine.SummarizedDifference sumDiff = sumDiffs.get(i);
Difference sumDiff = sumDiffs.get(i);
String expected = expecteds.get(i);
String[] pathCount = expected.split(":");
String path = pathCount[0];

View File

@ -87,7 +87,7 @@ public class DifferenceUnitTest extends BaseTest {
logger.warn("Test tree1: " + (test.tree1 == null ? "null" : test.tree1.toOneLineString()));
logger.warn("Test tree2: " + (test.tree2 == null ? "null" : test.tree2.toOneLineString()));
logger.warn("Test expected diff : " + test.difference);
Difference diff = new Difference(test.tree1, test.tree2);
SpecificDifference diff = new SpecificDifference(test.tree1, test.tree2);
logger.warn("Observed diffs : " + diff);
Assert.assertEquals(diff.toString(), test.difference, "Observed diff string " + diff + " not equal to expected difference string " + test.difference );