Difference is now the general form -- used to be SummarizedDifference. The old Difference class is now a subclass of Difference that includes pointers to specific the master and test DiffElements.
Added a size() function that calculates the number of elements tree from a DiffElement.
This commit is contained in:
parent
05212aea62
commit
ccedd6ff4c
|
|
@ -115,4 +115,8 @@ public class DiffElement {
|
|||
else
|
||||
throw new ReviewedStingException("Illegal request conversion of a DiffValue into a DiffNode: " + this);
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return 1 + getValue().size();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,11 +24,9 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -60,7 +58,7 @@ public class DiffEngine {
|
|||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
public List<Difference> diff(DiffElement master, DiffElement test) {
|
||||
public List<SpecificDifference> diff(DiffElement master, DiffElement test) {
|
||||
DiffValue masterValue = master.getValue();
|
||||
DiffValue testValue = test.getValue();
|
||||
|
||||
|
|
@ -70,14 +68,14 @@ public class DiffEngine {
|
|||
return diff(masterValue, testValue);
|
||||
} else {
|
||||
// structural difference in types. one is node, other is leaf
|
||||
return Arrays.asList(new Difference(master, test));
|
||||
return Arrays.asList(new SpecificDifference(master, test));
|
||||
}
|
||||
}
|
||||
|
||||
public List<Difference> diff(DiffNode master, DiffNode test) {
|
||||
public List<SpecificDifference> diff(DiffNode master, DiffNode test) {
|
||||
Set<String> allNames = new HashSet<String>(master.getElementNames());
|
||||
allNames.addAll(test.getElementNames());
|
||||
List<Difference> diffs = new ArrayList<Difference>();
|
||||
List<SpecificDifference> diffs = new ArrayList<SpecificDifference>();
|
||||
|
||||
for ( String name : allNames ) {
|
||||
DiffElement masterElt = master.getElement(name);
|
||||
|
|
@ -86,7 +84,7 @@ public class DiffEngine {
|
|||
throw new ReviewedStingException("BUG: unexceptedly got two null elements for field: " + name);
|
||||
} else if ( masterElt == null || testElt == null ) { // if either is null, we are missing a value
|
||||
// todo -- should one of these be a special MISSING item?
|
||||
diffs.add(new Difference(masterElt, testElt));
|
||||
diffs.add(new SpecificDifference(masterElt, testElt));
|
||||
} else {
|
||||
diffs.addAll(diff(masterElt, testElt));
|
||||
}
|
||||
|
|
@ -95,11 +93,11 @@ public class DiffEngine {
|
|||
return diffs;
|
||||
}
|
||||
|
||||
public List<Difference> diff(DiffValue master, DiffValue test) {
|
||||
public List<SpecificDifference> diff(DiffValue master, DiffValue test) {
|
||||
if ( master.getValue().equals(test.getValue()) ) {
|
||||
return Collections.emptyList();
|
||||
} else {
|
||||
return Arrays.asList(new Difference(master.getBinding(), test.getBinding()));
|
||||
return Arrays.asList(new SpecificDifference(master.getBinding(), test.getBinding()));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -147,64 +145,68 @@ public class DiffEngine {
|
|||
* @param params determines how we display the items
|
||||
* @param diffs
|
||||
*/
|
||||
public void reportSummarizedDifferences(List<Difference> diffs, SummaryReportParams params ) {
|
||||
public void reportSummarizedDifferences(List<SpecificDifference> diffs, SummaryReportParams params ) {
|
||||
printSummaryReport(summarizeDifferences(diffs), params );
|
||||
}
|
||||
|
||||
public List<SummarizedDifference> summarizeDifferences(List<Difference> diffs) {
|
||||
List<String[]> diffPaths = new ArrayList<String[]>(diffs.size());
|
||||
|
||||
for ( Difference diff1 : diffs ) {
|
||||
diffPaths.add(diffNameToPath(diff1.getFullyQualifiedName()));
|
||||
}
|
||||
|
||||
return summarizedDifferencesOfPaths(diffPaths);
|
||||
public List<Difference> summarizeDifferences(List<SpecificDifference> diffs) {
|
||||
return summarizedDifferencesOfPaths(diffs);
|
||||
}
|
||||
|
||||
final protected static String[] diffNameToPath(String diffName) {
|
||||
return diffName.split("\\.");
|
||||
}
|
||||
|
||||
protected List<SummarizedDifference> summarizedDifferencesOfPaths(List<String[]> diffPaths) {
|
||||
Map<String, SummarizedDifference> summaries = new HashMap<String, SummarizedDifference>();
|
||||
protected List<Difference> summarizedDifferencesOfPathsFromString(List<String> singletonDiffs) {
|
||||
List<Difference> diffs = new ArrayList<Difference>();
|
||||
|
||||
for ( String diff : singletonDiffs ) {
|
||||
diffs.add(new Difference(diff));
|
||||
}
|
||||
|
||||
return summarizedDifferencesOfPaths(diffs);
|
||||
}
|
||||
|
||||
protected List<Difference> summarizedDifferencesOfPaths(List<? extends Difference> singletonDiffs) {
|
||||
Map<String, Difference> summaries = new HashMap<String, Difference>();
|
||||
|
||||
// create the initial set of differences
|
||||
for ( int i = 0; i < diffPaths.size(); i++ ) {
|
||||
for ( int i = 0; i < singletonDiffs.size(); i++ ) {
|
||||
for ( int j = 0; j <= i; j++ ) {
|
||||
String[] diffPath1 = diffPaths.get(i);
|
||||
String[] diffPath2 = diffPaths.get(j);
|
||||
if ( diffPath1.length == diffPath2.length ) {
|
||||
int lcp = longestCommonPostfix(diffPath1, diffPath2);
|
||||
String path = lcp > 0 ? summarizedPath(diffPath2, lcp) : Utils.join(".", diffPath2);
|
||||
Difference diffPath1 = singletonDiffs.get(i);
|
||||
Difference diffPath2 = singletonDiffs.get(j);
|
||||
if ( diffPath1.length() == diffPath2.length() ) {
|
||||
int lcp = longestCommonPostfix(diffPath1.getParts(), diffPath2.getParts());
|
||||
String path = lcp > 0 ? summarizedPath(diffPath2.getParts(), lcp) : diffPath2.getPath();
|
||||
addSummary(summaries, path, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// count differences
|
||||
for ( String[] diffPath : diffPaths ) {
|
||||
for ( SummarizedDifference sumDiff : summaries.values() ) {
|
||||
if ( sumDiff.matches(diffPath) )
|
||||
for ( Difference diffPath : singletonDiffs ) {
|
||||
for ( Difference sumDiff : summaries.values() ) {
|
||||
if ( sumDiff.matches(diffPath.getParts()) )
|
||||
addSummary(summaries, sumDiff.getPath(), false);
|
||||
}
|
||||
}
|
||||
|
||||
List<SummarizedDifference> sortedSummaries = new ArrayList<SummarizedDifference>(summaries.values());
|
||||
List<Difference> sortedSummaries = new ArrayList<Difference>(summaries.values());
|
||||
Collections.sort(sortedSummaries);
|
||||
return sortedSummaries;
|
||||
}
|
||||
|
||||
private static void addSummary(Map<String, SummarizedDifference> summaries, String path, boolean onlyCatalog) {
|
||||
private static void addSummary(Map<String, Difference> summaries, String path, boolean onlyCatalog) {
|
||||
if ( summaries.containsKey(path) ) {
|
||||
if ( ! onlyCatalog )
|
||||
summaries.get(path).incCount();
|
||||
} else {
|
||||
SummarizedDifference sumDiff = new SummarizedDifference(path);
|
||||
Difference sumDiff = new Difference(path);
|
||||
summaries.put(sumDiff.getPath(), sumDiff);
|
||||
}
|
||||
}
|
||||
|
||||
protected void printSummaryReport(List<SummarizedDifference> sortedSummaries, SummaryReportParams params ) {
|
||||
protected void printSummaryReport(List<Difference> sortedSummaries, SummaryReportParams params ) {
|
||||
GATKReport report = new GATKReport();
|
||||
final String tableName = "diffences";
|
||||
report.addTable(tableName, "Summarized differences between the master and test files.\nSee http://www.broadinstitute.org/gsa/wiki/index.php/DiffObjectsWalker_and_SummarizedDifferences for more information");
|
||||
|
|
@ -213,7 +215,7 @@ public class DiffEngine {
|
|||
table.addColumn("NumberOfOccurrences", 0);
|
||||
|
||||
int count = 0, count1 = 0;
|
||||
for ( SummarizedDifference diff : sortedSummaries ) {
|
||||
for ( Difference diff : sortedSummaries ) {
|
||||
if ( diff.getCount() < params.minSumDiffToShow )
|
||||
// in order, so break as soon as the count is too low
|
||||
break;
|
||||
|
|
@ -261,76 +263,6 @@ public class DiffEngine {
|
|||
return Utils.join(".", parts);
|
||||
}
|
||||
|
||||
/**
|
||||
* TODO -- all of the algorithms above should use SummarizedDifference instead
|
||||
* TODO -- of some SummarizedDifferences and some low-level String[]
|
||||
*/
|
||||
public static class SummarizedDifference implements Comparable<SummarizedDifference> {
|
||||
final String path; // X.Y.Z
|
||||
final String[] parts;
|
||||
int count = 0;
|
||||
|
||||
public SummarizedDifference(String path) {
|
||||
this.path = path;
|
||||
this.parts = diffNameToPath(path);
|
||||
}
|
||||
|
||||
public void incCount() { count++; }
|
||||
|
||||
public int getCount() {
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* The fully qualified path object A.B.C etc
|
||||
* @return
|
||||
*/
|
||||
public String getPath() {
|
||||
return path;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the length of the parts of this summary
|
||||
*/
|
||||
public int length() {
|
||||
return this.parts.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the string parts matches this summary. Matches are
|
||||
* must be equal() everywhere where this summary isn't *.
|
||||
* @param otherParts
|
||||
* @return
|
||||
*/
|
||||
public boolean matches(String[] otherParts) {
|
||||
if ( otherParts.length != length() )
|
||||
return false;
|
||||
|
||||
// TODO optimization: can start at right most non-star element
|
||||
for ( int i = 0; i < length(); i++ ) {
|
||||
String part = parts[i];
|
||||
if ( ! part.equals("*") && ! part.equals(otherParts[i]) )
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%s:%d", getPath(), getCount());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(SummarizedDifference other) {
|
||||
// sort first highest to lowest count, then by lowest to highest path
|
||||
int countCmp = Integer.valueOf(count).compareTo(other.count);
|
||||
return countCmp != 0 ? -1 * countCmp : path.compareTo(other.path);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// plugin manager
|
||||
|
|
@ -404,7 +336,7 @@ public class DiffEngine {
|
|||
if ( diffEngine.canRead(masterFile) && diffEngine.canRead(testFile) ) {
|
||||
DiffElement master = diffEngine.createDiffableFromFile(masterFile);
|
||||
DiffElement test = diffEngine.createDiffableFromFile(testFile);
|
||||
List<Difference> diffs = diffEngine.diff(master, test);
|
||||
List<SpecificDifference> diffs = diffEngine.diff(master, test);
|
||||
diffEngine.reportSummarizedDifferences(diffs, params);
|
||||
return true;
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -153,6 +153,13 @@ public class DiffNode extends DiffValue {
|
|||
add(new DiffElement(name, this.getBinding(), new DiffValue(value)));
|
||||
}
|
||||
|
||||
public int size() {
|
||||
int count = 0;
|
||||
for ( DiffElement value : getElements() )
|
||||
count += value.size();
|
||||
return count;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
//
|
||||
// toString
|
||||
|
|
|
|||
|
|
@ -24,7 +24,6 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||
|
||||
import org.apache.xmlbeans.impl.tool.Diff;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
|
|
@ -95,18 +94,20 @@ public class DiffObjectsWalker extends RodWalker<Integer, Integer> {
|
|||
public void onTraversalDone(Integer sum) {
|
||||
out.printf("Reading master file %s%n", masterFile);
|
||||
DiffElement master = diffEngine.createDiffableFromFile(masterFile, MAX_OBJECTS_TO_READ);
|
||||
out.printf(" Read %d objects%n", master.size());
|
||||
out.printf("Reading test file %s%n", testFile);
|
||||
DiffElement test = diffEngine.createDiffableFromFile(testFile, MAX_OBJECTS_TO_READ);
|
||||
out.printf(" Read %d objects%n", test.size());
|
||||
|
||||
// out.printf("Master diff objects%n");
|
||||
// out.println(master.toString());
|
||||
// out.printf("Test diff objects%n");
|
||||
// out.println(test.toString());
|
||||
|
||||
List<Difference> diffs = diffEngine.diff(master, test);
|
||||
List<SpecificDifference> diffs = diffEngine.diff(master, test);
|
||||
if ( showItemizedDifferences ) {
|
||||
out.printf("Itemized results%n");
|
||||
for ( Difference diff : diffs )
|
||||
for ( SpecificDifference diff : diffs )
|
||||
out.printf("DIFF: %s%n", diff.toString());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -87,4 +87,5 @@ public class DiffValue {
|
|||
|
||||
public boolean isAtomic() { return true; }
|
||||
public boolean isCompound() { return ! isAtomic(); }
|
||||
public int size() { return 1; }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,35 +24,72 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: 7/4/11
|
||||
* Time: 12:53 PM
|
||||
*
|
||||
* Represents a specific difference between two specific DiffElements
|
||||
*/
|
||||
public class Difference {
|
||||
DiffElement master, test;
|
||||
public class Difference implements Comparable<Difference> {
|
||||
final String path; // X.Y.Z
|
||||
final String[] parts;
|
||||
int count = 0;
|
||||
|
||||
public Difference(DiffElement master, DiffElement test) {
|
||||
if ( master == null && test == null ) throw new IllegalArgumentException("Master and test both cannot be null");
|
||||
this.master = master;
|
||||
this.test = test;
|
||||
public Difference(String path) {
|
||||
this.path = path;
|
||||
this.parts = DiffEngine.diffNameToPath(path);
|
||||
}
|
||||
|
||||
public String[] getParts() {
|
||||
return parts;
|
||||
}
|
||||
|
||||
public void incCount() { count++; }
|
||||
|
||||
public int getCount() {
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* The fully qualified path object A.B.C etc
|
||||
* @return
|
||||
*/
|
||||
public String getPath() {
|
||||
return path;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the length of the parts of this summary
|
||||
*/
|
||||
public int length() {
|
||||
return this.parts.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the string parts matches this summary. Matches are
|
||||
* must be equal() everywhere where this summary isn't *.
|
||||
* @param otherParts
|
||||
* @return
|
||||
*/
|
||||
public boolean matches(String[] otherParts) {
|
||||
if ( otherParts.length != length() )
|
||||
return false;
|
||||
|
||||
// TODO optimization: can start at right most non-star element
|
||||
for ( int i = 0; i < length(); i++ ) {
|
||||
String part = parts[i];
|
||||
if ( ! part.equals("*") && ! part.equals(otherParts[i]) )
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%s:%s!=%s",
|
||||
getFullyQualifiedName(),
|
||||
getOneLineString(master),
|
||||
getOneLineString(test));
|
||||
return String.format("%s:%d", getPath(), getCount());
|
||||
}
|
||||
|
||||
public String getFullyQualifiedName() {
|
||||
return (master == null ? test : master).fullyQualifiedName();
|
||||
@Override
|
||||
public int compareTo(Difference other) {
|
||||
// sort first highest to lowest count, then by lowest to highest path
|
||||
int countCmp = Integer.valueOf(count).compareTo(other.count);
|
||||
return countCmp != 0 ? -1 * countCmp : path.compareTo(other.path);
|
||||
}
|
||||
|
||||
private static String getOneLineString(DiffElement elt) {
|
||||
return elt == null ? "MISSING" : elt.getValue().toOneLineString();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,59 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: 7/4/11
|
||||
* Time: 12:53 PM
|
||||
*
|
||||
* Represents a specific difference between two specific DiffElements
|
||||
*/
|
||||
public class SpecificDifference extends Difference {
|
||||
DiffElement master, test;
|
||||
|
||||
public SpecificDifference(DiffElement master, DiffElement test) {
|
||||
super(createName(master, test));
|
||||
if ( master == null && test == null ) throw new IllegalArgumentException("Master and test both cannot be null");
|
||||
this.master = master;
|
||||
this.test = test;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("%s:%s!=%s",
|
||||
getPath(),
|
||||
getOneLineString(master),
|
||||
getOneLineString(test));
|
||||
}
|
||||
|
||||
private static String createName(DiffElement master, DiffElement test) {
|
||||
return (master == null ? test : master).fullyQualifiedName();
|
||||
}
|
||||
|
||||
private static String getOneLineString(DiffElement elt) {
|
||||
return elt == null ? "MISSING" : elt.getValue().toOneLineString();
|
||||
}
|
||||
}
|
||||
|
|
@ -99,7 +99,7 @@ public class DiffEngineUnitTest extends BaseTest {
|
|||
logger.warn("Test tree1: " + test.tree1.toOneLineString());
|
||||
logger.warn("Test tree2: " + test.tree2.toOneLineString());
|
||||
|
||||
List<Difference> diffs = engine.diff(test.tree1, test.tree2);
|
||||
List<SpecificDifference> diffs = engine.diff(test.tree1, test.tree2);
|
||||
logger.warn("Test expected diff : " + test.differences);
|
||||
logger.warn("Observed diffs : " + diffs);
|
||||
}
|
||||
|
|
@ -185,12 +185,12 @@ public class DiffEngineUnitTest extends BaseTest {
|
|||
List<String[]> diffPaths = new ArrayList<String[]>(diffs.size());
|
||||
for ( String diff : diffs ) { diffPaths.add(DiffEngine.diffNameToPath(diff)); }
|
||||
|
||||
List<DiffEngine.SummarizedDifference> sumDiffs = engine.summarizedDifferencesOfPaths(diffPaths);
|
||||
List<Difference> sumDiffs = engine.summarizedDifferencesOfPathsFromString(diffs);
|
||||
|
||||
Assert.assertEquals(sumDiffs.size(), expecteds.size(), "Unexpected number of summarized differences: " + sumDiffs);
|
||||
|
||||
for ( int i = 0; i < sumDiffs.size(); i++ ) {
|
||||
DiffEngine.SummarizedDifference sumDiff = sumDiffs.get(i);
|
||||
Difference sumDiff = sumDiffs.get(i);
|
||||
String expected = expecteds.get(i);
|
||||
String[] pathCount = expected.split(":");
|
||||
String path = pathCount[0];
|
||||
|
|
|
|||
|
|
@ -87,7 +87,7 @@ public class DifferenceUnitTest extends BaseTest {
|
|||
logger.warn("Test tree1: " + (test.tree1 == null ? "null" : test.tree1.toOneLineString()));
|
||||
logger.warn("Test tree2: " + (test.tree2 == null ? "null" : test.tree2.toOneLineString()));
|
||||
logger.warn("Test expected diff : " + test.difference);
|
||||
Difference diff = new Difference(test.tree1, test.tree2);
|
||||
SpecificDifference diff = new SpecificDifference(test.tree1, test.tree2);
|
||||
logger.warn("Observed diffs : " + diff);
|
||||
Assert.assertEquals(diff.toString(), test.difference, "Observed diff string " + diff + " not equal to expected difference string " + test.difference );
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue