Removed the SpecificDifference class. Now Difference classes always have the option to remember specific master and test values. This means that all summarized differences carry with them specific examples of their differences. Consequently, now even summarized differences give at least one example of the specific difference, even when the count of the difference is > 1. Unit tests updated. Added DiffObjects integrationtest. VCFDiffableReader now specifically reads the first line of the VCF file to capture the version number.
This commit is contained in:
parent
66575b7644
commit
6f26c07b85
|
|
@ -58,7 +58,7 @@ public class DiffEngine {
|
|||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
public List<SpecificDifference> diff(DiffElement master, DiffElement test) {
|
||||
public List<Difference> diff(DiffElement master, DiffElement test) {
|
||||
DiffValue masterValue = master.getValue();
|
||||
DiffValue testValue = test.getValue();
|
||||
|
||||
|
|
@ -68,14 +68,14 @@ public class DiffEngine {
|
|||
return diff(masterValue, testValue);
|
||||
} else {
|
||||
// structural difference in types. one is node, other is leaf
|
||||
return Arrays.asList(new SpecificDifference(master, test));
|
||||
return Arrays.asList(new Difference(master, test));
|
||||
}
|
||||
}
|
||||
|
||||
public List<SpecificDifference> diff(DiffNode master, DiffNode test) {
|
||||
public List<Difference> diff(DiffNode master, DiffNode test) {
|
||||
Set<String> allNames = new HashSet<String>(master.getElementNames());
|
||||
allNames.addAll(test.getElementNames());
|
||||
List<SpecificDifference> diffs = new ArrayList<SpecificDifference>();
|
||||
List<Difference> diffs = new ArrayList<Difference>();
|
||||
|
||||
for ( String name : allNames ) {
|
||||
DiffElement masterElt = master.getElement(name);
|
||||
|
|
@ -84,7 +84,7 @@ public class DiffEngine {
|
|||
throw new ReviewedStingException("BUG: unexceptedly got two null elements for field: " + name);
|
||||
} else if ( masterElt == null || testElt == null ) { // if either is null, we are missing a value
|
||||
// todo -- should one of these be a special MISSING item?
|
||||
diffs.add(new SpecificDifference(masterElt, testElt));
|
||||
diffs.add(new Difference(masterElt, testElt));
|
||||
} else {
|
||||
diffs.addAll(diff(masterElt, testElt));
|
||||
}
|
||||
|
|
@ -93,11 +93,11 @@ public class DiffEngine {
|
|||
return diffs;
|
||||
}
|
||||
|
||||
public List<SpecificDifference> diff(DiffValue master, DiffValue test) {
|
||||
public List<Difference> diff(DiffValue master, DiffValue test) {
|
||||
if ( master.getValue().equals(test.getValue()) ) {
|
||||
return Collections.emptyList();
|
||||
} else {
|
||||
return Arrays.asList(new SpecificDifference(master.getBinding(), test.getBinding()));
|
||||
return Arrays.asList(new Difference(master.getBinding(), test.getBinding()));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -145,11 +145,11 @@ public class DiffEngine {
|
|||
* @param params determines how we display the items
|
||||
* @param diffs
|
||||
*/
|
||||
public void reportSummarizedDifferences(List<SpecificDifference> diffs, SummaryReportParams params ) {
|
||||
public void reportSummarizedDifferences(List<Difference> diffs, SummaryReportParams params ) {
|
||||
printSummaryReport(summarizeDifferences(diffs), params );
|
||||
}
|
||||
|
||||
public List<Difference> summarizeDifferences(List<SpecificDifference> diffs) {
|
||||
public List<Difference> summarizeDifferences(List<Difference> diffs) {
|
||||
return summarizedDifferencesOfPaths(diffs);
|
||||
}
|
||||
|
||||
|
|
@ -177,8 +177,12 @@ public class DiffEngine {
|
|||
Difference diffPath2 = singletonDiffs.get(j);
|
||||
if ( diffPath1.length() == diffPath2.length() ) {
|
||||
int lcp = longestCommonPostfix(diffPath1.getParts(), diffPath2.getParts());
|
||||
String path = lcp > 0 ? summarizedPath(diffPath2.getParts(), lcp) : diffPath2.getPath();
|
||||
addSummary(summaries, path, true);
|
||||
String path = diffPath2.getPath();
|
||||
if ( lcp != 0 && lcp != diffPath1.length() )
|
||||
path = summarizedPath(diffPath2.getParts(), lcp);
|
||||
Difference sumDiff = new Difference(path, diffPath2.getMaster(), diffPath2.getTest());
|
||||
sumDiff.setCount(0);
|
||||
addSummaryIfMissing(summaries, sumDiff);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -187,7 +191,7 @@ public class DiffEngine {
|
|||
for ( Difference diffPath : singletonDiffs ) {
|
||||
for ( Difference sumDiff : summaries.values() ) {
|
||||
if ( sumDiff.matches(diffPath.getParts()) )
|
||||
addSummary(summaries, sumDiff.getPath(), false);
|
||||
sumDiff.incCount();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -196,13 +200,9 @@ public class DiffEngine {
|
|||
return sortedSummaries;
|
||||
}
|
||||
|
||||
private static void addSummary(Map<String, Difference> summaries, String path, boolean onlyCatalog) {
|
||||
if ( summaries.containsKey(path) ) {
|
||||
if ( ! onlyCatalog )
|
||||
summaries.get(path).incCount();
|
||||
} else {
|
||||
Difference sumDiff = new Difference(path);
|
||||
summaries.put(sumDiff.getPath(), sumDiff);
|
||||
protected void addSummaryIfMissing(Map<String, Difference> summaries, Difference diff) {
|
||||
if ( ! summaries.containsKey(diff.getPath()) ) {
|
||||
summaries.put(diff.getPath(), diff);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -213,6 +213,7 @@ public class DiffEngine {
|
|||
GATKReportTable table = report.getTable(tableName);
|
||||
table.addPrimaryKey("Difference", true);
|
||||
table.addColumn("NumberOfOccurrences", 0);
|
||||
table.addColumn("SpecificDifference", 0);
|
||||
|
||||
int count = 0, count1 = 0;
|
||||
for ( Difference diff : sortedSummaries ) {
|
||||
|
|
@ -230,6 +231,7 @@ public class DiffEngine {
|
|||
}
|
||||
|
||||
table.set(diff.getPath(), "NumberOfOccurrences", diff.getCount());
|
||||
table.set(diff.getPath(), "SpecificDifference", diff.valueDiffString());
|
||||
}
|
||||
|
||||
table.write(params.out);
|
||||
|
|
@ -336,7 +338,7 @@ public class DiffEngine {
|
|||
if ( diffEngine.canRead(masterFile) && diffEngine.canRead(testFile) ) {
|
||||
DiffElement master = diffEngine.createDiffableFromFile(masterFile);
|
||||
DiffElement test = diffEngine.createDiffableFromFile(testFile);
|
||||
List<SpecificDifference> diffs = diffEngine.diff(master, test);
|
||||
List<Difference> diffs = diffEngine.diff(master, test);
|
||||
diffEngine.reportSummarizedDifferences(diffs, params);
|
||||
return true;
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -104,10 +104,10 @@ public class DiffObjectsWalker extends RodWalker<Integer, Integer> {
|
|||
// out.printf("Test diff objects%n");
|
||||
// out.println(test.toString());
|
||||
|
||||
List<SpecificDifference> diffs = diffEngine.diff(master, test);
|
||||
List<Difference> diffs = diffEngine.diff(master, test);
|
||||
if ( showItemizedDifferences ) {
|
||||
out.printf("Itemized results%n");
|
||||
for ( SpecificDifference diff : diffs )
|
||||
for ( Difference diff : diffs )
|
||||
out.printf("DIFF: %s%n", diff.toString());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -27,13 +27,24 @@ package org.broadinstitute.sting.gatk.walkers.diffengine;
|
|||
public class Difference implements Comparable<Difference> {
|
||||
final String path; // X.Y.Z
|
||||
final String[] parts;
|
||||
int count = 0;
|
||||
int count = 1;
|
||||
DiffElement master = null , test = null;
|
||||
|
||||
public Difference(String path) {
|
||||
this.path = path;
|
||||
this.parts = DiffEngine.diffNameToPath(path);
|
||||
}
|
||||
|
||||
public Difference(DiffElement master, DiffElement test) {
|
||||
this(createPath(master, test), master, test);
|
||||
}
|
||||
|
||||
public Difference(String path, DiffElement master, DiffElement test) {
|
||||
this(path);
|
||||
this.master = master;
|
||||
this.test = test;
|
||||
}
|
||||
|
||||
public String[] getParts() {
|
||||
return parts;
|
||||
}
|
||||
|
|
@ -44,6 +55,10 @@ public class Difference implements Comparable<Difference> {
|
|||
return count;
|
||||
}
|
||||
|
||||
public void setCount(int count) {
|
||||
this.count = count;
|
||||
}
|
||||
|
||||
/**
|
||||
* The fully qualified path object A.B.C etc
|
||||
* @return
|
||||
|
|
@ -81,7 +96,7 @@ public class Difference implements Comparable<Difference> {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%s:%d", getPath(), getCount());
|
||||
return String.format("%s:%d:%s", getPath(), getCount(), valueDiffString());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -91,5 +106,31 @@ public class Difference implements Comparable<Difference> {
|
|||
return countCmp != 0 ? -1 * countCmp : path.compareTo(other.path);
|
||||
}
|
||||
|
||||
public String valueDiffString() {
|
||||
if ( hasSpecificDifference() ) {
|
||||
return String.format("%s!=%s", getOneLineString(master), getOneLineString(test));
|
||||
} else {
|
||||
return "N/A";
|
||||
}
|
||||
}
|
||||
|
||||
private static String createPath(DiffElement master, DiffElement test) {
|
||||
return (master == null ? test : master).fullyQualifiedName();
|
||||
}
|
||||
|
||||
private static String getOneLineString(DiffElement elt) {
|
||||
return elt == null ? "MISSING" : elt.getValue().toOneLineString();
|
||||
}
|
||||
|
||||
public boolean hasSpecificDifference() {
|
||||
return master != null || test != null;
|
||||
}
|
||||
|
||||
public DiffElement getMaster() {
|
||||
return master;
|
||||
}
|
||||
|
||||
public DiffElement getTest() {
|
||||
return test;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,59 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: 7/4/11
|
||||
* Time: 12:53 PM
|
||||
*
|
||||
* Represents a specific difference between two specific DiffElements
|
||||
*/
|
||||
public class SpecificDifference extends Difference {
|
||||
DiffElement master, test;
|
||||
|
||||
public SpecificDifference(DiffElement master, DiffElement test) {
|
||||
super(createName(master, test));
|
||||
if ( master == null && test == null ) throw new IllegalArgumentException("Master and test both cannot be null");
|
||||
this.master = master;
|
||||
this.test = test;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("%s:%s!=%s",
|
||||
getPath(),
|
||||
getOneLineString(master),
|
||||
getOneLineString(test));
|
||||
}
|
||||
|
||||
private static String createName(DiffElement master, DiffElement test) {
|
||||
return (master == null ? test : master).fullyQualifiedName();
|
||||
}
|
||||
|
||||
private static String getOneLineString(DiffElement elt) {
|
||||
return elt == null ? "MISSING" : elt.getValue().toOneLineString();
|
||||
}
|
||||
}
|
||||
|
|
@ -53,7 +53,13 @@ public class VCFDiffableReader implements DiffableReader {
|
|||
public DiffElement readFromFile(File file, int maxElementsToRead) {
|
||||
DiffNode root = DiffNode.rooted(file.getName());
|
||||
try {
|
||||
// read the version line from the file
|
||||
LineReader lineReader = new AsciiLineReader(new FileInputStream(file));
|
||||
final String version = lineReader.readLine();
|
||||
root.add("VERSION", version);
|
||||
lineReader.close();
|
||||
|
||||
lineReader = new AsciiLineReader(new FileInputStream(file));
|
||||
VCFCodec vcfCodec = new VCFCodec();
|
||||
|
||||
// must be read as state is stored in reader itself
|
||||
|
|
|
|||
|
|
@ -99,7 +99,7 @@ public class DiffEngineUnitTest extends BaseTest {
|
|||
logger.warn("Test tree1: " + test.tree1.toOneLineString());
|
||||
logger.warn("Test tree2: " + test.tree2.toOneLineString());
|
||||
|
||||
List<SpecificDifference> diffs = engine.diff(test.tree1, test.tree2);
|
||||
List<Difference> diffs = engine.diff(test.tree1, test.tree2);
|
||||
logger.warn("Test expected diff : " + test.differences);
|
||||
logger.warn("Observed diffs : " + diffs);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,71 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
public class DiffObjectsIntegrationTest extends WalkerTest {
|
||||
private class TestParams extends TestDataProvider {
|
||||
public File master, test;
|
||||
public String MD5;
|
||||
|
||||
private TestParams(String master, String test, String MD5) {
|
||||
super(TestParams.class);
|
||||
this.master = new File(master);
|
||||
this.test = new File(test);
|
||||
this.MD5 = MD5;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("master=%s,test=%s,md5=%s", master, test, MD5);
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "data")
|
||||
public Object[][] createData() {
|
||||
new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "fb7f4e011487ca56bce865ae5468cdc5");
|
||||
new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "423cec3befbf0a72d8bc3757ee628fc4");
|
||||
return TestParams.getTests(TestParams.class);
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "data")
|
||||
public void testDiffs(TestParams params) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T DiffObjects -R public/testdata/exampleFASTA.fasta "
|
||||
+ " -m " + params.master
|
||||
+ " -t " + params.test
|
||||
+ " -o %s",
|
||||
Arrays.asList(params.MD5));
|
||||
executeTest("testDiffObjects:"+params, spec).getFirst();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -87,7 +87,7 @@ public class DiffableReaderUnitTest extends BaseTest {
|
|||
Assert.assertSame(diff.getParent(), DiffElement.ROOT);
|
||||
|
||||
DiffNode node = diff.getValueAsNode();
|
||||
Assert.assertEquals(node.getElements().size(), 10);
|
||||
Assert.assertEquals(node.getElements().size(), 11);
|
||||
|
||||
// chr1 2646 rs62635284 G A 0.15 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:53,75:3:-12.40,-0.90,-0.00:9.03
|
||||
DiffNode rec1 = node.getElement("chr1:2646").getValueAsNode();
|
||||
|
|
|
|||
|
|
@ -75,10 +75,10 @@ public class DifferenceUnitTest extends BaseTest {
|
|||
|
||||
@DataProvider(name = "data")
|
||||
public Object[][] createTrees() {
|
||||
new DifferenceTest("A=X", "A=Y", "A:X!=Y");
|
||||
new DifferenceTest("A=Y", "A=X", "A:Y!=X");
|
||||
new DifferenceTest(DiffNode.fromString("A=X"), null, "A:X!=MISSING");
|
||||
new DifferenceTest(null, DiffNode.fromString("A=X"), "A:MISSING!=X");
|
||||
new DifferenceTest("A=X", "A=Y", "A:1:X!=Y");
|
||||
new DifferenceTest("A=Y", "A=X", "A:1:Y!=X");
|
||||
new DifferenceTest(DiffNode.fromString("A=X"), null, "A:1:X!=MISSING");
|
||||
new DifferenceTest(null, DiffNode.fromString("A=X"), "A:1:MISSING!=X");
|
||||
return DifferenceTest.getTests(DifferenceTest.class);
|
||||
}
|
||||
|
||||
|
|
@ -87,7 +87,7 @@ public class DifferenceUnitTest extends BaseTest {
|
|||
logger.warn("Test tree1: " + (test.tree1 == null ? "null" : test.tree1.toOneLineString()));
|
||||
logger.warn("Test tree2: " + (test.tree2 == null ? "null" : test.tree2.toOneLineString()));
|
||||
logger.warn("Test expected diff : " + test.difference);
|
||||
SpecificDifference diff = new SpecificDifference(test.tree1, test.tree2);
|
||||
Difference diff = new Difference(test.tree1, test.tree2);
|
||||
logger.warn("Observed diffs : " + diff);
|
||||
Assert.assertEquals(diff.toString(), test.difference, "Observed diff string " + diff + " not equal to expected difference string " + test.difference );
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue