diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java index 7ce40ad98..f66eeed4e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java @@ -147,11 +147,7 @@ public class DiffEngine { * @param diffs the list of differences to summarize */ public void reportSummarizedDifferences(List diffs, SummaryReportParams params ) { - printSummaryReport(summarizeDifferences(diffs), params ); - } - - public List summarizeDifferences(List diffs) { - return summarizedDifferencesOfPaths(diffs); + printSummaryReport(summarizedDifferencesOfPaths(diffs, params.maxRawDiffsToSummarize), params ); } final protected static String[] diffNameToPath(String diffName) { @@ -165,10 +161,11 @@ public class DiffEngine { diffs.add(new Difference(diff)); } - return summarizedDifferencesOfPaths(diffs); + return summarizedDifferencesOfPaths(diffs, -1); } - protected List summarizedDifferencesOfPaths(List singletonDiffs) { + private Map initialPairwiseSummaries(final List singletonDiffs, + final int maxRawDiffsToSummarize) { Map summaries = new HashMap(); // create the initial set of differences @@ -184,10 +181,20 @@ public class DiffEngine { Difference sumDiff = new Difference(path, diffPath2.getMaster(), diffPath2.getTest()); sumDiff.setCount(0); addSummaryIfMissing(summaries, sumDiff); + + if ( maxRawDiffsToSummarize != -1 && summaries.size() > maxRawDiffsToSummarize) + return summaries; } } } + return summaries; + } + + protected List summarizedDifferencesOfPaths(final List singletonDiffs, + final int maxRawDiffsToSummarize) { + Map summaries = initialPairwiseSummaries(singletonDiffs, maxRawDiffsToSummarize); + // count differences for ( Difference diffPath : singletonDiffs ) { for ( Difference sumDiff : summaries.values() ) { @@ -360,17 +367,23 @@ public class DiffEngine { } public static class SummaryReportParams { - PrintStream out = System.out; - int maxItemsToDisplay = 0; - int maxCountOneItems = 0; - int minSumDiffToShow = 0; + final PrintStream out; + final int maxItemsToDisplay; + final int maxCountOneItems; + final int minSumDiffToShow; + final int maxRawDiffsToSummarize; boolean descending = true; - public SummaryReportParams(PrintStream out, int maxItemsToDisplay, int maxCountOneItems, int minSumDiffToShow) { + public SummaryReportParams(PrintStream out, + int maxItemsToDisplay, + int maxCountOneItems, + int minSumDiffToShow, + int maxRawDiffsToSummarize) { this.out = out; this.maxItemsToDisplay = maxItemsToDisplay; this.maxCountOneItems = maxCountOneItems; this.minSumDiffToShow = minSumDiffToShow; + this.maxRawDiffsToSummarize = maxRawDiffsToSummarize; } public void setDescending(boolean descending) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java index 04437fdd1..21de6135f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java @@ -162,6 +162,10 @@ public class DiffObjectsWalker extends RodWalker { @Argument(fullName="maxObjectsToRead", shortName="motr", doc="Max. number of objects to read from the files. -1 [default] means unlimited", required=false) int MAX_OBJECTS_TO_READ = -1; + @Argument(fullName="maxRawDiffsToSummary", shortName="maxRawDiffsToSummary", doc="Max. number of objects to read from the files. -1 [default] means unlimited", required=false) + int maxRawDiffsToSummary = -1; + + /** * The max number of differences to display when summarizing. For example, if there are 10M differences, but * maxDiffs is 10, then the comparison aborts after first ten summarized differences are shown. Note that @@ -232,13 +236,14 @@ public class DiffObjectsWalker extends RodWalker { // out.println(test.toString()); List diffs = diffEngine.diff(master, test); + out.printf(" Done computing diff, n = %d%n", diffs.size()); if ( showItemizedDifferences ) { out.printf("Itemized results%n"); for ( Difference diff : diffs ) out.printf("DIFF: %s%n", diff.toString()); } - DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out, MAX_DIFFS, MAX_COUNT1_DIFFS, minCountForDiff); + DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out, MAX_DIFFS, MAX_COUNT1_DIFFS, minCountForDiff, maxRawDiffsToSummary); params.setDescending(false); diffEngine.reportSummarizedDifferences(diffs, params); } diff --git a/public/java/test/org/broadinstitute/sting/MD5DB.java b/public/java/test/org/broadinstitute/sting/MD5DB.java index e756c9864..6b62cb3cd 100644 --- a/public/java/test/org/broadinstitute/sting/MD5DB.java +++ b/public/java/test/org/broadinstitute/sting/MD5DB.java @@ -250,7 +250,7 @@ public class MD5DB { // TODO -- capture output and put in log final ByteArrayOutputStream baos = new ByteArrayOutputStream(); final PrintStream ps = new PrintStream(baos); - DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(ps, 20, 10, 0); + DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(ps, 20, 10, 0, MAX_RECORDS_TO_READ); boolean success = DiffEngine.simpleDiffFiles(new File(pathToExpectedMD5File), new File(pathToFileMD5File), MAX_RECORDS_TO_READ, params); if ( success ) { final String content = baos.toString();