Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Matt Hanna 2011-07-18 21:30:51 -04:00
commit 0ef37979cc
6 changed files with 53 additions and 30 deletions

View File

@ -100,7 +100,11 @@ public class GATKReport {
* @param tableDescription the description of the table
*/
public void addTable(String tableName, String tableDescription) {
GATKReportTable table = new GATKReportTable(tableName, tableDescription);
addTable(tableName, tableDescription, true);
}
public void addTable(String tableName, String tableDescription, boolean sortByPrimaryKey) {
GATKReportTable table = new GATKReportTable(tableName, tableDescription, sortByPrimaryKey);
tables.put(tableName, table);
}

View File

@ -3,9 +3,7 @@ package org.broadinstitute.sting.gatk.report;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.PrintStream;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.TreeSet;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -96,8 +94,9 @@ public class GATKReportTable {
private String tableDescription;
private String primaryKeyName;
private TreeSet<Object> primaryKeyColumn;
private Collection<Object> primaryKeyColumn;
private boolean primaryKeyDisplay;
boolean sortByPrimaryKey = true;
private LinkedHashMap<String, GATKReportColumn> columns;
@ -121,12 +120,17 @@ public class GATKReportTable {
* @param tableDescription the description of the table
*/
public GATKReportTable(String tableName, String tableDescription) {
if (!isValidName(tableName)) {
this(tableName, tableDescription, true);
}
public GATKReportTable(String tableName, String tableDescription, boolean sortByPrimaryKey) {
if (!isValidName(tableName)) {
throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed.");
}
this.tableName = tableName;
this.tableDescription = tableDescription;
this.sortByPrimaryKey = sortByPrimaryKey;
columns = new LinkedHashMap<String, GATKReportColumn>();
}
@ -137,20 +141,14 @@ public class GATKReportTable {
* @param primaryKeyName the name of the primary key column
*/
public void addPrimaryKey(String primaryKeyName) {
if (!isValidName(primaryKeyName)) {
throw new ReviewedStingException("Attempted to set a GATKReportTable primary key name of '" + primaryKeyName + "'. GATKReportTable primary key names must be purely alphanumeric - no spaces or special characters are allowed.");
}
this.primaryKeyName = primaryKeyName;
primaryKeyColumn = new TreeSet<Object>();
primaryKeyDisplay = true;
addPrimaryKey(primaryKeyName, true);
}
/**
* Add an optionally visible primary key column. This becomes the unique identifier for every column in the table, and will always be printed as the first column.
*
* @param primaryKeyName the name of the primary key column
* @param display should this primary key be displayed?
*/
public void addPrimaryKey(String primaryKeyName, boolean display) {
if (!isValidName(primaryKeyName)) {
@ -159,7 +157,7 @@ public class GATKReportTable {
this.primaryKeyName = primaryKeyName;
primaryKeyColumn = new TreeSet<Object>();
primaryKeyColumn = sortByPrimaryKey ? new TreeSet<Object>() : new LinkedList<Object>();
primaryKeyDisplay = display;
}

View File

@ -143,7 +143,7 @@ public class DiffEngine {
* Not that only pairs of the same length are considered as potentially equivalent
*
* @param params determines how we display the items
* @param diffs
* @param diffs the list of differences to summarize
*/
public void reportSummarizedDifferences(List<Difference> diffs, SummaryReportParams params ) {
printSummaryReport(summarizeDifferences(diffs), params );
@ -207,14 +207,7 @@ public class DiffEngine {
}
protected void printSummaryReport(List<Difference> sortedSummaries, SummaryReportParams params ) {
GATKReport report = new GATKReport();
final String tableName = "diffences";
report.addTable(tableName, "Summarized differences between the master and test files.\nSee http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information");
GATKReportTable table = report.getTable(tableName);
table.addPrimaryKey("Difference", true);
table.addColumn("NumberOfOccurrences", 0);
table.addColumn("SpecificDifference", 0);
List<Difference> toShow = new ArrayList<Difference>();
int count = 0, count1 = 0;
for ( Difference diff : sortedSummaries ) {
if ( diff.getCount() < params.minSumDiffToShow )
@ -230,10 +223,26 @@ public class DiffEngine {
break;
}
table.set(diff.getPath(), "NumberOfOccurrences", diff.getCount());
table.set(diff.getPath(), "SpecificDifference", diff.valueDiffString());
toShow.add(diff);
}
// if we want it in descending order, reverse the list
if ( ! params.descending ) {
Collections.reverse(toShow);
}
// now that we have a specific list of values we want to show, display them
GATKReport report = new GATKReport();
final String tableName = "diffences";
report.addTable(tableName, "Summarized differences between the master and test files.\nSee http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information", false);
GATKReportTable table = report.getTable(tableName);
table.addPrimaryKey("Difference", true);
table.addColumn("NumberOfOccurrences", 0);
table.addColumn("ExampleDifference", 0);
for ( Difference diff : toShow ) {
table.set(diff.getPath(), "NumberOfOccurrences", diff.getCount());
table.set(diff.getPath(), "ExampleDifference", diff.valueDiffString());
}
table.write(params.out);
}
@ -252,7 +261,7 @@ public class DiffEngine {
* commonPostfixLength: how many parts are shared at the end, suppose its 2
* We want to create a string *.*.C.D
*
* @param parts
* @param parts the separated path values [above without .]
* @param commonPostfixLength
* @return
*/
@ -351,6 +360,7 @@ public class DiffEngine {
int maxItemsToDisplay = 0;
int maxCountOneItems = 0;
int minSumDiffToShow = 0;
boolean descending = true;
public SummaryReportParams(PrintStream out, int maxItemsToDisplay, int maxCountOneItems, int minSumDiffToShow) {
this.out = out;
@ -358,5 +368,9 @@ public class DiffEngine {
this.maxCountOneItems = maxCountOneItems;
this.minSumDiffToShow = minSumDiffToShow;
}
public void setDescending(boolean descending) {
this.descending = descending;
}
}
}

View File

@ -112,6 +112,7 @@ public class DiffObjectsWalker extends RodWalker<Integer, Integer> {
}
DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out, MAX_DIFFS, MAX_COUNT1_DIFFS, minCountForDiff);
params.setDescending(false);
diffEngine.reportSummarizedDifferences(diffs, params);
}
}

View File

@ -72,13 +72,19 @@ public class VCFDiffableReader implements DiffableReader {
}
String line = lineReader.readLine();
int count = 0;
int count = 0, nRecordsAtPos = 1;
String prevName = "";
while ( line != null ) {
if ( count++ > maxElementsToRead && maxElementsToRead != -1)
break;
VariantContext vc = (VariantContext)vcfCodec.decode(line);
String name = vc.getChr() + ":" + vc.getStart();
if ( name.equals(prevName) ) {
name += "_" + ++nRecordsAtPos;
} else {
prevName = name;
}
DiffNode vcRoot = DiffNode.empty(name, root);
// add fields

View File

@ -52,8 +52,8 @@ public class DiffObjectsIntegrationTest extends WalkerTest {
@DataProvider(name = "data")
public Object[][] createData() {
new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "fb7f4e011487ca56bce865ae5468cdc5");
new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "423cec3befbf0a72d8bc3757ee628fc4");
new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "4d9f4636de05b93c354d05011264546e");
new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "37e6efd833b5cd6d860a9df3df9713fc");
return TestParams.getTests(TestParams.class);
}