Merge branch 'master' of ssh://copper.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
f7233a5e63
|
|
@ -100,7 +100,11 @@ public class GATKReport {
|
|||
* @param tableDescription the description of the table
|
||||
*/
|
||||
public void addTable(String tableName, String tableDescription) {
|
||||
GATKReportTable table = new GATKReportTable(tableName, tableDescription);
|
||||
addTable(tableName, tableDescription, true);
|
||||
}
|
||||
|
||||
public void addTable(String tableName, String tableDescription, boolean sortByPrimaryKey) {
|
||||
GATKReportTable table = new GATKReportTable(tableName, tableDescription, sortByPrimaryKey);
|
||||
tables.put(tableName, table);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,9 +3,7 @@ package org.broadinstitute.sting.gatk.report;
|
|||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.TreeSet;
|
||||
import java.util.*;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
|
|
@ -96,8 +94,9 @@ public class GATKReportTable {
|
|||
private String tableDescription;
|
||||
|
||||
private String primaryKeyName;
|
||||
private TreeSet<Object> primaryKeyColumn;
|
||||
private Collection<Object> primaryKeyColumn;
|
||||
private boolean primaryKeyDisplay;
|
||||
boolean sortByPrimaryKey = true;
|
||||
|
||||
private LinkedHashMap<String, GATKReportColumn> columns;
|
||||
|
||||
|
|
@ -121,12 +120,17 @@ public class GATKReportTable {
|
|||
* @param tableDescription the description of the table
|
||||
*/
|
||||
public GATKReportTable(String tableName, String tableDescription) {
|
||||
if (!isValidName(tableName)) {
|
||||
this(tableName, tableDescription, true);
|
||||
}
|
||||
|
||||
public GATKReportTable(String tableName, String tableDescription, boolean sortByPrimaryKey) {
|
||||
if (!isValidName(tableName)) {
|
||||
throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed.");
|
||||
}
|
||||
|
||||
this.tableName = tableName;
|
||||
this.tableDescription = tableDescription;
|
||||
this.sortByPrimaryKey = sortByPrimaryKey;
|
||||
|
||||
columns = new LinkedHashMap<String, GATKReportColumn>();
|
||||
}
|
||||
|
|
@ -137,20 +141,14 @@ public class GATKReportTable {
|
|||
* @param primaryKeyName the name of the primary key column
|
||||
*/
|
||||
public void addPrimaryKey(String primaryKeyName) {
|
||||
if (!isValidName(primaryKeyName)) {
|
||||
throw new ReviewedStingException("Attempted to set a GATKReportTable primary key name of '" + primaryKeyName + "'. GATKReportTable primary key names must be purely alphanumeric - no spaces or special characters are allowed.");
|
||||
}
|
||||
|
||||
this.primaryKeyName = primaryKeyName;
|
||||
|
||||
primaryKeyColumn = new TreeSet<Object>();
|
||||
primaryKeyDisplay = true;
|
||||
addPrimaryKey(primaryKeyName, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add an optionally visible primary key column. This becomes the unique identifier for every column in the table, and will always be printed as the first column.
|
||||
*
|
||||
* @param primaryKeyName the name of the primary key column
|
||||
* @param display should this primary key be displayed?
|
||||
*/
|
||||
public void addPrimaryKey(String primaryKeyName, boolean display) {
|
||||
if (!isValidName(primaryKeyName)) {
|
||||
|
|
@ -159,7 +157,7 @@ public class GATKReportTable {
|
|||
|
||||
this.primaryKeyName = primaryKeyName;
|
||||
|
||||
primaryKeyColumn = new TreeSet<Object>();
|
||||
primaryKeyColumn = sortByPrimaryKey ? new TreeSet<Object>() : new LinkedList<Object>();
|
||||
primaryKeyDisplay = display;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -29,9 +29,7 @@ import net.sf.samtools.SAMRecord;
|
|||
import net.sf.samtools.SAMRecordIterator;
|
||||
import net.sf.samtools.util.BlockCompressedInputStream;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.*;
|
||||
import java.util.Arrays;
|
||||
|
||||
|
||||
|
|
@ -102,8 +100,10 @@ public class BAMDiffableReader implements DiffableReader {
|
|||
final byte[] BAM_MAGIC = "BAM\1".getBytes();
|
||||
final byte[] buffer = new byte[BAM_MAGIC.length];
|
||||
try {
|
||||
FileInputStream fstream = new FileInputStream(file);
|
||||
new BlockCompressedInputStream(fstream).read(buffer,0,BAM_MAGIC.length);
|
||||
InputStream fstream = new BufferedInputStream(new FileInputStream(file));
|
||||
if ( !BlockCompressedInputStream.isValidFile(fstream) )
|
||||
return false;
|
||||
new BlockCompressedInputStream(fstream).read(buffer, 0, BAM_MAGIC.length);
|
||||
return Arrays.equals(buffer, BAM_MAGIC);
|
||||
} catch ( IOException e ) {
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ public class DiffEngine {
|
|||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
public List<SpecificDifference> diff(DiffElement master, DiffElement test) {
|
||||
public List<Difference> diff(DiffElement master, DiffElement test) {
|
||||
DiffValue masterValue = master.getValue();
|
||||
DiffValue testValue = test.getValue();
|
||||
|
||||
|
|
@ -68,14 +68,14 @@ public class DiffEngine {
|
|||
return diff(masterValue, testValue);
|
||||
} else {
|
||||
// structural difference in types. one is node, other is leaf
|
||||
return Arrays.asList(new SpecificDifference(master, test));
|
||||
return Arrays.asList(new Difference(master, test));
|
||||
}
|
||||
}
|
||||
|
||||
public List<SpecificDifference> diff(DiffNode master, DiffNode test) {
|
||||
public List<Difference> diff(DiffNode master, DiffNode test) {
|
||||
Set<String> allNames = new HashSet<String>(master.getElementNames());
|
||||
allNames.addAll(test.getElementNames());
|
||||
List<SpecificDifference> diffs = new ArrayList<SpecificDifference>();
|
||||
List<Difference> diffs = new ArrayList<Difference>();
|
||||
|
||||
for ( String name : allNames ) {
|
||||
DiffElement masterElt = master.getElement(name);
|
||||
|
|
@ -84,7 +84,7 @@ public class DiffEngine {
|
|||
throw new ReviewedStingException("BUG: unexceptedly got two null elements for field: " + name);
|
||||
} else if ( masterElt == null || testElt == null ) { // if either is null, we are missing a value
|
||||
// todo -- should one of these be a special MISSING item?
|
||||
diffs.add(new SpecificDifference(masterElt, testElt));
|
||||
diffs.add(new Difference(masterElt, testElt));
|
||||
} else {
|
||||
diffs.addAll(diff(masterElt, testElt));
|
||||
}
|
||||
|
|
@ -93,11 +93,11 @@ public class DiffEngine {
|
|||
return diffs;
|
||||
}
|
||||
|
||||
public List<SpecificDifference> diff(DiffValue master, DiffValue test) {
|
||||
public List<Difference> diff(DiffValue master, DiffValue test) {
|
||||
if ( master.getValue().equals(test.getValue()) ) {
|
||||
return Collections.emptyList();
|
||||
} else {
|
||||
return Arrays.asList(new SpecificDifference(master.getBinding(), test.getBinding()));
|
||||
return Arrays.asList(new Difference(master.getBinding(), test.getBinding()));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -143,13 +143,13 @@ public class DiffEngine {
|
|||
* Not that only pairs of the same length are considered as potentially equivalent
|
||||
*
|
||||
* @param params determines how we display the items
|
||||
* @param diffs
|
||||
* @param diffs the list of differences to summarize
|
||||
*/
|
||||
public void reportSummarizedDifferences(List<SpecificDifference> diffs, SummaryReportParams params ) {
|
||||
public void reportSummarizedDifferences(List<Difference> diffs, SummaryReportParams params ) {
|
||||
printSummaryReport(summarizeDifferences(diffs), params );
|
||||
}
|
||||
|
||||
public List<Difference> summarizeDifferences(List<SpecificDifference> diffs) {
|
||||
public List<Difference> summarizeDifferences(List<Difference> diffs) {
|
||||
return summarizedDifferencesOfPaths(diffs);
|
||||
}
|
||||
|
||||
|
|
@ -177,8 +177,12 @@ public class DiffEngine {
|
|||
Difference diffPath2 = singletonDiffs.get(j);
|
||||
if ( diffPath1.length() == diffPath2.length() ) {
|
||||
int lcp = longestCommonPostfix(diffPath1.getParts(), diffPath2.getParts());
|
||||
String path = lcp > 0 ? summarizedPath(diffPath2.getParts(), lcp) : diffPath2.getPath();
|
||||
addSummary(summaries, path, true);
|
||||
String path = diffPath2.getPath();
|
||||
if ( lcp != 0 && lcp != diffPath1.length() )
|
||||
path = summarizedPath(diffPath2.getParts(), lcp);
|
||||
Difference sumDiff = new Difference(path, diffPath2.getMaster(), diffPath2.getTest());
|
||||
sumDiff.setCount(0);
|
||||
addSummaryIfMissing(summaries, sumDiff);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -187,7 +191,7 @@ public class DiffEngine {
|
|||
for ( Difference diffPath : singletonDiffs ) {
|
||||
for ( Difference sumDiff : summaries.values() ) {
|
||||
if ( sumDiff.matches(diffPath.getParts()) )
|
||||
addSummary(summaries, sumDiff.getPath(), false);
|
||||
sumDiff.incCount();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -196,24 +200,14 @@ public class DiffEngine {
|
|||
return sortedSummaries;
|
||||
}
|
||||
|
||||
private static void addSummary(Map<String, Difference> summaries, String path, boolean onlyCatalog) {
|
||||
if ( summaries.containsKey(path) ) {
|
||||
if ( ! onlyCatalog )
|
||||
summaries.get(path).incCount();
|
||||
} else {
|
||||
Difference sumDiff = new Difference(path);
|
||||
summaries.put(sumDiff.getPath(), sumDiff);
|
||||
protected void addSummaryIfMissing(Map<String, Difference> summaries, Difference diff) {
|
||||
if ( ! summaries.containsKey(diff.getPath()) ) {
|
||||
summaries.put(diff.getPath(), diff);
|
||||
}
|
||||
}
|
||||
|
||||
protected void printSummaryReport(List<Difference> sortedSummaries, SummaryReportParams params ) {
|
||||
GATKReport report = new GATKReport();
|
||||
final String tableName = "diffences";
|
||||
report.addTable(tableName, "Summarized differences between the master and test files.\nSee http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information");
|
||||
GATKReportTable table = report.getTable(tableName);
|
||||
table.addPrimaryKey("Difference", true);
|
||||
table.addColumn("NumberOfOccurrences", 0);
|
||||
|
||||
List<Difference> toShow = new ArrayList<Difference>();
|
||||
int count = 0, count1 = 0;
|
||||
for ( Difference diff : sortedSummaries ) {
|
||||
if ( diff.getCount() < params.minSumDiffToShow )
|
||||
|
|
@ -229,9 +223,26 @@ public class DiffEngine {
|
|||
break;
|
||||
}
|
||||
|
||||
table.set(diff.getPath(), "NumberOfOccurrences", diff.getCount());
|
||||
toShow.add(diff);
|
||||
}
|
||||
|
||||
// if we want it in descending order, reverse the list
|
||||
if ( ! params.descending ) {
|
||||
Collections.reverse(toShow);
|
||||
}
|
||||
|
||||
// now that we have a specific list of values we want to show, display them
|
||||
GATKReport report = new GATKReport();
|
||||
final String tableName = "diffences";
|
||||
report.addTable(tableName, "Summarized differences between the master and test files.\nSee http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information", false);
|
||||
GATKReportTable table = report.getTable(tableName);
|
||||
table.addPrimaryKey("Difference", true);
|
||||
table.addColumn("NumberOfOccurrences", 0);
|
||||
table.addColumn("ExampleDifference", 0);
|
||||
for ( Difference diff : toShow ) {
|
||||
table.set(diff.getPath(), "NumberOfOccurrences", diff.getCount());
|
||||
table.set(diff.getPath(), "ExampleDifference", diff.valueDiffString());
|
||||
}
|
||||
table.write(params.out);
|
||||
}
|
||||
|
||||
|
|
@ -250,7 +261,7 @@ public class DiffEngine {
|
|||
* commonPostfixLength: how many parts are shared at the end, suppose its 2
|
||||
* We want to create a string *.*.C.D
|
||||
*
|
||||
* @param parts
|
||||
* @param parts the separated path values [above without .]
|
||||
* @param commonPostfixLength
|
||||
* @return
|
||||
*/
|
||||
|
|
@ -336,7 +347,7 @@ public class DiffEngine {
|
|||
if ( diffEngine.canRead(masterFile) && diffEngine.canRead(testFile) ) {
|
||||
DiffElement master = diffEngine.createDiffableFromFile(masterFile);
|
||||
DiffElement test = diffEngine.createDiffableFromFile(testFile);
|
||||
List<SpecificDifference> diffs = diffEngine.diff(master, test);
|
||||
List<Difference> diffs = diffEngine.diff(master, test);
|
||||
diffEngine.reportSummarizedDifferences(diffs, params);
|
||||
return true;
|
||||
} else {
|
||||
|
|
@ -349,6 +360,7 @@ public class DiffEngine {
|
|||
int maxItemsToDisplay = 0;
|
||||
int maxCountOneItems = 0;
|
||||
int minSumDiffToShow = 0;
|
||||
boolean descending = true;
|
||||
|
||||
public SummaryReportParams(PrintStream out, int maxItemsToDisplay, int maxCountOneItems, int minSumDiffToShow) {
|
||||
this.out = out;
|
||||
|
|
@ -356,5 +368,9 @@ public class DiffEngine {
|
|||
this.maxCountOneItems = maxCountOneItems;
|
||||
this.minSumDiffToShow = minSumDiffToShow;
|
||||
}
|
||||
|
||||
public void setDescending(boolean descending) {
|
||||
this.descending = descending;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -104,14 +104,15 @@ public class DiffObjectsWalker extends RodWalker<Integer, Integer> {
|
|||
// out.printf("Test diff objects%n");
|
||||
// out.println(test.toString());
|
||||
|
||||
List<SpecificDifference> diffs = diffEngine.diff(master, test);
|
||||
List<Difference> diffs = diffEngine.diff(master, test);
|
||||
if ( showItemizedDifferences ) {
|
||||
out.printf("Itemized results%n");
|
||||
for ( SpecificDifference diff : diffs )
|
||||
for ( Difference diff : diffs )
|
||||
out.printf("DIFF: %s%n", diff.toString());
|
||||
}
|
||||
|
||||
DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out, MAX_DIFFS, MAX_COUNT1_DIFFS, minCountForDiff);
|
||||
params.setDescending(false);
|
||||
diffEngine.reportSummarizedDifferences(diffs, params);
|
||||
}
|
||||
}
|
||||
|
|
@ -27,13 +27,24 @@ package org.broadinstitute.sting.gatk.walkers.diffengine;
|
|||
public class Difference implements Comparable<Difference> {
|
||||
final String path; // X.Y.Z
|
||||
final String[] parts;
|
||||
int count = 0;
|
||||
int count = 1;
|
||||
DiffElement master = null , test = null;
|
||||
|
||||
public Difference(String path) {
|
||||
this.path = path;
|
||||
this.parts = DiffEngine.diffNameToPath(path);
|
||||
}
|
||||
|
||||
public Difference(DiffElement master, DiffElement test) {
|
||||
this(createPath(master, test), master, test);
|
||||
}
|
||||
|
||||
public Difference(String path, DiffElement master, DiffElement test) {
|
||||
this(path);
|
||||
this.master = master;
|
||||
this.test = test;
|
||||
}
|
||||
|
||||
public String[] getParts() {
|
||||
return parts;
|
||||
}
|
||||
|
|
@ -44,6 +55,10 @@ public class Difference implements Comparable<Difference> {
|
|||
return count;
|
||||
}
|
||||
|
||||
public void setCount(int count) {
|
||||
this.count = count;
|
||||
}
|
||||
|
||||
/**
|
||||
* The fully qualified path object A.B.C etc
|
||||
* @return
|
||||
|
|
@ -81,7 +96,7 @@ public class Difference implements Comparable<Difference> {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%s:%d", getPath(), getCount());
|
||||
return String.format("%s:%d:%s", getPath(), getCount(), valueDiffString());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -91,5 +106,31 @@ public class Difference implements Comparable<Difference> {
|
|||
return countCmp != 0 ? -1 * countCmp : path.compareTo(other.path);
|
||||
}
|
||||
|
||||
public String valueDiffString() {
|
||||
if ( hasSpecificDifference() ) {
|
||||
return String.format("%s!=%s", getOneLineString(master), getOneLineString(test));
|
||||
} else {
|
||||
return "N/A";
|
||||
}
|
||||
}
|
||||
|
||||
private static String createPath(DiffElement master, DiffElement test) {
|
||||
return (master == null ? test : master).fullyQualifiedName();
|
||||
}
|
||||
|
||||
private static String getOneLineString(DiffElement elt) {
|
||||
return elt == null ? "MISSING" : elt.getValue().toOneLineString();
|
||||
}
|
||||
|
||||
public boolean hasSpecificDifference() {
|
||||
return master != null || test != null;
|
||||
}
|
||||
|
||||
public DiffElement getMaster() {
|
||||
return master;
|
||||
}
|
||||
|
||||
public DiffElement getTest() {
|
||||
return test;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,59 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: 7/4/11
|
||||
* Time: 12:53 PM
|
||||
*
|
||||
* Represents a specific difference between two specific DiffElements
|
||||
*/
|
||||
public class SpecificDifference extends Difference {
|
||||
DiffElement master, test;
|
||||
|
||||
public SpecificDifference(DiffElement master, DiffElement test) {
|
||||
super(createName(master, test));
|
||||
if ( master == null && test == null ) throw new IllegalArgumentException("Master and test both cannot be null");
|
||||
this.master = master;
|
||||
this.test = test;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("%s:%s!=%s",
|
||||
getPath(),
|
||||
getOneLineString(master),
|
||||
getOneLineString(test));
|
||||
}
|
||||
|
||||
private static String createName(DiffElement master, DiffElement test) {
|
||||
return (master == null ? test : master).fullyQualifiedName();
|
||||
}
|
||||
|
||||
private static String getOneLineString(DiffElement elt) {
|
||||
return elt == null ? "MISSING" : elt.getValue().toOneLineString();
|
||||
}
|
||||
}
|
||||
|
|
@ -53,7 +53,13 @@ public class VCFDiffableReader implements DiffableReader {
|
|||
public DiffElement readFromFile(File file, int maxElementsToRead) {
|
||||
DiffNode root = DiffNode.rooted(file.getName());
|
||||
try {
|
||||
// read the version line from the file
|
||||
LineReader lineReader = new AsciiLineReader(new FileInputStream(file));
|
||||
final String version = lineReader.readLine();
|
||||
root.add("VERSION", version);
|
||||
lineReader.close();
|
||||
|
||||
lineReader = new AsciiLineReader(new FileInputStream(file));
|
||||
VCFCodec vcfCodec = new VCFCodec();
|
||||
|
||||
// must be read as state is stored in reader itself
|
||||
|
|
@ -66,13 +72,19 @@ public class VCFDiffableReader implements DiffableReader {
|
|||
}
|
||||
|
||||
String line = lineReader.readLine();
|
||||
int count = 0;
|
||||
int count = 0, nRecordsAtPos = 1;
|
||||
String prevName = "";
|
||||
while ( line != null ) {
|
||||
if ( count++ > maxElementsToRead && maxElementsToRead != -1)
|
||||
break;
|
||||
|
||||
VariantContext vc = (VariantContext)vcfCodec.decode(line);
|
||||
String name = vc.getChr() + ":" + vc.getStart();
|
||||
if ( name.equals(prevName) ) {
|
||||
name += "_" + ++nRecordsAtPos;
|
||||
} else {
|
||||
prevName = name;
|
||||
}
|
||||
DiffNode vcRoot = DiffNode.empty(name, root);
|
||||
|
||||
// add fields
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||
|
||||
import org.apache.poi.hpsf.Variant;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Hidden;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
|
|
@ -149,7 +150,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
|||
|
||||
// get all of the vcf rods at this locus
|
||||
// Need to provide reference bases to simpleMerge starting at current locus
|
||||
Collection<VariantContext> vcs = tracker.getAllVariantContexts(ref, null,context.getLocation(), true, false);
|
||||
Collection<VariantContext> vcs = tracker.getAllVariantContexts(ref, null, context.getLocation(), true, false);
|
||||
|
||||
if ( sitesOnlyVCF ) {
|
||||
vcs = VariantContextUtils.sitesOnlyVariantContexts(vcs);
|
||||
|
|
@ -172,17 +173,25 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
|||
if (minimumN > 1 && (vcs.size() - numFilteredRecords < minimumN))
|
||||
return 0;
|
||||
|
||||
VariantContext mergedVC;
|
||||
List<VariantContext> mergedVCs = new ArrayList<VariantContext>();
|
||||
if ( master ) {
|
||||
mergedVC = VariantContextUtils.masterMerge(vcs, "master");
|
||||
mergedVCs.add(VariantContextUtils.masterMerge(vcs, "master"));
|
||||
} else {
|
||||
mergedVC = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(),vcs, priority, filteredRecordsMergeType,
|
||||
genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC);
|
||||
Map<VariantContext.Type, List<VariantContext>> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs);
|
||||
// iterate over the types so that it's deterministic
|
||||
for ( VariantContext.Type type : VariantContext.Type.values() ) {
|
||||
if ( VCsByType.containsKey(type) )
|
||||
mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type),
|
||||
priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
|
||||
ref.getBase(), SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
|
||||
}
|
||||
}
|
||||
|
||||
//out.printf(" merged => %s%nannotated => %s%n", mergedVC, annotatedMergedVC);
|
||||
for ( VariantContext mergedVC : mergedVCs ) {
|
||||
// only operate at the start of events
|
||||
if ( mergedVC == null )
|
||||
continue;
|
||||
|
||||
if ( mergedVC != null ) { // only operate at the start of events
|
||||
HashMap<String, Object> attributes = new HashMap<String, Object>(mergedVC.getAttributes());
|
||||
// re-compute chromosome counts
|
||||
VariantContextUtils.calculateChromosomeCounts(mergedVC, attributes, false);
|
||||
|
|
|
|||
|
|
@ -289,8 +289,8 @@ public class VariantContextUtils {
|
|||
|
||||
/**
|
||||
* Returns a newly allocated VC that is the same as VC, but without genotypes
|
||||
* @param vc
|
||||
* @return
|
||||
* @param vc variant context
|
||||
* @return new VC without genotypes
|
||||
*/
|
||||
@Requires("vc != null")
|
||||
@Ensures("result != null")
|
||||
|
|
@ -303,8 +303,8 @@ public class VariantContextUtils {
|
|||
|
||||
/**
|
||||
* Returns a newly allocated list of VC, where each VC is the same as the input VCs, but without genotypes
|
||||
* @param vcs
|
||||
* @return
|
||||
* @param vcs collection of VCs
|
||||
* @return new VCs without genotypes
|
||||
*/
|
||||
@Requires("vcs != null")
|
||||
@Ensures("result != null")
|
||||
|
|
@ -362,9 +362,9 @@ public class VariantContextUtils {
|
|||
* information per genotype. The master merge will add the PQ information from each genotype record, where
|
||||
* appropriate, to the master VC.
|
||||
*
|
||||
* @param unsortedVCs
|
||||
* @param masterName
|
||||
* @return
|
||||
* @param unsortedVCs collection of VCs
|
||||
* @param masterName name of master VC
|
||||
* @return master-merged VC
|
||||
*/
|
||||
public static VariantContext masterMerge(Collection<VariantContext> unsortedVCs, String masterName) {
|
||||
VariantContext master = findMaster(unsortedVCs, masterName);
|
||||
|
|
@ -435,11 +435,15 @@ public class VariantContextUtils {
|
|||
* If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
|
||||
* the sample name
|
||||
*
|
||||
* @param unsortedVCs
|
||||
* @param priorityListOfVCs
|
||||
* @param filteredRecordMergeType
|
||||
* @param genotypeMergeOptions
|
||||
* @return
|
||||
* @param genomeLocParser loc parser
|
||||
* @param unsortedVCs collection of unsorted VCs
|
||||
* @param priorityListOfVCs priority list detailing the order in which we should grab the VCs
|
||||
* @param filteredRecordMergeType merge type for filtered records
|
||||
* @param genotypeMergeOptions merge option for genotypes
|
||||
* @param annotateOrigin should we annotate the set it came from?
|
||||
* @param printMessages should we print messages?
|
||||
* @param inputRefBase the ref base
|
||||
* @return new VariantContext
|
||||
*/
|
||||
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
|
||||
FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions,
|
||||
|
|
@ -448,6 +452,24 @@ public class VariantContextUtils {
|
|||
return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, filteredRecordMergeType, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority order, if provided.
|
||||
* If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
|
||||
* the sample name
|
||||
*
|
||||
* @param genomeLocParser loc parser
|
||||
* @param unsortedVCs collection of unsorted VCs
|
||||
* @param priorityListOfVCs priority list detailing the order in which we should grab the VCs
|
||||
* @param filteredRecordMergeType merge type for filtered records
|
||||
* @param genotypeMergeOptions merge option for genotypes
|
||||
* @param annotateOrigin should we annotate the set it came from?
|
||||
* @param printMessages should we print messages?
|
||||
* @param inputRefBase the ref base
|
||||
* @param setKey the key name of the set
|
||||
* @param filteredAreUncalled are filtered records uncalled?
|
||||
* @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count?
|
||||
* @return new VariantContext
|
||||
*/
|
||||
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
|
||||
FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions,
|
||||
boolean annotateOrigin, boolean printMessages, byte inputRefBase, String setKey,
|
||||
|
|
@ -470,7 +492,7 @@ public class VariantContextUtils {
|
|||
if ( ! filteredAreUncalled || vc.isNotFiltered() )
|
||||
VCs.add(VariantContext.createVariantContextWithPaddedAlleles(vc,inputRefBase,false));
|
||||
}
|
||||
if ( VCs.size() == 0 ) // everything is filtered out and we're filteredareUncalled
|
||||
if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled
|
||||
return null;
|
||||
|
||||
// establish the baseline info from the first VC
|
||||
|
|
@ -615,6 +637,17 @@ public class VariantContextUtils {
|
|||
return merged;
|
||||
}
|
||||
|
||||
public static Map<VariantContext.Type, List<VariantContext>> separateVariantContextsByType(Collection<VariantContext> VCs) {
|
||||
HashMap<VariantContext.Type, List<VariantContext>> mappedVCs = new HashMap<VariantContext.Type, List<VariantContext>>();
|
||||
for ( VariantContext vc : VCs ) {
|
||||
if ( !mappedVCs.containsKey(vc.getType()) )
|
||||
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
|
||||
mappedVCs.get(vc.getType()).add(vc);
|
||||
}
|
||||
|
||||
return mappedVCs;
|
||||
}
|
||||
|
||||
private static class AlleleMapper {
|
||||
private VariantContext vc = null;
|
||||
private Map<Allele, Allele> map = null;
|
||||
|
|
@ -834,6 +867,7 @@ public class VariantContextUtils {
|
|||
|
||||
/**
|
||||
* create a genome location, given a variant context
|
||||
* @param genomeLocParser parser
|
||||
* @param vc the variant context
|
||||
* @return the genomeLoc
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -13,10 +13,7 @@ import java.io.*;
|
|||
import java.math.BigInteger;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
*
|
||||
|
|
@ -83,11 +80,6 @@ public abstract class BaseTest {
|
|||
public static final String networkTempDir = "/broad/shptmp/";
|
||||
public static final File networkTempDirFile = new File(networkTempDir);
|
||||
|
||||
/**
|
||||
* Subdirectory under the ant build directory where we store integration test md5 results
|
||||
*/
|
||||
public static final String MD5_FILE_DB_SUBDIR = "integrationtests";
|
||||
|
||||
public static final String testDir = "public/testdata/";
|
||||
|
||||
/** before the class starts up */
|
||||
|
|
@ -129,7 +121,7 @@ public abstract class BaseTest {
|
|||
* 2: Create instances of your subclass. Return from it the call to getTests, providing
|
||||
* the class type of your test
|
||||
*
|
||||
* @DataProvider(name = "summaries")
|
||||
* @DataProvider(name = "summaries"
|
||||
* public Object[][] createSummaries() {
|
||||
* new SummarizeDifferenceTest().addDiff("A", "A").addSummary("A:2");
|
||||
* new SummarizeDifferenceTest().addDiff("A", "B").addSummary("A:1", "B:1");
|
||||
|
|
@ -206,200 +198,6 @@ public abstract class BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* a little utility function for all tests to md5sum a file
|
||||
* Shameless taken from:
|
||||
*
|
||||
* http://www.javalobby.org/java/forums/t84420.html
|
||||
*
|
||||
* @param file the file
|
||||
* @return a string
|
||||
*/
|
||||
public static String md5SumFile(File file) {
|
||||
MessageDigest digest;
|
||||
try {
|
||||
digest = MessageDigest.getInstance("MD5");
|
||||
} catch (NoSuchAlgorithmException e) {
|
||||
throw new ReviewedStingException("Unable to find MD5 digest");
|
||||
}
|
||||
InputStream is;
|
||||
try {
|
||||
is = new FileInputStream(file);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new ReviewedStingException("Unable to open file " + file);
|
||||
}
|
||||
byte[] buffer = new byte[8192];
|
||||
int read;
|
||||
try {
|
||||
while ((read = is.read(buffer)) > 0) {
|
||||
digest.update(buffer, 0, read);
|
||||
}
|
||||
byte[] md5sum = digest.digest();
|
||||
BigInteger bigInt = new BigInteger(1, md5sum);
|
||||
return bigInt.toString(16);
|
||||
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new ReviewedStingException("Unable to process file for MD5", e);
|
||||
}
|
||||
finally {
|
||||
try {
|
||||
is.close();
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new ReviewedStingException("Unable to close input stream for MD5 calculation", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected static void ensureMd5DbDirectory() {
|
||||
// todo -- make path
|
||||
File dir = new File(MD5_FILE_DB_SUBDIR);
|
||||
if ( ! dir.exists() ) {
|
||||
System.out.printf("##### Creating MD5 db %s%n", MD5_FILE_DB_SUBDIR);
|
||||
if ( ! dir.mkdir() ) {
|
||||
throw new ReviewedStingException("Infrastructure failure: failed to create md5 directory " + MD5_FILE_DB_SUBDIR);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected static File getFileForMD5(final String md5) {
|
||||
final String basename = String.format("%s.integrationtest", md5);
|
||||
return new File(MD5_FILE_DB_SUBDIR + "/" + basename);
|
||||
}
|
||||
|
||||
private static void updateMD5Db(final String md5, final File resultsFile) {
|
||||
// todo -- copy results file to DB dir if needed under filename for md5
|
||||
final File dbFile = getFileForMD5(md5);
|
||||
if ( ! dbFile.exists() ) {
|
||||
// the file isn't already in the db, copy it over
|
||||
System.out.printf("##### Updating MD5 file: %s%n", dbFile.getPath());
|
||||
try {
|
||||
FileUtils.copyFile(resultsFile, dbFile);
|
||||
} catch ( IOException e ) {
|
||||
throw new ReviewedStingException(e.getMessage());
|
||||
}
|
||||
} else {
|
||||
System.out.printf("##### MD5 file is up to date: %s%n", dbFile.getPath());
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
private static String getMD5Path(final String md5, final String valueIfNotFound) {
|
||||
// todo -- look up the result in the directory and return the path if it exists
|
||||
final File dbFile = getFileForMD5(md5);
|
||||
return dbFile.exists() ? dbFile.getPath() : valueIfNotFound;
|
||||
}
|
||||
|
||||
public static byte[] getBytesFromFile(File file) throws IOException {
|
||||
InputStream is = new FileInputStream(file);
|
||||
|
||||
// Get the size of the file
|
||||
long length = file.length();
|
||||
|
||||
if (length > Integer.MAX_VALUE) {
|
||||
// File is too large
|
||||
}
|
||||
|
||||
// Create the byte array to hold the data
|
||||
byte[] bytes = new byte[(int) length];
|
||||
|
||||
// Read in the bytes
|
||||
int offset = 0;
|
||||
int numRead = 0;
|
||||
while (offset < bytes.length
|
||||
&& (numRead = is.read(bytes, offset, bytes.length - offset)) >= 0) {
|
||||
offset += numRead;
|
||||
}
|
||||
|
||||
// Ensure all the bytes have been read in
|
||||
if (offset < bytes.length) {
|
||||
throw new IOException("Could not completely read file " + file.getName());
|
||||
}
|
||||
|
||||
// Close the input stream and return bytes
|
||||
is.close();
|
||||
return bytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests a file MD5 against an expected value, returning the MD5. NOTE: This function WILL throw an exception if the MD5s are different.
|
||||
* @param name Name of the test.
|
||||
* @param resultsFile File to MD5.
|
||||
* @param expectedMD5 Expected MD5 value.
|
||||
* @param parameterize If true or if expectedMD5 is an empty string, will print out the calculated MD5 instead of error text.
|
||||
* @return The calculated MD5.
|
||||
*/
|
||||
public static String assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) {
|
||||
String filemd5sum = testFileMD5(name, resultsFile, expectedMD5, parameterize);
|
||||
|
||||
if (parameterize || expectedMD5.equals("")) {
|
||||
// Don't assert
|
||||
} else if ( filemd5sum.equals(expectedMD5) ) {
|
||||
System.out.println(String.format(" => %s PASSED", name));
|
||||
} else {
|
||||
Assert.fail(String.format("%s has mismatching MD5s: expected=%s observed=%s", name, expectedMD5, filemd5sum));
|
||||
}
|
||||
|
||||
|
||||
|
||||
return filemd5sum;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Tests a file MD5 against an expected value, returning the MD5. NOTE: This function WILL NOT throw an exception if the MD5s are different.
|
||||
* @param name Name of the test.
|
||||
* @param resultsFile File to MD5.
|
||||
* @param expectedMD5 Expected MD5 value.
|
||||
* @param parameterize If true or if expectedMD5 is an empty string, will print out the calculated MD5 instead of error text.
|
||||
* @return The calculated MD5.
|
||||
*/
|
||||
public static String testFileMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) {
|
||||
try {
|
||||
byte[] bytesOfMessage = getBytesFromFile(resultsFile);
|
||||
byte[] thedigest = MessageDigest.getInstance("MD5").digest(bytesOfMessage);
|
||||
BigInteger bigInt = new BigInteger(1, thedigest);
|
||||
String filemd5sum = bigInt.toString(16);
|
||||
while (filemd5sum.length() < 32) filemd5sum = "0" + filemd5sum; // pad to length 32
|
||||
|
||||
//
|
||||
// copy md5 to integrationtests
|
||||
//
|
||||
updateMD5Db(filemd5sum, resultsFile);
|
||||
|
||||
if (parameterize || expectedMD5.equals("")) {
|
||||
System.out.println(String.format("PARAMETERIZATION[%s]: file %s has md5 = %s, stated expectation is %s, equal? = %b",
|
||||
name, resultsFile, filemd5sum, expectedMD5, filemd5sum.equals(expectedMD5)));
|
||||
} else {
|
||||
System.out.println(String.format("Checking MD5 for %s [calculated=%s, expected=%s]", resultsFile, filemd5sum, expectedMD5));
|
||||
System.out.flush();
|
||||
|
||||
if ( ! expectedMD5.equals(filemd5sum) ) {
|
||||
// we are going to fail for real in assertEquals (so we are counted by the testing framework).
|
||||
// prepare ourselves for the comparison
|
||||
System.out.printf("##### Test %s is going fail #####%n", name);
|
||||
String pathToExpectedMD5File = getMD5Path(expectedMD5, "[No DB file found]");
|
||||
String pathToFileMD5File = getMD5Path(filemd5sum, "[No DB file found]");
|
||||
System.out.printf("##### Path to expected file (MD5=%s): %s%n", expectedMD5, pathToExpectedMD5File);
|
||||
System.out.printf("##### Path to calculated file (MD5=%s): %s%n", filemd5sum, pathToFileMD5File);
|
||||
System.out.printf("##### Diff command: diff %s %s%n", pathToExpectedMD5File, pathToFileMD5File);
|
||||
|
||||
// inline differences
|
||||
DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(System.out, 20, 10, 0);
|
||||
boolean success = DiffEngine.simpleDiffFiles(new File(pathToExpectedMD5File), new File(pathToFileMD5File), params);
|
||||
if ( success )
|
||||
System.out.printf("Note that the above list is not comprehensive. At most 20 lines of output, and 10 specific differences will be listed. Please use -T DiffObjects -R public/testdata/exampleFASTA.fasta -m %s -t %s to explore the differences more freely%n",
|
||||
pathToExpectedMD5File, pathToFileMD5File);
|
||||
}
|
||||
}
|
||||
|
||||
return filemd5sum;
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException("Failed to read bytes from calls file: " + resultsFile, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a temp file that will be deleted on exit after tests are complete.
|
||||
* @param name Prefix of the file.
|
||||
|
|
|
|||
|
|
@ -0,0 +1,247 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.broadinstitute.sting.gatk.walkers.diffengine.DiffEngine;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.testng.Assert;
|
||||
|
||||
import java.io.*;
|
||||
import java.math.BigInteger;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: 7/18/11
|
||||
* Time: 9:10 AM
|
||||
*
|
||||
* Utilities for manipulating the MD5 database of previous results
|
||||
*/
|
||||
public class MD5DB {
|
||||
/**
|
||||
* Subdirectory under the ant build directory where we store integration test md5 results
|
||||
*/
|
||||
public static final String LOCAL_MD5_DB_DIR = "integrationtests";
|
||||
public static final String GLOBAL_MD5_DB_DIR = "/humgen/gsa-hpprojects/GATK/data/integrationtests";
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// MD5 DB stuff
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Create the MD5 file directories if necessary
|
||||
*/
|
||||
protected static void ensureMd5DbDirectory() {
|
||||
File dir = new File(LOCAL_MD5_DB_DIR);
|
||||
if ( ! dir.exists() ) {
|
||||
System.out.printf("##### Creating MD5 db %s%n", LOCAL_MD5_DB_DIR);
|
||||
if ( ! dir.mkdir() ) {
|
||||
throw new ReviewedStingException("Infrastructure failure: failed to create md5 directory " + LOCAL_MD5_DB_DIR);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the path to an already existing file with the md5 contents, or valueIfNotFound
|
||||
* if no such file exists in the db.
|
||||
*
|
||||
* @param md5
|
||||
* @param valueIfNotFound
|
||||
* @return
|
||||
*/
|
||||
public static String getMD5FilePath(final String md5, final String valueIfNotFound) {
|
||||
// we prefer the local db to the global DB, so match it first
|
||||
for ( String dir : Arrays.asList(LOCAL_MD5_DB_DIR, GLOBAL_MD5_DB_DIR)) {
|
||||
File f = getFileForMD5(md5, dir);
|
||||
if ( f.exists() && f.canRead() )
|
||||
return f.getPath();
|
||||
}
|
||||
|
||||
return valueIfNotFound;
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility function that given a file's md5 value and the path to the md5 db,
|
||||
* returns the canonical name of the file. For example, if md5 is XXX and db is YYY,
|
||||
* this will return YYY/XXX.integrationtest
|
||||
*
|
||||
* @param md5
|
||||
* @param dbPath
|
||||
* @return
|
||||
*/
|
||||
private static File getFileForMD5(final String md5, final String dbPath) {
|
||||
final String basename = String.format("%s.integrationtest", md5);
|
||||
return new File(dbPath + "/" + basename);
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies the results file with md5 value to its canonical file name and db places
|
||||
*
|
||||
* @param md5
|
||||
* @param resultsFile
|
||||
*/
|
||||
private static void updateMD5Db(final String md5, final File resultsFile) {
|
||||
copyFileToDB(getFileForMD5(md5, LOCAL_MD5_DB_DIR), resultsFile);
|
||||
copyFileToDB(getFileForMD5(md5, GLOBAL_MD5_DB_DIR), resultsFile);
|
||||
}
|
||||
|
||||
/**
|
||||
* Low-level utility routine that copies resultsFile to dbFile
|
||||
* @param dbFile
|
||||
* @param resultsFile
|
||||
*/
|
||||
private static void copyFileToDB(File dbFile, final File resultsFile) {
|
||||
if ( ! dbFile.exists() ) {
|
||||
// the file isn't already in the db, copy it over
|
||||
System.out.printf("##### Updating MD5 file: %s%n", dbFile.getPath());
|
||||
try {
|
||||
FileUtils.copyFile(resultsFile, dbFile);
|
||||
} catch ( IOException e ) {
|
||||
System.out.printf("##### Skipping update, cannot write file %s%n", dbFile);
|
||||
}
|
||||
} else {
|
||||
System.out.printf("##### MD5 file is up to date: %s%n", dbFile.getPath());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the byte[] of the entire contents of file, for md5 calculations
|
||||
* @param file
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
private static byte[] getBytesFromFile(File file) throws IOException {
|
||||
InputStream is = new FileInputStream(file);
|
||||
|
||||
// Get the size of the file
|
||||
long length = file.length();
|
||||
|
||||
if (length > Integer.MAX_VALUE) {
|
||||
// File is too large
|
||||
}
|
||||
|
||||
// Create the byte array to hold the data
|
||||
byte[] bytes = new byte[(int) length];
|
||||
|
||||
// Read in the bytes
|
||||
int offset = 0;
|
||||
int numRead = 0;
|
||||
while (offset < bytes.length
|
||||
&& (numRead = is.read(bytes, offset, bytes.length - offset)) >= 0) {
|
||||
offset += numRead;
|
||||
}
|
||||
|
||||
// Ensure all the bytes have been read in
|
||||
if (offset < bytes.length) {
|
||||
throw new IOException("Could not completely read file " + file.getName());
|
||||
}
|
||||
|
||||
// Close the input stream and return bytes
|
||||
is.close();
|
||||
return bytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests a file MD5 against an expected value, returning the MD5. NOTE: This function WILL throw an exception if the MD5s are different.
|
||||
* @param name Name of the test.
|
||||
* @param resultsFile File to MD5.
|
||||
* @param expectedMD5 Expected MD5 value.
|
||||
* @param parameterize If true or if expectedMD5 is an empty string, will print out the calculated MD5 instead of error text.
|
||||
* @return The calculated MD5.
|
||||
*/
|
||||
public static String assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) {
|
||||
String filemd5sum = testFileMD5(name, resultsFile, expectedMD5, parameterize);
|
||||
|
||||
if (parameterize || expectedMD5.equals("")) {
|
||||
// Don't assert
|
||||
} else if ( filemd5sum.equals(expectedMD5) ) {
|
||||
System.out.println(String.format(" => %s PASSED", name));
|
||||
} else {
|
||||
Assert.fail(String.format("%s has mismatching MD5s: expected=%s observed=%s", name, expectedMD5, filemd5sum));
|
||||
}
|
||||
|
||||
return filemd5sum;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Tests a file MD5 against an expected value, returning the MD5. NOTE: This function WILL NOT throw an exception if the MD5s are different.
|
||||
* @param name Name of the test.
|
||||
* @param resultsFile File to MD5.
|
||||
* @param expectedMD5 Expected MD5 value.
|
||||
* @param parameterize If true or if expectedMD5 is an empty string, will print out the calculated MD5 instead of error text.
|
||||
* @return The calculated MD5.
|
||||
*/
|
||||
public static String testFileMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) {
|
||||
try {
|
||||
byte[] bytesOfMessage = getBytesFromFile(resultsFile);
|
||||
byte[] thedigest = MessageDigest.getInstance("MD5").digest(bytesOfMessage);
|
||||
BigInteger bigInt = new BigInteger(1, thedigest);
|
||||
String filemd5sum = bigInt.toString(16);
|
||||
while (filemd5sum.length() < 32) filemd5sum = "0" + filemd5sum; // pad to length 32
|
||||
|
||||
//
|
||||
// copy md5 to integrationtests
|
||||
//
|
||||
updateMD5Db(filemd5sum, resultsFile);
|
||||
|
||||
if (parameterize || expectedMD5.equals("")) {
|
||||
System.out.println(String.format("PARAMETERIZATION[%s]: file %s has md5 = %s, stated expectation is %s, equal? = %b",
|
||||
name, resultsFile, filemd5sum, expectedMD5, filemd5sum.equals(expectedMD5)));
|
||||
} else {
|
||||
System.out.println(String.format("Checking MD5 for %s [calculated=%s, expected=%s]", resultsFile, filemd5sum, expectedMD5));
|
||||
System.out.flush();
|
||||
|
||||
if ( ! expectedMD5.equals(filemd5sum) ) {
|
||||
// we are going to fail for real in assertEquals (so we are counted by the testing framework).
|
||||
// prepare ourselves for the comparison
|
||||
System.out.printf("##### Test %s is going fail #####%n", name);
|
||||
String pathToExpectedMD5File = getMD5FilePath(expectedMD5, "[No DB file found]");
|
||||
String pathToFileMD5File = getMD5FilePath(filemd5sum, "[No DB file found]");
|
||||
System.out.printf("##### Path to expected file (MD5=%s): %s%n", expectedMD5, pathToExpectedMD5File);
|
||||
System.out.printf("##### Path to calculated file (MD5=%s): %s%n", filemd5sum, pathToFileMD5File);
|
||||
System.out.printf("##### Diff command: diff %s %s%n", pathToExpectedMD5File, pathToFileMD5File);
|
||||
|
||||
// inline differences
|
||||
DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(System.out, 20, 10, 0);
|
||||
boolean success = DiffEngine.simpleDiffFiles(new File(pathToExpectedMD5File), new File(pathToFileMD5File), params);
|
||||
if ( success )
|
||||
System.out.printf("Note that the above list is not comprehensive. At most 20 lines of output, and 10 specific differences will be listed. Please use -T DiffObjects -R public/testdata/exampleFASTA.fasta -m %s -t %s to explore the differences more freely%n",
|
||||
pathToExpectedMD5File, pathToFileMD5File);
|
||||
}
|
||||
}
|
||||
|
||||
return filemd5sum;
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException("Failed to read bytes from calls file: " + resultsFile, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -53,7 +53,7 @@ public class WalkerTest extends BaseTest {
|
|||
}
|
||||
|
||||
public String assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5) {
|
||||
return assertMatchingMD5(name, resultsFile, expectedMD5, parameterize());
|
||||
return MD5DB.assertMatchingMD5(name, resultsFile, expectedMD5, parameterize());
|
||||
}
|
||||
|
||||
public void maybeValidateSupplementaryFile(final String name, final File resultFile) {
|
||||
|
|
@ -191,7 +191,7 @@ public class WalkerTest extends BaseTest {
|
|||
}
|
||||
|
||||
protected Pair<List<File>, List<String>> executeTest(final String name, WalkerTestSpec spec) {
|
||||
ensureMd5DbDirectory(); // ensure the md5 directory exists
|
||||
MD5DB.ensureMd5DbDirectory(); // ensure the md5 directory exists
|
||||
|
||||
List<File> tmpFiles = new ArrayList<File>();
|
||||
for (int i = 0; i < spec.nOutputFiles; i++) {
|
||||
|
|
|
|||
|
|
@ -99,7 +99,7 @@ public class DiffEngineUnitTest extends BaseTest {
|
|||
logger.warn("Test tree1: " + test.tree1.toOneLineString());
|
||||
logger.warn("Test tree2: " + test.tree2.toOneLineString());
|
||||
|
||||
List<SpecificDifference> diffs = engine.diff(test.tree1, test.tree2);
|
||||
List<Difference> diffs = engine.diff(test.tree1, test.tree2);
|
||||
logger.warn("Test expected diff : " + test.differences);
|
||||
logger.warn("Observed diffs : " + diffs);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,71 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
public class DiffObjectsIntegrationTest extends WalkerTest {
|
||||
private class TestParams extends TestDataProvider {
|
||||
public File master, test;
|
||||
public String MD5;
|
||||
|
||||
private TestParams(String master, String test, String MD5) {
|
||||
super(TestParams.class);
|
||||
this.master = new File(master);
|
||||
this.test = new File(test);
|
||||
this.MD5 = MD5;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("master=%s,test=%s,md5=%s", master, test, MD5);
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "data")
|
||||
public Object[][] createData() {
|
||||
new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "4d9f4636de05b93c354d05011264546e");
|
||||
new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "37e6efd833b5cd6d860a9df3df9713fc");
|
||||
return TestParams.getTests(TestParams.class);
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "data")
|
||||
public void testDiffs(TestParams params) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T DiffObjects -R public/testdata/exampleFASTA.fasta "
|
||||
+ " -m " + params.master
|
||||
+ " -t " + params.test
|
||||
+ " -o %s",
|
||||
Arrays.asList(params.MD5));
|
||||
executeTest("testDiffObjects:"+params, spec).getFirst();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -87,7 +87,7 @@ public class DiffableReaderUnitTest extends BaseTest {
|
|||
Assert.assertSame(diff.getParent(), DiffElement.ROOT);
|
||||
|
||||
DiffNode node = diff.getValueAsNode();
|
||||
Assert.assertEquals(node.getElements().size(), 10);
|
||||
Assert.assertEquals(node.getElements().size(), 11);
|
||||
|
||||
// chr1 2646 rs62635284 G A 0.15 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:53,75:3:-12.40,-0.90,-0.00:9.03
|
||||
DiffNode rec1 = node.getElement("chr1:2646").getValueAsNode();
|
||||
|
|
|
|||
|
|
@ -75,10 +75,10 @@ public class DifferenceUnitTest extends BaseTest {
|
|||
|
||||
@DataProvider(name = "data")
|
||||
public Object[][] createTrees() {
|
||||
new DifferenceTest("A=X", "A=Y", "A:X!=Y");
|
||||
new DifferenceTest("A=Y", "A=X", "A:Y!=X");
|
||||
new DifferenceTest(DiffNode.fromString("A=X"), null, "A:X!=MISSING");
|
||||
new DifferenceTest(null, DiffNode.fromString("A=X"), "A:MISSING!=X");
|
||||
new DifferenceTest("A=X", "A=Y", "A:1:X!=Y");
|
||||
new DifferenceTest("A=Y", "A=X", "A:1:Y!=X");
|
||||
new DifferenceTest(DiffNode.fromString("A=X"), null, "A:1:X!=MISSING");
|
||||
new DifferenceTest(null, DiffNode.fromString("A=X"), "A:1:MISSING!=X");
|
||||
return DifferenceTest.getTests(DifferenceTest.class);
|
||||
}
|
||||
|
||||
|
|
@ -87,7 +87,7 @@ public class DifferenceUnitTest extends BaseTest {
|
|||
logger.warn("Test tree1: " + (test.tree1 == null ? "null" : test.tree1.toOneLineString()));
|
||||
logger.warn("Test tree2: " + (test.tree2 == null ? "null" : test.tree2.toOneLineString()));
|
||||
logger.warn("Test expected diff : " + test.difference);
|
||||
SpecificDifference diff = new SpecificDifference(test.tree1, test.tree2);
|
||||
Difference diff = new Difference(test.tree1, test.tree2);
|
||||
logger.warn("Observed diffs : " + diff);
|
||||
Assert.assertEquals(diff.toString(), test.difference, "Observed diff string " + diff + " not equal to expected difference string " + test.difference );
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
|
||||
|
||||
import org.broadinstitute.sting.MD5DB;
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.Test;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
|
@ -65,8 +66,8 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
" -NO_HEADER" +
|
||||
" -B:input,VCF " + params.inVCF +
|
||||
" -o %s" +
|
||||
" -tranchesFile " + getFileForMD5(params.tranchesMD5) +
|
||||
" -recalFile " + getFileForMD5(params.recalMD5),
|
||||
" -tranchesFile " + MD5DB.getMD5FilePath(params.tranchesMD5, null) +
|
||||
" -recalFile " + MD5DB.getMD5FilePath(params.recalMD5, null),
|
||||
Arrays.asList(params.cutVCFMD5));
|
||||
executeTest("testApplyRecalibration-"+params.inVCF, spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -70,7 +70,6 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
executeTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
|
||||
}
|
||||
|
||||
|
||||
@Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "c608b9fc1e36dba6cebb4f259883f9f0", true); }
|
||||
@Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "20caad94411d6ab48153b214de916df8", " -setKey foo", true); }
|
||||
@Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "004f3065cb1bc2ce2f9afd695caf0b48", " -setKey null", true); }
|
||||
|
|
@ -81,9 +80,9 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f", false); } // official project VCF files in tabix format
|
||||
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9", false); } // official project VCF files in tabix format
|
||||
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "5b82f37df1f5ba40f0474d71c94142ec", false); }
|
||||
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "f1cf095c2fe9641b7ca1f8ee2c46fd4a", false); }
|
||||
|
||||
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "c58dca482bf97069eac6d9f1a07a2cba", false); }
|
||||
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e144b6283765494bfe8189ac59965083", false); }
|
||||
|
||||
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "89f55abea8f59e39d1effb908440548c", true); }
|
||||
|
||||
|
|
@ -101,7 +100,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
|
||||
" -genotypeMergeOptions UNIQUIFY -L 1"),
|
||||
1,
|
||||
Arrays.asList("8b78339ccf7a5a5a837f79e88a3a38e5"));
|
||||
Arrays.asList("1de95f91ca15d2a8856de35dee0ce33e"));
|
||||
executeTest("threeWayWithRefs", spec);
|
||||
}
|
||||
|
||||
|
|
@ -119,7 +118,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
executeTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
|
||||
}
|
||||
|
||||
@Test public void complexTestFull() { combineComplexSites("", "64b991fd3850f83614518f7d71f0532f"); }
|
||||
@Test public void complexTestFull() { combineComplexSites("", "b5a53ee92bdaacd2bb3327e9004ae058"); }
|
||||
@Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "df96cb3beb2dbb5e02f80abec7d3571e"); }
|
||||
@Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "f72a178137e25dbe0b931934cdc0079d"); }
|
||||
@Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "f704caeaaaed6711943014b847fe381a"); }
|
||||
|
|
|
|||
|
|
@ -2,15 +2,16 @@ package org.broadinstitute.sting.utils.genotype.vcf;
|
|||
|
||||
import org.broad.tribble.readers.AsciiLineReader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.testng.Assert;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
import java.io.StringBufferInputStream;
|
||||
import java.io.*;
|
||||
import java.math.BigInteger;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -40,6 +41,52 @@ public class VCFHeaderUnitTest extends BaseTest {
|
|||
checkMD5ofHeaderFile(header, "ad8c4cf85e868b0261ab49ee2c613088");
|
||||
}
|
||||
|
||||
/**
|
||||
* a little utility function for all tests to md5sum a file
|
||||
* Shameless taken from:
|
||||
*
|
||||
* http://www.javalobby.org/java/forums/t84420.html
|
||||
*
|
||||
* @param file the file
|
||||
* @return a string
|
||||
*/
|
||||
private static String md5SumFile(File file) {
|
||||
MessageDigest digest;
|
||||
try {
|
||||
digest = MessageDigest.getInstance("MD5");
|
||||
} catch (NoSuchAlgorithmException e) {
|
||||
throw new ReviewedStingException("Unable to find MD5 digest");
|
||||
}
|
||||
InputStream is;
|
||||
try {
|
||||
is = new FileInputStream(file);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new ReviewedStingException("Unable to open file " + file);
|
||||
}
|
||||
byte[] buffer = new byte[8192];
|
||||
int read;
|
||||
try {
|
||||
while ((read = is.read(buffer)) > 0) {
|
||||
digest.update(buffer, 0, read);
|
||||
}
|
||||
byte[] md5sum = digest.digest();
|
||||
BigInteger bigInt = new BigInteger(1, md5sum);
|
||||
return bigInt.toString(16);
|
||||
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new ReviewedStingException("Unable to process file for MD5", e);
|
||||
}
|
||||
finally {
|
||||
try {
|
||||
is.close();
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new ReviewedStingException("Unable to close input stream for MD5 calculation", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void checkMD5ofHeaderFile(VCFHeader header, String md5sum) {
|
||||
File myTempFile = null;
|
||||
PrintWriter pw = null;
|
||||
|
|
|
|||
|
|
@ -7,6 +7,8 @@
|
|||
<class name="edu.mit.broad.picard.genotype.geli.GeliFileWriter" />
|
||||
<class name="edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods" />
|
||||
<class name="edu.mit.broad.picard.util.PasteParser" />
|
||||
<class name="edu.mit.broad.picard.util.PicardAggregationFsUtil" />
|
||||
<class name="edu.mit.broad.picard.util.PicardFileSystemUtil" />
|
||||
<class name="edu.mit.broad.picard.variation.KnownVariantCodecV2" />
|
||||
<class name="edu.mit.broad.picard.variation.KnownVariantCodec" />
|
||||
<class name="edu.mit.broad.picard.variation.KnownVariantFileHeader" />
|
||||
|
|
|
|||
|
|
@ -72,6 +72,9 @@ class DataProcessingPipeline extends QScript {
|
|||
@Input(doc="Number of threads BWA should use", fullName="bwa_threads", shortName="bt", required=false)
|
||||
var bwaThreads: Int = 1
|
||||
|
||||
@Input(doc="Dont perform validation on the BAM files", fullName="no_validation", shortName="nv", required=false)
|
||||
var noValidation: Boolean = false
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
* Global Variables
|
||||
|
|
@ -135,7 +138,7 @@ class DataProcessingPipeline extends QScript {
|
|||
}
|
||||
}
|
||||
|
||||
println("\n\n*** DEBUG ***\n")
|
||||
println("\n\n*** INPUT FILES ***\n")
|
||||
// Creating one file for each sample in the dataset
|
||||
val sampleBamFiles = scala.collection.mutable.Map.empty[String, File]
|
||||
for ((sample, flist) <- sampleTable) {
|
||||
|
|
@ -149,7 +152,7 @@ class DataProcessingPipeline extends QScript {
|
|||
sampleBamFiles(sample) = sampleFileName
|
||||
add(joinBams(flist, sampleFileName))
|
||||
}
|
||||
println("*** DEBUG ***\n\n")
|
||||
println("*** INPUT FILES ***\n\n")
|
||||
|
||||
return sampleBamFiles.toMap
|
||||
}
|
||||
|
|
@ -246,7 +249,12 @@ class DataProcessingPipeline extends QScript {
|
|||
val preValidateLog = swapExt(bam, ".bam", ".pre.validation")
|
||||
val postValidateLog = swapExt(bam, ".bam", ".post.validation")
|
||||
|
||||
add(validate(bam, preValidateLog))
|
||||
// Validation is an optional step for the BAM file generated after
|
||||
// alignment and the final bam file of the pipeline.
|
||||
if (!noValidation) {
|
||||
add(validate(bam, preValidateLog),
|
||||
validate(recalBam, postValidateLog))
|
||||
}
|
||||
|
||||
if (cleaningModel != ConsensusDeterminationModel.KNOWNS_ONLY)
|
||||
add(target(bam, targetIntervals))
|
||||
|
|
@ -257,8 +265,8 @@ class DataProcessingPipeline extends QScript {
|
|||
recal(dedupedBam, preRecalFile, recalBam),
|
||||
cov(recalBam, postRecalFile),
|
||||
analyzeCovariates(preRecalFile, preOutPath),
|
||||
analyzeCovariates(postRecalFile, postOutPath),
|
||||
validate(recalBam, postValidateLog))
|
||||
analyzeCovariates(postRecalFile, postOutPath))
|
||||
|
||||
|
||||
cohortList :+= recalBam
|
||||
}
|
||||
|
|
@ -282,6 +290,13 @@ class DataProcessingPipeline extends QScript {
|
|||
this.isIntermediate = true
|
||||
}
|
||||
|
||||
// General arguments to non-GATK tools
|
||||
trait ExternalCommonArgs extends CommandLineFunction {
|
||||
this.memoryLimit = 4
|
||||
this.isIntermediate = true
|
||||
}
|
||||
|
||||
|
||||
case class target (inBams: File, outIntervals: File) extends RealignerTargetCreator with CommandLineGATKArgs {
|
||||
if (cleaningModel != ConsensusDeterminationModel.KNOWNS_ONLY)
|
||||
this.input_file :+= inBams
|
||||
|
|
@ -300,8 +315,8 @@ class DataProcessingPipeline extends QScript {
|
|||
this.targetIntervals = tIntervals
|
||||
this.out = outBam
|
||||
this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
|
||||
if (!indels.isEmpty)
|
||||
this.rodBind :+= RodBind("indels", "VCF", indels)
|
||||
if (!qscript.indels.isEmpty)
|
||||
this.rodBind :+= RodBind("indels", "VCF", qscript.indels)
|
||||
this.consensusDeterminationModel = consensusDeterminationModel
|
||||
this.compress = 0
|
||||
this.scatterCount = nContigs
|
||||
|
|
@ -332,7 +347,6 @@ class DataProcessingPipeline extends QScript {
|
|||
this.isIntermediate = false
|
||||
this.analysisName = queueLogDir + outBam + ".recalibration"
|
||||
this.jobName = queueLogDir + outBam + ".recalibration"
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -350,48 +364,41 @@ class DataProcessingPipeline extends QScript {
|
|||
this.jobName = queueLogDir + inRecalFile + ".analyze_covariates"
|
||||
}
|
||||
|
||||
case class dedup (inBam: File, outBam: File, metricsFile: File) extends MarkDuplicates {
|
||||
case class dedup (inBam: File, outBam: File, metricsFile: File) extends MarkDuplicates with ExternalCommonArgs {
|
||||
this.input = List(inBam)
|
||||
this.output = outBam
|
||||
this.metrics = metricsFile
|
||||
this.memoryLimit = 6
|
||||
this.isIntermediate = true
|
||||
this.analysisName = queueLogDir + outBam + ".dedup"
|
||||
this.jobName = queueLogDir + outBam + ".dedup"
|
||||
}
|
||||
|
||||
case class joinBams (inBams: List[File], outBam: File) extends MergeSamFiles {
|
||||
case class joinBams (inBams: List[File], outBam: File) extends MergeSamFiles with ExternalCommonArgs {
|
||||
this.input = inBams
|
||||
this.output = outBam
|
||||
this.memoryLimit = 4
|
||||
this.isIntermediate = true
|
||||
this.analysisName = queueLogDir + outBam + ".joinBams"
|
||||
this.jobName = queueLogDir + outBam + ".joinBams"
|
||||
}
|
||||
|
||||
case class sortSam (inSam: File, outBam: File, sortOrderP: SortOrder) extends SortSam {
|
||||
case class sortSam (inSam: File, outBam: File, sortOrderP: SortOrder) extends SortSam with ExternalCommonArgs {
|
||||
this.input = List(inSam)
|
||||
this.output = outBam
|
||||
this.sortOrder = sortOrderP
|
||||
this.memoryLimit = 4
|
||||
this.isIntermediate = true
|
||||
this.analysisName = queueLogDir + outBam + ".sortSam"
|
||||
this.jobName = queueLogDir + outBam + ".sortSam"
|
||||
}
|
||||
|
||||
case class validate (inBam: File, outLog: File) extends ValidateSamFile {
|
||||
case class validate (inBam: File, outLog: File) extends ValidateSamFile with ExternalCommonArgs {
|
||||
this.input = List(inBam)
|
||||
this.output = outLog
|
||||
this.maxRecordsInRam = 100000
|
||||
this.REFERENCE_SEQUENCE = qscript.reference
|
||||
this.memoryLimit = 4
|
||||
this.isIntermediate = false
|
||||
this.analysisName = queueLogDir + outLog + ".validate"
|
||||
this.jobName = queueLogDir + outLog + ".validate"
|
||||
}
|
||||
|
||||
|
||||
case class addReadGroup (inBam: File, outBam: File, readGroup: ReadGroup) extends AddOrReplaceReadGroups {
|
||||
case class addReadGroup (inBam: File, outBam: File, readGroup: ReadGroup) extends AddOrReplaceReadGroups with ExternalCommonArgs {
|
||||
this.input = List(inBam)
|
||||
this.output = outBam
|
||||
this.RGID = readGroup.id
|
||||
|
|
@ -407,12 +414,7 @@ class DataProcessingPipeline extends QScript {
|
|||
this.jobName = queueLogDir + outBam + ".rg"
|
||||
}
|
||||
|
||||
trait BWACommonArgs extends CommandLineFunction {
|
||||
this.memoryLimit = 4
|
||||
this.isIntermediate = true
|
||||
}
|
||||
|
||||
case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with BWACommonArgs {
|
||||
case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with ExternalCommonArgs {
|
||||
@Input(doc="bam file to be aligned") var bam = inBam
|
||||
@Output(doc="output sai file") var sai = outSai
|
||||
def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b " + bam + " > " + sai
|
||||
|
|
@ -420,7 +422,7 @@ class DataProcessingPipeline extends QScript {
|
|||
this.jobName = queueLogDir + outSai + ".bwa_aln_se"
|
||||
}
|
||||
|
||||
case class bwa_aln_pe (inBam: File, outSai1: File, index: Int) extends CommandLineFunction with BWACommonArgs {
|
||||
case class bwa_aln_pe (inBam: File, outSai1: File, index: Int) extends CommandLineFunction with ExternalCommonArgs {
|
||||
@Input(doc="bam file to be aligned") var bam = inBam
|
||||
@Output(doc="output sai file for 1st mating pair") var sai = outSai1
|
||||
def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b" + index + " " + bam + " > " + sai
|
||||
|
|
@ -428,7 +430,7 @@ class DataProcessingPipeline extends QScript {
|
|||
this.jobName = queueLogDir + outSai1 + ".bwa_aln_pe1"
|
||||
}
|
||||
|
||||
case class bwa_sam_se (inBam: File, inSai: File, outBam: File) extends CommandLineFunction with BWACommonArgs {
|
||||
case class bwa_sam_se (inBam: File, inSai: File, outBam: File) extends CommandLineFunction with ExternalCommonArgs {
|
||||
@Input(doc="bam file to be aligned") var bam = inBam
|
||||
@Input(doc="bwa alignment index file") var sai = inSai
|
||||
@Output(doc="output aligned bam file") var alignedBam = outBam
|
||||
|
|
@ -437,7 +439,7 @@ class DataProcessingPipeline extends QScript {
|
|||
this.jobName = queueLogDir + outBam + ".bwa_sam_se"
|
||||
}
|
||||
|
||||
case class bwa_sam_pe (inBam: File, inSai1: File, inSai2:File, outBam: File) extends CommandLineFunction with BWACommonArgs {
|
||||
case class bwa_sam_pe (inBam: File, inSai1: File, inSai2:File, outBam: File) extends CommandLineFunction with ExternalCommonArgs {
|
||||
@Input(doc="bam file to be aligned") var bam = inBam
|
||||
@Input(doc="bwa alignment index file for 1st mating pair") var sai1 = inSai1
|
||||
@Input(doc="bwa alignment index file for 2nd mating pair") var sai2 = inSai2
|
||||
|
|
|
|||
|
|
@ -20,14 +20,14 @@ class RecalibrateBaseQualities extends QScript {
|
|||
@Input(doc="input BAM file - or list of BAM files", shortName="i", required=true)
|
||||
var input: File = _
|
||||
|
||||
@Input(doc="path to R resources folder inside the Sting repository", fullName="path_to_r", shortName="r", required=false)
|
||||
var R: String = new File("/humgen/gsa-scr1/carneiro/stable/R")
|
||||
@Input(doc="path to R resources folder inside the Sting repository", fullName="path_to_r", shortName="r", required=true)
|
||||
var R: String = _
|
||||
|
||||
@Input(doc="Reference fasta file", shortName="R", required=false)
|
||||
var reference: File = new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta")
|
||||
@Input(doc="Reference fasta file", shortName="R", required=true)
|
||||
var reference: File = _ // new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta")
|
||||
|
||||
@Input(doc="dbsnp ROD to use (VCF)", shortName="D", required=false)
|
||||
var dbSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf")
|
||||
@Input(doc="dbsnp ROD to use (VCF)", shortName="D", required=true)
|
||||
var dbSNP: File = _ // new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf")
|
||||
|
||||
val queueLogDir: String = ".qlog/"
|
||||
var nContigs: Int = 0
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ import org.broadinstitute.sting.commandline.CommandLineProgram
|
|||
import java.util.Date
|
||||
import java.text.SimpleDateFormat
|
||||
import org.broadinstitute.sting.BaseTest
|
||||
import org.broadinstitute.sting.MD5DB
|
||||
import org.broadinstitute.sting.queue.QCommandLine
|
||||
import org.broadinstitute.sting.queue.util.{Logging, ProcessController}
|
||||
import java.io.{FileNotFoundException, File}
|
||||
|
|
@ -105,7 +106,7 @@ object PipelineTest extends BaseTest with Logging {
|
|||
private def assertMatchingMD5s(name: String, fileMD5s: Traversable[(File, String)], parameterize: Boolean) {
|
||||
var failed = 0
|
||||
for ((file, expectedMD5) <- fileMD5s) {
|
||||
val calculatedMD5 = BaseTest.testFileMD5(name, file, expectedMD5, parameterize)
|
||||
val calculatedMD5 = MD5DB.testFileMD5(name, file, expectedMD5, parameterize)
|
||||
if (!parameterize && expectedMD5 != "" && expectedMD5 != calculatedMD5)
|
||||
failed += 1
|
||||
}
|
||||
|
|
|
|||
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -1,3 +0,0 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="edu.mit.broad" module="picard-private-parts" revision="1941" status="integration" publication="20110614114100" />
|
||||
</ivy-module>
|
||||
Binary file not shown.
|
|
@ -1,3 +1,3 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="edu.mit.broad" module="picard-private-parts" revision="1954" status="integration" publication="20110712113100" />
|
||||
<info organisation="edu.mit.broad" module="picard-private-parts" revision="1959" status="integration" publication="20110718185300" />
|
||||
</ivy-module>
|
||||
|
|
@ -1,3 +0,0 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="net.sf" module="picard" revision="1.48.889" status="release" />
|
||||
</ivy-module>
|
||||
Binary file not shown.
|
|
@ -0,0 +1,3 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="net.sf" module="picard" revision="1.49.895" status="release" />
|
||||
</ivy-module>
|
||||
|
|
@ -1,3 +0,0 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="net.sf" module="sam" revision="1.48.889" status="release" />
|
||||
</ivy-module>
|
||||
Binary file not shown.
|
|
@ -0,0 +1,3 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="net.sf" module="sam" revision="1.49.895" status="release" />
|
||||
</ivy-module>
|
||||
Loading…
Reference in New Issue