Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
9283ac906e
|
|
@ -981,6 +981,7 @@
|
||||||
<delete dir="out"/>
|
<delete dir="out"/>
|
||||||
<delete dir="${build.dir}"/>
|
<delete dir="${build.dir}"/>
|
||||||
<delete dir="${lib.dir}"/>
|
<delete dir="${lib.dir}"/>
|
||||||
|
<delete dir="dump"/>
|
||||||
<delete dir="staging"/>
|
<delete dir="staging"/>
|
||||||
<delete dir="${dist.dir}"/>
|
<delete dir="${dist.dir}"/>
|
||||||
<delete dir="pipelinetests"/>
|
<delete dir="pipelinetests"/>
|
||||||
|
|
|
||||||
|
|
@ -51,12 +51,11 @@ import java.util.zip.GZIPInputStream;
|
||||||
* Class implementing diffnode reader for VCF
|
* Class implementing diffnode reader for VCF
|
||||||
*/
|
*/
|
||||||
public class BAMDiffableReader implements DiffableReader {
|
public class BAMDiffableReader implements DiffableReader {
|
||||||
private final static int MAX_RECORDS_TO_READ = 1000;
|
|
||||||
@Override
|
@Override
|
||||||
public String getName() { return "BAM"; }
|
public String getName() { return "BAM"; }
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DiffElement readFromFile(File file) {
|
public DiffElement readFromFile(File file, int maxElementsToRead) {
|
||||||
final SAMFileReader reader = new SAMFileReader(file, null); // null because we don't want it to look for the index
|
final SAMFileReader reader = new SAMFileReader(file, null); // null because we don't want it to look for the index
|
||||||
reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
|
reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
|
||||||
|
|
||||||
|
|
@ -65,7 +64,7 @@ public class BAMDiffableReader implements DiffableReader {
|
||||||
|
|
||||||
int count = 0;
|
int count = 0;
|
||||||
while ( iterator.hasNext() ) {
|
while ( iterator.hasNext() ) {
|
||||||
if ( count++ > MAX_RECORDS_TO_READ )
|
if ( count++ > maxElementsToRead && maxElementsToRead != -1)
|
||||||
break;
|
break;
|
||||||
final SAMRecord record = iterator.next();
|
final SAMRecord record = iterator.next();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -115,4 +115,8 @@ public class DiffElement {
|
||||||
else
|
else
|
||||||
throw new ReviewedStingException("Illegal request conversion of a DiffValue into a DiffNode: " + this);
|
throw new ReviewedStingException("Illegal request conversion of a DiffValue into a DiffNode: " + this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int size() {
|
||||||
|
return 1 + getValue().size();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -24,11 +24,9 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||||
|
|
||||||
import com.google.java.contract.Requires;
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||||
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier;
|
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
@ -60,7 +58,7 @@ public class DiffEngine {
|
||||||
//
|
//
|
||||||
// --------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
public List<Difference> diff(DiffElement master, DiffElement test) {
|
public List<SpecificDifference> diff(DiffElement master, DiffElement test) {
|
||||||
DiffValue masterValue = master.getValue();
|
DiffValue masterValue = master.getValue();
|
||||||
DiffValue testValue = test.getValue();
|
DiffValue testValue = test.getValue();
|
||||||
|
|
||||||
|
|
@ -70,14 +68,14 @@ public class DiffEngine {
|
||||||
return diff(masterValue, testValue);
|
return diff(masterValue, testValue);
|
||||||
} else {
|
} else {
|
||||||
// structural difference in types. one is node, other is leaf
|
// structural difference in types. one is node, other is leaf
|
||||||
return Arrays.asList(new Difference(master, test));
|
return Arrays.asList(new SpecificDifference(master, test));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Difference> diff(DiffNode master, DiffNode test) {
|
public List<SpecificDifference> diff(DiffNode master, DiffNode test) {
|
||||||
Set<String> allNames = new HashSet<String>(master.getElementNames());
|
Set<String> allNames = new HashSet<String>(master.getElementNames());
|
||||||
allNames.addAll(test.getElementNames());
|
allNames.addAll(test.getElementNames());
|
||||||
List<Difference> diffs = new ArrayList<Difference>();
|
List<SpecificDifference> diffs = new ArrayList<SpecificDifference>();
|
||||||
|
|
||||||
for ( String name : allNames ) {
|
for ( String name : allNames ) {
|
||||||
DiffElement masterElt = master.getElement(name);
|
DiffElement masterElt = master.getElement(name);
|
||||||
|
|
@ -86,7 +84,7 @@ public class DiffEngine {
|
||||||
throw new ReviewedStingException("BUG: unexceptedly got two null elements for field: " + name);
|
throw new ReviewedStingException("BUG: unexceptedly got two null elements for field: " + name);
|
||||||
} else if ( masterElt == null || testElt == null ) { // if either is null, we are missing a value
|
} else if ( masterElt == null || testElt == null ) { // if either is null, we are missing a value
|
||||||
// todo -- should one of these be a special MISSING item?
|
// todo -- should one of these be a special MISSING item?
|
||||||
diffs.add(new Difference(masterElt, testElt));
|
diffs.add(new SpecificDifference(masterElt, testElt));
|
||||||
} else {
|
} else {
|
||||||
diffs.addAll(diff(masterElt, testElt));
|
diffs.addAll(diff(masterElt, testElt));
|
||||||
}
|
}
|
||||||
|
|
@ -95,11 +93,11 @@ public class DiffEngine {
|
||||||
return diffs;
|
return diffs;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Difference> diff(DiffValue master, DiffValue test) {
|
public List<SpecificDifference> diff(DiffValue master, DiffValue test) {
|
||||||
if ( master.getValue().equals(test.getValue()) ) {
|
if ( master.getValue().equals(test.getValue()) ) {
|
||||||
return Collections.emptyList();
|
return Collections.emptyList();
|
||||||
} else {
|
} else {
|
||||||
return Arrays.asList(new Difference(master.getBinding(), test.getBinding()));
|
return Arrays.asList(new SpecificDifference(master.getBinding(), test.getBinding()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -147,64 +145,68 @@ public class DiffEngine {
|
||||||
* @param params determines how we display the items
|
* @param params determines how we display the items
|
||||||
* @param diffs
|
* @param diffs
|
||||||
*/
|
*/
|
||||||
public void reportSummarizedDifferences(List<Difference> diffs, SummaryReportParams params ) {
|
public void reportSummarizedDifferences(List<SpecificDifference> diffs, SummaryReportParams params ) {
|
||||||
printSummaryReport(summarizeDifferences(diffs), params );
|
printSummaryReport(summarizeDifferences(diffs), params );
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<SummarizedDifference> summarizeDifferences(List<Difference> diffs) {
|
public List<Difference> summarizeDifferences(List<SpecificDifference> diffs) {
|
||||||
List<String[]> diffPaths = new ArrayList<String[]>(diffs.size());
|
return summarizedDifferencesOfPaths(diffs);
|
||||||
|
|
||||||
for ( Difference diff1 : diffs ) {
|
|
||||||
diffPaths.add(diffNameToPath(diff1.getFullyQualifiedName()));
|
|
||||||
}
|
|
||||||
|
|
||||||
return summarizedDifferencesOfPaths(diffPaths);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
final protected static String[] diffNameToPath(String diffName) {
|
final protected static String[] diffNameToPath(String diffName) {
|
||||||
return diffName.split("\\.");
|
return diffName.split("\\.");
|
||||||
}
|
}
|
||||||
|
|
||||||
protected List<SummarizedDifference> summarizedDifferencesOfPaths(List<String[]> diffPaths) {
|
protected List<Difference> summarizedDifferencesOfPathsFromString(List<String> singletonDiffs) {
|
||||||
Map<String, SummarizedDifference> summaries = new HashMap<String, SummarizedDifference>();
|
List<Difference> diffs = new ArrayList<Difference>();
|
||||||
|
|
||||||
|
for ( String diff : singletonDiffs ) {
|
||||||
|
diffs.add(new Difference(diff));
|
||||||
|
}
|
||||||
|
|
||||||
|
return summarizedDifferencesOfPaths(diffs);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected List<Difference> summarizedDifferencesOfPaths(List<? extends Difference> singletonDiffs) {
|
||||||
|
Map<String, Difference> summaries = new HashMap<String, Difference>();
|
||||||
|
|
||||||
// create the initial set of differences
|
// create the initial set of differences
|
||||||
for ( int i = 0; i < diffPaths.size(); i++ ) {
|
for ( int i = 0; i < singletonDiffs.size(); i++ ) {
|
||||||
for ( int j = 0; j <= i; j++ ) {
|
for ( int j = 0; j <= i; j++ ) {
|
||||||
String[] diffPath1 = diffPaths.get(i);
|
Difference diffPath1 = singletonDiffs.get(i);
|
||||||
String[] diffPath2 = diffPaths.get(j);
|
Difference diffPath2 = singletonDiffs.get(j);
|
||||||
if ( diffPath1.length == diffPath2.length ) {
|
if ( diffPath1.length() == diffPath2.length() ) {
|
||||||
int lcp = longestCommonPostfix(diffPath1, diffPath2);
|
int lcp = longestCommonPostfix(diffPath1.getParts(), diffPath2.getParts());
|
||||||
String path = lcp > 0 ? summarizedPath(diffPath2, lcp) : Utils.join(".", diffPath2);
|
String path = lcp > 0 ? summarizedPath(diffPath2.getParts(), lcp) : diffPath2.getPath();
|
||||||
addSummary(summaries, path, true);
|
addSummary(summaries, path, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// count differences
|
// count differences
|
||||||
for ( String[] diffPath : diffPaths ) {
|
for ( Difference diffPath : singletonDiffs ) {
|
||||||
for ( SummarizedDifference sumDiff : summaries.values() ) {
|
for ( Difference sumDiff : summaries.values() ) {
|
||||||
if ( sumDiff.matches(diffPath) )
|
if ( sumDiff.matches(diffPath.getParts()) )
|
||||||
addSummary(summaries, sumDiff.getPath(), false);
|
addSummary(summaries, sumDiff.getPath(), false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
List<SummarizedDifference> sortedSummaries = new ArrayList<SummarizedDifference>(summaries.values());
|
List<Difference> sortedSummaries = new ArrayList<Difference>(summaries.values());
|
||||||
Collections.sort(sortedSummaries);
|
Collections.sort(sortedSummaries);
|
||||||
return sortedSummaries;
|
return sortedSummaries;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void addSummary(Map<String, SummarizedDifference> summaries, String path, boolean onlyCatalog) {
|
private static void addSummary(Map<String, Difference> summaries, String path, boolean onlyCatalog) {
|
||||||
if ( summaries.containsKey(path) ) {
|
if ( summaries.containsKey(path) ) {
|
||||||
if ( ! onlyCatalog )
|
if ( ! onlyCatalog )
|
||||||
summaries.get(path).incCount();
|
summaries.get(path).incCount();
|
||||||
} else {
|
} else {
|
||||||
SummarizedDifference sumDiff = new SummarizedDifference(path);
|
Difference sumDiff = new Difference(path);
|
||||||
summaries.put(sumDiff.getPath(), sumDiff);
|
summaries.put(sumDiff.getPath(), sumDiff);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void printSummaryReport(List<SummarizedDifference> sortedSummaries, SummaryReportParams params ) {
|
protected void printSummaryReport(List<Difference> sortedSummaries, SummaryReportParams params ) {
|
||||||
GATKReport report = new GATKReport();
|
GATKReport report = new GATKReport();
|
||||||
final String tableName = "diffences";
|
final String tableName = "diffences";
|
||||||
report.addTable(tableName, "Summarized differences between the master and test files.\nSee http://www.broadinstitute.org/gsa/wiki/index.php/DiffObjectsWalker_and_SummarizedDifferences for more information");
|
report.addTable(tableName, "Summarized differences between the master and test files.\nSee http://www.broadinstitute.org/gsa/wiki/index.php/DiffObjectsWalker_and_SummarizedDifferences for more information");
|
||||||
|
|
@ -213,7 +215,7 @@ public class DiffEngine {
|
||||||
table.addColumn("NumberOfOccurrences", 0);
|
table.addColumn("NumberOfOccurrences", 0);
|
||||||
|
|
||||||
int count = 0, count1 = 0;
|
int count = 0, count1 = 0;
|
||||||
for ( SummarizedDifference diff : sortedSummaries ) {
|
for ( Difference diff : sortedSummaries ) {
|
||||||
if ( diff.getCount() < params.minSumDiffToShow )
|
if ( diff.getCount() < params.minSumDiffToShow )
|
||||||
// in order, so break as soon as the count is too low
|
// in order, so break as soon as the count is too low
|
||||||
break;
|
break;
|
||||||
|
|
@ -261,76 +263,6 @@ public class DiffEngine {
|
||||||
return Utils.join(".", parts);
|
return Utils.join(".", parts);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* TODO -- all of the algorithms above should use SummarizedDifference instead
|
|
||||||
* TODO -- of some SummarizedDifferences and some low-level String[]
|
|
||||||
*/
|
|
||||||
public static class SummarizedDifference implements Comparable<SummarizedDifference> {
|
|
||||||
final String path; // X.Y.Z
|
|
||||||
final String[] parts;
|
|
||||||
int count = 0;
|
|
||||||
|
|
||||||
public SummarizedDifference(String path) {
|
|
||||||
this.path = path;
|
|
||||||
this.parts = diffNameToPath(path);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void incCount() { count++; }
|
|
||||||
|
|
||||||
public int getCount() {
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The fully qualified path object A.B.C etc
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public String getPath() {
|
|
||||||
return path;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return the length of the parts of this summary
|
|
||||||
*/
|
|
||||||
public int length() {
|
|
||||||
return this.parts.length;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns true if the string parts matches this summary. Matches are
|
|
||||||
* must be equal() everywhere where this summary isn't *.
|
|
||||||
* @param otherParts
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public boolean matches(String[] otherParts) {
|
|
||||||
if ( otherParts.length != length() )
|
|
||||||
return false;
|
|
||||||
|
|
||||||
// TODO optimization: can start at right most non-star element
|
|
||||||
for ( int i = 0; i < length(); i++ ) {
|
|
||||||
String part = parts[i];
|
|
||||||
if ( ! part.equals("*") && ! part.equals(otherParts[i]) )
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return String.format("%s:%d", getPath(), getCount());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int compareTo(SummarizedDifference other) {
|
|
||||||
// sort first highest to lowest count, then by lowest to highest path
|
|
||||||
int countCmp = Integer.valueOf(count).compareTo(other.count);
|
|
||||||
return countCmp != 0 ? -1 * countCmp : path.compareTo(other.path);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// plugin manager
|
// plugin manager
|
||||||
|
|
@ -385,12 +317,17 @@ public class DiffEngine {
|
||||||
return findReaderForFile(file) != null;
|
return findReaderForFile(file) != null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public DiffElement createDiffableFromFile(File file) {
|
public DiffElement createDiffableFromFile(File file) {
|
||||||
|
return createDiffableFromFile(file, -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
public DiffElement createDiffableFromFile(File file, int maxElementsToRead) {
|
||||||
DiffableReader reader = findReaderForFile(file);
|
DiffableReader reader = findReaderForFile(file);
|
||||||
if ( reader == null )
|
if ( reader == null )
|
||||||
throw new UserException("Unsupported file type: " + file);
|
throw new UserException("Unsupported file type: " + file);
|
||||||
else
|
else
|
||||||
return reader.readFromFile(file);
|
return reader.readFromFile(file, maxElementsToRead);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean simpleDiffFiles(File masterFile, File testFile, DiffEngine.SummaryReportParams params) {
|
public static boolean simpleDiffFiles(File masterFile, File testFile, DiffEngine.SummaryReportParams params) {
|
||||||
|
|
@ -399,7 +336,7 @@ public class DiffEngine {
|
||||||
if ( diffEngine.canRead(masterFile) && diffEngine.canRead(testFile) ) {
|
if ( diffEngine.canRead(masterFile) && diffEngine.canRead(testFile) ) {
|
||||||
DiffElement master = diffEngine.createDiffableFromFile(masterFile);
|
DiffElement master = diffEngine.createDiffableFromFile(masterFile);
|
||||||
DiffElement test = diffEngine.createDiffableFromFile(testFile);
|
DiffElement test = diffEngine.createDiffableFromFile(testFile);
|
||||||
List<Difference> diffs = diffEngine.diff(master, test);
|
List<SpecificDifference> diffs = diffEngine.diff(master, test);
|
||||||
diffEngine.reportSummarizedDifferences(diffs, params);
|
diffEngine.reportSummarizedDifferences(diffs, params);
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
|
|
@ -107,11 +107,13 @@ public class DiffNode extends DiffValue {
|
||||||
return getElements(false);
|
return getElements(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the element bound to name, or null if no such binding exists
|
||||||
|
* @param name
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
public DiffElement getElement(String name) {
|
public DiffElement getElement(String name) {
|
||||||
for ( DiffElement elt : getElements() )
|
return getElementMap().get(name);
|
||||||
if ( elt.getName().equals(name) )
|
|
||||||
return elt;
|
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -151,6 +153,13 @@ public class DiffNode extends DiffValue {
|
||||||
add(new DiffElement(name, this.getBinding(), new DiffValue(value)));
|
add(new DiffElement(name, this.getBinding(), new DiffValue(value)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int size() {
|
||||||
|
int count = 0;
|
||||||
|
for ( DiffElement value : getElements() )
|
||||||
|
count += value.size();
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// toString
|
// toString
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,6 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||||
|
|
||||||
import org.apache.xmlbeans.impl.tool.Diff;
|
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
|
@ -48,11 +47,14 @@ public class DiffObjectsWalker extends RodWalker<Integer, Integer> {
|
||||||
@Output(doc="File to which results should be written",required=true)
|
@Output(doc="File to which results should be written",required=true)
|
||||||
protected PrintStream out;
|
protected PrintStream out;
|
||||||
|
|
||||||
@Argument(fullName="maxRecords", shortName="M", doc="Max. number of records to process", required=false)
|
@Argument(fullName="maxObjectsToRead", shortName="motr", doc="Max. number of objects to read from the files. -1 [default] means unlimited", required=false)
|
||||||
int MAX_RECORDS = 0;
|
int MAX_OBJECTS_TO_READ = -1;
|
||||||
|
|
||||||
@Argument(fullName="maxCount1Records", shortName="M1", doc="Max. number of records occuring exactly once in the file to process", required=false)
|
@Argument(fullName="maxDiffs", shortName="M", doc="Max. number of diffs to process", required=false)
|
||||||
int MAX_COUNT1_RECORDS = 0;
|
int MAX_DIFFS = 0;
|
||||||
|
|
||||||
|
@Argument(fullName="maxCount1Diffs", shortName="M1", doc="Max. number of diffs occuring exactly once in the file to process", required=false)
|
||||||
|
int MAX_COUNT1_DIFFS = 0;
|
||||||
|
|
||||||
@Argument(fullName="minCountForDiff", shortName="MCFD", doc="Min number of observations for a records to display", required=false)
|
@Argument(fullName="minCountForDiff", shortName="MCFD", doc="Min number of observations for a records to display", required=false)
|
||||||
int minCountForDiff = 1;
|
int minCountForDiff = 1;
|
||||||
|
|
@ -91,23 +93,25 @@ public class DiffObjectsWalker extends RodWalker<Integer, Integer> {
|
||||||
@Override
|
@Override
|
||||||
public void onTraversalDone(Integer sum) {
|
public void onTraversalDone(Integer sum) {
|
||||||
out.printf("Reading master file %s%n", masterFile);
|
out.printf("Reading master file %s%n", masterFile);
|
||||||
DiffElement master = diffEngine.createDiffableFromFile(masterFile);
|
DiffElement master = diffEngine.createDiffableFromFile(masterFile, MAX_OBJECTS_TO_READ);
|
||||||
|
out.printf(" Read %d objects%n", master.size());
|
||||||
out.printf("Reading test file %s%n", testFile);
|
out.printf("Reading test file %s%n", testFile);
|
||||||
DiffElement test = diffEngine.createDiffableFromFile(testFile);
|
DiffElement test = diffEngine.createDiffableFromFile(testFile, MAX_OBJECTS_TO_READ);
|
||||||
|
out.printf(" Read %d objects%n", test.size());
|
||||||
|
|
||||||
// out.printf("Master diff objects%n");
|
// out.printf("Master diff objects%n");
|
||||||
// out.println(master.toString());
|
// out.println(master.toString());
|
||||||
// out.printf("Test diff objects%n");
|
// out.printf("Test diff objects%n");
|
||||||
// out.println(test.toString());
|
// out.println(test.toString());
|
||||||
|
|
||||||
List<Difference> diffs = diffEngine.diff(master, test);
|
List<SpecificDifference> diffs = diffEngine.diff(master, test);
|
||||||
if ( showItemizedDifferences ) {
|
if ( showItemizedDifferences ) {
|
||||||
out.printf("Itemized results%n");
|
out.printf("Itemized results%n");
|
||||||
for ( Difference diff : diffs )
|
for ( SpecificDifference diff : diffs )
|
||||||
out.printf("DIFF: %s%n", diff.toString());
|
out.printf("DIFF: %s%n", diff.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out, MAX_RECORDS, MAX_COUNT1_RECORDS, minCountForDiff);
|
DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out, MAX_DIFFS, MAX_COUNT1_DIFFS, minCountForDiff);
|
||||||
diffEngine.reportSummarizedDifferences(diffs, params);
|
diffEngine.reportSummarizedDifferences(diffs, params);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -87,4 +87,5 @@ public class DiffValue {
|
||||||
|
|
||||||
public boolean isAtomic() { return true; }
|
public boolean isAtomic() { return true; }
|
||||||
public boolean isCompound() { return ! isAtomic(); }
|
public boolean isCompound() { return ! isAtomic(); }
|
||||||
|
public int size() { return 1; }
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,7 @@ public interface DiffableReader {
|
||||||
|
|
||||||
@Ensures("result != null")
|
@Ensures("result != null")
|
||||||
@Requires("file != null")
|
@Requires("file != null")
|
||||||
public DiffElement readFromFile(File file);
|
public DiffElement readFromFile(File file, int maxElementsToRead);
|
||||||
|
|
||||||
@Requires("file != null")
|
@Requires("file != null")
|
||||||
public boolean canRead(File file);
|
public boolean canRead(File file);
|
||||||
|
|
|
||||||
|
|
@ -24,35 +24,72 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||||
|
|
||||||
/**
|
public class Difference implements Comparable<Difference> {
|
||||||
* Created by IntelliJ IDEA.
|
final String path; // X.Y.Z
|
||||||
* User: depristo
|
final String[] parts;
|
||||||
* Date: 7/4/11
|
int count = 0;
|
||||||
* Time: 12:53 PM
|
|
||||||
*
|
|
||||||
* Represents a specific difference between two specific DiffElements
|
|
||||||
*/
|
|
||||||
public class Difference {
|
|
||||||
DiffElement master, test;
|
|
||||||
|
|
||||||
public Difference(DiffElement master, DiffElement test) {
|
public Difference(String path) {
|
||||||
if ( master == null && test == null ) throw new IllegalArgumentException("Master and test both cannot be null");
|
this.path = path;
|
||||||
this.master = master;
|
this.parts = DiffEngine.diffNameToPath(path);
|
||||||
this.test = test;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String[] getParts() {
|
||||||
|
return parts;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void incCount() { count++; }
|
||||||
|
|
||||||
|
public int getCount() {
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The fully qualified path object A.B.C etc
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public String getPath() {
|
||||||
|
return path;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the length of the parts of this summary
|
||||||
|
*/
|
||||||
|
public int length() {
|
||||||
|
return this.parts.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if the string parts matches this summary. Matches are
|
||||||
|
* must be equal() everywhere where this summary isn't *.
|
||||||
|
* @param otherParts
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public boolean matches(String[] otherParts) {
|
||||||
|
if ( otherParts.length != length() )
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// TODO optimization: can start at right most non-star element
|
||||||
|
for ( int i = 0; i < length(); i++ ) {
|
||||||
|
String part = parts[i];
|
||||||
|
if ( ! part.equals("*") && ! part.equals(otherParts[i]) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return String.format("%s:%s!=%s",
|
return String.format("%s:%d", getPath(), getCount());
|
||||||
getFullyQualifiedName(),
|
|
||||||
getOneLineString(master),
|
|
||||||
getOneLineString(test));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getFullyQualifiedName() {
|
@Override
|
||||||
return (master == null ? test : master).fullyQualifiedName();
|
public int compareTo(Difference other) {
|
||||||
|
// sort first highest to lowest count, then by lowest to highest path
|
||||||
|
int countCmp = Integer.valueOf(count).compareTo(other.count);
|
||||||
|
return countCmp != 0 ? -1 * countCmp : path.compareTo(other.path);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String getOneLineString(DiffElement elt) {
|
|
||||||
return elt == null ? "MISSING" : elt.getValue().toOneLineString();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,59 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: depristo
|
||||||
|
* Date: 7/4/11
|
||||||
|
* Time: 12:53 PM
|
||||||
|
*
|
||||||
|
* Represents a specific difference between two specific DiffElements
|
||||||
|
*/
|
||||||
|
public class SpecificDifference extends Difference {
|
||||||
|
DiffElement master, test;
|
||||||
|
|
||||||
|
public SpecificDifference(DiffElement master, DiffElement test) {
|
||||||
|
super(createName(master, test));
|
||||||
|
if ( master == null && test == null ) throw new IllegalArgumentException("Master and test both cannot be null");
|
||||||
|
this.master = master;
|
||||||
|
this.test = test;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return String.format("%s:%s!=%s",
|
||||||
|
getPath(),
|
||||||
|
getOneLineString(master),
|
||||||
|
getOneLineString(test));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String createName(DiffElement master, DiffElement test) {
|
||||||
|
return (master == null ? test : master).fullyQualifiedName();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String getOneLineString(DiffElement elt) {
|
||||||
|
return elt == null ? "MISSING" : elt.getValue().toOneLineString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -26,16 +26,12 @@ package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||||
|
|
||||||
import org.broad.tribble.readers.AsciiLineReader;
|
import org.broad.tribble.readers.AsciiLineReader;
|
||||||
import org.broad.tribble.readers.LineReader;
|
import org.broad.tribble.readers.LineReader;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
|
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.zip.GZIPInputStream;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -51,15 +47,27 @@ public class VCFDiffableReader implements DiffableReader {
|
||||||
public String getName() { return "VCF"; }
|
public String getName() { return "VCF"; }
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DiffElement readFromFile(File file) {
|
public DiffElement readFromFile(File file, int maxElementsToRead) {
|
||||||
DiffNode root = DiffNode.rooted(file.getName());
|
DiffNode root = DiffNode.rooted(file.getName());
|
||||||
try {
|
try {
|
||||||
LineReader lineReader = new AsciiLineReader(new FileInputStream(file));
|
LineReader lineReader = new AsciiLineReader(new FileInputStream(file));
|
||||||
VCFCodec vcfCodec = new VCFCodec();
|
VCFCodec vcfCodec = new VCFCodec();
|
||||||
|
|
||||||
|
// must be read as state is stored in reader itself
|
||||||
VCFHeader header = (VCFHeader)vcfCodec.readHeader(lineReader);
|
VCFHeader header = (VCFHeader)vcfCodec.readHeader(lineReader);
|
||||||
|
for ( VCFHeaderLine headerLine : header.getMetaData() ) {
|
||||||
|
String key = headerLine.getKey();
|
||||||
|
if ( headerLine instanceof VCFNamedHeaderLine )
|
||||||
|
key += "_" + ((VCFNamedHeaderLine) headerLine).getName();
|
||||||
|
root.add(key, headerLine.toString());
|
||||||
|
}
|
||||||
|
|
||||||
String line = lineReader.readLine();
|
String line = lineReader.readLine();
|
||||||
|
int count = 0;
|
||||||
while ( line != null ) {
|
while ( line != null ) {
|
||||||
|
if ( count++ > maxElementsToRead && maxElementsToRead != -1)
|
||||||
|
break;
|
||||||
|
|
||||||
VariantContext vc = (VariantContext)vcfCodec.decode(line);
|
VariantContext vc = (VariantContext)vcfCodec.decode(line);
|
||||||
String name = vc.getChr() + ":" + vc.getStart();
|
String name = vc.getChr() + ":" + vc.getStart();
|
||||||
DiffNode vcRoot = DiffNode.empty(name, root);
|
DiffNode vcRoot = DiffNode.empty(name, root);
|
||||||
|
|
|
||||||
|
|
@ -220,6 +220,9 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
unprocessedList.add(vc); // Finished with the unprocessed variant, and writer can enforce sorting on-the-fly
|
unprocessedList.add(vc); // Finished with the unprocessed variant, and writer can enforce sorting on-the-fly
|
||||||
|
|
||||||
|
if (DEBUG)
|
||||||
|
logger.debug("Unprocessed variant = " + VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), vc));
|
||||||
}
|
}
|
||||||
|
|
||||||
int numReads = 0;
|
int numReads = 0;
|
||||||
|
|
@ -1105,7 +1108,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
this.alleles = vc.getAlleles();
|
this.alleles = vc.getAlleles();
|
||||||
this.genotypes = new HashMap<String, Genotype>(vc.getGenotypes()); // since vc.getGenotypes() is unmodifiable
|
this.genotypes = new HashMap<String, Genotype>(vc.getGenotypes()); // since vc.getGenotypes() is unmodifiable
|
||||||
this.negLog10PError = vc.getNegLog10PError();
|
this.negLog10PError = vc.getNegLog10PError();
|
||||||
this.filters = vc.getFilters();
|
this.filters = vc.filtersWereApplied() ? vc.getFilters() : null;
|
||||||
this.attributes = new HashMap<String, Object>(vc.getAttributes());
|
this.attributes = new HashMap<String, Object>(vc.getAttributes());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||||
|
|
||||||
import org.broad.tribble.bed.BEDCodec;
|
import org.broad.tribble.bed.BEDCodec;
|
||||||
import org.broad.tribble.dbsnp.DbSNPCodec;
|
import org.broad.tribble.dbsnp.DbSNPCodec;
|
||||||
|
import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableReadFilter;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
|
||||||
import org.broadinstitute.sting.commandline.Gather;
|
import org.broadinstitute.sting.commandline.Gather;
|
||||||
|
|
@ -75,7 +76,7 @@ import java.util.Map;
|
||||||
|
|
||||||
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
|
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
|
||||||
@By( DataSource.READS ) // Only look at covered loci, not every loci of the reference file
|
@By( DataSource.READS ) // Only look at covered loci, not every loci of the reference file
|
||||||
@ReadFilters( {MappingQualityZeroReadFilter.class} ) // Filter out all reads with zero mapping quality
|
@ReadFilters( {MappingQualityZeroReadFilter.class, MappingQualityUnavailableReadFilter.class} ) // Filter out all reads with zero or unavailable mapping quality
|
||||||
@Requires( {DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES} ) // This walker requires both -I input.bam and -R reference.fasta
|
@Requires( {DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES} ) // This walker requires both -I input.bam and -R reference.fasta
|
||||||
@PartitionBy(PartitionType.LOCUS)
|
@PartitionBy(PartitionType.LOCUS)
|
||||||
public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.CountedData, CountCovariatesWalker.CountedData> implements TreeReducible<CountCovariatesWalker.CountedData> {
|
public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.CountedData, CountCovariatesWalker.CountedData> implements TreeReducible<CountCovariatesWalker.CountedData> {
|
||||||
|
|
|
||||||
|
|
@ -199,8 +199,8 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
||||||
// setup the header fields
|
// setup the header fields
|
||||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||||
hInfo.add(new VCFHeaderLine("source", "VariantsToVCF"));
|
//hInfo.add(new VCFHeaderLine("source", "VariantsToVCF"));
|
||||||
hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
|
//hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
|
||||||
|
|
||||||
allowedGenotypeFormatStrings.add(VCFConstants.GENOTYPE_KEY);
|
allowedGenotypeFormatStrings.add(VCFConstants.GENOTYPE_KEY);
|
||||||
for ( VCFHeaderLine field : hInfo ) {
|
for ( VCFHeaderLine field : hInfo ) {
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,8 @@ import org.broad.tribble.NameAwareCodec;
|
||||||
import org.broad.tribble.TribbleException;
|
import org.broad.tribble.TribbleException;
|
||||||
import org.broad.tribble.readers.LineReader;
|
import org.broad.tribble.readers.LineReader;
|
||||||
import org.broad.tribble.util.ParsingUtils;
|
import org.broad.tribble.util.ParsingUtils;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
@ -96,6 +98,9 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
||||||
for ( String str : headerStrings ) {
|
for ( String str : headerStrings ) {
|
||||||
if ( !str.startsWith(VCFHeader.METADATA_INDICATOR) ) {
|
if ( !str.startsWith(VCFHeader.METADATA_INDICATOR) ) {
|
||||||
String[] strings = str.substring(1).split(VCFConstants.FIELD_SEPARATOR);
|
String[] strings = str.substring(1).split(VCFConstants.FIELD_SEPARATOR);
|
||||||
|
if ( strings.length < VCFHeader.HEADER_FIELDS.values().length )
|
||||||
|
throw new TribbleException.InvalidHeader("there are not enough columns present in the header line: " + str);
|
||||||
|
|
||||||
int arrayIndex = 0;
|
int arrayIndex = 0;
|
||||||
for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) {
|
for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) {
|
||||||
try {
|
try {
|
||||||
|
|
@ -159,12 +164,11 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
||||||
}
|
}
|
||||||
|
|
||||||
private Feature reallyDecode(String line) {
|
private Feature reallyDecode(String line) {
|
||||||
try {
|
|
||||||
// the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line
|
// the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line
|
||||||
if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null;
|
if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null;
|
||||||
|
|
||||||
// our header cannot be null, we need the genotype sample names and counts
|
// our header cannot be null, we need the genotype sample names and counts
|
||||||
if (header == null) throw new IllegalStateException("VCF Header cannot be null when decoding a record");
|
if (header == null) throw new ReviewedStingException("VCF Header cannot be null when decoding a record");
|
||||||
|
|
||||||
if (parts == null)
|
if (parts == null)
|
||||||
parts = new String[Math.min(header.getColumnCount(), NUM_STANDARD_FIELDS+1)];
|
parts = new String[Math.min(header.getColumnCount(), NUM_STANDARD_FIELDS+1)];
|
||||||
|
|
@ -174,17 +178,18 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
||||||
// if we have don't have a header, or we have a header with no genotyping data check that we have eight columns. Otherwise check that we have nine (normal colummns + genotyping data)
|
// if we have don't have a header, or we have a header with no genotyping data check that we have eight columns. Otherwise check that we have nine (normal colummns + genotyping data)
|
||||||
if (( (header == null || (header != null && !header.hasGenotypingData())) && nParts != NUM_STANDARD_FIELDS) ||
|
if (( (header == null || (header != null && !header.hasGenotypingData())) && nParts != NUM_STANDARD_FIELDS) ||
|
||||||
(header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) )
|
(header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) )
|
||||||
throw new IllegalArgumentException("There aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) +
|
throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) +
|
||||||
" tokens, and saw " + nParts + " )");
|
" tokens, and saw " + nParts + " )", lineNo);
|
||||||
|
|
||||||
return parseVCFLine(parts);
|
return parseVCFLine(parts);
|
||||||
} catch (TribbleException e) {
|
|
||||||
throw new TribbleException.InvalidDecodeLine(e.getMessage(), line);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void generateException(String message) {
|
protected void generateException(String message) {
|
||||||
throw new TribbleException.InvalidDecodeLine(message, lineNo);
|
throw new UserException.MalformedVCF(message, lineNo);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void generateException(String message, int lineNo) {
|
||||||
|
throw new UserException.MalformedVCF(message, lineNo);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -472,10 +477,6 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void generateException(String message, int lineNo) {
|
|
||||||
throw new TribbleException.InvalidDecodeLine(message, lineNo);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static int computeForwardClipping(List<Allele> unclippedAlleles, String ref) {
|
private static int computeForwardClipping(List<Allele> unclippedAlleles, String ref) {
|
||||||
boolean clipping = true;
|
boolean clipping = true;
|
||||||
// Note that the computation of forward clipping here is meant only to see whether there is a common
|
// Note that the computation of forward clipping here is meant only to see whether there is a common
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,7 @@ import org.broad.tribble.index.IndexFactory;
|
||||||
import org.broad.tribble.util.LittleEndianOutputStream;
|
import org.broad.tribble.util.LittleEndianOutputStream;
|
||||||
import org.broad.tribble.util.ParsingUtils;
|
import org.broad.tribble.util.ParsingUtils;
|
||||||
import org.broad.tribble.util.PositionalStream;
|
import org.broad.tribble.util.PositionalStream;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
|
|
@ -300,10 +301,7 @@ public class StandardVCFWriter implements VCFWriter {
|
||||||
} else {
|
} else {
|
||||||
List<String> genotypeAttributeKeys = new ArrayList<String>();
|
List<String> genotypeAttributeKeys = new ArrayList<String>();
|
||||||
if ( vc.hasGenotypes() ) {
|
if ( vc.hasGenotypes() ) {
|
||||||
genotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY);
|
genotypeAttributeKeys.addAll(calcVCFGenotypeKeys(vc));
|
||||||
for ( String key : calcVCFGenotypeKeys(vc) ) {
|
|
||||||
genotypeAttributeKeys.add(key);
|
|
||||||
}
|
|
||||||
} else if ( mHeader.hasGenotypingData() ) {
|
} else if ( mHeader.hasGenotypingData() ) {
|
||||||
// this needs to be done in case all samples are no-calls
|
// this needs to be done in case all samples are no-calls
|
||||||
genotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY);
|
genotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY);
|
||||||
|
|
@ -387,16 +385,22 @@ public class StandardVCFWriter implements VCFWriter {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
writeAllele(g.getAllele(0), alleleMap);
|
|
||||||
for (int i = 1; i < g.getPloidy(); i++) {
|
|
||||||
mWriter.write(g.isPhased() ? VCFConstants.PHASED : VCFConstants.UNPHASED);
|
|
||||||
writeAllele(g.getAllele(i), alleleMap);
|
|
||||||
}
|
|
||||||
|
|
||||||
List<String> attrs = new ArrayList<String>(genotypeFormatKeys.size());
|
List<String> attrs = new ArrayList<String>(genotypeFormatKeys.size());
|
||||||
for ( String key : genotypeFormatKeys ) {
|
for ( String key : genotypeFormatKeys ) {
|
||||||
if ( key.equals(VCFConstants.GENOTYPE_KEY) )
|
|
||||||
|
if ( key.equals(VCFConstants.GENOTYPE_KEY) ) {
|
||||||
|
if ( !g.isAvailable() ) {
|
||||||
|
throw new ReviewedStingException("GTs cannot be missing for some samples if they are available for others in the record");
|
||||||
|
}
|
||||||
|
|
||||||
|
writeAllele(g.getAllele(0), alleleMap);
|
||||||
|
for (int i = 1; i < g.getPloidy(); i++) {
|
||||||
|
mWriter.write(g.isPhased() ? VCFConstants.PHASED : VCFConstants.UNPHASED);
|
||||||
|
writeAllele(g.getAllele(i), alleleMap);
|
||||||
|
}
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
Object val = g.hasAttribute(key) ? g.getAttribute(key) : VCFConstants.MISSING_VALUE_v4;
|
Object val = g.hasAttribute(key) ? g.getAttribute(key) : VCFConstants.MISSING_VALUE_v4;
|
||||||
|
|
||||||
|
|
@ -440,9 +444,10 @@ public class StandardVCFWriter implements VCFWriter {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (String s : attrs ) {
|
for (int i = 0; i < attrs.size(); i++) {
|
||||||
mWriter.write(VCFConstants.GENOTYPE_FIELD_SEPARATOR);
|
if ( i > 0 || genotypeFormatKeys.contains(VCFConstants.GENOTYPE_KEY) )
|
||||||
mWriter.write(s);
|
mWriter.write(VCFConstants.GENOTYPE_FIELD_SEPARATOR);
|
||||||
|
mWriter.write(attrs.get(i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -488,10 +493,13 @@ public class StandardVCFWriter implements VCFWriter {
|
||||||
private static List<String> calcVCFGenotypeKeys(VariantContext vc) {
|
private static List<String> calcVCFGenotypeKeys(VariantContext vc) {
|
||||||
Set<String> keys = new HashSet<String>();
|
Set<String> keys = new HashSet<String>();
|
||||||
|
|
||||||
|
boolean sawGoodGT = false;
|
||||||
boolean sawGoodQual = false;
|
boolean sawGoodQual = false;
|
||||||
boolean sawGenotypeFilter = false;
|
boolean sawGenotypeFilter = false;
|
||||||
for ( Genotype g : vc.getGenotypes().values() ) {
|
for ( Genotype g : vc.getGenotypes().values() ) {
|
||||||
keys.addAll(g.getAttributes().keySet());
|
keys.addAll(g.getAttributes().keySet());
|
||||||
|
if ( g.isAvailable() )
|
||||||
|
sawGoodGT = true;
|
||||||
if ( g.hasNegLog10PError() )
|
if ( g.hasNegLog10PError() )
|
||||||
sawGoodQual = true;
|
sawGoodQual = true;
|
||||||
if (g.isFiltered() && g.isCalled())
|
if (g.isFiltered() && g.isCalled())
|
||||||
|
|
@ -504,7 +512,17 @@ public class StandardVCFWriter implements VCFWriter {
|
||||||
if (sawGenotypeFilter)
|
if (sawGenotypeFilter)
|
||||||
keys.add(VCFConstants.GENOTYPE_FILTER_KEY);
|
keys.add(VCFConstants.GENOTYPE_FILTER_KEY);
|
||||||
|
|
||||||
return ParsingUtils.sortList(new ArrayList<String>(keys));
|
List<String> sortedList = ParsingUtils.sortList(new ArrayList<String>(keys));
|
||||||
|
|
||||||
|
// make sure the GT is first
|
||||||
|
if ( sawGoodGT ) {
|
||||||
|
List<String> newList = new ArrayList<String>(sortedList.size()+1);
|
||||||
|
newList.add(VCFConstants.GENOTYPE_KEY);
|
||||||
|
newList.addAll(sortedList);
|
||||||
|
sortedList = newList;
|
||||||
|
}
|
||||||
|
|
||||||
|
return sortedList;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -141,8 +141,6 @@ public class VCF3Codec extends AbstractVCFCodec {
|
||||||
boolean missing = i >= GTValueSplitSize;
|
boolean missing = i >= GTValueSplitSize;
|
||||||
|
|
||||||
if (gtKey.equals(VCFConstants.GENOTYPE_KEY)) {
|
if (gtKey.equals(VCFConstants.GENOTYPE_KEY)) {
|
||||||
if (i != 0)
|
|
||||||
generateException("Saw GT at position " + i + ", but it must be at the first position for genotypes");
|
|
||||||
genotypeAlleleLocation = i;
|
genotypeAlleleLocation = i;
|
||||||
} else if (gtKey.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) {
|
} else if (gtKey.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) {
|
||||||
GTQual = missing ? parseQual(VCFConstants.MISSING_VALUE_v4) : parseQual(GTValueArray[i]);
|
GTQual = missing ? parseQual(VCFConstants.MISSING_VALUE_v4) : parseQual(GTValueArray[i]);
|
||||||
|
|
@ -156,12 +154,13 @@ public class VCF3Codec extends AbstractVCFCodec {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// check to make sure we found a gentoype field
|
// check to make sure we found a genotype field
|
||||||
if (genotypeAlleleLocation < 0) generateException("Unable to find required field GT for the record; we don't yet support a missing GT field");
|
if ( genotypeAlleleLocation < 0 )
|
||||||
|
generateException("Unable to find the GT field for the record; the GT field is required");
|
||||||
|
if ( genotypeAlleleLocation > 0 )
|
||||||
|
generateException("Saw GT field at position " + genotypeAlleleLocation + ", but it must be at the first position for genotypes");
|
||||||
|
|
||||||
// todo -- assuming allele list length in the single digits is bad. Fix me.
|
boolean phased = GTValueArray[genotypeAlleleLocation].indexOf(VCFConstants.PHASED) != -1;
|
||||||
// Check for > 1 for haploid genotypes
|
|
||||||
boolean phased = GTValueArray[genotypeAlleleLocation].length() > 1 && GTValueArray[genotypeAlleleLocation].charAt(1) == '|';
|
|
||||||
|
|
||||||
// add it to the list
|
// add it to the list
|
||||||
try {
|
try {
|
||||||
|
|
|
||||||
|
|
@ -145,8 +145,6 @@ public class VCFCodec extends AbstractVCFCodec {
|
||||||
|
|
||||||
// todo -- all of these on the fly parsing of the missing value should be static constants
|
// todo -- all of these on the fly parsing of the missing value should be static constants
|
||||||
if (gtKey.equals(VCFConstants.GENOTYPE_KEY)) {
|
if (gtKey.equals(VCFConstants.GENOTYPE_KEY)) {
|
||||||
if (i != 0)
|
|
||||||
generateException("Saw GT at position " + i + ", but it must be at the first position for genotypes");
|
|
||||||
genotypeAlleleLocation = i;
|
genotypeAlleleLocation = i;
|
||||||
} else if (gtKey.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) {
|
} else if (gtKey.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) {
|
||||||
GTQual = missing ? parseQual(VCFConstants.MISSING_VALUE_v4) : parseQual(GTValueArray[i]);
|
GTQual = missing ? parseQual(VCFConstants.MISSING_VALUE_v4) : parseQual(GTValueArray[i]);
|
||||||
|
|
@ -160,22 +158,24 @@ public class VCFCodec extends AbstractVCFCodec {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// check to make sure we found a gentoype field
|
// check to make sure we found a genotype field if we are a VCF4.0 file
|
||||||
// TODO -- This is no longer required in v4.1
|
if ( version == VCFHeaderVersion.VCF4_0 && genotypeAlleleLocation == -1 )
|
||||||
if (genotypeAlleleLocation < 0) generateException("Unable to find required field GT for the record; we don't yet support a missing GT field");
|
generateException("Unable to find the GT field for the record; the GT field is required in VCF4.0");
|
||||||
|
if ( genotypeAlleleLocation > 0 )
|
||||||
|
generateException("Saw GT field at position " + genotypeAlleleLocation + ", but it must be at the first position for genotypes when present");
|
||||||
|
|
||||||
// todo -- assuming allele list length in the single digits is bad. Fix me.
|
List<Allele> GTalleles = (genotypeAlleleLocation == -1 ? null : parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap));
|
||||||
// Check for > 1 for haploid genotypes
|
boolean phased = genotypeAlleleLocation != -1 && GTValueArray[genotypeAlleleLocation].indexOf(VCFConstants.PHASED) != -1;
|
||||||
boolean phased = GTValueArray[genotypeAlleleLocation].length() > 1 && GTValueArray[genotypeAlleleLocation].charAt(1) == '|';
|
|
||||||
|
|
||||||
// add it to the list
|
// add it to the list
|
||||||
try {
|
try {
|
||||||
genotypes.put(sampleName, new Genotype(sampleName,
|
genotypes.put(sampleName,
|
||||||
parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap),
|
new Genotype(sampleName,
|
||||||
GTQual,
|
GTalleles,
|
||||||
genotypeFilters,
|
GTQual,
|
||||||
gtAttributes,
|
genotypeFilters,
|
||||||
phased));
|
gtAttributes,
|
||||||
|
phased));
|
||||||
} catch (TribbleException e) {
|
} catch (TribbleException e) {
|
||||||
throw new TribbleException.InternalCodecException(e.getMessage() + ", at position " + chr+":"+pos);
|
throw new TribbleException.InternalCodecException(e.getMessage() + ", at position " + chr+":"+pos);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -154,6 +154,16 @@ public class UserException extends ReviewedStingException {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static class MalformedVCF extends UserException {
|
||||||
|
public MalformedVCF(String message, String line) {
|
||||||
|
super(String.format("The provided VCF file is malformed at line %s: %s", line, message));
|
||||||
|
}
|
||||||
|
|
||||||
|
public MalformedVCF(String message, int lineNo) {
|
||||||
|
super(String.format("The provided VCF file is malformed at line nmber %d: %s", lineNo, message));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static class ReadMissingReadGroup extends MalformedBAM {
|
public static class ReadMissingReadGroup extends MalformedBAM {
|
||||||
public ReadMissingReadGroup(SAMRecord read) {
|
public ReadMissingReadGroup(SAMRecord read) {
|
||||||
super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK. Please use http://www.broadinstitute.org/gsa/wiki/index.php/ReplaceReadGroups to fix this problem", read.getReadName()));
|
super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK. Please use http://www.broadinstitute.org/gsa/wiki/index.php/ReplaceReadGroups to fix this problem", read.getReadName()));
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.utils.variantcontext;
|
||||||
|
|
||||||
import org.broad.tribble.util.ParsingUtils;
|
import org.broad.tribble.util.ParsingUtils;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -19,12 +20,14 @@ public class Genotype {
|
||||||
protected InferredGeneticContext commonInfo;
|
protected InferredGeneticContext commonInfo;
|
||||||
public final static double NO_NEG_LOG_10PERROR = InferredGeneticContext.NO_NEG_LOG_10PERROR;
|
public final static double NO_NEG_LOG_10PERROR = InferredGeneticContext.NO_NEG_LOG_10PERROR;
|
||||||
protected List<Allele> alleles = null; // new ArrayList<Allele>();
|
protected List<Allele> alleles = null; // new ArrayList<Allele>();
|
||||||
|
protected Type type = null;
|
||||||
|
|
||||||
protected boolean isPhased = false;
|
protected boolean isPhased = false;
|
||||||
private boolean filtersWereAppliedToContext;
|
protected boolean filtersWereAppliedToContext;
|
||||||
|
|
||||||
public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, boolean isPhased) {
|
public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, boolean isPhased) {
|
||||||
this.alleles = Collections.unmodifiableList(alleles);
|
if ( alleles != null )
|
||||||
|
this.alleles = Collections.unmodifiableList(alleles);
|
||||||
commonInfo = new InferredGeneticContext(sampleName, negLog10PError, filters, attributes);
|
commonInfo = new InferredGeneticContext(sampleName, negLog10PError, filters, attributes);
|
||||||
filtersWereAppliedToContext = filters != null;
|
filtersWereAppliedToContext = filters != null;
|
||||||
this.isPhased = isPhased;
|
this.isPhased = isPhased;
|
||||||
|
|
@ -66,6 +69,9 @@ public class Genotype {
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Allele> getAlleles(Allele allele) {
|
public List<Allele> getAlleles(Allele allele) {
|
||||||
|
if ( getType() == Type.UNAVAILABLE )
|
||||||
|
throw new ReviewedStingException("Requesting alleles for an UNAVAILABLE genotype");
|
||||||
|
|
||||||
List<Allele> al = new ArrayList<Allele>();
|
List<Allele> al = new ArrayList<Allele>();
|
||||||
for ( Allele a : alleles )
|
for ( Allele a : alleles )
|
||||||
if ( a.equals(allele) )
|
if ( a.equals(allele) )
|
||||||
|
|
@ -75,6 +81,8 @@ public class Genotype {
|
||||||
}
|
}
|
||||||
|
|
||||||
public Allele getAllele(int i) {
|
public Allele getAllele(int i) {
|
||||||
|
if ( getType() == Type.UNAVAILABLE )
|
||||||
|
throw new ReviewedStingException("Requesting alleles for an UNAVAILABLE genotype");
|
||||||
return alleles.get(i);
|
return alleles.get(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -89,10 +97,21 @@ public class Genotype {
|
||||||
NO_CALL,
|
NO_CALL,
|
||||||
HOM_REF,
|
HOM_REF,
|
||||||
HET,
|
HET,
|
||||||
HOM_VAR
|
HOM_VAR,
|
||||||
|
UNAVAILABLE
|
||||||
}
|
}
|
||||||
|
|
||||||
public Type getType() {
|
public Type getType() {
|
||||||
|
if ( type == null ) {
|
||||||
|
type = determineType();
|
||||||
|
}
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Type determineType() {
|
||||||
|
if ( alleles == null )
|
||||||
|
return Type.UNAVAILABLE;
|
||||||
|
|
||||||
Allele firstAllele = alleles.get(0);
|
Allele firstAllele = alleles.get(0);
|
||||||
|
|
||||||
if ( firstAllele.isNoCall() ) {
|
if ( firstAllele.isNoCall() ) {
|
||||||
|
|
@ -122,7 +141,8 @@ public class Genotype {
|
||||||
* @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF)
|
* @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF)
|
||||||
*/
|
*/
|
||||||
public boolean isNoCall() { return getType() == Type.NO_CALL; }
|
public boolean isNoCall() { return getType() == Type.NO_CALL; }
|
||||||
public boolean isCalled() { return getType() != Type.NO_CALL; }
|
public boolean isCalled() { return getType() != Type.NO_CALL && getType() != Type.UNAVAILABLE; }
|
||||||
|
public boolean isAvailable() { return getType() != Type.UNAVAILABLE; }
|
||||||
|
|
||||||
//
|
//
|
||||||
// Useful methods for getting genotype likelihoods for a genotype object, if present
|
// Useful methods for getting genotype likelihoods for a genotype object, if present
|
||||||
|
|
@ -157,8 +177,8 @@ public class Genotype {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void validate() {
|
public void validate() {
|
||||||
if ( alleles == null ) throw new IllegalArgumentException("BUG: alleles cannot be null in setAlleles");
|
if ( alleles == null ) return;
|
||||||
if ( alleles.size() == 0) throw new IllegalArgumentException("BUG: alleles cannot be of size 0 in setAlleles");
|
if ( alleles.size() == 0) throw new IllegalArgumentException("BUG: alleles cannot be of size 0");
|
||||||
|
|
||||||
int nNoCalls = 0;
|
int nNoCalls = 0;
|
||||||
for ( Allele allele : alleles ) {
|
for ( Allele allele : alleles ) {
|
||||||
|
|
@ -175,6 +195,9 @@ public class Genotype {
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getGenotypeString(boolean ignoreRefState) {
|
public String getGenotypeString(boolean ignoreRefState) {
|
||||||
|
if ( alleles == null )
|
||||||
|
return null;
|
||||||
|
|
||||||
// Notes:
|
// Notes:
|
||||||
// 1. Make sure to use the appropriate separator depending on whether the genotype is phased
|
// 1. Make sure to use the appropriate separator depending on whether the genotype is phased
|
||||||
// 2. If ignoreRefState is true, then we want just the bases of the Alleles (ignoring the '*' indicating a ref Allele)
|
// 2. If ignoreRefState is true, then we want just the bases of the Alleles (ignoring the '*' indicating a ref Allele)
|
||||||
|
|
|
||||||
|
|
@ -1206,9 +1206,11 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
||||||
|
|
||||||
if ( ! name.equals(g.getSampleName()) ) throw new IllegalStateException("Bound sample name " + name + " does not equal the name of the genotype " + g.getSampleName());
|
if ( ! name.equals(g.getSampleName()) ) throw new IllegalStateException("Bound sample name " + name + " does not equal the name of the genotype " + g.getSampleName());
|
||||||
|
|
||||||
for ( Allele gAllele : g.getAlleles() ) {
|
if ( g.isAvailable() ) {
|
||||||
if ( ! hasAllele(gAllele) && gAllele.isCalled() )
|
for ( Allele gAllele : g.getAlleles() ) {
|
||||||
throw new IllegalStateException("Allele in genotype " + gAllele + " not in the variant context " + alleles);
|
if ( ! hasAllele(gAllele) && gAllele.isCalled() )
|
||||||
|
throw new IllegalStateException("Allele in genotype " + gAllele + " not in the variant context " + alleles);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,9 @@
|
||||||
package org.broadinstitute.sting;
|
package org.broadinstitute.sting;
|
||||||
|
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
import org.broad.tribble.FeatureCodec;
|
||||||
import org.broad.tribble.Tribble;
|
import org.broad.tribble.Tribble;
|
||||||
|
import org.broad.tribble.index.Index;
|
||||||
import org.broad.tribble.index.IndexFactory;
|
import org.broad.tribble.index.IndexFactory;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
|
||||||
import org.broadinstitute.sting.gatk.CommandLineExecutable;
|
import org.broadinstitute.sting.gatk.CommandLineExecutable;
|
||||||
|
|
@ -64,10 +66,19 @@ public class WalkerTest extends BaseTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
System.out.println("Verifying on-the-fly index " + indexFile + " for test " + name + " using file " + resultFile);
|
System.out.println("Verifying on-the-fly index " + indexFile + " for test " + name + " using file " + resultFile);
|
||||||
Assert.assertTrue(IndexFactory.onDiskIndexEqualToNewlyCreatedIndex(resultFile, indexFile, new VCFCodec()), "Index on disk from indexing on the fly not equal to the index created after the run completed");
|
Index indexFromOutputFile = IndexFactory.createIndex(resultFile, new VCFCodec());
|
||||||
|
Index dynamicIndex = IndexFactory.loadIndex(indexFile.getAbsolutePath());
|
||||||
|
|
||||||
|
if ( ! indexFromOutputFile.equals(dynamicIndex) ) {
|
||||||
|
Assert.fail(String.format("Index on disk from indexing on the fly not equal to the index created after the run completed. FileIndex %s vs. on-the-fly %s%n",
|
||||||
|
indexFromOutputFile.getProperties(),
|
||||||
|
dynamicIndex.getProperties()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public List<String> assertMatchingMD5s(final String name, List<File> resultFiles, List<String> expectedMD5s) {
|
public List<String> assertMatchingMD5s(final String name, List<File> resultFiles, List<String> expectedMD5s) {
|
||||||
List<String> md5s = new ArrayList<String>();
|
List<String> md5s = new ArrayList<String>();
|
||||||
for (int i = 0; i < resultFiles.size(); i++) {
|
for (int i = 0; i < resultFiles.size(); i++) {
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,229 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// our package
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||||
|
|
||||||
|
|
||||||
|
// the imports for unit testing.
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.BeforeClass;
|
||||||
|
import org.testng.annotations.DataProvider;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Basic unit test for DifferableReaders in reduced reads
|
||||||
|
*/
|
||||||
|
public class DiffEngineUnitTest extends BaseTest {
|
||||||
|
DiffEngine engine;
|
||||||
|
|
||||||
|
@BeforeClass(enabled = true)
|
||||||
|
public void createDiffEngine() {
|
||||||
|
engine = new DiffEngine();
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Difference testing routines
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private class DifferenceTest extends TestDataProvider {
|
||||||
|
public DiffElement tree1, tree2;
|
||||||
|
public List<String> differences;
|
||||||
|
|
||||||
|
private DifferenceTest(String tree1, String tree2) {
|
||||||
|
this(tree1, tree2, Collections.<String>emptyList());
|
||||||
|
}
|
||||||
|
|
||||||
|
private DifferenceTest(String tree1, String tree2, String difference) {
|
||||||
|
this(tree1, tree2, Arrays.asList(difference));
|
||||||
|
}
|
||||||
|
|
||||||
|
private DifferenceTest(String tree1, String tree2, List<String> differences) {
|
||||||
|
super(DifferenceTest.class);
|
||||||
|
this.tree1 = DiffNode.fromString(tree1);
|
||||||
|
this.tree2 = DiffNode.fromString(tree2);
|
||||||
|
this.differences = differences;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return String.format("tree1=%s tree2=%s diff=%s",
|
||||||
|
tree1.toOneLineString(), tree2.toOneLineString(), differences);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "trees")
|
||||||
|
public Object[][] createTrees() {
|
||||||
|
new DifferenceTest("A=X", "A=X");
|
||||||
|
new DifferenceTest("A=X", "A=Y", "A:X!=Y");
|
||||||
|
new DifferenceTest("A=X", "B=X", Arrays.asList("A:X!=MISSING", "B:MISSING!=X"));
|
||||||
|
new DifferenceTest("A=(X=1)", "B=(X=1)", Arrays.asList("A:(X=1)!=MISSING", "B:MISSING!=(X=1)"));
|
||||||
|
new DifferenceTest("A=(X=1)", "A=(X=1)");
|
||||||
|
new DifferenceTest("A=(X=1 Y=2)", "A=(X=1 Y=2)");
|
||||||
|
new DifferenceTest("A=(X=1 Y=2 B=(Z=3))", "A=(X=1 Y=2 B=(Z=3))");
|
||||||
|
new DifferenceTest("A=(X=1)", "A=(X=2)", "A.X:1!=2");
|
||||||
|
new DifferenceTest("A=(X=1 Y=2 B=(Z=3))", "A=(X=1 Y=2 B=(Z=4))", "A.B.Z:3!=4");
|
||||||
|
new DifferenceTest("A=(X=1)", "A=(X=1 Y=2)", "A.Y:MISSING!=2");
|
||||||
|
new DifferenceTest("A=(X=1 Y=2 B=(Z=3))", "A=(X=1 Y=2)", "A.B:(Z=3)!=MISSING");
|
||||||
|
return DifferenceTest.getTests(DifferenceTest.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(enabled = true, dataProvider = "trees")
|
||||||
|
public void testDiffs(DifferenceTest test) {
|
||||||
|
logger.warn("Test tree1: " + test.tree1.toOneLineString());
|
||||||
|
logger.warn("Test tree2: " + test.tree2.toOneLineString());
|
||||||
|
|
||||||
|
List<SpecificDifference> diffs = engine.diff(test.tree1, test.tree2);
|
||||||
|
logger.warn("Test expected diff : " + test.differences);
|
||||||
|
logger.warn("Observed diffs : " + diffs);
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Low-level routines for summarizing differences
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@Test(enabled = true)
|
||||||
|
public void testLongestCommonPostfix() {
|
||||||
|
testLongestCommonPostfixHelper("A", "A", 1);
|
||||||
|
testLongestCommonPostfixHelper("A", "B", 0);
|
||||||
|
testLongestCommonPostfixHelper("A.B", "A.B", 2);
|
||||||
|
testLongestCommonPostfixHelper("A.B.C", "A.B.C", 3);
|
||||||
|
testLongestCommonPostfixHelper("A.B.C", "X.B.C", 2);
|
||||||
|
testLongestCommonPostfixHelper("A.B.C", "X.Y.C", 1);
|
||||||
|
testLongestCommonPostfixHelper("A.B.C", "X.Y.Z", 0);
|
||||||
|
testLongestCommonPostfixHelper("A.B.C", "A.X.C", 1);
|
||||||
|
testLongestCommonPostfixHelper("A.B.C", "A.X.Z", 0);
|
||||||
|
testLongestCommonPostfixHelper("A.B.C", "A.B.Z", 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testLongestCommonPostfixHelper(String p1, String p2, int expected) {
|
||||||
|
String[] parts1 = p1.split("\\.");
|
||||||
|
String[] parts2 = p2.split("\\.");
|
||||||
|
int obs = DiffEngine.longestCommonPostfix(parts1, parts2);
|
||||||
|
Assert.assertEquals(obs, expected, "p1=" + p1 + " p2=" + p2 + " failed");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(enabled = true, dependsOnMethods = "testLongestCommonPostfix")
|
||||||
|
public void testSummarizePath() {
|
||||||
|
testSummarizePathHelper("A", "A", "A");
|
||||||
|
testSummarizePathHelper("A", "B", "*");
|
||||||
|
testSummarizePathHelper("A.B", "A.B", "A.B");
|
||||||
|
testSummarizePathHelper("A.B", "X.B", "*.B");
|
||||||
|
testSummarizePathHelper("A.B", "X.Y", "*.*");
|
||||||
|
testSummarizePathHelper("A.B.C", "A.B.C", "A.B.C");
|
||||||
|
testSummarizePathHelper("A.B.C", "X.B.C", "*.B.C");
|
||||||
|
testSummarizePathHelper("A.B.C", "X.Y.C", "*.*.C");
|
||||||
|
testSummarizePathHelper("A.B.C", "X.Y.Z", "*.*.*");
|
||||||
|
testSummarizePathHelper("A.B.C", "A.X.C", "*.*.C");
|
||||||
|
testSummarizePathHelper("A.B.C", "A.X.Z", "*.*.*");
|
||||||
|
testSummarizePathHelper("A.B.C", "A.B.Z", "*.*.*");
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSummarizePathHelper(String p1, String p2, String expected) {
|
||||||
|
String[] parts1 = DiffEngine.diffNameToPath(p1);
|
||||||
|
String[] parts2 = DiffEngine.diffNameToPath(p2);
|
||||||
|
int obs = DiffEngine.longestCommonPostfix(parts1, parts2);
|
||||||
|
String path = DiffEngine.summarizedPath(parts2, obs);
|
||||||
|
Assert.assertEquals(path, expected, "p1=" + p1 + " p2=" + p2 + " failed");
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// High-level difference summary
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private class SummarizeDifferenceTest extends TestDataProvider {
|
||||||
|
List<String> diffs = new ArrayList<String>();
|
||||||
|
List<String> expecteds = new ArrayList<String>();
|
||||||
|
|
||||||
|
public SummarizeDifferenceTest() { super(SummarizeDifferenceTest.class); }
|
||||||
|
|
||||||
|
public SummarizeDifferenceTest addDiff(String... diffsToAdd) {
|
||||||
|
diffs.addAll(Arrays.asList(diffsToAdd));
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SummarizeDifferenceTest addSummary(String... expectedSummary) {
|
||||||
|
expecteds.addAll(Arrays.asList(expectedSummary));
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return String.format("diffs=%s => expected=%s", diffs, expecteds);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test() {
|
||||||
|
List<String[]> diffPaths = new ArrayList<String[]>(diffs.size());
|
||||||
|
for ( String diff : diffs ) { diffPaths.add(DiffEngine.diffNameToPath(diff)); }
|
||||||
|
|
||||||
|
List<Difference> sumDiffs = engine.summarizedDifferencesOfPathsFromString(diffs);
|
||||||
|
|
||||||
|
Assert.assertEquals(sumDiffs.size(), expecteds.size(), "Unexpected number of summarized differences: " + sumDiffs);
|
||||||
|
|
||||||
|
for ( int i = 0; i < sumDiffs.size(); i++ ) {
|
||||||
|
Difference sumDiff = sumDiffs.get(i);
|
||||||
|
String expected = expecteds.get(i);
|
||||||
|
String[] pathCount = expected.split(":");
|
||||||
|
String path = pathCount[0];
|
||||||
|
int count = Integer.valueOf(pathCount[1]);
|
||||||
|
Assert.assertEquals(sumDiff.getPath(), path, "Unexpected path at: " + expected + " obs=" + sumDiff + " all=" + sumDiffs);
|
||||||
|
Assert.assertEquals(sumDiff.getCount(), count, "Unexpected counts at: " + expected + " obs=" + sumDiff + " all=" + sumDiffs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "summaries")
|
||||||
|
public Object[][] createSummaries() {
|
||||||
|
new SummarizeDifferenceTest().addDiff("A", "A").addSummary("A:2");
|
||||||
|
new SummarizeDifferenceTest().addDiff("A", "B").addSummary("A:1", "B:1");
|
||||||
|
new SummarizeDifferenceTest().addDiff("A", "A", "A").addSummary("A:3");
|
||||||
|
new SummarizeDifferenceTest().addDiff("A", "A", "A", "B").addSummary("A:3", "B:1");
|
||||||
|
new SummarizeDifferenceTest().addDiff("A", "A", "A", "B", "B").addSummary("A:3", "B:2");
|
||||||
|
new SummarizeDifferenceTest().addDiff("A", "A", "A", "B", "B", "C").addSummary("A:3", "B:2", "C:1");
|
||||||
|
new SummarizeDifferenceTest().addDiff("A.X", "A.X").addSummary("A.X:2");
|
||||||
|
new SummarizeDifferenceTest().addDiff("A.X", "A.X", "B.X").addSummary("*.X:3", "A.X:2", "B.X:1");
|
||||||
|
new SummarizeDifferenceTest().addDiff("A.X", "A.X", "B.X", "B.X").addSummary("*.X:4", "A.X:2", "B.X:2");
|
||||||
|
new SummarizeDifferenceTest().addDiff("A.B.C", "X.B.C").addSummary("*.B.C:2", "A.B.C:1", "X.B.C:1");
|
||||||
|
new SummarizeDifferenceTest().addDiff("A.B.C", "X.Y.C", "X.Y.C").addSummary("*.*.C:3", "X.Y.C:2", "A.B.C:1");
|
||||||
|
new SummarizeDifferenceTest().addDiff("A.B.C", "A.X.C", "X.Y.C").addSummary("*.*.C:3", "A.B.C:1", "A.X.C:1", "X.Y.C:1");
|
||||||
|
new SummarizeDifferenceTest().addDiff("A.B.C", "A.X.C", "B.X.C").addSummary("*.*.C:3", "*.X.C:2", "A.B.C:1", "A.X.C:1", "B.X.C:1");
|
||||||
|
new SummarizeDifferenceTest().addDiff("A.B.C", "A.X.C", "B.X.C", "B.X.C").addSummary("*.*.C:4", "*.X.C:3", "B.X.C:2", "A.B.C:1", "A.X.C:1");
|
||||||
|
|
||||||
|
return SummarizeDifferenceTest.getTests(SummarizeDifferenceTest.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test(enabled = true, dependsOnMethods = "testSummarizePath", dataProvider = "summaries")
|
||||||
|
public void testSummarizeDifferences(SummarizeDifferenceTest test) {
|
||||||
|
test.test();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,249 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// our package
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||||
|
|
||||||
|
|
||||||
|
// the imports for unit testing.
|
||||||
|
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.DataProvider;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Basic unit test for DifferableReaders in reduced reads
|
||||||
|
*/
|
||||||
|
public class DiffNodeUnitTest extends BaseTest {
|
||||||
|
// Data is:
|
||||||
|
// MY_ROOT
|
||||||
|
// fields: A=A, B=B
|
||||||
|
// nodes: C, D
|
||||||
|
// C: fields: E=E, nodes: none
|
||||||
|
// D: fields: F=F, G=G, nodes: none
|
||||||
|
static DiffNode MY_ROOT = DiffNode.rooted("MY_ROOT");
|
||||||
|
static DiffValue Value_A = new DiffValue("A", MY_ROOT, "A");
|
||||||
|
static DiffValue Value_B = new DiffValue("B", MY_ROOT, "B");
|
||||||
|
static DiffNode NODE_C = DiffNode.empty("C", MY_ROOT);
|
||||||
|
static DiffNode NODE_D = DiffNode.empty("D", MY_ROOT);
|
||||||
|
static DiffValue Value_E = new DiffValue("E", NODE_C, "E");
|
||||||
|
static DiffValue Value_F = new DiffValue("F", NODE_D, "F");
|
||||||
|
static DiffValue Value_G = new DiffValue("G", NODE_D, "G");
|
||||||
|
|
||||||
|
static {
|
||||||
|
MY_ROOT.add(Value_A);
|
||||||
|
MY_ROOT.add(Value_B);
|
||||||
|
MY_ROOT.add(NODE_C);
|
||||||
|
MY_ROOT.add(NODE_D);
|
||||||
|
NODE_C.add(Value_E);
|
||||||
|
NODE_D.add(Value_F);
|
||||||
|
NODE_D.add(Value_G);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Element testing routines
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private class ElementTest extends TestDataProvider {
|
||||||
|
public DiffElement elt;
|
||||||
|
public String name;
|
||||||
|
public String fullName;
|
||||||
|
public DiffElement parent;
|
||||||
|
|
||||||
|
private ElementTest(DiffValue elt, DiffValue parent, String name, String fullName) {
|
||||||
|
this(elt.getBinding(), parent.getBinding(), name, fullName);
|
||||||
|
}
|
||||||
|
|
||||||
|
private ElementTest(DiffElement elt, DiffElement parent, String name, String fullName) {
|
||||||
|
super(ElementTest.class);
|
||||||
|
this.elt = elt;
|
||||||
|
this.name = name;
|
||||||
|
this.fullName = fullName;
|
||||||
|
this.parent = parent;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return String.format("ElementTest elt=%s name=%s fullName=%s parent=%s",
|
||||||
|
elt.toOneLineString(), name, fullName, parent.getName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "elementdata")
|
||||||
|
public Object[][] createElementData() {
|
||||||
|
new ElementTest(MY_ROOT.getBinding(), DiffElement.ROOT, "MY_ROOT", "MY_ROOT");
|
||||||
|
new ElementTest(NODE_C, MY_ROOT, "C", "MY_ROOT.C");
|
||||||
|
new ElementTest(NODE_D, MY_ROOT, "D", "MY_ROOT.D");
|
||||||
|
new ElementTest(Value_A, MY_ROOT, "A", "MY_ROOT.A");
|
||||||
|
new ElementTest(Value_B, MY_ROOT, "B", "MY_ROOT.B");
|
||||||
|
new ElementTest(Value_E, NODE_C, "E", "MY_ROOT.C.E");
|
||||||
|
new ElementTest(Value_F, NODE_D, "F", "MY_ROOT.D.F");
|
||||||
|
new ElementTest(Value_G, NODE_D, "G", "MY_ROOT.D.G");
|
||||||
|
return TestDataProvider.getTests(ElementTest.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(enabled = true, dataProvider = "elementdata")
|
||||||
|
public void testElementMethods(ElementTest test) {
|
||||||
|
Assert.assertNotNull(test.elt.getName());
|
||||||
|
Assert.assertNotNull(test.elt.getParent());
|
||||||
|
Assert.assertEquals(test.elt.getName(), test.name);
|
||||||
|
Assert.assertEquals(test.elt.getParent(), test.parent);
|
||||||
|
Assert.assertEquals(test.elt.fullyQualifiedName(), test.fullName);
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// DiffValue testing routines
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private class LeafTest extends TestDataProvider {
|
||||||
|
public DiffValue diffvalue;
|
||||||
|
public Object value;
|
||||||
|
|
||||||
|
private LeafTest(DiffValue diffvalue, Object value) {
|
||||||
|
super(LeafTest.class);
|
||||||
|
this.diffvalue = diffvalue;
|
||||||
|
this.value = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return String.format("LeafTest diffvalue=%s value=%s", diffvalue.toOneLineString(), value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "leafdata")
|
||||||
|
public Object[][] createLeafData() {
|
||||||
|
new LeafTest(Value_A, "A");
|
||||||
|
new LeafTest(Value_B, "B");
|
||||||
|
new LeafTest(Value_E, "E");
|
||||||
|
new LeafTest(Value_F, "F");
|
||||||
|
new LeafTest(Value_G, "G");
|
||||||
|
return TestDataProvider.getTests(LeafTest.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(enabled = true, dataProvider = "leafdata")
|
||||||
|
public void testLeafMethods(LeafTest test) {
|
||||||
|
Assert.assertNotNull(test.diffvalue.getValue());
|
||||||
|
Assert.assertEquals(test.diffvalue.getValue(), test.value);
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Node testing routines
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private class NodeTest extends TestDataProvider {
|
||||||
|
public DiffNode node;
|
||||||
|
public Set<String> fields;
|
||||||
|
public Set<String> subnodes;
|
||||||
|
public Set<String> allNames;
|
||||||
|
|
||||||
|
private NodeTest(DiffNode node, List<String> fields, List<String> subnodes) {
|
||||||
|
super(NodeTest.class);
|
||||||
|
this.node = node;
|
||||||
|
this.fields = new HashSet<String>(fields);
|
||||||
|
this.subnodes = new HashSet<String>(subnodes);
|
||||||
|
this.allNames = new HashSet<String>(fields);
|
||||||
|
allNames.addAll(subnodes);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return String.format("NodeTest node=%s fields=%s subnodes=%s",
|
||||||
|
node.toOneLineString(), fields, subnodes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "nodedata")
|
||||||
|
public Object[][] createData1() {
|
||||||
|
new NodeTest(MY_ROOT, Arrays.asList("A", "B"), Arrays.asList("C", "D"));
|
||||||
|
new NodeTest(NODE_C, Arrays.asList("E"), Collections.<String>emptyList());
|
||||||
|
new NodeTest(NODE_D, Arrays.asList("F", "G"), Collections.<String>emptyList());
|
||||||
|
return TestDataProvider.getTests(NodeTest.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(enabled = true, dataProvider = "nodedata")
|
||||||
|
public void testNodeAccessors(NodeTest test) {
|
||||||
|
Assert.assertNotNull(test.node.getElements());
|
||||||
|
|
||||||
|
for ( String name : test.allNames ) {
|
||||||
|
DiffElement elt = test.node.getElement(name);
|
||||||
|
Assert.assertNotNull(elt, "Failed to find field " + elt + " in " + test.node);
|
||||||
|
Assert.assertEquals(elt.getName(), name);
|
||||||
|
Assert.assertEquals(elt.getValue().isAtomic(), test.fields.contains(name), "Failed atomic/compound expectation: " + test.node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NOTE: add routines are being implicitly tested by the creation of the data structures
|
||||||
|
|
||||||
|
@Test(enabled = true, dataProvider = "nodedata")
|
||||||
|
public void testCounts(NodeTest test) {
|
||||||
|
Assert.assertEquals(test.node.getElements().size(), test.allNames.size());
|
||||||
|
Assert.assertEquals(test.node.getElementNames(), test.allNames);
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// fromString testing routines
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private class FromStringTest extends TestDataProvider {
|
||||||
|
public String string;
|
||||||
|
public DiffElement expected;
|
||||||
|
|
||||||
|
private FromStringTest(String string, DiffElement expected) {
|
||||||
|
super(FromStringTest.class);
|
||||||
|
this.string = string;
|
||||||
|
this.expected = expected;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return String.format("FromStringTest string=%s expected=%s", string, expected.toOneLineString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "fromstringdata")
|
||||||
|
public Object[][] createFromData() {
|
||||||
|
new FromStringTest("A=A", Value_A.getBinding());
|
||||||
|
new FromStringTest("B=B", Value_B.getBinding());
|
||||||
|
new FromStringTest("C=(E=E)", NODE_C.getBinding());
|
||||||
|
new FromStringTest("D=(F=F G=G)", NODE_D.getBinding());
|
||||||
|
return TestDataProvider.getTests(FromStringTest.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(enabled = true, dataProvider = "fromstringdata")
|
||||||
|
public void parseFromString(FromStringTest test) {
|
||||||
|
logger.warn("Testing from string: " + test.string);
|
||||||
|
DiffElement elt = DiffNode.fromString(test.string);
|
||||||
|
Assert.assertEquals(elt.toOneLineString(), test.expected.toOneLineString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,143 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// our package
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||||
|
|
||||||
|
|
||||||
|
// the imports for unit testing.
|
||||||
|
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.BeforeClass;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Basic unit test for DifferableReaders in reduced reads
|
||||||
|
*/
|
||||||
|
public class DiffableReaderUnitTest extends BaseTest {
|
||||||
|
DiffEngine engine;
|
||||||
|
|
||||||
|
File vcfFile = new File(testDir + "diffTestMaster.vcf");
|
||||||
|
File bamFile = new File(testDir + "exampleBAM.bam");
|
||||||
|
|
||||||
|
@BeforeClass(enabled = true)
|
||||||
|
public void createDiffEngine() {
|
||||||
|
engine = new DiffEngine();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(enabled = true)
|
||||||
|
public void testPluggableDiffableReaders() {
|
||||||
|
logger.warn("testPluggableDiffableReaders");
|
||||||
|
Map<String, DiffableReader> readers = engine.getReaders();
|
||||||
|
Assert.assertNotNull(readers);
|
||||||
|
Assert.assertTrue(readers.size() > 0);
|
||||||
|
Assert.assertNotNull(readers.get("VCF"));
|
||||||
|
for ( Map.Entry<String, DiffableReader> e : engine.getReaders().entrySet() ) {
|
||||||
|
logger.warn("Found diffable reader: " + e.getKey());
|
||||||
|
Assert.assertEquals(e.getValue().getName(), e.getKey());
|
||||||
|
Assert.assertEquals(e.getValue(), engine.getReader(e.getKey()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void testLeaf(DiffNode rec, String field, Object expected) {
|
||||||
|
DiffElement value = rec.getElement(field);
|
||||||
|
Assert.assertNotNull(value, "Expected to see leaf named " + field + " in rec " + rec);
|
||||||
|
Assert.assertEquals(value.getValue().getValue(), expected, "Expected to leaf named " + field + " to have value " + expected + " in rec " + rec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(enabled = true, dependsOnMethods = "testPluggableDiffableReaders")
|
||||||
|
public void testVCF1() {
|
||||||
|
logger.warn("testVCF1");
|
||||||
|
DiffableReader vcfReader = engine.getReader("VCF");
|
||||||
|
Assert.assertTrue(vcfReader.canRead(vcfFile));
|
||||||
|
Assert.assertFalse(vcfReader.canRead(bamFile));
|
||||||
|
|
||||||
|
DiffElement diff = vcfReader.readFromFile(vcfFile, -1);
|
||||||
|
Assert.assertNotNull(diff);
|
||||||
|
|
||||||
|
Assert.assertEquals(diff.getName(), vcfFile.getName());
|
||||||
|
Assert.assertSame(diff.getParent(), DiffElement.ROOT);
|
||||||
|
|
||||||
|
DiffNode node = diff.getValueAsNode();
|
||||||
|
Assert.assertEquals(node.getElements().size(), 10);
|
||||||
|
|
||||||
|
// chr1 2646 rs62635284 G A 0.15 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:53,75:3:-12.40,-0.90,-0.00:9.03
|
||||||
|
DiffNode rec1 = node.getElement("chr1:2646").getValueAsNode();
|
||||||
|
testLeaf(rec1, "CHROM", "chr1");
|
||||||
|
testLeaf(rec1, "POS", 2646);
|
||||||
|
testLeaf(rec1, "ID", "rs62635284");
|
||||||
|
testLeaf(rec1, "REF", Allele.create("G", true));
|
||||||
|
testLeaf(rec1, "ALT", new HashSet<Allele>(Arrays.asList(Allele.create("A"))));
|
||||||
|
testLeaf(rec1, "QUAL", 0.15);
|
||||||
|
testLeaf(rec1, "FILTER", Collections.<Object>emptySet());
|
||||||
|
testLeaf(rec1, "AC", "2");
|
||||||
|
testLeaf(rec1, "AF", "1.00");
|
||||||
|
testLeaf(rec1, "AN", "2");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(enabled = true, dependsOnMethods = "testPluggableDiffableReaders")
|
||||||
|
public void testBAM() {
|
||||||
|
logger.warn("testBAM");
|
||||||
|
DiffableReader bamReader = engine.getReader("BAM");
|
||||||
|
Assert.assertTrue(bamReader.canRead(bamFile));
|
||||||
|
Assert.assertFalse(bamReader.canRead(vcfFile));
|
||||||
|
|
||||||
|
DiffElement diff = bamReader.readFromFile(bamFile, -1);
|
||||||
|
Assert.assertNotNull(diff);
|
||||||
|
|
||||||
|
Assert.assertEquals(diff.getName(), bamFile.getName());
|
||||||
|
Assert.assertSame(diff.getParent(), DiffElement.ROOT);
|
||||||
|
|
||||||
|
DiffNode node = diff.getValueAsNode();
|
||||||
|
Assert.assertEquals(node.getElements().size(), 33);
|
||||||
|
|
||||||
|
// 30PPJAAXX090125:1:42:512:1817#0 99 chr1 200 0 76M =
|
||||||
|
// 255 -130 ACCCTAACCCTAACCCTAACCCTAACCATAACCCTAAGACTAACCCTAAACCTAACCCTCATAATCGAAATACAAC
|
||||||
|
// BBBBC@C?AABCBB<63>=B@>+B9-9+)2B8,+@327B5A>90((>-+''3?(/'''A)(''19('7.,**%)3:
|
||||||
|
// PG:Z:0 RG:Z:exampleBAM.bam SM:Z:exampleBAM.bam
|
||||||
|
|
||||||
|
DiffNode rec1 = node.getElement("30PPJAAXX090125:1:42:512:1817#0_1").getValueAsNode();
|
||||||
|
testLeaf(rec1, "NAME", "30PPJAAXX090125:1:42:512:1817#0");
|
||||||
|
testLeaf(rec1, "FLAGS", 99);
|
||||||
|
testLeaf(rec1, "RNAME", "chr1");
|
||||||
|
testLeaf(rec1, "POS", 200);
|
||||||
|
testLeaf(rec1, "MAPQ", 0);
|
||||||
|
testLeaf(rec1, "CIGAR", "76M");
|
||||||
|
testLeaf(rec1, "RNEXT", "chr1");
|
||||||
|
testLeaf(rec1, "PNEXT", 255);
|
||||||
|
testLeaf(rec1, "TLEN", -130);
|
||||||
|
testLeaf(rec1, "SEQ", "ACCCTAACCCTAACCCTAACCCTAACCATAACCCTAAGACTAACCCTAAACCTAACCCTCATAATCGAAATACAAC");
|
||||||
|
testLeaf(rec1, "QUAL", "BBBBC@C?AABCBB<63>=B@>+B9-9+)2B8,+@327B5A>90((>-+''3?(/'''A)(''19('7.,**%)3:");
|
||||||
|
testLeaf(rec1, "PG", "0");
|
||||||
|
testLeaf(rec1, "RG", "exampleBAM.bam");
|
||||||
|
testLeaf(rec1, "SM", "exampleBAM.bam");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,95 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// our package
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||||
|
|
||||||
|
|
||||||
|
// the imports for unit testing.
|
||||||
|
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.BeforeClass;
|
||||||
|
import org.testng.annotations.DataProvider;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Basic unit test for DifferableReaders in reduced reads
|
||||||
|
*/
|
||||||
|
public class DifferenceUnitTest extends BaseTest {
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// testing routines
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private class DifferenceTest extends TestDataProvider {
|
||||||
|
public DiffElement tree1, tree2;
|
||||||
|
public String difference;
|
||||||
|
|
||||||
|
private DifferenceTest(String tree1, String tree2, String difference) {
|
||||||
|
this(DiffNode.fromString(tree1), DiffNode.fromString(tree2), difference);
|
||||||
|
}
|
||||||
|
|
||||||
|
private DifferenceTest(DiffElement tree1, DiffElement tree2, String difference) {
|
||||||
|
super(DifferenceTest.class);
|
||||||
|
this.tree1 = tree1;
|
||||||
|
this.tree2 = tree2;
|
||||||
|
this.difference = difference;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return String.format("tree1=%s tree2=%s diff=%s",
|
||||||
|
tree1 == null ? "null" : tree1.toOneLineString(),
|
||||||
|
tree2 == null ? "null" : tree2.toOneLineString(),
|
||||||
|
difference);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "data")
|
||||||
|
public Object[][] createTrees() {
|
||||||
|
new DifferenceTest("A=X", "A=Y", "A:X!=Y");
|
||||||
|
new DifferenceTest("A=Y", "A=X", "A:Y!=X");
|
||||||
|
new DifferenceTest(DiffNode.fromString("A=X"), null, "A:X!=MISSING");
|
||||||
|
new DifferenceTest(null, DiffNode.fromString("A=X"), "A:MISSING!=X");
|
||||||
|
return DifferenceTest.getTests(DifferenceTest.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(enabled = true, dataProvider = "data")
|
||||||
|
public void testDiffToString(DifferenceTest test) {
|
||||||
|
logger.warn("Test tree1: " + (test.tree1 == null ? "null" : test.tree1.toOneLineString()));
|
||||||
|
logger.warn("Test tree2: " + (test.tree2 == null ? "null" : test.tree2.toOneLineString()));
|
||||||
|
logger.warn("Test expected diff : " + test.difference);
|
||||||
|
SpecificDifference diff = new SpecificDifference(test.tree1, test.tree2);
|
||||||
|
logger.warn("Observed diffs : " + diff);
|
||||||
|
Assert.assertEquals(diff.toString(), test.difference, "Observed diff string " + diff + " not equal to expected difference string " + test.difference );
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -23,7 +23,7 @@ public class MergeMNPsIntegrationTest extends WalkerTest {
|
||||||
baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 1)
|
baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 1)
|
||||||
+ " -L chr20:556259-756570",
|
+ " -L chr20:556259-756570",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("e312b7d3854d5b2834a370659514a813"));
|
Arrays.asList("7f11f7f75d1526077f0173c7ed1fc6c4"));
|
||||||
executeTest("Merge MNP sites within genomic distance of 1 [TEST ONE]", spec);
|
executeTest("Merge MNP sites within genomic distance of 1 [TEST ONE]", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -33,7 +33,7 @@ public class MergeMNPsIntegrationTest extends WalkerTest {
|
||||||
baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 10)
|
baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 10)
|
||||||
+ " -L chr20:556259-756570",
|
+ " -L chr20:556259-756570",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("681f50e45f1d697370d2c355df2e18bc"));
|
Arrays.asList("53dd312468296826bdd3c22387390c88"));
|
||||||
executeTest("Merge MNP sites within genomic distance of 10 [TEST TWO]", spec);
|
executeTest("Merge MNP sites within genomic distance of 10 [TEST TWO]", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -43,7 +43,7 @@ public class MergeMNPsIntegrationTest extends WalkerTest {
|
||||||
baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 100)
|
baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 100)
|
||||||
+ " -L chr20:556259-756570",
|
+ " -L chr20:556259-756570",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("0bccb0ef928a108418246bec01098083"));
|
Arrays.asList("e26f92d2fb9f4eaeac7f9d8ee27410ee"));
|
||||||
executeTest("Merge MNP sites within genomic distance of 100 [TEST THREE]", spec);
|
executeTest("Merge MNP sites within genomic distance of 100 [TEST THREE]", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@ public class MergeSegregatingAlternateAllelesIntegrationTest extends WalkerTest
|
||||||
baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 1)
|
baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 1)
|
||||||
+ " -L chr20:556259-756570",
|
+ " -L chr20:556259-756570",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("e16f957d888054ae0518e25660295241"));
|
Arrays.asList("af5e1370822551c0c6f50f23447dc627"));
|
||||||
executeTest("Merge sites within genomic distance of 1 [TEST ONE]", spec);
|
executeTest("Merge sites within genomic distance of 1 [TEST ONE]", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -33,7 +33,7 @@ public class MergeSegregatingAlternateAllelesIntegrationTest extends WalkerTest
|
||||||
baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 10)
|
baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 10)
|
||||||
+ " -L chr20:556259-756570",
|
+ " -L chr20:556259-756570",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("122a482090677c7619c2105d44e00d11"));
|
Arrays.asList("dd8c44ae1ef059a7fe85399467e102eb"));
|
||||||
executeTest("Merge sites within genomic distance of 10 [TEST TWO]", spec);
|
executeTest("Merge sites within genomic distance of 10 [TEST TWO]", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -43,7 +43,7 @@ public class MergeSegregatingAlternateAllelesIntegrationTest extends WalkerTest
|
||||||
baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 100)
|
baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 100)
|
||||||
+ " -L chr20:556259-756570",
|
+ " -L chr20:556259-756570",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("bc6a8c8a42bb2601db98e88e9ad74748"));
|
Arrays.asList("f81fd72ecaa57b3215406fcea860bcc5"));
|
||||||
executeTest("Merge sites within genomic distance of 100 [TEST THREE]", spec);
|
executeTest("Merge sites within genomic distance of 100 [TEST THREE]", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -19,9 +19,9 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
public void testCountCovariates1() {
|
public void testCountCovariates1() {
|
||||||
HashMap<String, String> e = new HashMap<String, String>();
|
HashMap<String, String> e = new HashMap<String, String>();
|
||||||
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "7b5832d4b2a23b8ef2bb639eb59bfa88" );
|
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "7b5832d4b2a23b8ef2bb639eb59bfa88" );
|
||||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "f4f8a49bb5764d2a8f61e055f64dcce4");
|
e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "9c006f8e9fb5752b1c139f5a8cc7ea88");
|
||||||
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "e6f7b4ab9aa291022e0ba8b7dbe4c77e" );
|
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "e6f7b4ab9aa291022e0ba8b7dbe4c77e" );
|
||||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "570506533f079d738d70934dfe1c02cd" );
|
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "e6b98af01c5a08e4954b79ec42db6fc3" );
|
||||||
|
|
||||||
for ( String parallelism : Arrays.asList("", " -nt 4")) {
|
for ( String parallelism : Arrays.asList("", " -nt 4")) {
|
||||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||||
|
|
@ -53,9 +53,9 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
public void testTableRecalibrator1() {
|
public void testTableRecalibrator1() {
|
||||||
HashMap<String, String> e = new HashMap<String, String>();
|
HashMap<String, String> e = new HashMap<String, String>();
|
||||||
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "0278cce4cfdab869dc0c11d6852a984b" );
|
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "0278cce4cfdab869dc0c11d6852a984b" );
|
||||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "344d4252143df8c2cce6b568747553a5");
|
e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "6797d7ffa4ef6c48413719ba32696ccf");
|
||||||
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "2bb3374dde131791d7638031ae3b3e10" );
|
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "2bb3374dde131791d7638031ae3b3e10" );
|
||||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "064c4a7bdd23974c3a9c5f924540df76" );
|
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "1f9d8944b73169b367cb83b0d22e5432" );
|
||||||
|
|
||||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||||
String bam = entry.getKey();
|
String bam = entry.getKey();
|
||||||
|
|
@ -107,7 +107,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testTableRecalibratorMaxQ70() {
|
public void testTableRecalibratorMaxQ70() {
|
||||||
HashMap<String, String> e = new HashMap<String, String>();
|
HashMap<String, String> e = new HashMap<String, String>();
|
||||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "344d4252143df8c2cce6b568747553a5" );
|
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "0278cce4cfdab869dc0c11d6852a984b" );
|
||||||
|
|
||||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||||
String bam = entry.getKey();
|
String bam = entry.getKey();
|
||||||
|
|
@ -133,12 +133,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCountCovariatesSolidIndelsRemoveRefBias() {
|
public void testCountCovariatesSolidIndelsRemoveRefBias() {
|
||||||
HashMap<String, String> e = new HashMap<String, String>();
|
HashMap<String, String> e = new HashMap<String, String>();
|
||||||
e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "0a6cdb9611e5880ea6611205080aa267" );
|
e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "c9ea5f995e1e2b7a5688533e678dcedc" );
|
||||||
|
|
||||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||||
String bam = entry.getKey();
|
String bam = entry.getKey();
|
||||||
|
|
@ -164,7 +162,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testTableRecalibratorSolidIndelsRemoveRefBias() {
|
public void testTableRecalibratorSolidIndelsRemoveRefBias() {
|
||||||
HashMap<String, String> e = new HashMap<String, String>();
|
HashMap<String, String> e = new HashMap<String, String>();
|
||||||
e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "9bc7e1ad223ba759fe5e8ddb4c07369c" );
|
e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "993fae4270e7e1e15986f270acf247af" );
|
||||||
|
|
||||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||||
String bam = entry.getKey();
|
String bam = entry.getKey();
|
||||||
|
|
@ -189,13 +187,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCountCovariatesVCF() {
|
public void testCountCovariatesVCF() {
|
||||||
HashMap<String, String> e = new HashMap<String, String>();
|
HashMap<String, String> e = new HashMap<String, String>();
|
||||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "3700eaf567e4937f442fc777a226d6ad");
|
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "170f0c3cc4b8d72c539136effeec9a16");
|
||||||
|
|
||||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||||
String bam = entry.getKey();
|
String bam = entry.getKey();
|
||||||
|
|
@ -219,7 +214,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testCountCovariatesBED() {
|
public void testCountCovariatesBED() {
|
||||||
HashMap<String, String> e = new HashMap<String, String>();
|
HashMap<String, String> e = new HashMap<String, String>();
|
||||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "6803891a3398821fc8a37e19ea8e5a00");
|
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "b460478d9683e827784e42bc352db8bb");
|
||||||
|
|
||||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||||
String bam = entry.getKey();
|
String bam = entry.getKey();
|
||||||
|
|
@ -243,7 +238,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testCountCovariatesVCFPlusDBsnp() {
|
public void testCountCovariatesVCFPlusDBsnp() {
|
||||||
HashMap<String, String> e = new HashMap<String, String>();
|
HashMap<String, String> e = new HashMap<String, String>();
|
||||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "f224c42fbc4026db973ccc91265ab5c7");
|
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "a3d892bd60d8f679affda3c1e3af96c1");
|
||||||
|
|
||||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||||
String bam = entry.getKey();
|
String bam = entry.getKey();
|
||||||
|
|
@ -268,69 +263,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testCountCovariatesNoReadGroups() {
|
|
||||||
HashMap<String, String> e = new HashMap<String, String>();
|
|
||||||
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "c024e03f019aeceaf364fa58c8295ad8" );
|
|
||||||
|
|
||||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
|
||||||
String bam = entry.getKey();
|
|
||||||
String md5 = entry.getValue();
|
|
||||||
|
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
|
||||||
"-R " + b36KGReference +
|
|
||||||
" --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" +
|
|
||||||
" -T CountCovariates" +
|
|
||||||
" -I " + bam +
|
|
||||||
" -L 1:10,000,000-10,200,000" +
|
|
||||||
" -cov ReadGroupCovariate" +
|
|
||||||
" -cov QualityScoreCovariate" +
|
|
||||||
" -cov CycleCovariate" +
|
|
||||||
" -cov DinucCovariate" +
|
|
||||||
" --default_read_group DefaultReadGroup" +
|
|
||||||
" --default_platform illumina" +
|
|
||||||
" --solid_recal_mode SET_Q_ZERO" +
|
|
||||||
" -recalFile %s",
|
|
||||||
1, // just one output file
|
|
||||||
Arrays.asList(md5));
|
|
||||||
List<File> result = executeTest("testCountCovariatesNoReadGroups", spec).getFirst();
|
|
||||||
paramsFilesNoReadGroupTest.put(bam, result.get(0).getAbsolutePath());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testTableRecalibratorNoReadGroups() {
|
|
||||||
HashMap<String, String> e = new HashMap<String, String>();
|
|
||||||
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "1eefbe7ac0376fc1ed1392d85242171e" );
|
|
||||||
|
|
||||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
|
||||||
String bam = entry.getKey();
|
|
||||||
String md5 = entry.getValue();
|
|
||||||
String paramsFile = paramsFilesNoReadGroupTest.get(bam);
|
|
||||||
System.out.printf("PARAMS FOR %s is %s%n", bam, paramsFile);
|
|
||||||
if ( paramsFile != null ) {
|
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
|
||||||
"-R " + b36KGReference +
|
|
||||||
" -T TableRecalibration" +
|
|
||||||
" -I " + bam +
|
|
||||||
" -L 1:10,100,000-10,300,000" +
|
|
||||||
" -o %s" +
|
|
||||||
" --no_pg_tag" +
|
|
||||||
" --solid_recal_mode SET_Q_ZERO" +
|
|
||||||
" --default_read_group DefaultReadGroup" +
|
|
||||||
" --default_platform illumina" +
|
|
||||||
" -recalFile " + paramsFile,
|
|
||||||
1, // just one output file
|
|
||||||
Arrays.asList(md5));
|
|
||||||
executeTest("testTableRecalibratorNoReadGroups", spec);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCountCovariatesNoIndex() {
|
public void testCountCovariatesNoIndex() {
|
||||||
HashMap<String, String> e = new HashMap<String, String>();
|
HashMap<String, String> e = new HashMap<String, String>();
|
||||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "cfc31bb6f51436d1c3b34f62bb801dc8" );
|
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "284ccac1f8fe485e52c86333cac7c2d4" );
|
||||||
|
|
||||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||||
String bam = entry.getKey();
|
String bam = entry.getKey();
|
||||||
|
|
@ -356,7 +292,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testTableRecalibratorNoIndex() {
|
public void testTableRecalibratorNoIndex() {
|
||||||
HashMap<String, String> e = new HashMap<String, String>();
|
HashMap<String, String> e = new HashMap<String, String>();
|
||||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "83b848a16034c2fb423d1bb0f5be7784" );
|
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "c167799c2d9cab815d7c9b23337f162e" );
|
||||||
|
|
||||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||||
String bam = entry.getKey();
|
String bam = entry.getKey();
|
||||||
|
|
@ -380,11 +316,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCountCovariatesFailWithoutDBSNP() {
|
public void testCountCovariatesFailWithoutDBSNP() {
|
||||||
HashMap<String, String> e = new HashMap<String, String>();
|
HashMap<String, String> e = new HashMap<String, String>();
|
||||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "");
|
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "");
|
||||||
|
|
||||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||||
String bam = entry.getKey();
|
String bam = entry.getKey();
|
||||||
|
|
|
||||||
|
|
@ -1,46 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2010, The Broad Institute
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.WalkerTest;
|
|
||||||
import org.testng.annotations.Test;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
public class BatchMergeIntegrationTest extends WalkerTest {
|
|
||||||
@Test
|
|
||||||
public void testBatchMerge1() {
|
|
||||||
String bam = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.bam";
|
|
||||||
String alleles = validationDataLocation + "batch.merge.alleles.vcf";
|
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
|
||||||
"-T UnifiedGenotyper -NO_HEADER -BTI alleles -stand_call_conf 0.0 -glm BOTH -G none -nsl -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -o %s -R " + b37KGReference
|
|
||||||
+ " -B:alleles,VCF " + alleles
|
|
||||||
+ " -I " + bam,
|
|
||||||
1,
|
|
||||||
Arrays.asList("f4ed8f4ef2cba96823c06e90e9d0de35"));
|
|
||||||
executeTest("testBatchMerge UG genotype given alleles:" + new File(bam).getName() + " with " + new File(alleles).getName(), spec);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -20,7 +20,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testVariantsToVCFUsingGeliInput() {
|
public void testVariantsToVCFUsingGeliInput() {
|
||||||
List<String> md5 = new ArrayList<String>();
|
List<String> md5 = new ArrayList<String>();
|
||||||
md5.add("815b82fff92aab41c209eedce2d7e7d9");
|
md5.add("4accae035d271b35ee2ec58f403c68c6");
|
||||||
|
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-R " + b36KGReference +
|
"-R " + b36KGReference +
|
||||||
|
|
@ -38,7 +38,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testGenotypesToVCFUsingGeliInput() {
|
public void testGenotypesToVCFUsingGeliInput() {
|
||||||
List<String> md5 = new ArrayList<String>();
|
List<String> md5 = new ArrayList<String>();
|
||||||
md5.add("22336ee9c12aa222ce29c3c5babca7d0");
|
md5.add("71e8c98d7c3a73b6287ecc339086fe03");
|
||||||
|
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-R " + b36KGReference +
|
"-R " + b36KGReference +
|
||||||
|
|
@ -56,7 +56,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testGenotypesToVCFUsingHapMapInput() {
|
public void testGenotypesToVCFUsingHapMapInput() {
|
||||||
List<String> md5 = new ArrayList<String>();
|
List<String> md5 = new ArrayList<String>();
|
||||||
md5.add("9bedaa7670b86a07be5191898c3727cf");
|
md5.add("f343085305e80c7a2493422e4eaad983");
|
||||||
|
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-R " + b36KGReference +
|
"-R " + b36KGReference +
|
||||||
|
|
@ -73,7 +73,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testGenotypesToVCFUsingVCFInput() {
|
public void testGenotypesToVCFUsingVCFInput() {
|
||||||
List<String> md5 = new ArrayList<String>();
|
List<String> md5 = new ArrayList<String>();
|
||||||
md5.add("cc215edec9ca28e5c79ab1b67506f9f7");
|
md5.add("86f02e2e764ba35854cff2aa05a1fdd8");
|
||||||
|
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-R " + b36KGReference +
|
"-R " + b36KGReference +
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,28 @@
|
||||||
|
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.WalkerTest;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class VCFIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testReadingAndWritingWitHNoChanges() {
|
||||||
|
|
||||||
|
String md5ofInputVCF = "a990ba187a69ca44cb9bc2bb44d00447";
|
||||||
|
String testVCF = validationDataLocation + "vcf4.1.example.vcf";
|
||||||
|
|
||||||
|
String baseCommand = "-R " + b37KGReference + " -NO_HEADER -o %s ";
|
||||||
|
|
||||||
|
String test1 = baseCommand + "-T VariantAnnotator -BTI variant -B:variant,vcf " + testVCF;
|
||||||
|
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList(md5ofInputVCF));
|
||||||
|
List<File> result = executeTest("Test Variant Annotator with no changes", spec1).getFirst();
|
||||||
|
|
||||||
|
String test2 = baseCommand + "-T VariantsToVCF -B:variant,vcf " + result.get(0).getAbsolutePath();
|
||||||
|
WalkerTestSpec spec2 = new WalkerTestSpec(test2, 1, Arrays.asList(md5ofInputVCF));
|
||||||
|
executeTest("Test Variants To VCF from new output", spec2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -4,13 +4,13 @@ import org.broadinstitute.sting.queue.extensions.gatk._
|
||||||
import org.broadinstitute.sting.queue.QScript
|
import org.broadinstitute.sting.queue.QScript
|
||||||
import org.broadinstitute.sting.queue.function.ListWriterFunction
|
import org.broadinstitute.sting.queue.function.ListWriterFunction
|
||||||
|
|
||||||
import scala.io.Source._
|
|
||||||
import collection.JavaConversions._
|
import collection.JavaConversions._
|
||||||
import org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner.ConsensusDeterminationModel
|
import org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner.ConsensusDeterminationModel
|
||||||
import org.broadinstitute.sting.queue.extensions.picard._
|
import org.broadinstitute.sting.queue.extensions.picard._
|
||||||
import net.sf.samtools.{SAMFileReader, SAMReadGroupRecord}
|
import net.sf.samtools.{SAMFileReader}
|
||||||
import net.sf.samtools.SAMFileHeader.SortOrder
|
import net.sf.samtools.SAMFileHeader.SortOrder
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.queue.qscripts.utils.Utils
|
||||||
|
|
||||||
class DataProcessingPipeline extends QScript {
|
class DataProcessingPipeline extends QScript {
|
||||||
qscript =>
|
qscript =>
|
||||||
|
|
@ -103,18 +103,6 @@ class DataProcessingPipeline extends QScript {
|
||||||
val ds: String)
|
val ds: String)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
// Utility function to check if there are multiple samples in a BAM file (currently we can't deal with that)
|
|
||||||
def hasMultipleSamples(readGroups: java.util.List[SAMReadGroupRecord]): Boolean = {
|
|
||||||
var sample: String = ""
|
|
||||||
for (r <- readGroups) {
|
|
||||||
if (sample.isEmpty)
|
|
||||||
sample = r.getSample
|
|
||||||
else if (sample != r.getSample)
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// Utility function to merge all bam files of similar samples. Generates one BAM file per sample.
|
// Utility function to merge all bam files of similar samples. Generates one BAM file per sample.
|
||||||
// It uses the sample information on the header of the input BAM files.
|
// It uses the sample information on the header of the input BAM files.
|
||||||
//
|
//
|
||||||
|
|
@ -135,7 +123,7 @@ class DataProcessingPipeline extends QScript {
|
||||||
|
|
||||||
// only allow one sample per file. Bam files with multiple samples would require pre-processing of the file
|
// only allow one sample per file. Bam files with multiple samples would require pre-processing of the file
|
||||||
// with PrintReads to separate the samples. Tell user to do it himself!
|
// with PrintReads to separate the samples. Tell user to do it himself!
|
||||||
assert(!hasMultipleSamples(readGroups), "The pipeline requires that only one sample is present in a BAM file. Please separate the samples in " + bam)
|
assert(!Utils.hasMultipleSamples(readGroups), "The pipeline requires that only one sample is present in a BAM file. Please separate the samples in " + bam)
|
||||||
|
|
||||||
// Fill out the sample table with the readgroups in this file
|
// Fill out the sample table with the readgroups in this file
|
||||||
for (rg <- readGroups) {
|
for (rg <- readGroups) {
|
||||||
|
|
@ -147,20 +135,23 @@ class DataProcessingPipeline extends QScript {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
println("\n\n*** DEBUG ***\n")
|
||||||
// Creating one file for each sample in the dataset
|
// Creating one file for each sample in the dataset
|
||||||
val sampleBamFiles = scala.collection.mutable.Map.empty[String, File]
|
val sampleBamFiles = scala.collection.mutable.Map.empty[String, File]
|
||||||
for ((sample, flist) <- sampleTable) {
|
for ((sample, flist) <- sampleTable) {
|
||||||
|
|
||||||
|
println(sample + ":")
|
||||||
|
for (f <- flist)
|
||||||
|
println (f)
|
||||||
|
println()
|
||||||
|
|
||||||
val sampleFileName = new File(qscript.outputDir + qscript.projectName + "." + sample + ".bam")
|
val sampleFileName = new File(qscript.outputDir + qscript.projectName + "." + sample + ".bam")
|
||||||
sampleBamFiles(sample) = sampleFileName
|
sampleBamFiles(sample) = sampleFileName
|
||||||
add(joinBams(flist, sampleFileName))
|
add(joinBams(flist, sampleFileName))
|
||||||
}
|
}
|
||||||
return sampleBamFiles.toMap
|
println("*** DEBUG ***\n\n")
|
||||||
}
|
|
||||||
|
|
||||||
// Checks how many contigs are in the dataset. Uses the BAM file header information.
|
return sampleBamFiles.toMap
|
||||||
def getNumberOfContigs(bamFile: File): Int = {
|
|
||||||
val samReader = new SAMFileReader(new File(bamFile))
|
|
||||||
return samReader.getFileHeader.getSequenceDictionary.getSequences.size()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Rebuilds the Read Group string to give BWA
|
// Rebuilds the Read Group string to give BWA
|
||||||
|
|
@ -206,17 +197,6 @@ class DataProcessingPipeline extends QScript {
|
||||||
return realignedBams
|
return realignedBams
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reads a BAM LIST file and creates a scala list with all the files
|
|
||||||
def createListFromFile(in: File):List[File] = {
|
|
||||||
if (in.toString.endsWith("bam"))
|
|
||||||
return List(in)
|
|
||||||
var l: List[File] = List()
|
|
||||||
for (bam <- fromFile(in).getLines)
|
|
||||||
l :+= new File(bam)
|
|
||||||
return l
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/****************************************************************************
|
/****************************************************************************
|
||||||
* Main script
|
* Main script
|
||||||
|
|
@ -226,17 +206,14 @@ class DataProcessingPipeline extends QScript {
|
||||||
def script = {
|
def script = {
|
||||||
|
|
||||||
// keep a record of the number of contigs in the first bam file in the list
|
// keep a record of the number of contigs in the first bam file in the list
|
||||||
val bams = createListFromFile(input)
|
val bams = Utils.createListFromFile(input)
|
||||||
nContigs = getNumberOfContigs(bams(0))
|
nContigs = Utils.getNumberOfContigs(bams(0))
|
||||||
|
|
||||||
val realignedBams = if (useBWApe || useBWAse) {performAlignment(bams)} else {bams}
|
val realignedBams = if (useBWApe || useBWAse) {performAlignment(bams)} else {bams}
|
||||||
|
|
||||||
// Generate a BAM file per sample joining all per lane files if necessary
|
// Generate a BAM file per sample joining all per lane files if necessary
|
||||||
val sampleBamFiles: Map[String, File] = createSampleFiles(bams, realignedBams)
|
val sampleBamFiles: Map[String, File] = createSampleFiles(bams, realignedBams)
|
||||||
|
|
||||||
|
|
||||||
println("nContigs: " + nContigs)
|
|
||||||
|
|
||||||
// Final output list of processed bam files
|
// Final output list of processed bam files
|
||||||
var cohortList: List[File] = List()
|
var cohortList: List[File] = List()
|
||||||
|
|
||||||
|
|
@ -244,6 +221,7 @@ class DataProcessingPipeline extends QScript {
|
||||||
println("\nFound the following samples: ")
|
println("\nFound the following samples: ")
|
||||||
for ((sample, file) <- sampleBamFiles)
|
for ((sample, file) <- sampleBamFiles)
|
||||||
println("\t" + sample + " -> " + file)
|
println("\t" + sample + " -> " + file)
|
||||||
|
println("\n")
|
||||||
|
|
||||||
// If this is a 'knowns only' indel realignment run, do it only once for all samples.
|
// If this is a 'knowns only' indel realignment run, do it only once for all samples.
|
||||||
val globalIntervals = new File(outputDir + projectName + ".intervals")
|
val globalIntervals = new File(outputDir + projectName + ".intervals")
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,8 @@ package org.broadinstitute.sting.queue.qscripts
|
||||||
import org.broadinstitute.sting.queue.QScript
|
import org.broadinstitute.sting.queue.QScript
|
||||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||||
import net.sf.samtools.SAMFileReader
|
import net.sf.samtools.SAMFileReader
|
||||||
|
import io.Source._
|
||||||
|
import org.broadinstitute.sting.queue.qscripts.utils.Utils
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -32,26 +34,25 @@ class RecalibrateBaseQualities extends QScript {
|
||||||
val queueLogDir: String = ".qlog/"
|
val queueLogDir: String = ".qlog/"
|
||||||
var nContigs: Int = 0
|
var nContigs: Int = 0
|
||||||
|
|
||||||
def getNumberOfContigs(bamFile: File): Int = {
|
|
||||||
val samReader = new SAMFileReader(new File(bamFile))
|
|
||||||
return samReader.getFileHeader.getSequenceDictionary.getSequences.size()
|
|
||||||
}
|
|
||||||
|
|
||||||
def script = {
|
def script = {
|
||||||
|
|
||||||
nContigs = getNumberOfContigs(input)
|
val bamList = Utils.createListFromFile(input)
|
||||||
|
nContigs = Utils.getNumberOfContigs(bamList(0))
|
||||||
|
|
||||||
val recalFile1: File = swapExt(input, ".bam", "recal1.csv")
|
for (bam <- bamList) {
|
||||||
val recalFile2: File = swapExt(input, ".bam", "recal2.csv")
|
|
||||||
val recalBam: File = swapExt(input, ".bam", "recal.bam")
|
val recalFile1: File = swapExt(bam, ".bam", ".recal1.csv")
|
||||||
val path1: String = "before"
|
val recalFile2: File = swapExt(bam, ".bam", ".recal2.csv")
|
||||||
val path2: String = "after"
|
val recalBam: File = swapExt(bam, ".bam", ".recal.bam")
|
||||||
|
val path1: String = bam + "before"
|
||||||
add(cov(input, recalFile1),
|
val path2: String = bam + "after"
|
||||||
recal(input, recalFile1, recalBam),
|
|
||||||
cov(recalBam, recalFile2),
|
add(cov(bam, recalFile1),
|
||||||
analyzeCovariates(recalFile1, path1),
|
recal(bam, recalFile1, recalBam),
|
||||||
analyzeCovariates(recalFile2, path2))
|
cov(recalBam, recalFile2),
|
||||||
|
analyzeCovariates(recalFile1, path1),
|
||||||
|
analyzeCovariates(recalFile2, path2))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
trait CommandLineGATKArgs extends CommandLineGATK {
|
trait CommandLineGATKArgs extends CommandLineGATK {
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,60 @@
|
||||||
|
package org.broadinstitute.sting.queue.qscripts.utils
|
||||||
|
|
||||||
|
import java.io.File
|
||||||
|
import io.Source._
|
||||||
|
import net.sf.samtools.{SAMReadGroupRecord, SAMFileReader}
|
||||||
|
|
||||||
|
import collection.JavaConversions._
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: carneiro
|
||||||
|
* Date: 7/14/11
|
||||||
|
* Time: 4:57 PM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
|
||||||
|
object Utils {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Takes a bam list file and produces a scala list with each file allowing the bam list
|
||||||
|
* to have empty lines and comment lines (lines starting with #).
|
||||||
|
*/
|
||||||
|
def createListFromFile(in: File):List[File] = {
|
||||||
|
// If the file provided ends with .bam, it is not a bam list, we treat it as a single file.
|
||||||
|
// and return a list with only this file.
|
||||||
|
if (in.toString.endsWith(".bam"))
|
||||||
|
return List(in)
|
||||||
|
|
||||||
|
var list: List[File] = List()
|
||||||
|
for (bam <- fromFile(in).getLines)
|
||||||
|
if (!bam.startsWith("#") && !bam.isEmpty )
|
||||||
|
list :+= new File(bam.trim())
|
||||||
|
list
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the number of contigs in the BAM file header.
|
||||||
|
*/
|
||||||
|
def getNumberOfContigs(bamFile: File): Int = {
|
||||||
|
val samReader = new SAMFileReader(new File(bamFile))
|
||||||
|
samReader.getFileHeader.getSequenceDictionary.getSequences.size()
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if there are multiple samples in a BAM file
|
||||||
|
*/
|
||||||
|
def hasMultipleSamples(readGroups: java.util.List[SAMReadGroupRecord]): Boolean = {
|
||||||
|
var sample: String = ""
|
||||||
|
for (r <- readGroups) {
|
||||||
|
if (sample.isEmpty)
|
||||||
|
sample = r.getSample
|
||||||
|
else if (sample != r.getSample)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue