Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
bfbca8b194
|
|
@ -55,10 +55,14 @@ public class TableFeature implements Feature {
|
|||
}
|
||||
|
||||
public List<String> getAllValues() {
|
||||
return getValuesTo(values.size()-1);
|
||||
return getValuesTo(values.size());
|
||||
}
|
||||
|
||||
public List<String> getValuesTo(int columnPosition) {
|
||||
return values.subList(0,columnPosition);
|
||||
}
|
||||
|
||||
public List<String> getHeader() {
|
||||
return keys;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
|
|
@ -41,8 +42,8 @@ import java.util.*;
|
|||
public class ChromosomeCounts implements InfoFieldAnnotation, StandardAnnotation {
|
||||
|
||||
private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };
|
||||
private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, -1, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"),
|
||||
new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, -1, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"),
|
||||
private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"),
|
||||
new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"),
|
||||
new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") };
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
|
@ -142,5 +143,5 @@ public class DepthPerAlleleBySample implements GenotypeAnnotation, StandardAnnot
|
|||
// public String getIndelBases()
|
||||
public List<String> getKeyNames() { return Arrays.asList("AD"); }
|
||||
|
||||
public List<VCFFormatHeaderLine> getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), VCFCompoundHeaderLine.UNBOUNDED, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed")); }
|
||||
public List<VCFFormatHeaderLine> getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed")); }
|
||||
}
|
||||
|
|
@ -29,6 +29,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||
|
|
@ -200,8 +201,8 @@ public class ReadDepthAndAllelicFractionBySample implements GenotypeAnnotation {
|
|||
1,
|
||||
VCFHeaderLineType.Integer,
|
||||
"Total read depth per sample, including MQ0"),
|
||||
new VCFFormatHeaderLine(getKeyNames().get(1),
|
||||
VCFCompoundHeaderLine.UNBOUNDED,
|
||||
new VCFFormatHeaderLine(getKeyNames().get(1),
|
||||
VCFHeaderLineCount.UNBOUNDED,
|
||||
VCFHeaderLineType.Float,
|
||||
"Fractions of reads (excluding MQ0 from both ref and alt) supporting each reported alternative allele, per sample"));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
|
|
@ -65,5 +66,5 @@ public class SampleList implements InfoFieldAnnotation {
|
|||
|
||||
public List<String> getKeyNames() { return Arrays.asList("Samples"); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("Samples", VCFInfoHeaderLine.UNBOUNDED, VCFHeaderLineType.String, "List of polymorphic samples")); }
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("Samples", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "List of polymorphic samples")); }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,122 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import net.sf.samtools.util.BlockCompressedInputStream;
|
||||
import org.broad.tribble.readers.AsciiLineReader;
|
||||
import org.broad.tribble.readers.LineReader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.DataInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Map;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: 7/4/11
|
||||
* Time: 1:09 PM
|
||||
*
|
||||
* Class implementing diffnode reader for VCF
|
||||
*/
|
||||
public class BAMDiffableReader implements DiffableReader {
|
||||
private final static int MAX_RECORDS_TO_READ = 1000;
|
||||
@Override
|
||||
public String getName() { return "BAM"; }
|
||||
|
||||
@Override
|
||||
public DiffElement readFromFile(File file) {
|
||||
final SAMFileReader reader = new SAMFileReader(file, null); // null because we don't want it to look for the index
|
||||
reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
|
||||
|
||||
DiffNode root = DiffNode.rooted(file.getName());
|
||||
SAMRecordIterator iterator = reader.iterator();
|
||||
|
||||
int count = 0;
|
||||
while ( iterator.hasNext() ) {
|
||||
if ( count++ > MAX_RECORDS_TO_READ )
|
||||
break;
|
||||
final SAMRecord record = iterator.next();
|
||||
|
||||
// name is the read name + first of pair
|
||||
String name = record.getReadName().replace('.', '_');
|
||||
if ( record.getReadPairedFlag() ) {
|
||||
name += record.getFirstOfPairFlag() ? "_1" : "_2";
|
||||
}
|
||||
|
||||
DiffNode readRoot = DiffNode.empty(name, root);
|
||||
|
||||
// add fields
|
||||
readRoot.add("NAME", record.getReadName());
|
||||
readRoot.add("FLAGS", record.getFlags());
|
||||
readRoot.add("RNAME", record.getReferenceName());
|
||||
readRoot.add("POS", record.getAlignmentStart());
|
||||
readRoot.add("MAPQ", record.getMappingQuality());
|
||||
readRoot.add("CIGAR", record.getCigarString());
|
||||
readRoot.add("RNEXT", record.getMateReferenceName());
|
||||
readRoot.add("PNEXT", record.getMateAlignmentStart());
|
||||
readRoot.add("TLEN", record.getInferredInsertSize());
|
||||
readRoot.add("SEQ", record.getReadString());
|
||||
readRoot.add("QUAL", record.getBaseQualityString());
|
||||
|
||||
for ( SAMRecord.SAMTagAndValue xt : record.getAttributes() ) {
|
||||
readRoot.add(xt.tag, xt.value);
|
||||
}
|
||||
|
||||
// add record to root
|
||||
if ( ! root.hasElement(name) )
|
||||
// protect ourselves from malformed files
|
||||
root.add(readRoot);
|
||||
}
|
||||
|
||||
reader.close();
|
||||
|
||||
return root.getBinding();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canRead(File file) {
|
||||
final byte[] BAM_MAGIC = "BAM\1".getBytes();
|
||||
final byte[] buffer = new byte[BAM_MAGIC.length];
|
||||
try {
|
||||
FileInputStream fstream = new FileInputStream(file);
|
||||
new BlockCompressedInputStream(fstream).read(buffer,0,BAM_MAGIC.length);
|
||||
return Arrays.equals(buffer, BAM_MAGIC);
|
||||
} catch ( IOException e ) {
|
||||
return false;
|
||||
} catch ( net.sf.samtools.FileTruncatedException e ) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,118 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||
|
||||
import com.google.java.contract.*;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: 7/4/11
|
||||
* Time: 12:55 PM
|
||||
*
|
||||
* An interface that must be implemented to allow us to calculate differences
|
||||
* between structured objects
|
||||
*/
|
||||
@Invariant({
|
||||
"name != null",
|
||||
"value != null",
|
||||
"parent != null || name.equals(\"ROOT\")",
|
||||
"value == null || value.getBinding() == this"})
|
||||
public class DiffElement {
|
||||
public final static DiffElement ROOT = new DiffElement();
|
||||
|
||||
final private String name;
|
||||
final private DiffElement parent;
|
||||
final private DiffValue value;
|
||||
|
||||
/**
|
||||
* For ROOT only
|
||||
*/
|
||||
private DiffElement() {
|
||||
this.name = "ROOT";
|
||||
this.parent = null;
|
||||
this.value = new DiffValue(this, "ROOT");
|
||||
}
|
||||
|
||||
@Requires({"name != null", "parent != null", "value != null"})
|
||||
public DiffElement(String name, DiffElement parent, DiffValue value) {
|
||||
if ( name.equals("ROOT") ) throw new IllegalArgumentException("Cannot use reserved name ROOT");
|
||||
this.name = name;
|
||||
this.parent = parent;
|
||||
this.value = value;
|
||||
this.value.setBinding(this);
|
||||
}
|
||||
|
||||
@Ensures({"result != null"})
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public DiffElement getParent() {
|
||||
return parent;
|
||||
}
|
||||
|
||||
@Ensures({"result != null"})
|
||||
public DiffValue getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
public boolean isRoot() { return this == ROOT; }
|
||||
|
||||
@Ensures({"result != null"})
|
||||
@Override
|
||||
public String toString() {
|
||||
return getName() + "=" + getValue().toString();
|
||||
}
|
||||
|
||||
public String toString(int offset) {
|
||||
return (offset > 0 ? Utils.dupString(' ', offset) : 0) + getName() + "=" + getValue().toString(offset);
|
||||
}
|
||||
|
||||
@Ensures({"result != null"})
|
||||
public final String fullyQualifiedName() {
|
||||
if ( isRoot() )
|
||||
return "";
|
||||
else if ( parent.isRoot() )
|
||||
return name;
|
||||
else
|
||||
return parent.fullyQualifiedName() + "." + name;
|
||||
}
|
||||
|
||||
@Ensures({"result != null"})
|
||||
public String toOneLineString() {
|
||||
return getName() + "=" + getValue().toOneLineString();
|
||||
}
|
||||
|
||||
@Ensures({"result != null"})
|
||||
public DiffNode getValueAsNode() {
|
||||
if ( getValue().isCompound() )
|
||||
return (DiffNode)getValue();
|
||||
else
|
||||
throw new ReviewedStingException("Illegal request conversion of a DiffValue into a DiffNode: " + this);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,423 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: 7/4/11
|
||||
* Time: 12:51 PM
|
||||
* A generic engine for comparing tree-structured objects
|
||||
*/
|
||||
public class DiffEngine {
|
||||
final protected static Logger logger = Logger.getLogger(DiffEngine.class);
|
||||
|
||||
private final Map<String, DiffableReader> readers = new HashMap<String, DiffableReader>();
|
||||
|
||||
public DiffEngine() {
|
||||
loadDiffableReaders();
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// difference calculation
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
public List<Difference> diff(DiffElement master, DiffElement test) {
|
||||
DiffValue masterValue = master.getValue();
|
||||
DiffValue testValue = test.getValue();
|
||||
|
||||
if ( masterValue.isCompound() && masterValue.isCompound() ) {
|
||||
return diff(master.getValueAsNode(), test.getValueAsNode());
|
||||
} else if ( masterValue.isAtomic() && testValue.isAtomic() ) {
|
||||
return diff(masterValue, testValue);
|
||||
} else {
|
||||
// structural difference in types. one is node, other is leaf
|
||||
return Arrays.asList(new Difference(master, test));
|
||||
}
|
||||
}
|
||||
|
||||
public List<Difference> diff(DiffNode master, DiffNode test) {
|
||||
Set<String> allNames = new HashSet<String>(master.getElementNames());
|
||||
allNames.addAll(test.getElementNames());
|
||||
List<Difference> diffs = new ArrayList<Difference>();
|
||||
|
||||
for ( String name : allNames ) {
|
||||
DiffElement masterElt = master.getElement(name);
|
||||
DiffElement testElt = test.getElement(name);
|
||||
if ( masterElt == null && testElt == null ) {
|
||||
throw new ReviewedStingException("BUG: unexceptedly got two null elements for field: " + name);
|
||||
} else if ( masterElt == null || testElt == null ) { // if either is null, we are missing a value
|
||||
// todo -- should one of these be a special MISSING item?
|
||||
diffs.add(new Difference(masterElt, testElt));
|
||||
} else {
|
||||
diffs.addAll(diff(masterElt, testElt));
|
||||
}
|
||||
}
|
||||
|
||||
return diffs;
|
||||
}
|
||||
|
||||
public List<Difference> diff(DiffValue master, DiffValue test) {
|
||||
if ( master.getValue().equals(test.getValue()) ) {
|
||||
return Collections.emptyList();
|
||||
} else {
|
||||
return Arrays.asList(new Difference(master.getBinding(), test.getBinding()));
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Summarizing differences
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Emits a summary of the diffs to out. Suppose you have the following three differences:
|
||||
*
|
||||
* A.X.Z:1!=2
|
||||
* A.Y.Z:3!=4
|
||||
* B.X.Z:5!=6
|
||||
*
|
||||
* The above is the itemized list of the differences. The summary looks for common differences
|
||||
* in the name hierarchy, counts those shared elements, and emits the differences that occur
|
||||
* in order of decreasing counts.
|
||||
*
|
||||
* So, in the above example, what are the shared elements?
|
||||
*
|
||||
* A.X.Z and B.X.Z share X.Z, so there's a *.X.Z with count 2
|
||||
* A.X.Z, A.Y.Z, and B.X.Z all share *.*.Z, with count 3
|
||||
* Each of A.X.Z, A.Y.Z, and B.X.Z are individually unique, with count 1
|
||||
*
|
||||
* So we would emit the following summary:
|
||||
*
|
||||
* *.*.Z: 3
|
||||
* *.X.Z: 2
|
||||
* A.X.Z: 1 [specific difference: 1!=2]
|
||||
* A.Y.Z: 1 [specific difference: 3!=4]
|
||||
* B.X.Z: 1 [specific difference: 5!=6]
|
||||
*
|
||||
* The algorithm to accomplish this calculation is relatively simple. Start with all of the
|
||||
* concrete differences. For each pair of differences A1.A2....AN and B1.B2....BN:
|
||||
*
|
||||
* find the longest common subsequence Si.Si+1...SN where Ai = Bi = Si
|
||||
* If i == 0, then there's no shared substructure
|
||||
* If i > 0, then generate the summarized value X = *.*...Si.Si+1...SN
|
||||
* if X is a known summary, increment it's count, otherwise set its count to 1
|
||||
*
|
||||
* Not that only pairs of the same length are considered as potentially equivalent
|
||||
*
|
||||
* @param params determines how we display the items
|
||||
* @param diffs
|
||||
*/
|
||||
public void reportSummarizedDifferences(List<Difference> diffs, SummaryReportParams params ) {
|
||||
printSummaryReport(summarizeDifferences(diffs), params );
|
||||
}
|
||||
|
||||
public List<SummarizedDifference> summarizeDifferences(List<Difference> diffs) {
|
||||
List<String[]> diffPaths = new ArrayList<String[]>(diffs.size());
|
||||
|
||||
for ( Difference diff1 : diffs ) {
|
||||
diffPaths.add(diffNameToPath(diff1.getFullyQualifiedName()));
|
||||
}
|
||||
|
||||
return summarizedDifferencesOfPaths(diffPaths);
|
||||
}
|
||||
|
||||
final protected static String[] diffNameToPath(String diffName) {
|
||||
return diffName.split("\\.");
|
||||
}
|
||||
|
||||
protected List<SummarizedDifference> summarizedDifferencesOfPaths(List<String[]> diffPaths) {
|
||||
Map<String, SummarizedDifference> summaries = new HashMap<String, SummarizedDifference>();
|
||||
|
||||
// create the initial set of differences
|
||||
for ( int i = 0; i < diffPaths.size(); i++ ) {
|
||||
for ( int j = 0; j <= i; j++ ) {
|
||||
String[] diffPath1 = diffPaths.get(i);
|
||||
String[] diffPath2 = diffPaths.get(j);
|
||||
if ( diffPath1.length == diffPath2.length ) {
|
||||
int lcp = longestCommonPostfix(diffPath1, diffPath2);
|
||||
String path = lcp > 0 ? summarizedPath(diffPath2, lcp) : Utils.join(".", diffPath2);
|
||||
addSummary(summaries, path, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// count differences
|
||||
for ( String[] diffPath : diffPaths ) {
|
||||
for ( SummarizedDifference sumDiff : summaries.values() ) {
|
||||
if ( sumDiff.matches(diffPath) )
|
||||
addSummary(summaries, sumDiff.getPath(), false);
|
||||
}
|
||||
}
|
||||
|
||||
List<SummarizedDifference> sortedSummaries = new ArrayList<SummarizedDifference>(summaries.values());
|
||||
Collections.sort(sortedSummaries);
|
||||
return sortedSummaries;
|
||||
}
|
||||
|
||||
private static void addSummary(Map<String, SummarizedDifference> summaries, String path, boolean onlyCatalog) {
|
||||
if ( summaries.containsKey(path) ) {
|
||||
if ( ! onlyCatalog )
|
||||
summaries.get(path).incCount();
|
||||
} else {
|
||||
SummarizedDifference sumDiff = new SummarizedDifference(path);
|
||||
summaries.put(sumDiff.getPath(), sumDiff);
|
||||
}
|
||||
}
|
||||
|
||||
protected void printSummaryReport(List<SummarizedDifference> sortedSummaries, SummaryReportParams params ) {
|
||||
GATKReport report = new GATKReport();
|
||||
final String tableName = "diffences";
|
||||
report.addTable(tableName, "Summarized differences between the master and test files.\nSee http://www.broadinstitute.org/gsa/wiki/index.php/DiffObjectsWalker_and_SummarizedDifferences for more information");
|
||||
GATKReportTable table = report.getTable(tableName);
|
||||
table.addPrimaryKey("Difference", true);
|
||||
table.addColumn("NumberOfOccurrences", 0);
|
||||
|
||||
int count = 0, count1 = 0;
|
||||
for ( SummarizedDifference diff : sortedSummaries ) {
|
||||
if ( diff.getCount() < params.minSumDiffToShow )
|
||||
// in order, so break as soon as the count is too low
|
||||
break;
|
||||
|
||||
if ( params.maxItemsToDisplay != 0 && count++ > params.maxItemsToDisplay )
|
||||
break;
|
||||
|
||||
if ( diff.getCount() == 1 ) {
|
||||
count1++;
|
||||
if ( params.maxCountOneItems != 0 && count1 > params.maxCountOneItems )
|
||||
break;
|
||||
}
|
||||
|
||||
table.set(diff.getPath(), "NumberOfOccurrences", diff.getCount());
|
||||
}
|
||||
|
||||
table.write(params.out);
|
||||
}
|
||||
|
||||
protected static int longestCommonPostfix(String[] diffPath1, String[] diffPath2) {
|
||||
int i = 0;
|
||||
for ( ; i < diffPath1.length; i++ ) {
|
||||
int j = diffPath1.length - i - 1;
|
||||
if ( ! diffPath1[j].equals(diffPath2[j]) )
|
||||
break;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
/**
|
||||
* parts is [A B C D]
|
||||
* commonPostfixLength: how many parts are shared at the end, suppose its 2
|
||||
* We want to create a string *.*.C.D
|
||||
*
|
||||
* @param parts
|
||||
* @param commonPostfixLength
|
||||
* @return
|
||||
*/
|
||||
protected static String summarizedPath(String[] parts, int commonPostfixLength) {
|
||||
int stop = parts.length - commonPostfixLength;
|
||||
if ( stop > 0 ) parts = parts.clone();
|
||||
for ( int i = 0; i < stop; i++ ) {
|
||||
parts[i] = "*";
|
||||
}
|
||||
return Utils.join(".", parts);
|
||||
}
|
||||
|
||||
/**
|
||||
* TODO -- all of the algorithms above should use SummarizedDifference instead
|
||||
* TODO -- of some SummarizedDifferences and some low-level String[]
|
||||
*/
|
||||
public static class SummarizedDifference implements Comparable<SummarizedDifference> {
|
||||
final String path; // X.Y.Z
|
||||
final String[] parts;
|
||||
int count = 0;
|
||||
|
||||
public SummarizedDifference(String path) {
|
||||
this.path = path;
|
||||
this.parts = diffNameToPath(path);
|
||||
}
|
||||
|
||||
public void incCount() { count++; }
|
||||
|
||||
public int getCount() {
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* The fully qualified path object A.B.C etc
|
||||
* @return
|
||||
*/
|
||||
public String getPath() {
|
||||
return path;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the length of the parts of this summary
|
||||
*/
|
||||
public int length() {
|
||||
return this.parts.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the string parts matches this summary. Matches are
|
||||
* must be equal() everywhere where this summary isn't *.
|
||||
* @param otherParts
|
||||
* @return
|
||||
*/
|
||||
public boolean matches(String[] otherParts) {
|
||||
if ( otherParts.length != length() )
|
||||
return false;
|
||||
|
||||
// TODO optimization: can start at right most non-star element
|
||||
for ( int i = 0; i < length(); i++ ) {
|
||||
String part = parts[i];
|
||||
if ( ! part.equals("*") && ! part.equals(otherParts[i]) )
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%s:%d", getPath(), getCount());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(SummarizedDifference other) {
|
||||
// sort first highest to lowest count, then by lowest to highest path
|
||||
int countCmp = Integer.valueOf(count).compareTo(other.count);
|
||||
return countCmp != 0 ? -1 * countCmp : path.compareTo(other.path);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// plugin manager
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
public void loadDiffableReaders() {
|
||||
List<Class<? extends DiffableReader>> drClasses = new PluginManager<DiffableReader>( DiffableReader.class ).getPlugins();
|
||||
|
||||
logger.info("Loading diffable modules:");
|
||||
for (Class<? extends DiffableReader> drClass : drClasses ) {
|
||||
logger.info("\t" + drClass.getSimpleName());
|
||||
|
||||
try {
|
||||
DiffableReader dr = drClass.newInstance();
|
||||
readers.put(dr.getName(), dr);
|
||||
} catch (InstantiationException e) {
|
||||
throw new ReviewedStingException("Unable to instantiate module '" + drClass.getSimpleName() + "'");
|
||||
} catch (IllegalAccessException e) {
|
||||
throw new ReviewedStingException("Illegal access error when trying to instantiate '" + drClass.getSimpleName() + "'");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected Map<String, DiffableReader> getReaders() {
|
||||
return readers;
|
||||
}
|
||||
|
||||
protected DiffableReader getReader(String name) {
|
||||
return readers.get(name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a reader appropriate for this file, or null if no such reader exists
|
||||
* @param file
|
||||
* @return
|
||||
*/
|
||||
public DiffableReader findReaderForFile(File file) {
|
||||
for ( DiffableReader reader : readers.values() )
|
||||
if (reader.canRead(file) )
|
||||
return reader;
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if reader appropriate for this file, or false if no such reader exists
|
||||
* @param file
|
||||
* @return
|
||||
*/
|
||||
public boolean canRead(File file) {
|
||||
return findReaderForFile(file) != null;
|
||||
}
|
||||
|
||||
public DiffElement createDiffableFromFile(File file) {
|
||||
DiffableReader reader = findReaderForFile(file);
|
||||
if ( reader == null )
|
||||
throw new UserException("Unsupported file type: " + file);
|
||||
else
|
||||
return reader.readFromFile(file);
|
||||
}
|
||||
|
||||
public static boolean simpleDiffFiles(File masterFile, File testFile, DiffEngine.SummaryReportParams params) {
|
||||
DiffEngine diffEngine = new DiffEngine();
|
||||
|
||||
if ( diffEngine.canRead(masterFile) && diffEngine.canRead(testFile) ) {
|
||||
DiffElement master = diffEngine.createDiffableFromFile(masterFile);
|
||||
DiffElement test = diffEngine.createDiffableFromFile(testFile);
|
||||
List<Difference> diffs = diffEngine.diff(master, test);
|
||||
diffEngine.reportSummarizedDifferences(diffs, params);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public static class SummaryReportParams {
|
||||
PrintStream out = System.out;
|
||||
int maxItemsToDisplay = 0;
|
||||
int maxCountOneItems = 0;
|
||||
int minSumDiffToShow = 0;
|
||||
|
||||
public SummaryReportParams(PrintStream out, int maxItemsToDisplay, int maxCountOneItems, int minSumDiffToShow) {
|
||||
this.out = out;
|
||||
this.maxItemsToDisplay = maxItemsToDisplay;
|
||||
this.maxCountOneItems = maxCountOneItems;
|
||||
this.minSumDiffToShow = minSumDiffToShow;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,239 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: 7/4/11
|
||||
* Time: 12:55 PM
|
||||
*
|
||||
* An interface that must be implemented to allow us to calculate differences
|
||||
* between structured objects
|
||||
*/
|
||||
public class DiffNode extends DiffValue {
|
||||
private Map<String, DiffElement> getElementMap() {
|
||||
return (Map<String, DiffElement>)super.getValue();
|
||||
}
|
||||
private static Map<String, DiffElement> emptyElements() { return new HashMap<String, DiffElement>(); }
|
||||
|
||||
private DiffNode(Map<String, DiffElement> elements) {
|
||||
super(elements);
|
||||
}
|
||||
|
||||
private DiffNode(DiffElement binding, Map<String, DiffElement> elements) {
|
||||
super(binding, elements);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
//
|
||||
// constructors
|
||||
//
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
public static DiffNode rooted(String name) {
|
||||
return empty(name, DiffElement.ROOT);
|
||||
}
|
||||
|
||||
public static DiffNode empty(String name, DiffElement parent) {
|
||||
DiffNode df = new DiffNode(emptyElements());
|
||||
DiffElement elt = new DiffElement(name, parent, df);
|
||||
df.setBinding(elt);
|
||||
return df;
|
||||
}
|
||||
|
||||
public static DiffNode empty(String name, DiffValue parent) {
|
||||
return empty(name, parent.getBinding());
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
//
|
||||
// accessors
|
||||
//
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@Override
|
||||
public boolean isAtomic() { return false; }
|
||||
|
||||
public Collection<String> getElementNames() {
|
||||
return getElementMap().keySet();
|
||||
}
|
||||
|
||||
public Collection<DiffElement> getElements() {
|
||||
return getElementMap().values();
|
||||
}
|
||||
|
||||
private Collection<DiffElement> getElements(boolean atomicOnly) {
|
||||
List<DiffElement> elts = new ArrayList<DiffElement>();
|
||||
for ( DiffElement elt : getElements() )
|
||||
if ( (atomicOnly && elt.getValue().isAtomic()) || (! atomicOnly && elt.getValue().isCompound()))
|
||||
elts.add(elt);
|
||||
return elts;
|
||||
}
|
||||
|
||||
public Collection<DiffElement> getAtomicElements() {
|
||||
return getElements(true);
|
||||
}
|
||||
|
||||
public Collection<DiffElement> getCompoundElements() {
|
||||
return getElements(false);
|
||||
}
|
||||
|
||||
public DiffElement getElement(String name) {
|
||||
for ( DiffElement elt : getElements() )
|
||||
if ( elt.getName().equals(name) )
|
||||
return elt;
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if name is bound in this node
|
||||
* @param name
|
||||
* @return
|
||||
*/
|
||||
public boolean hasElement(String name) {
|
||||
return getElement(name) != null;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
//
|
||||
// add
|
||||
//
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@Requires("elt != null")
|
||||
public void add(DiffElement elt) {
|
||||
if ( getElementMap().containsKey(elt.getName()) )
|
||||
throw new IllegalArgumentException("Attempting to rebind already existing binding: " + elt + " node=" + this);
|
||||
getElementMap().put(elt.getName(), elt);
|
||||
}
|
||||
|
||||
@Requires("elt != null")
|
||||
public void add(DiffValue elt) {
|
||||
add(elt.getBinding());
|
||||
}
|
||||
|
||||
@Requires("elts != null")
|
||||
public void add(Collection<DiffElement> elts) {
|
||||
for ( DiffElement e : elts )
|
||||
add(e);
|
||||
}
|
||||
|
||||
public void add(String name, Object value) {
|
||||
add(new DiffElement(name, this.getBinding(), new DiffValue(value)));
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
//
|
||||
// toString
|
||||
//
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return toString(0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(int offset) {
|
||||
String off = offset > 0 ? Utils.dupString(' ', offset) : "";
|
||||
StringBuilder b = new StringBuilder();
|
||||
|
||||
b.append("(").append("\n");
|
||||
Collection<DiffElement> atomicElts = getAtomicElements();
|
||||
for ( DiffElement elt : atomicElts ) {
|
||||
b.append(elt.toString(offset + 2)).append('\n');
|
||||
}
|
||||
|
||||
for ( DiffElement elt : getCompoundElements() ) {
|
||||
b.append(elt.toString(offset + 4)).append('\n');
|
||||
}
|
||||
b.append(off).append(")").append("\n");
|
||||
|
||||
return b.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toOneLineString() {
|
||||
StringBuilder b = new StringBuilder();
|
||||
|
||||
b.append('(');
|
||||
List<String> parts = new ArrayList<String>();
|
||||
for ( DiffElement elt : getElements() )
|
||||
parts.add(elt.toOneLineString());
|
||||
b.append(Utils.join(" ", parts));
|
||||
b.append(')');
|
||||
|
||||
return b.toString();
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// fromString and toOneLineString
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
public static DiffElement fromString(String tree) {
|
||||
return fromString(tree, DiffElement.ROOT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Doesn't support full tree structure parsing
|
||||
* @param tree
|
||||
* @param parent
|
||||
* @return
|
||||
*/
|
||||
private static DiffElement fromString(String tree, DiffElement parent) {
|
||||
// X=(A=A B=B C=(D=D))
|
||||
String[] parts = tree.split("=", 2);
|
||||
if ( parts.length != 2 )
|
||||
throw new ReviewedStingException("Unexpected tree structure: " + tree + " parts=" + parts);
|
||||
String name = parts[0];
|
||||
String value = parts[1];
|
||||
|
||||
if ( value.length() == 0 )
|
||||
throw new ReviewedStingException("Illegal tree structure: " + value + " at " + tree);
|
||||
|
||||
if ( value.charAt(0) == '(' ) {
|
||||
if ( ! value.endsWith(")") )
|
||||
throw new ReviewedStingException("Illegal tree structure. Missing ): " + value + " at " + tree);
|
||||
String subtree = value.substring(1, value.length()-1);
|
||||
DiffNode rec = DiffNode.empty(name, parent);
|
||||
String[] subParts = subtree.split(" ");
|
||||
for ( String subPart : subParts ) {
|
||||
rec.add(fromString(subPart, rec.getBinding()));
|
||||
}
|
||||
return rec.getBinding();
|
||||
} else {
|
||||
return new DiffValue(name, parent, value).getBinding();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,113 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||
|
||||
import org.apache.xmlbeans.impl.tool.Diff;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Compares two record-oriented files, itemizing specific difference between equivalent
|
||||
* records in the two files. Reports both itemized and summarized differences.
|
||||
* @author Mark DePristo
|
||||
* @version 0.1
|
||||
*/
|
||||
@Requires(value={})
|
||||
public class DiffObjectsWalker extends RodWalker<Integer, Integer> {
|
||||
@Output(doc="File to which results should be written",required=true)
|
||||
protected PrintStream out;
|
||||
|
||||
@Argument(fullName="maxRecords", shortName="M", doc="Max. number of records to process", required=false)
|
||||
int MAX_RECORDS = 0;
|
||||
|
||||
@Argument(fullName="maxCount1Records", shortName="M1", doc="Max. number of records occuring exactly once in the file to process", required=false)
|
||||
int MAX_COUNT1_RECORDS = 0;
|
||||
|
||||
@Argument(fullName="minCountForDiff", shortName="MCFD", doc="Min number of observations for a records to display", required=false)
|
||||
int minCountForDiff = 1;
|
||||
|
||||
@Argument(fullName="showItemizedDifferences", shortName="SID", doc="Should we enumerate all differences between the files?", required=false)
|
||||
boolean showItemizedDifferences = false;
|
||||
|
||||
@Argument(fullName="master", shortName="m", doc="Master file: expected results", required=true)
|
||||
File masterFile;
|
||||
|
||||
@Argument(fullName="test", shortName="t", doc="Test file: new results to compare to the master file", required=true)
|
||||
File testFile;
|
||||
|
||||
final DiffEngine diffEngine = new DiffEngine();
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer reduceInit() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer reduce(Integer counter, Integer sum) {
|
||||
return counter + sum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTraversalDone(Integer sum) {
|
||||
out.printf("Reading master file %s%n", masterFile);
|
||||
DiffElement master = diffEngine.createDiffableFromFile(masterFile);
|
||||
out.printf("Reading test file %s%n", testFile);
|
||||
DiffElement test = diffEngine.createDiffableFromFile(testFile);
|
||||
|
||||
// out.printf("Master diff objects%n");
|
||||
// out.println(master.toString());
|
||||
// out.printf("Test diff objects%n");
|
||||
// out.println(test.toString());
|
||||
|
||||
List<Difference> diffs = diffEngine.diff(master, test);
|
||||
if ( showItemizedDifferences ) {
|
||||
out.printf("Itemized results%n");
|
||||
for ( Difference diff : diffs )
|
||||
out.printf("DIFF: %s%n", diff.toString());
|
||||
}
|
||||
|
||||
DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out, MAX_RECORDS, MAX_COUNT1_RECORDS, minCountForDiff);
|
||||
diffEngine.reportSummarizedDifferences(diffs, params);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,90 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: 7/4/11
|
||||
* Time: 12:55 PM
|
||||
*
|
||||
* An interface that must be implemented to allow us to calculate differences
|
||||
* between structured objects
|
||||
*/
|
||||
public class DiffValue {
|
||||
private DiffElement binding = null;
|
||||
final private Object value;
|
||||
|
||||
public DiffValue(Object value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public DiffValue(DiffElement binding, Object value) {
|
||||
this.binding = binding;
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public DiffValue(DiffValue parent, Object value) {
|
||||
this(parent.getBinding(), value);
|
||||
}
|
||||
|
||||
public DiffValue(String name, DiffElement parent, Object value) {
|
||||
this.binding = new DiffElement(name, parent, this);
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public DiffValue(String name, DiffValue parent, Object value) {
|
||||
this(name, parent.getBinding(), value);
|
||||
}
|
||||
|
||||
public DiffElement getBinding() {
|
||||
return binding;
|
||||
}
|
||||
|
||||
protected void setBinding(DiffElement binding) {
|
||||
this.binding = binding;
|
||||
}
|
||||
|
||||
public Object getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return getValue().toString();
|
||||
}
|
||||
|
||||
public String toString(int offset) {
|
||||
return toString();
|
||||
}
|
||||
|
||||
public String toOneLineString() {
|
||||
return getValue().toString();
|
||||
}
|
||||
|
||||
public boolean isAtomic() { return true; }
|
||||
public boolean isCompound() { return ! isAtomic(); }
|
||||
}
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: 7/4/11
|
||||
* Time: 1:09 PM
|
||||
*
|
||||
* Interface for readers creating diffable objects from a file
|
||||
*/
|
||||
public interface DiffableReader {
|
||||
@Ensures("result != null")
|
||||
public String getName();
|
||||
|
||||
@Ensures("result != null")
|
||||
@Requires("file != null")
|
||||
public DiffElement readFromFile(File file);
|
||||
|
||||
@Requires("file != null")
|
||||
public boolean canRead(File file);
|
||||
}
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: 7/4/11
|
||||
* Time: 12:53 PM
|
||||
*
|
||||
* Represents a specific difference between two specific DiffElements
|
||||
*/
|
||||
public class Difference {
|
||||
DiffElement master, test;
|
||||
|
||||
public Difference(DiffElement master, DiffElement test) {
|
||||
if ( master == null && test == null ) throw new IllegalArgumentException("Master and test both cannot be null");
|
||||
this.master = master;
|
||||
this.test = test;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("%s:%s!=%s",
|
||||
getFullyQualifiedName(),
|
||||
getOneLineString(master),
|
||||
getOneLineString(test));
|
||||
}
|
||||
|
||||
public String getFullyQualifiedName() {
|
||||
return (master == null ? test : master).fullyQualifiedName();
|
||||
}
|
||||
|
||||
private static String getOneLineString(DiffElement elt) {
|
||||
return elt == null ? "MISSING" : elt.getValue().toOneLineString();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,119 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diffengine;
|
||||
|
||||
import org.broad.tribble.readers.AsciiLineReader;
|
||||
import org.broad.tribble.readers.LineReader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.Arrays;
|
||||
import java.util.Map;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: 7/4/11
|
||||
* Time: 1:09 PM
|
||||
*
|
||||
* Class implementing diffnode reader for VCF
|
||||
*/
|
||||
public class VCFDiffableReader implements DiffableReader {
|
||||
@Override
|
||||
public String getName() { return "VCF"; }
|
||||
|
||||
@Override
|
||||
public DiffElement readFromFile(File file) {
|
||||
DiffNode root = DiffNode.rooted(file.getName());
|
||||
try {
|
||||
LineReader lineReader = new AsciiLineReader(new FileInputStream(file));
|
||||
VCFCodec vcfCodec = new VCFCodec();
|
||||
VCFHeader header = (VCFHeader)vcfCodec.readHeader(lineReader);
|
||||
|
||||
String line = lineReader.readLine();
|
||||
while ( line != null ) {
|
||||
VariantContext vc = (VariantContext)vcfCodec.decode(line);
|
||||
String name = vc.getChr() + ":" + vc.getStart();
|
||||
DiffNode vcRoot = DiffNode.empty(name, root);
|
||||
|
||||
// add fields
|
||||
vcRoot.add("CHROM", vc.getChr());
|
||||
vcRoot.add("POS", vc.getStart());
|
||||
vcRoot.add("ID", vc.hasID() ? vc.getID() : VCFConstants.MISSING_VALUE_v4);
|
||||
vcRoot.add("REF", vc.getReference());
|
||||
vcRoot.add("ALT", vc.getAlternateAlleles());
|
||||
vcRoot.add("QUAL", vc.hasNegLog10PError() ? vc.getNegLog10PError() * 10 : VCFConstants.MISSING_VALUE_v4);
|
||||
vcRoot.add("FILTER", vc.getFilters());
|
||||
|
||||
// add info fields
|
||||
for (Map.Entry<String, Object> attribute : vc.getAttributes().entrySet()) {
|
||||
if ( ! attribute.getKey().startsWith("_") && ! attribute.getKey().equals(VariantContext.ID_KEY))
|
||||
vcRoot.add(attribute.getKey(), attribute.getValue());
|
||||
}
|
||||
|
||||
for (Genotype g : vc.getGenotypes().values() ) {
|
||||
DiffNode gRoot = DiffNode.empty(g.getSampleName(), vcRoot);
|
||||
gRoot.add("GT", g.getGenotypeString());
|
||||
gRoot.add("GQ", g.hasNegLog10PError() ? g.getNegLog10PError() * 10 : VCFConstants.MISSING_VALUE_v4 );
|
||||
|
||||
for (Map.Entry<String, Object> attribute : g.getAttributes().entrySet()) {
|
||||
if ( ! attribute.getKey().startsWith("_") )
|
||||
gRoot.add(attribute.getKey(), attribute.getValue());
|
||||
}
|
||||
|
||||
vcRoot.add(gRoot);
|
||||
}
|
||||
|
||||
root.add(vcRoot);
|
||||
line = lineReader.readLine();
|
||||
}
|
||||
|
||||
lineReader.close();
|
||||
} catch ( IOException e ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return root.getBinding();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canRead(File file) {
|
||||
try {
|
||||
final String VCF4_HEADER = "##fileformat=VCFv4";
|
||||
char[] buff = new char[VCF4_HEADER.length()];
|
||||
new FileReader(file).read(buff, 0, VCF4_HEADER.length());
|
||||
String firstLine = new String(buff);
|
||||
return firstLine.startsWith(VCF4_HEADER);
|
||||
} catch ( IOException e ) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -37,7 +37,6 @@ import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
|||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
||||
|
||||
import java.util.*;
|
||||
import java.io.PrintStream;
|
||||
|
|
@ -158,7 +157,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
}
|
||||
|
||||
// FORMAT and INFO fields
|
||||
headerInfo.addAll(VCFUtils.getSupportedHeaderStrings());
|
||||
headerInfo.addAll(getSupportedHeaderStrings());
|
||||
|
||||
// FILTER fields
|
||||
if ( UAC.STANDARD_CONFIDENCE_FOR_EMITTING < UAC.STANDARD_CONFIDENCE_FOR_CALLING )
|
||||
|
|
@ -167,6 +166,20 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
return headerInfo;
|
||||
}
|
||||
|
||||
/**
|
||||
* return a set of supported format lines; what we currently support for output in the genotype fields of a VCF
|
||||
* @return a set of VCF format lines
|
||||
*/
|
||||
private static Set<VCFFormatHeaderLine> getSupportedHeaderStrings() {
|
||||
Set<VCFFormatHeaderLine> result = new HashSet<VCFFormatHeaderLine>();
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality"));
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)"));
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Float, "Normalized, Phred-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; if site is not biallelic, number of likelihoods if n*(n+1)/2"));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute at a given locus.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -358,16 +358,8 @@ public class StandardVCFWriter implements VCFWriter {
|
|||
mWriter.write(key);
|
||||
|
||||
if ( !entry.getValue().equals("") ) {
|
||||
int numVals = 1;
|
||||
VCFInfoHeaderLine metaData = mHeader.getInfoHeaderLine(key);
|
||||
if ( metaData != null )
|
||||
numVals = metaData.getCount();
|
||||
|
||||
// take care of unbounded encoding
|
||||
if ( numVals == VCFInfoHeaderLine.UNBOUNDED )
|
||||
numVals = 1;
|
||||
|
||||
if ( numVals > 0 ) {
|
||||
if ( metaData == null || metaData.getCountType() != VCFHeaderLineCount.INTEGER || metaData.getCount() != 0 ) {
|
||||
mWriter.write("=");
|
||||
mWriter.write(entry.getValue());
|
||||
}
|
||||
|
|
@ -423,7 +415,7 @@ public class StandardVCFWriter implements VCFWriter {
|
|||
|
||||
VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(key);
|
||||
if ( metaData != null ) {
|
||||
int numInFormatField = metaData.getCount();
|
||||
int numInFormatField = metaData.getCount(vc.getAlternateAlleles().size());
|
||||
if ( numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4) ) {
|
||||
// If we have a missing field but multiple values are expected, we need to construct a new string with all fields.
|
||||
// For example, if Number=2, the string has to be ".,."
|
||||
|
|
|
|||
|
|
@ -0,0 +1,28 @@
|
|||
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* A class representing a key=value entry for ALT fields in the VCF header
|
||||
*/
|
||||
public class VCFAltHeaderLine extends VCFSimpleHeaderLine {
|
||||
|
||||
/**
|
||||
* create a VCF filter header line
|
||||
*
|
||||
* @param name the name for this header line
|
||||
* @param description the description for this header line
|
||||
*/
|
||||
public VCFAltHeaderLine(String name, String description) {
|
||||
super(name, description, SupportedHeaderLineType.ALT);
|
||||
}
|
||||
|
||||
/**
|
||||
* create a VCF info header line
|
||||
*
|
||||
* @param line the header line
|
||||
* @param version the vcf header version
|
||||
*/
|
||||
protected VCFAltHeaderLine(String line, VCFHeaderVersion version) {
|
||||
super(line, version, SupportedHeaderLineType.ALT);
|
||||
}
|
||||
}
|
||||
|
|
@ -24,6 +24,8 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
|
|
@ -43,26 +45,43 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
|||
|
||||
// the field types
|
||||
private String name;
|
||||
private int count;
|
||||
private int count = -1;
|
||||
private VCFHeaderLineCount countType;
|
||||
private String description;
|
||||
private VCFHeaderLineType type;
|
||||
|
||||
// access methods
|
||||
public String getName() { return name; }
|
||||
public int getCount() { return count; }
|
||||
public String getDescription() { return description; }
|
||||
public VCFHeaderLineType getType() { return type; }
|
||||
public VCFHeaderLineCount getCountType() { return countType; }
|
||||
public int getCount() {
|
||||
if ( countType != VCFHeaderLineCount.INTEGER )
|
||||
throw new ReviewedStingException("Asking for header line count when type is not an integer");
|
||||
return count;
|
||||
}
|
||||
|
||||
//
|
||||
public void setNumberToUnbounded() { this.count = UNBOUNDED; }
|
||||
// utility method
|
||||
public int getCount(int numAltAlleles) {
|
||||
int myCount;
|
||||
switch ( countType ) {
|
||||
case INTEGER: myCount = count; break;
|
||||
case UNBOUNDED: myCount = -1; break;
|
||||
case A: myCount = numAltAlleles; break;
|
||||
case G: myCount = ((numAltAlleles + 1) * (numAltAlleles + 2) / 2); break;
|
||||
default: throw new ReviewedStingException("Unknown count type: " + countType);
|
||||
}
|
||||
return myCount;
|
||||
}
|
||||
|
||||
public void setNumberToUnbounded() {
|
||||
countType = VCFHeaderLineCount.UNBOUNDED;
|
||||
count = -1;
|
||||
}
|
||||
|
||||
// our type of line, i.e. format, info, etc
|
||||
private final SupportedHeaderLineType lineType;
|
||||
|
||||
// line numerical values are allowed to be unbounded (or unknown), which is
|
||||
// marked with a dot (.)
|
||||
public static final int UNBOUNDED = -1; // the value we store internally for unbounded types
|
||||
|
||||
/**
|
||||
* create a VCF format header line
|
||||
*
|
||||
|
|
@ -70,10 +89,12 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
|||
* @param count the count for this header line
|
||||
* @param type the type for this header line
|
||||
* @param description the description for this header line
|
||||
* @param lineType the header line type
|
||||
*/
|
||||
protected VCFCompoundHeaderLine(String name, int count, VCFHeaderLineType type, String description, SupportedHeaderLineType lineType) {
|
||||
super(lineType.toString(), "");
|
||||
this.name = name;
|
||||
this.countType = VCFHeaderLineCount.INTEGER;
|
||||
this.count = count;
|
||||
this.type = type;
|
||||
this.description = description;
|
||||
|
|
@ -81,20 +102,53 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
|||
validate();
|
||||
}
|
||||
|
||||
/**
|
||||
* create a VCF format header line
|
||||
*
|
||||
* @param name the name for this header line
|
||||
* @param count the count type for this header line
|
||||
* @param type the type for this header line
|
||||
* @param description the description for this header line
|
||||
* @param lineType the header line type
|
||||
*/
|
||||
protected VCFCompoundHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description, SupportedHeaderLineType lineType) {
|
||||
super(lineType.toString(), "");
|
||||
this.name = name;
|
||||
this.countType = count;
|
||||
this.type = type;
|
||||
this.description = description;
|
||||
this.lineType = lineType;
|
||||
validate();
|
||||
}
|
||||
|
||||
/**
|
||||
* create a VCF format header line
|
||||
*
|
||||
* @param line the header line
|
||||
* @param version the VCF header version
|
||||
* @param lineType the header line type
|
||||
*
|
||||
*/
|
||||
protected VCFCompoundHeaderLine(String line, VCFHeaderVersion version, SupportedHeaderLineType lineType) {
|
||||
super(lineType.toString(), "");
|
||||
Map<String,String> mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Number","Type","Description"));
|
||||
name = mapping.get("ID");
|
||||
count = (version == VCFHeaderVersion.VCF4_0 || version == VCFHeaderVersion.VCF4_1) ?
|
||||
mapping.get("Number").equals(VCFConstants.UNBOUNDED_ENCODING_v4) ? UNBOUNDED : Integer.valueOf(mapping.get("Number")) :
|
||||
mapping.get("Number").equals(VCFConstants.UNBOUNDED_ENCODING_v3) ? UNBOUNDED : Integer.valueOf(mapping.get("Number"));
|
||||
count = -1;
|
||||
final String numberStr = mapping.get("Number");
|
||||
if ( numberStr.equals(VCFConstants.PER_ALLELE_COUNT) ) {
|
||||
countType = VCFHeaderLineCount.A;
|
||||
} else if ( numberStr.equals(VCFConstants.PER_GENOTYPE_COUNT) ) {
|
||||
countType = VCFHeaderLineCount.G;
|
||||
} else if ( ((version == VCFHeaderVersion.VCF4_0 || version == VCFHeaderVersion.VCF4_1) &&
|
||||
numberStr.equals(VCFConstants.UNBOUNDED_ENCODING_v4)) ||
|
||||
((version == VCFHeaderVersion.VCF3_2 || version == VCFHeaderVersion.VCF3_3) &&
|
||||
numberStr.equals(VCFConstants.UNBOUNDED_ENCODING_v3)) ) {
|
||||
countType = VCFHeaderLineCount.UNBOUNDED;
|
||||
} else {
|
||||
countType = VCFHeaderLineCount.INTEGER;
|
||||
count = Integer.valueOf(numberStr);
|
||||
|
||||
}
|
||||
type = VCFHeaderLineType.valueOf(mapping.get("Type"));
|
||||
if (type == VCFHeaderLineType.Flag && !allowFlagValues())
|
||||
throw new IllegalArgumentException("Flag is an unsupported type for this kind of field");
|
||||
|
|
@ -121,7 +175,15 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
|||
protected String toStringEncoding() {
|
||||
Map<String,Object> map = new LinkedHashMap<String,Object>();
|
||||
map.put("ID", name);
|
||||
map.put("Number", count == UNBOUNDED ? VCFConstants.UNBOUNDED_ENCODING_v4 : count);
|
||||
Object number;
|
||||
switch ( countType ) {
|
||||
case A: number = VCFConstants.PER_ALLELE_COUNT; break;
|
||||
case G: number = VCFConstants.PER_GENOTYPE_COUNT; break;
|
||||
case UNBOUNDED: number = VCFConstants.UNBOUNDED_ENCODING_v4; break;
|
||||
case INTEGER:
|
||||
default: number = count;
|
||||
}
|
||||
map.put("Number", number);
|
||||
map.put("Type", type);
|
||||
map.put("Description", description);
|
||||
return lineType.toString() + "=" + VCFHeaderLine.toStringEncoding(map);
|
||||
|
|
@ -136,15 +198,13 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
|||
if ( !(o instanceof VCFCompoundHeaderLine) )
|
||||
return false;
|
||||
VCFCompoundHeaderLine other = (VCFCompoundHeaderLine)o;
|
||||
return name.equals(other.name) &&
|
||||
count == other.count &&
|
||||
description.equals(other.description) &&
|
||||
type == other.type &&
|
||||
lineType == other.lineType;
|
||||
return equalsExcludingDescription(other) &&
|
||||
description.equals(other.description);
|
||||
}
|
||||
|
||||
public boolean equalsExcludingDescription(VCFCompoundHeaderLine other) {
|
||||
return count == other.count &&
|
||||
countType == other.countType &&
|
||||
type == other.type &&
|
||||
lineType == other.lineType &&
|
||||
name.equals(other.name);
|
||||
|
|
|
|||
|
|
@ -99,6 +99,8 @@ public final class VCFConstants {
|
|||
public static final String MISSING_DEPTH_v3 = "-1";
|
||||
public static final String UNBOUNDED_ENCODING_v4 = ".";
|
||||
public static final String UNBOUNDED_ENCODING_v3 = "-1";
|
||||
public static final String PER_ALLELE_COUNT = "A";
|
||||
public static final String PER_GENOTYPE_COUNT = "G";
|
||||
public static final String EMPTY_ALLELE = ".";
|
||||
public static final String EMPTY_GENOTYPE = "./.";
|
||||
public static final double MAX_GENOTYPE_QUAL = 99.0;
|
||||
|
|
|
|||
|
|
@ -1,19 +1,10 @@
|
|||
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* A class representing a key=value entry for FILTER fields in the VCF header
|
||||
*/
|
||||
public class VCFFilterHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine {
|
||||
|
||||
private String name;
|
||||
private String description;
|
||||
|
||||
public class VCFFilterHeaderLine extends VCFSimpleHeaderLine {
|
||||
|
||||
/**
|
||||
* create a VCF filter header line
|
||||
|
|
@ -22,12 +13,7 @@ public class VCFFilterHeaderLine extends VCFHeaderLine implements VCFNamedHeader
|
|||
* @param description the description for this header line
|
||||
*/
|
||||
public VCFFilterHeaderLine(String name, String description) {
|
||||
super("FILTER", "");
|
||||
this.name = name;
|
||||
this.description = description;
|
||||
|
||||
if ( name == null || description == null )
|
||||
throw new IllegalArgumentException(String.format("Invalid VCFCompoundHeaderLine: key=%s name=%s desc=%s", super.getKey(), name, description ));
|
||||
super(name, description, SupportedHeaderLineType.FILTER);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -37,34 +23,6 @@ public class VCFFilterHeaderLine extends VCFHeaderLine implements VCFNamedHeader
|
|||
* @param version the vcf header version
|
||||
*/
|
||||
protected VCFFilterHeaderLine(String line, VCFHeaderVersion version) {
|
||||
super("FILTER", "");
|
||||
Map<String,String> mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Description"));
|
||||
name = mapping.get("ID");
|
||||
description = mapping.get("Description");
|
||||
if ( description == null && ALLOW_UNBOUND_DESCRIPTIONS ) // handle the case where there's no description provided
|
||||
description = UNBOUND_DESCRIPTION;
|
||||
}
|
||||
|
||||
protected String toStringEncoding() {
|
||||
Map<String,Object> map = new LinkedHashMap<String,Object>();
|
||||
map.put("ID", name);
|
||||
map.put("Description", description);
|
||||
return "FILTER=" + VCFHeaderLine.toStringEncoding(map);
|
||||
}
|
||||
|
||||
public boolean equals(Object o) {
|
||||
if ( !(o instanceof VCFFilterHeaderLine) )
|
||||
return false;
|
||||
VCFFilterHeaderLine other = (VCFFilterHeaderLine)o;
|
||||
return name.equals(other.name) &&
|
||||
description.equals(other.description);
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public String getDescription() {
|
||||
return description;
|
||||
super(line, version, SupportedHeaderLineType.FILTER);
|
||||
}
|
||||
}
|
||||
|
|
@ -16,6 +16,10 @@ public class VCFFormatHeaderLine extends VCFCompoundHeaderLine {
|
|||
throw new IllegalArgumentException("Flag is an unsupported type for format fields");
|
||||
}
|
||||
|
||||
public VCFFormatHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description) {
|
||||
super(name, count, type, description, SupportedHeaderLineType.FORMAT);
|
||||
}
|
||||
|
||||
protected VCFFormatHeaderLine(String line, VCFHeaderVersion version) {
|
||||
super(line, version, SupportedHeaderLineType.FORMAT);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,8 @@
|
|||
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||
|
||||
/**
|
||||
* the count encodings we use for fields in VCF header lines
|
||||
*/
|
||||
public enum VCFHeaderLineCount {
|
||||
INTEGER, A, G, UNBOUNDED;
|
||||
}
|
||||
|
|
@ -13,6 +13,10 @@ public class VCFInfoHeaderLine extends VCFCompoundHeaderLine {
|
|||
super(name, count, type, description, SupportedHeaderLineType.INFO);
|
||||
}
|
||||
|
||||
public VCFInfoHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description) {
|
||||
super(name, count, type, description, SupportedHeaderLineType.INFO);
|
||||
}
|
||||
|
||||
protected VCFInfoHeaderLine(String line, VCFHeaderVersion version) {
|
||||
super(line, version, SupportedHeaderLineType.INFO);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,81 @@
|
|||
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* A class representing a key=value entry for simple VCF header types
|
||||
*/
|
||||
public abstract class VCFSimpleHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine {
|
||||
|
||||
public enum SupportedHeaderLineType {
|
||||
FILTER, ALT;
|
||||
}
|
||||
|
||||
private String name;
|
||||
private String description;
|
||||
|
||||
// our type of line, i.e. filter, alt, etc
|
||||
private final SupportedHeaderLineType lineType;
|
||||
|
||||
|
||||
/**
|
||||
* create a VCF filter header line
|
||||
*
|
||||
* @param name the name for this header line
|
||||
* @param description the description for this header line
|
||||
* @param lineType the header line type
|
||||
*/
|
||||
public VCFSimpleHeaderLine(String name, String description, SupportedHeaderLineType lineType) {
|
||||
super(lineType.toString(), "");
|
||||
this.lineType = lineType;
|
||||
this.name = name;
|
||||
this.description = description;
|
||||
|
||||
if ( name == null || description == null )
|
||||
throw new IllegalArgumentException(String.format("Invalid VCFSimpleHeaderLine: key=%s name=%s desc=%s", super.getKey(), name, description ));
|
||||
}
|
||||
|
||||
/**
|
||||
* create a VCF info header line
|
||||
*
|
||||
* @param line the header line
|
||||
* @param version the vcf header version
|
||||
* @param lineType the header line type
|
||||
*/
|
||||
protected VCFSimpleHeaderLine(String line, VCFHeaderVersion version, SupportedHeaderLineType lineType) {
|
||||
super(lineType.toString(), "");
|
||||
this.lineType = lineType;
|
||||
Map<String,String> mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Description"));
|
||||
name = mapping.get("ID");
|
||||
description = mapping.get("Description");
|
||||
if ( description == null && ALLOW_UNBOUND_DESCRIPTIONS ) // handle the case where there's no description provided
|
||||
description = UNBOUND_DESCRIPTION;
|
||||
}
|
||||
|
||||
protected String toStringEncoding() {
|
||||
Map<String,Object> map = new LinkedHashMap<String,Object>();
|
||||
map.put("ID", name);
|
||||
map.put("Description", description);
|
||||
return lineType.toString() + "=" + VCFHeaderLine.toStringEncoding(map);
|
||||
}
|
||||
|
||||
public boolean equals(Object o) {
|
||||
if ( !(o instanceof VCFSimpleHeaderLine) )
|
||||
return false;
|
||||
VCFSimpleHeaderLine other = (VCFSimpleHeaderLine)o;
|
||||
return name.equals(other.name) &&
|
||||
description.equals(other.description);
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public String getDescription() {
|
||||
return description;
|
||||
}
|
||||
}
|
||||
|
|
@ -180,19 +180,4 @@ public class VCFUtils {
|
|||
|
||||
return new HashSet<VCFHeaderLine>(map.values());
|
||||
}
|
||||
|
||||
/**
|
||||
* return a set of supported format lines; what we currently support for output in the genotype fields of a VCF
|
||||
* @return a set of VCF format lines
|
||||
*/
|
||||
public static Set<VCFFormatHeaderLine> getSupportedHeaderStrings() {
|
||||
Set<VCFFormatHeaderLine> result = new HashSet<VCFFormatHeaderLine>();
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality"));
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)"));
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, -1, VCFHeaderLineType.Float, "Normalized, Phred-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; if site is not biallelic, number of likelihoods if n*(n+1)/2"));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -108,7 +108,7 @@ public class Allele implements Comparable<Allele> {
|
|||
this.bases = bases;
|
||||
|
||||
if ( ! acceptableAlleleBases(bases) )
|
||||
throw new IllegalArgumentException("Unexpected base in allele bases " + new String(bases));
|
||||
throw new IllegalArgumentException("Unexpected base in allele bases \'" + new String(bases)+"\'");
|
||||
}
|
||||
|
||||
private Allele(String bases, boolean isRef) {
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import org.apache.commons.io.FileUtils;
|
|||
import org.apache.log4j.*;
|
||||
import org.apache.log4j.spi.LoggingEvent;
|
||||
import org.broadinstitute.sting.commandline.CommandLineUtils;
|
||||
import org.broadinstitute.sting.gatk.walkers.diffengine.DiffEngine;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.testng.Assert;
|
||||
|
||||
|
|
@ -334,11 +335,14 @@ public abstract class BaseTest {
|
|||
|
||||
if (parameterize || expectedMD5.equals("")) {
|
||||
// Don't assert
|
||||
} else {
|
||||
Assert.assertEquals(filemd5sum, expectedMD5, name + " Mismatching MD5s");
|
||||
} else if ( filemd5sum.equals(expectedMD5) ) {
|
||||
System.out.println(String.format(" => %s PASSED", name));
|
||||
} else {
|
||||
Assert.fail(String.format("%s has mismatching MD5s: expected=%s observed=%s", name, expectedMD5, filemd5sum));
|
||||
}
|
||||
|
||||
|
||||
|
||||
return filemd5sum;
|
||||
}
|
||||
|
||||
|
|
@ -381,7 +385,12 @@ public abstract class BaseTest {
|
|||
System.out.printf("##### Path to calculated file (MD5=%s): %s%n", filemd5sum, pathToFileMD5File);
|
||||
System.out.printf("##### Diff command: diff %s %s%n", pathToExpectedMD5File, pathToFileMD5File);
|
||||
|
||||
// todo -- add support for simple inline display of the first N differences for text file
|
||||
// inline differences
|
||||
DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(System.out, 20, 10, 0);
|
||||
boolean success = DiffEngine.simpleDiffFiles(new File(pathToExpectedMD5File), new File(pathToFileMD5File), params);
|
||||
if ( success )
|
||||
System.out.printf("Note that the above list is not comprehensive. At most 20 lines of output, and 10 specific differences will be listed. Please use -T DiffObjects -R public/testdata/exampleFASTA.fasta -m %s -t %s to explore the differences more freely%n",
|
||||
pathToExpectedMD5File, pathToFileMD5File);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -34,76 +34,76 @@ import java.util.Arrays;
|
|||
* Tests CombineVariants
|
||||
*/
|
||||
public class CombineVariantsIntegrationTest extends WalkerTest {
|
||||
// public static String baseTestString(String args) {
|
||||
// return "-T CombineVariants -NO_HEADER -L 1:1-50,000,000 -o %s -R " + b36KGReference + args;
|
||||
// }
|
||||
//
|
||||
// public void test1InOut(String file, String md5, boolean vcf3) {
|
||||
// test1InOut(file, md5, "", vcf3);
|
||||
// }
|
||||
//
|
||||
// public void test1InOut(String file, String md5, String args, boolean vcf3) {
|
||||
// WalkerTestSpec spec = new WalkerTestSpec(
|
||||
// baseTestString(" -priority v1 -B:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file + args),
|
||||
// 1,
|
||||
// Arrays.asList(md5));
|
||||
// executeTest("testInOut1--" + file, spec);
|
||||
// }
|
||||
//
|
||||
// public void combine2(String file1, String file2, String args, String md5, boolean vcf3) {
|
||||
// WalkerTestSpec spec = new WalkerTestSpec(
|
||||
// baseTestString(" -priority v1,v2 -B:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file1 + " -B:v2,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file2 + args),
|
||||
// 1,
|
||||
// Arrays.asList(md5));
|
||||
// executeTest("combine2 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec);
|
||||
// }
|
||||
//
|
||||
// public void combineSites(String args, String md5) {
|
||||
// String file1 = "1000G_omni2.5.b37.sites.vcf";
|
||||
// String file2 = "hapmap_3.3.b37.sites.vcf";
|
||||
// WalkerTestSpec spec = new WalkerTestSpec(
|
||||
// "-T CombineVariants -NO_HEADER -o %s -R " + b37KGReference
|
||||
// + " -L 1:1-10,000,000 -B:omni,VCF " + validationDataLocation + file1
|
||||
// + " -B:hm3,VCF " + validationDataLocation + file2 + args,
|
||||
// 1,
|
||||
// Arrays.asList(md5));
|
||||
// executeTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
|
||||
// }
|
||||
//
|
||||
//
|
||||
// @Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "2117fff6e0d182cd20be508e9661829c", true); }
|
||||
// @Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "2cfaf7af3dd119df08b8a9c1f72e2f93", " -setKey foo", true); }
|
||||
// @Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "1474ac0fde2ce42a3c24f1c97eab333e", " -setKey null", true); }
|
||||
// @Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "7fc66df048a0ab08cf507906e1d4a308", false); } // official project VCF files in tabix format
|
||||
//
|
||||
// @Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "ec9715f53dbf4531570557c212822f12", false); }
|
||||
// @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "f1072be5f5c6ee810276d9ca6537224d", false); }
|
||||
//
|
||||
// @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "b77a1eec725201d9d8e74ee0c45638d3", false); } // official project VCF files in tabix format
|
||||
// @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "802977fdfd2f4905b501bb06800f60af", false); } // official project VCF files in tabix format
|
||||
// @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "a67157287dd2b24b5cdf7ebf8fcbbe9a", false); }
|
||||
//
|
||||
// @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e1f4718a179f1196538a33863da04f53", false); }
|
||||
//
|
||||
// @Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "b3783384b7c8e877b971033e90beba48", true); }
|
||||
//
|
||||
// @Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "902e541c87caa72134db6293fc46f0ad"); }
|
||||
// @Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "f339ad4bb5863b58b9c919ce7d040bb9"); }
|
||||
//
|
||||
// @Test public void threeWayWithRefs() {
|
||||
// WalkerTestSpec spec = new WalkerTestSpec(
|
||||
// baseTestString(" -B:NA19240_BGI,VCF "+validationDataLocation+"NA19240.BGI.RG.vcf" +
|
||||
// " -B:NA19240_ILLUMINA,VCF "+validationDataLocation+"NA19240.ILLUMINA.RG.vcf" +
|
||||
// " -B:NA19240_WUGSC,VCF "+validationDataLocation+"NA19240.WUGSC.RG.vcf" +
|
||||
// " -B:denovoInfo,VCF "+validationDataLocation+"yri_merged_validation_data_240610.annotated.b36.vcf" +
|
||||
// " -setKey centerSet" +
|
||||
// " -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED" +
|
||||
// " -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
|
||||
// " -genotypeMergeOptions UNIQUIFY -L 1"),
|
||||
// 1,
|
||||
// Arrays.asList("a07995587b855f3214fb71940bf23c0f"));
|
||||
// executeTest("threeWayWithRefs", spec);
|
||||
// }
|
||||
public static String baseTestString(String args) {
|
||||
return "-T CombineVariants -NO_HEADER -L 1:1-50,000,000 -o %s -R " + b36KGReference + args;
|
||||
}
|
||||
|
||||
public void test1InOut(String file, String md5, boolean vcf3) {
|
||||
test1InOut(file, md5, "", vcf3);
|
||||
}
|
||||
|
||||
public void test1InOut(String file, String md5, String args, boolean vcf3) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" -priority v1 -B:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file + args),
|
||||
1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("testInOut1--" + file, spec);
|
||||
}
|
||||
|
||||
public void combine2(String file1, String file2, String args, String md5, boolean vcf3) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" -priority v1,v2 -B:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file1 + " -B:v2,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file2 + args),
|
||||
1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("combine2 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec);
|
||||
}
|
||||
|
||||
public void combineSites(String args, String md5) {
|
||||
String file1 = "1000G_omni2.5.b37.sites.vcf";
|
||||
String file2 = "hapmap_3.3.b37.sites.vcf";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T CombineVariants -NO_HEADER -o %s -R " + b37KGReference
|
||||
+ " -L 1:1-10,000,000 -B:omni,VCF " + validationDataLocation + file1
|
||||
+ " -B:hm3,VCF " + validationDataLocation + file2 + args,
|
||||
1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
|
||||
}
|
||||
|
||||
|
||||
@Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "2117fff6e0d182cd20be508e9661829c", true); }
|
||||
@Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "2cfaf7af3dd119df08b8a9c1f72e2f93", " -setKey foo", true); }
|
||||
@Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "1474ac0fde2ce42a3c24f1c97eab333e", " -setKey null", true); }
|
||||
@Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "7fc66df048a0ab08cf507906e1d4a308", false); } // official project VCF files in tabix format
|
||||
|
||||
@Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "ec9715f53dbf4531570557c212822f12", false); }
|
||||
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "f1072be5f5c6ee810276d9ca6537224d", false); }
|
||||
|
||||
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "b77a1eec725201d9d8e74ee0c45638d3", false); } // official project VCF files in tabix format
|
||||
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "802977fdfd2f4905b501bb06800f60af", false); } // official project VCF files in tabix format
|
||||
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "a67157287dd2b24b5cdf7ebf8fcbbe9a", false); }
|
||||
|
||||
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e1f4718a179f1196538a33863da04f53", false); }
|
||||
|
||||
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "b3783384b7c8e877b971033e90beba48", true); }
|
||||
|
||||
@Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "902e541c87caa72134db6293fc46f0ad"); }
|
||||
@Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "f339ad4bb5863b58b9c919ce7d040bb9"); }
|
||||
|
||||
@Test public void threeWayWithRefs() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" -B:NA19240_BGI,VCF "+validationDataLocation+"NA19240.BGI.RG.vcf" +
|
||||
" -B:NA19240_ILLUMINA,VCF "+validationDataLocation+"NA19240.ILLUMINA.RG.vcf" +
|
||||
" -B:NA19240_WUGSC,VCF "+validationDataLocation+"NA19240.WUGSC.RG.vcf" +
|
||||
" -B:denovoInfo,VCF "+validationDataLocation+"yri_merged_validation_data_240610.annotated.b36.vcf" +
|
||||
" -setKey centerSet" +
|
||||
" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED" +
|
||||
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
|
||||
" -genotypeMergeOptions UNIQUIFY -L 1"),
|
||||
1,
|
||||
Arrays.asList("a07995587b855f3214fb71940bf23c0f"));
|
||||
executeTest("threeWayWithRefs", spec);
|
||||
}
|
||||
|
||||
|
||||
// complex examples with filtering, indels, and multiple alleles
|
||||
|
|
@ -119,7 +119,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
executeTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
|
||||
}
|
||||
|
||||
@Test public void complexTestFull() { combineComplexSites("", "64b991fd3850f83614518f7d71f0532f"); }
|
||||
// @Test public void complexTestFull() { combineComplexSites("", "64b991fd3850f83614518f7d71f0532f"); }
|
||||
@Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "0db9ef50fe54b60426474273d7c7fa99"); }
|
||||
@Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "d20acb3d53ba0a02ce92d540ebeda2a9"); }
|
||||
@Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "8d1b3d120515f8b56b5a0d10bc5da713"); }
|
||||
|
|
|
|||
|
|
@ -138,30 +138,32 @@ class QGraph extends Logging {
|
|||
validate()
|
||||
|
||||
if (running && numMissingValues == 0) {
|
||||
logger.info("Generating scatter gather jobs.")
|
||||
val scatterGathers = jobGraph.edgeSet.filter(edge => scatterGatherable(edge))
|
||||
if (!scatterGathers.isEmpty) {
|
||||
logger.info("Generating scatter gather jobs.")
|
||||
|
||||
var addedFunctions = List.empty[QFunction]
|
||||
for (scatterGather <- scatterGathers) {
|
||||
val functions = scatterGather.asInstanceOf[FunctionEdge]
|
||||
.function.asInstanceOf[ScatterGatherableFunction]
|
||||
.generateFunctions()
|
||||
addedFunctions ++= functions
|
||||
var addedFunctions = List.empty[QFunction]
|
||||
for (scatterGather <- scatterGathers) {
|
||||
val functions = scatterGather.asInstanceOf[FunctionEdge]
|
||||
.function.asInstanceOf[ScatterGatherableFunction]
|
||||
.generateFunctions()
|
||||
addedFunctions ++= functions
|
||||
}
|
||||
|
||||
logger.info("Removing original jobs.")
|
||||
this.jobGraph.removeAllEdges(scatterGathers)
|
||||
prune()
|
||||
|
||||
logger.info("Adding scatter gather jobs.")
|
||||
addedFunctions.foreach(function => if (running) this.add(function))
|
||||
|
||||
logger.info("Regenerating graph.")
|
||||
fill
|
||||
val scatterGatherDotFile = if (settings.expandedDotFile != null) settings.expandedDotFile else settings.dotFile
|
||||
if (scatterGatherDotFile != null)
|
||||
renderToDot(scatterGatherDotFile)
|
||||
validate()
|
||||
}
|
||||
|
||||
logger.info("Removing original jobs.")
|
||||
this.jobGraph.removeAllEdges(scatterGathers)
|
||||
prune()
|
||||
|
||||
logger.info("Adding scatter gather jobs.")
|
||||
addedFunctions.foreach(function => if (running) this.add(function))
|
||||
|
||||
logger.info("Regenerating graph.")
|
||||
fill
|
||||
val scatterGatherDotFile = if (settings.expandedDotFile != null) settings.expandedDotFile else settings.dotFile
|
||||
if (scatterGatherDotFile != null)
|
||||
renderToDot(scatterGatherDotFile)
|
||||
validate()
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -286,11 +286,11 @@ object Lsf706JobRunner extends Logging {
|
|||
// LSB_SHAREDIR/cluster_name/logdir/lsb.acct (man bacct)
|
||||
// LSB_SHAREDIR/cluster_name/logdir/lsb.events (man bhist)
|
||||
logger.debug("Job Id %s status / exitStatus / exitInfo: ??? / ??? / ???".format(runner.jobId))
|
||||
val unknownStatusSeconds = (System.currentTimeMillis - runner.lastStatusUpdate)
|
||||
if (unknownStatusSeconds > (unknownStatusMaxSeconds * 1000L)) {
|
||||
val unknownStatusMillis = (System.currentTimeMillis - runner.lastStatusUpdate)
|
||||
if (unknownStatusMillis > (unknownStatusMaxSeconds * 1000L)) {
|
||||
// Unknown status has been returned for a while now.
|
||||
runner.updateStatus(RunnerStatus.FAILED)
|
||||
logger.error("Unable to read LSF status for %d minutes: job id %d: %s".format(unknownStatusSeconds/60, runner.jobId, runner.function.description))
|
||||
logger.error("Unable to read LSF status for %0.2f minutes: job id %d: %s".format(unknownStatusMillis/(60 * 1000D), runner.jobId, runner.function.description))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue