Merge branch 'master' of ssh://chartl@tin.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Christopher Hartl 2011-07-12 18:33:30 -04:00
commit 61dad4f090
61 changed files with 2632 additions and 378 deletions

View File

@ -981,6 +981,7 @@
<delete dir="out"/>
<delete dir="${build.dir}"/>
<delete dir="${lib.dir}"/>
<delete dir="dump"/>
<delete dir="staging"/>
<delete dir="${dist.dir}"/>
<delete dir="pipelinetests"/>

View File

@ -0,0 +1,43 @@
/*
* Copyright (c) 2009 The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.filters;
import net.sf.picard.util.QualityUtil;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.QualityUtils;
/**
* Filter out mapping quality zero reads.
*
* @author ebanks
* @version 0.1
*/
public class MappingQualityUnavailableReadFilter extends ReadFilter {
public boolean filterOut(SAMRecord rec) {
return (rec.getMappingQuality() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE);
}
}

View File

@ -24,17 +24,16 @@
package org.broadinstitute.sting.gatk.filters;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMRecord;
/**
* Filter out zero mapping quality reads.
* Filter out mapping quality zero reads.
*
* @author hanna
* @version 0.1
*/
public class ZeroMappingQualityReadFilter extends ReadFilter {
public class MappingQualityZeroReadFilter extends ReadFilter {
public boolean filterOut(SAMRecord rec) {
return (rec.getMappingQuality() == 0);
}

View File

@ -62,5 +62,5 @@ public class AlleleBalanceBySample implements GenotypeAnnotation, ExperimentalAn
public List<String> getKeyNames() { return Arrays.asList("AB"); }
public List<VCFFormatHeaderLine> getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), -1, VCFHeaderLineType.Float, "Allele balance for each het genotype")); }
public List<VCFFormatHeaderLine> getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Allele balance for each het genotype")); }
}

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
@ -41,8 +42,8 @@ import java.util.*;
public class ChromosomeCounts implements InfoFieldAnnotation, StandardAnnotation {
private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };
private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, -1, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"),
new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, -1, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"),
private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"),
new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"),
new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") };
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -142,5 +143,5 @@ public class DepthPerAlleleBySample implements GenotypeAnnotation, StandardAnnot
// public String getIndelBases()
public List<String> getKeyNames() { return Arrays.asList("AD"); }
public List<VCFFormatHeaderLine> getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), VCFCompoundHeaderLine.UNBOUNDED, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed")); }
public List<VCFFormatHeaderLine> getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed")); }
}

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
@ -21,7 +22,7 @@ public class MappingQualityRankSumTest extends RankSumTest {
protected void fillQualsFromPileup(byte ref, byte alt, ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
for ( final PileupElement p : pileup ) {
if( isUsableBase(p) && p.getMappingQual() < 254 ) { // 254 and 255 are special mapping qualities used as a code by aligners
if ( isUsableBase(p) ) {
if ( p.getBase() == ref ) {
refQuals.add((double)p.getMappingQual());
} else if ( p.getBase() == alt ) {
@ -34,7 +35,7 @@ public class MappingQualityRankSumTest extends RankSumTest {
// equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele ?
HashMap<PileupElement,LinkedHashMap<Allele,Double>> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();
for (final PileupElement p: pileup) {
if (indelLikelihoodMap.containsKey(p) && p.getMappingQual() < 254) {
if (indelLikelihoodMap.containsKey(p) && p.getMappingQual() != 0 && p.getMappingQual() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE) {
// retrieve likelihood information corresponding to this read
LinkedHashMap<Allele,Double> el = indelLikelihoodMap.get(p);
// by design, first element in LinkedHashMap was ref allele
@ -54,8 +55,6 @@ public class MappingQualityRankSumTest extends RankSumTest {
refQuals.add((double)p.getMappingQual());
else if (altLikelihood > refLikelihood + INDEL_LIKELIHOOD_THRESH)
altQuals.add((double)p.getMappingQual());
}
}
}

View File

@ -47,5 +47,5 @@ public class NBaseCount implements InfoFieldAnnotation {
public List<String> getKeyNames() { return Arrays.asList("PercentNBaseSolid"); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("PercentNBaseSolid", 4, VCFHeaderLineType.Float, "Percentage of N bases in the pileup (counting only SOLiD reads)")); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("PercentNBaseSolid", 1, VCFHeaderLineType.Float, "Percentage of N bases in the pileup (counting only SOLiD reads)")); }
}

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
@ -38,8 +39,10 @@ public class RMSMappingQuality implements InfoFieldAnnotation, StandardAnnotatio
pileup = context.getBasePileup();
if (pileup != null) {
for (PileupElement p : pileup )
qualities[index++] = p.getRead().getMappingQuality();
for (PileupElement p : pileup ) {
if ( p.getMappingQual() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE )
qualities[index++] = p.getMappingQual();
}
}
}

View File

@ -106,6 +106,9 @@ public abstract class RankSumTest implements InfoFieldAnnotation, StandardAnnota
protected abstract void fillIndelQualsFromPileup(ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals);
protected static boolean isUsableBase( final PileupElement p ) {
return !( p.isDeletion() || p.getMappingQual() == 0 || ((int)p.getQual()) < 6 ); // need the unBAQed quality score here
return !( p.isDeletion() ||
p.getMappingQual() == 0 ||
p.getMappingQual() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE ||
((int)p.getQual()) < QualityUtils.MIN_USABLE_Q_SCORE ); // need the unBAQed quality score here
}
}

View File

@ -29,6 +29,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
@ -200,8 +201,8 @@ public class ReadDepthAndAllelicFractionBySample implements GenotypeAnnotation {
1,
VCFHeaderLineType.Integer,
"Total read depth per sample, including MQ0"),
new VCFFormatHeaderLine(getKeyNames().get(1),
VCFCompoundHeaderLine.UNBOUNDED,
new VCFFormatHeaderLine(getKeyNames().get(1),
VCFHeaderLineCount.UNBOUNDED,
VCFHeaderLineType.Float,
"Fractions of reads (excluding MQ0 from both ref and alt) supporting each reported alternative allele, per sample"));
}

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
@ -65,5 +66,5 @@ public class SampleList implements InfoFieldAnnotation {
public List<String> getKeyNames() { return Arrays.asList("Samples"); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("Samples", VCFInfoHeaderLine.UNBOUNDED, VCFHeaderLineType.String, "List of polymorphic samples")); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("Samples", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "List of polymorphic samples")); }
}

View File

@ -0,0 +1,121 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.diffengine;
import net.sf.samtools.*;
import net.sf.samtools.util.BlockCompressedInputStream;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.LineReader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.Map;
import java.util.zip.GZIPInputStream;
/**
* Created by IntelliJ IDEA.
* User: depristo
* Date: 7/4/11
* Time: 1:09 PM
*
* Class implementing diffnode reader for VCF
*/
public class BAMDiffableReader implements DiffableReader {
@Override
public String getName() { return "BAM"; }
@Override
public DiffElement readFromFile(File file, int maxElementsToRead) {
final SAMFileReader reader = new SAMFileReader(file, null); // null because we don't want it to look for the index
reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
DiffNode root = DiffNode.rooted(file.getName());
SAMRecordIterator iterator = reader.iterator();
int count = 0;
while ( iterator.hasNext() ) {
if ( count++ > maxElementsToRead && maxElementsToRead != -1)
break;
final SAMRecord record = iterator.next();
// name is the read name + first of pair
String name = record.getReadName().replace('.', '_');
if ( record.getReadPairedFlag() ) {
name += record.getFirstOfPairFlag() ? "_1" : "_2";
}
DiffNode readRoot = DiffNode.empty(name, root);
// add fields
readRoot.add("NAME", record.getReadName());
readRoot.add("FLAGS", record.getFlags());
readRoot.add("RNAME", record.getReferenceName());
readRoot.add("POS", record.getAlignmentStart());
readRoot.add("MAPQ", record.getMappingQuality());
readRoot.add("CIGAR", record.getCigarString());
readRoot.add("RNEXT", record.getMateReferenceName());
readRoot.add("PNEXT", record.getMateAlignmentStart());
readRoot.add("TLEN", record.getInferredInsertSize());
readRoot.add("SEQ", record.getReadString());
readRoot.add("QUAL", record.getBaseQualityString());
for ( SAMRecord.SAMTagAndValue xt : record.getAttributes() ) {
readRoot.add(xt.tag, xt.value);
}
// add record to root
if ( ! root.hasElement(name) )
// protect ourselves from malformed files
root.add(readRoot);
}
reader.close();
return root.getBinding();
}
@Override
public boolean canRead(File file) {
final byte[] BAM_MAGIC = "BAM\1".getBytes();
final byte[] buffer = new byte[BAM_MAGIC.length];
try {
FileInputStream fstream = new FileInputStream(file);
new BlockCompressedInputStream(fstream).read(buffer,0,BAM_MAGIC.length);
return Arrays.equals(buffer, BAM_MAGIC);
} catch ( IOException e ) {
return false;
} catch ( net.sf.samtools.FileTruncatedException e ) {
return false;
}
}
}

View File

@ -0,0 +1,122 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.diffengine;
import com.google.java.contract.*;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
/**
* Created by IntelliJ IDEA.
* User: depristo
* Date: 7/4/11
* Time: 12:55 PM
*
* An interface that must be implemented to allow us to calculate differences
* between structured objects
*/
@Invariant({
"name != null",
"value != null",
"parent != null || name.equals(\"ROOT\")",
"value == null || value.getBinding() == this"})
public class DiffElement {
public final static DiffElement ROOT = new DiffElement();
final private String name;
final private DiffElement parent;
final private DiffValue value;
/**
* For ROOT only
*/
private DiffElement() {
this.name = "ROOT";
this.parent = null;
this.value = new DiffValue(this, "ROOT");
}
@Requires({"name != null", "parent != null", "value != null"})
public DiffElement(String name, DiffElement parent, DiffValue value) {
if ( name.equals("ROOT") ) throw new IllegalArgumentException("Cannot use reserved name ROOT");
this.name = name;
this.parent = parent;
this.value = value;
this.value.setBinding(this);
}
@Ensures({"result != null"})
public String getName() {
return name;
}
public DiffElement getParent() {
return parent;
}
@Ensures({"result != null"})
public DiffValue getValue() {
return value;
}
public boolean isRoot() { return this == ROOT; }
@Ensures({"result != null"})
@Override
public String toString() {
return getName() + "=" + getValue().toString();
}
public String toString(int offset) {
return (offset > 0 ? Utils.dupString(' ', offset) : 0) + getName() + "=" + getValue().toString(offset);
}
@Ensures({"result != null"})
public final String fullyQualifiedName() {
if ( isRoot() )
return "";
else if ( parent.isRoot() )
return name;
else
return parent.fullyQualifiedName() + "." + name;
}
@Ensures({"result != null"})
public String toOneLineString() {
return getName() + "=" + getValue().toOneLineString();
}
@Ensures({"result != null"})
public DiffNode getValueAsNode() {
if ( getValue().isCompound() )
return (DiffNode)getValue();
else
throw new ReviewedStingException("Illegal request conversion of a DiffValue into a DiffNode: " + this);
}
public int size() {
return 1 + getValue().size();
}
}

View File

@ -0,0 +1,360 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.diffengine;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.io.File;
import java.io.PrintStream;
import java.util.*;
/**
* Created by IntelliJ IDEA.
* User: depristo
* Date: 7/4/11
* Time: 12:51 PM
* A generic engine for comparing tree-structured objects
*/
public class DiffEngine {
final protected static Logger logger = Logger.getLogger(DiffEngine.class);
private final Map<String, DiffableReader> readers = new HashMap<String, DiffableReader>();
public DiffEngine() {
loadDiffableReaders();
}
// --------------------------------------------------------------------------------
//
// difference calculation
//
// --------------------------------------------------------------------------------
public List<SpecificDifference> diff(DiffElement master, DiffElement test) {
DiffValue masterValue = master.getValue();
DiffValue testValue = test.getValue();
if ( masterValue.isCompound() && masterValue.isCompound() ) {
return diff(master.getValueAsNode(), test.getValueAsNode());
} else if ( masterValue.isAtomic() && testValue.isAtomic() ) {
return diff(masterValue, testValue);
} else {
// structural difference in types. one is node, other is leaf
return Arrays.asList(new SpecificDifference(master, test));
}
}
public List<SpecificDifference> diff(DiffNode master, DiffNode test) {
Set<String> allNames = new HashSet<String>(master.getElementNames());
allNames.addAll(test.getElementNames());
List<SpecificDifference> diffs = new ArrayList<SpecificDifference>();
for ( String name : allNames ) {
DiffElement masterElt = master.getElement(name);
DiffElement testElt = test.getElement(name);
if ( masterElt == null && testElt == null ) {
throw new ReviewedStingException("BUG: unexceptedly got two null elements for field: " + name);
} else if ( masterElt == null || testElt == null ) { // if either is null, we are missing a value
// todo -- should one of these be a special MISSING item?
diffs.add(new SpecificDifference(masterElt, testElt));
} else {
diffs.addAll(diff(masterElt, testElt));
}
}
return diffs;
}
public List<SpecificDifference> diff(DiffValue master, DiffValue test) {
if ( master.getValue().equals(test.getValue()) ) {
return Collections.emptyList();
} else {
return Arrays.asList(new SpecificDifference(master.getBinding(), test.getBinding()));
}
}
// --------------------------------------------------------------------------------
//
// Summarizing differences
//
// --------------------------------------------------------------------------------
/**
* Emits a summary of the diffs to out. Suppose you have the following three differences:
*
* A.X.Z:1!=2
* A.Y.Z:3!=4
* B.X.Z:5!=6
*
* The above is the itemized list of the differences. The summary looks for common differences
* in the name hierarchy, counts those shared elements, and emits the differences that occur
* in order of decreasing counts.
*
* So, in the above example, what are the shared elements?
*
* A.X.Z and B.X.Z share X.Z, so there's a *.X.Z with count 2
* A.X.Z, A.Y.Z, and B.X.Z all share *.*.Z, with count 3
* Each of A.X.Z, A.Y.Z, and B.X.Z are individually unique, with count 1
*
* So we would emit the following summary:
*
* *.*.Z: 3
* *.X.Z: 2
* A.X.Z: 1 [specific difference: 1!=2]
* A.Y.Z: 1 [specific difference: 3!=4]
* B.X.Z: 1 [specific difference: 5!=6]
*
* The algorithm to accomplish this calculation is relatively simple. Start with all of the
* concrete differences. For each pair of differences A1.A2....AN and B1.B2....BN:
*
* find the longest common subsequence Si.Si+1...SN where Ai = Bi = Si
* If i == 0, then there's no shared substructure
* If i > 0, then generate the summarized value X = *.*...Si.Si+1...SN
* if X is a known summary, increment it's count, otherwise set its count to 1
*
* Not that only pairs of the same length are considered as potentially equivalent
*
* @param params determines how we display the items
* @param diffs
*/
public void reportSummarizedDifferences(List<SpecificDifference> diffs, SummaryReportParams params ) {
printSummaryReport(summarizeDifferences(diffs), params );
}
public List<Difference> summarizeDifferences(List<SpecificDifference> diffs) {
return summarizedDifferencesOfPaths(diffs);
}
final protected static String[] diffNameToPath(String diffName) {
return diffName.split("\\.");
}
protected List<Difference> summarizedDifferencesOfPathsFromString(List<String> singletonDiffs) {
List<Difference> diffs = new ArrayList<Difference>();
for ( String diff : singletonDiffs ) {
diffs.add(new Difference(diff));
}
return summarizedDifferencesOfPaths(diffs);
}
protected List<Difference> summarizedDifferencesOfPaths(List<? extends Difference> singletonDiffs) {
Map<String, Difference> summaries = new HashMap<String, Difference>();
// create the initial set of differences
for ( int i = 0; i < singletonDiffs.size(); i++ ) {
for ( int j = 0; j <= i; j++ ) {
Difference diffPath1 = singletonDiffs.get(i);
Difference diffPath2 = singletonDiffs.get(j);
if ( diffPath1.length() == diffPath2.length() ) {
int lcp = longestCommonPostfix(diffPath1.getParts(), diffPath2.getParts());
String path = lcp > 0 ? summarizedPath(diffPath2.getParts(), lcp) : diffPath2.getPath();
addSummary(summaries, path, true);
}
}
}
// count differences
for ( Difference diffPath : singletonDiffs ) {
for ( Difference sumDiff : summaries.values() ) {
if ( sumDiff.matches(diffPath.getParts()) )
addSummary(summaries, sumDiff.getPath(), false);
}
}
List<Difference> sortedSummaries = new ArrayList<Difference>(summaries.values());
Collections.sort(sortedSummaries);
return sortedSummaries;
}
private static void addSummary(Map<String, Difference> summaries, String path, boolean onlyCatalog) {
if ( summaries.containsKey(path) ) {
if ( ! onlyCatalog )
summaries.get(path).incCount();
} else {
Difference sumDiff = new Difference(path);
summaries.put(sumDiff.getPath(), sumDiff);
}
}
protected void printSummaryReport(List<Difference> sortedSummaries, SummaryReportParams params ) {
GATKReport report = new GATKReport();
final String tableName = "diffences";
report.addTable(tableName, "Summarized differences between the master and test files.\nSee http://www.broadinstitute.org/gsa/wiki/index.php/DiffObjectsWalker_and_SummarizedDifferences for more information");
GATKReportTable table = report.getTable(tableName);
table.addPrimaryKey("Difference", true);
table.addColumn("NumberOfOccurrences", 0);
int count = 0, count1 = 0;
for ( Difference diff : sortedSummaries ) {
if ( diff.getCount() < params.minSumDiffToShow )
// in order, so break as soon as the count is too low
break;
if ( params.maxItemsToDisplay != 0 && count++ > params.maxItemsToDisplay )
break;
if ( diff.getCount() == 1 ) {
count1++;
if ( params.maxCountOneItems != 0 && count1 > params.maxCountOneItems )
break;
}
table.set(diff.getPath(), "NumberOfOccurrences", diff.getCount());
}
table.write(params.out);
}
protected static int longestCommonPostfix(String[] diffPath1, String[] diffPath2) {
int i = 0;
for ( ; i < diffPath1.length; i++ ) {
int j = diffPath1.length - i - 1;
if ( ! diffPath1[j].equals(diffPath2[j]) )
break;
}
return i;
}
/**
* parts is [A B C D]
* commonPostfixLength: how many parts are shared at the end, suppose its 2
* We want to create a string *.*.C.D
*
* @param parts
* @param commonPostfixLength
* @return
*/
protected static String summarizedPath(String[] parts, int commonPostfixLength) {
int stop = parts.length - commonPostfixLength;
if ( stop > 0 ) parts = parts.clone();
for ( int i = 0; i < stop; i++ ) {
parts[i] = "*";
}
return Utils.join(".", parts);
}
// --------------------------------------------------------------------------------
//
// plugin manager
//
// --------------------------------------------------------------------------------
public void loadDiffableReaders() {
List<Class<? extends DiffableReader>> drClasses = new PluginManager<DiffableReader>( DiffableReader.class ).getPlugins();
logger.info("Loading diffable modules:");
for (Class<? extends DiffableReader> drClass : drClasses ) {
logger.info("\t" + drClass.getSimpleName());
try {
DiffableReader dr = drClass.newInstance();
readers.put(dr.getName(), dr);
} catch (InstantiationException e) {
throw new ReviewedStingException("Unable to instantiate module '" + drClass.getSimpleName() + "'");
} catch (IllegalAccessException e) {
throw new ReviewedStingException("Illegal access error when trying to instantiate '" + drClass.getSimpleName() + "'");
}
}
}
protected Map<String, DiffableReader> getReaders() {
return readers;
}
protected DiffableReader getReader(String name) {
return readers.get(name);
}
/**
* Returns a reader appropriate for this file, or null if no such reader exists
* @param file
* @return
*/
public DiffableReader findReaderForFile(File file) {
for ( DiffableReader reader : readers.values() )
if (reader.canRead(file) )
return reader;
return null;
}
/**
* Returns true if reader appropriate for this file, or false if no such reader exists
* @param file
* @return
*/
public boolean canRead(File file) {
return findReaderForFile(file) != null;
}
public DiffElement createDiffableFromFile(File file) {
return createDiffableFromFile(file, -1);
}
public DiffElement createDiffableFromFile(File file, int maxElementsToRead) {
DiffableReader reader = findReaderForFile(file);
if ( reader == null )
throw new UserException("Unsupported file type: " + file);
else
return reader.readFromFile(file, maxElementsToRead);
}
public static boolean simpleDiffFiles(File masterFile, File testFile, DiffEngine.SummaryReportParams params) {
DiffEngine diffEngine = new DiffEngine();
if ( diffEngine.canRead(masterFile) && diffEngine.canRead(testFile) ) {
DiffElement master = diffEngine.createDiffableFromFile(masterFile);
DiffElement test = diffEngine.createDiffableFromFile(testFile);
List<SpecificDifference> diffs = diffEngine.diff(master, test);
diffEngine.reportSummarizedDifferences(diffs, params);
return true;
} else {
return false;
}
}
public static class SummaryReportParams {
PrintStream out = System.out;
int maxItemsToDisplay = 0;
int maxCountOneItems = 0;
int minSumDiffToShow = 0;
public SummaryReportParams(PrintStream out, int maxItemsToDisplay, int maxCountOneItems, int minSumDiffToShow) {
this.out = out;
this.maxItemsToDisplay = maxItemsToDisplay;
this.maxCountOneItems = maxCountOneItems;
this.minSumDiffToShow = minSumDiffToShow;
}
}
}

View File

@ -0,0 +1,248 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.diffengine;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.*;
/**
* Created by IntelliJ IDEA.
* User: depristo
* Date: 7/4/11
* Time: 12:55 PM
*
* An interface that must be implemented to allow us to calculate differences
* between structured objects
*/
public class DiffNode extends DiffValue {
private Map<String, DiffElement> getElementMap() {
return (Map<String, DiffElement>)super.getValue();
}
private static Map<String, DiffElement> emptyElements() { return new HashMap<String, DiffElement>(); }
private DiffNode(Map<String, DiffElement> elements) {
super(elements);
}
private DiffNode(DiffElement binding, Map<String, DiffElement> elements) {
super(binding, elements);
}
// ---------------------------------------------------------------------------
//
// constructors
//
// ---------------------------------------------------------------------------
public static DiffNode rooted(String name) {
return empty(name, DiffElement.ROOT);
}
public static DiffNode empty(String name, DiffElement parent) {
DiffNode df = new DiffNode(emptyElements());
DiffElement elt = new DiffElement(name, parent, df);
df.setBinding(elt);
return df;
}
public static DiffNode empty(String name, DiffValue parent) {
return empty(name, parent.getBinding());
}
// ---------------------------------------------------------------------------
//
// accessors
//
// ---------------------------------------------------------------------------
@Override
public boolean isAtomic() { return false; }
public Collection<String> getElementNames() {
return getElementMap().keySet();
}
public Collection<DiffElement> getElements() {
return getElementMap().values();
}
private Collection<DiffElement> getElements(boolean atomicOnly) {
List<DiffElement> elts = new ArrayList<DiffElement>();
for ( DiffElement elt : getElements() )
if ( (atomicOnly && elt.getValue().isAtomic()) || (! atomicOnly && elt.getValue().isCompound()))
elts.add(elt);
return elts;
}
public Collection<DiffElement> getAtomicElements() {
return getElements(true);
}
public Collection<DiffElement> getCompoundElements() {
return getElements(false);
}
/**
* Returns the element bound to name, or null if no such binding exists
* @param name
* @return
*/
public DiffElement getElement(String name) {
return getElementMap().get(name);
}
/**
* Returns true if name is bound in this node
* @param name
* @return
*/
public boolean hasElement(String name) {
return getElement(name) != null;
}
// ---------------------------------------------------------------------------
//
// add
//
// ---------------------------------------------------------------------------
@Requires("elt != null")
public void add(DiffElement elt) {
if ( getElementMap().containsKey(elt.getName()) )
throw new IllegalArgumentException("Attempting to rebind already existing binding: " + elt + " node=" + this);
getElementMap().put(elt.getName(), elt);
}
@Requires("elt != null")
public void add(DiffValue elt) {
add(elt.getBinding());
}
@Requires("elts != null")
public void add(Collection<DiffElement> elts) {
for ( DiffElement e : elts )
add(e);
}
public void add(String name, Object value) {
add(new DiffElement(name, this.getBinding(), new DiffValue(value)));
}
public int size() {
int count = 0;
for ( DiffElement value : getElements() )
count += value.size();
return count;
}
// ---------------------------------------------------------------------------
//
// toString
//
// ---------------------------------------------------------------------------
@Override
public String toString() {
return toString(0);
}
@Override
public String toString(int offset) {
String off = offset > 0 ? Utils.dupString(' ', offset) : "";
StringBuilder b = new StringBuilder();
b.append("(").append("\n");
Collection<DiffElement> atomicElts = getAtomicElements();
for ( DiffElement elt : atomicElts ) {
b.append(elt.toString(offset + 2)).append('\n');
}
for ( DiffElement elt : getCompoundElements() ) {
b.append(elt.toString(offset + 4)).append('\n');
}
b.append(off).append(")").append("\n");
return b.toString();
}
@Override
public String toOneLineString() {
StringBuilder b = new StringBuilder();
b.append('(');
List<String> parts = new ArrayList<String>();
for ( DiffElement elt : getElements() )
parts.add(elt.toOneLineString());
b.append(Utils.join(" ", parts));
b.append(')');
return b.toString();
}
// --------------------------------------------------------------------------------
//
// fromString and toOneLineString
//
// --------------------------------------------------------------------------------
public static DiffElement fromString(String tree) {
return fromString(tree, DiffElement.ROOT);
}
/**
* Doesn't support full tree structure parsing
* @param tree
* @param parent
* @return
*/
private static DiffElement fromString(String tree, DiffElement parent) {
// X=(A=A B=B C=(D=D))
String[] parts = tree.split("=", 2);
if ( parts.length != 2 )
throw new ReviewedStingException("Unexpected tree structure: " + tree + " parts=" + parts);
String name = parts[0];
String value = parts[1];
if ( value.length() == 0 )
throw new ReviewedStingException("Illegal tree structure: " + value + " at " + tree);
if ( value.charAt(0) == '(' ) {
if ( ! value.endsWith(")") )
throw new ReviewedStingException("Illegal tree structure. Missing ): " + value + " at " + tree);
String subtree = value.substring(1, value.length()-1);
DiffNode rec = DiffNode.empty(name, parent);
String[] subParts = subtree.split(" ");
for ( String subPart : subParts ) {
rec.add(fromString(subPart, rec.getBinding()));
}
return rec.getBinding();
} else {
return new DiffValue(name, parent, value).getBinding();
}
}
}

View File

@ -0,0 +1,117 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.diffengine;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import java.io.File;
import java.io.PrintStream;
import java.util.List;
/**
* Compares two record-oriented files, itemizing specific difference between equivalent
* records in the two files. Reports both itemized and summarized differences.
* @author Mark DePristo
* @version 0.1
*/
@Requires(value={})
public class DiffObjectsWalker extends RodWalker<Integer, Integer> {
@Output(doc="File to which results should be written",required=true)
protected PrintStream out;
@Argument(fullName="maxObjectsToRead", shortName="motr", doc="Max. number of objects to read from the files. -1 [default] means unlimited", required=false)
int MAX_OBJECTS_TO_READ = -1;
@Argument(fullName="maxDiffs", shortName="M", doc="Max. number of diffs to process", required=false)
int MAX_DIFFS = 0;
@Argument(fullName="maxCount1Diffs", shortName="M1", doc="Max. number of diffs occuring exactly once in the file to process", required=false)
int MAX_COUNT1_DIFFS = 0;
@Argument(fullName="minCountForDiff", shortName="MCFD", doc="Min number of observations for a records to display", required=false)
int minCountForDiff = 1;
@Argument(fullName="showItemizedDifferences", shortName="SID", doc="Should we enumerate all differences between the files?", required=false)
boolean showItemizedDifferences = false;
@Argument(fullName="master", shortName="m", doc="Master file: expected results", required=true)
File masterFile;
@Argument(fullName="test", shortName="t", doc="Test file: new results to compare to the master file", required=true)
File testFile;
final DiffEngine diffEngine = new DiffEngine();
@Override
public void initialize() {
}
@Override
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
return 0;
}
@Override
public Integer reduceInit() {
return 0;
}
@Override
public Integer reduce(Integer counter, Integer sum) {
return counter + sum;
}
@Override
public void onTraversalDone(Integer sum) {
out.printf("Reading master file %s%n", masterFile);
DiffElement master = diffEngine.createDiffableFromFile(masterFile, MAX_OBJECTS_TO_READ);
out.printf(" Read %d objects%n", master.size());
out.printf("Reading test file %s%n", testFile);
DiffElement test = diffEngine.createDiffableFromFile(testFile, MAX_OBJECTS_TO_READ);
out.printf(" Read %d objects%n", test.size());
// out.printf("Master diff objects%n");
// out.println(master.toString());
// out.printf("Test diff objects%n");
// out.println(test.toString());
List<SpecificDifference> diffs = diffEngine.diff(master, test);
if ( showItemizedDifferences ) {
out.printf("Itemized results%n");
for ( SpecificDifference diff : diffs )
out.printf("DIFF: %s%n", diff.toString());
}
DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out, MAX_DIFFS, MAX_COUNT1_DIFFS, minCountForDiff);
diffEngine.reportSummarizedDifferences(diffs, params);
}
}

View File

@ -0,0 +1,91 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.diffengine;
import org.broadinstitute.sting.utils.Utils;
/**
* Created by IntelliJ IDEA.
* User: depristo
* Date: 7/4/11
* Time: 12:55 PM
*
* An interface that must be implemented to allow us to calculate differences
* between structured objects
*/
public class DiffValue {
private DiffElement binding = null;
final private Object value;
public DiffValue(Object value) {
this.value = value;
}
public DiffValue(DiffElement binding, Object value) {
this.binding = binding;
this.value = value;
}
public DiffValue(DiffValue parent, Object value) {
this(parent.getBinding(), value);
}
public DiffValue(String name, DiffElement parent, Object value) {
this.binding = new DiffElement(name, parent, this);
this.value = value;
}
public DiffValue(String name, DiffValue parent, Object value) {
this(name, parent.getBinding(), value);
}
public DiffElement getBinding() {
return binding;
}
protected void setBinding(DiffElement binding) {
this.binding = binding;
}
public Object getValue() {
return value;
}
public String toString() {
return getValue().toString();
}
public String toString(int offset) {
return toString();
}
public String toOneLineString() {
return getValue().toString();
}
public boolean isAtomic() { return true; }
public boolean isCompound() { return ! isAtomic(); }
public int size() { return 1; }
}

View File

@ -0,0 +1,50 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.diffengine;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import java.io.File;
/**
* Created by IntelliJ IDEA.
* User: depristo
* Date: 7/4/11
* Time: 1:09 PM
*
* Interface for readers creating diffable objects from a file
*/
public interface DiffableReader {
@Ensures("result != null")
public String getName();
@Ensures("result != null")
@Requires("file != null")
public DiffElement readFromFile(File file, int maxElementsToRead);
@Requires("file != null")
public boolean canRead(File file);
}

View File

@ -0,0 +1,95 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.diffengine;
public class Difference implements Comparable<Difference> {
final String path; // X.Y.Z
final String[] parts;
int count = 0;
public Difference(String path) {
this.path = path;
this.parts = DiffEngine.diffNameToPath(path);
}
public String[] getParts() {
return parts;
}
public void incCount() { count++; }
public int getCount() {
return count;
}
/**
* The fully qualified path object A.B.C etc
* @return
*/
public String getPath() {
return path;
}
/**
* @return the length of the parts of this summary
*/
public int length() {
return this.parts.length;
}
/**
* Returns true if the string parts matches this summary. Matches are
* must be equal() everywhere where this summary isn't *.
* @param otherParts
* @return
*/
public boolean matches(String[] otherParts) {
if ( otherParts.length != length() )
return false;
// TODO optimization: can start at right most non-star element
for ( int i = 0; i < length(); i++ ) {
String part = parts[i];
if ( ! part.equals("*") && ! part.equals(otherParts[i]) )
return false;
}
return true;
}
@Override
public String toString() {
return String.format("%s:%d", getPath(), getCount());
}
@Override
public int compareTo(Difference other) {
// sort first highest to lowest count, then by lowest to highest path
int countCmp = Integer.valueOf(count).compareTo(other.count);
return countCmp != 0 ? -1 * countCmp : path.compareTo(other.path);
}
}

View File

@ -0,0 +1,59 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.diffengine;
/**
* Created by IntelliJ IDEA.
* User: depristo
* Date: 7/4/11
* Time: 12:53 PM
*
* Represents a specific difference between two specific DiffElements
*/
public class SpecificDifference extends Difference {
DiffElement master, test;
public SpecificDifference(DiffElement master, DiffElement test) {
super(createName(master, test));
if ( master == null && test == null ) throw new IllegalArgumentException("Master and test both cannot be null");
this.master = master;
this.test = test;
}
public String toString() {
return String.format("%s:%s!=%s",
getPath(),
getOneLineString(master),
getOneLineString(test));
}
private static String createName(DiffElement master, DiffElement test) {
return (master == null ? test : master).fullyQualifiedName();
}
private static String getOneLineString(DiffElement elt) {
return elt == null ? "MISSING" : elt.getValue().toOneLineString();
}
}

View File

@ -0,0 +1,125 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.diffengine;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.LineReader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.*;
import java.util.Arrays;
import java.util.Map;
import java.util.zip.GZIPInputStream;
/**
* Created by IntelliJ IDEA.
* User: depristo
* Date: 7/4/11
* Time: 1:09 PM
*
* Class implementing diffnode reader for VCF
*/
public class VCFDiffableReader implements DiffableReader {
@Override
public String getName() { return "VCF"; }
@Override
public DiffElement readFromFile(File file, int maxElementsToRead) {
DiffNode root = DiffNode.rooted(file.getName());
try {
LineReader lineReader = new AsciiLineReader(new FileInputStream(file));
VCFCodec vcfCodec = new VCFCodec();
// must be read as state is stored in reader itself
vcfCodec.readHeader(lineReader);
String line = lineReader.readLine();
int count = 0;
while ( line != null ) {
if ( count++ > maxElementsToRead && maxElementsToRead != -1)
break;
VariantContext vc = (VariantContext)vcfCodec.decode(line);
String name = vc.getChr() + ":" + vc.getStart();
DiffNode vcRoot = DiffNode.empty(name, root);
// add fields
vcRoot.add("CHROM", vc.getChr());
vcRoot.add("POS", vc.getStart());
vcRoot.add("ID", vc.hasID() ? vc.getID() : VCFConstants.MISSING_VALUE_v4);
vcRoot.add("REF", vc.getReference());
vcRoot.add("ALT", vc.getAlternateAlleles());
vcRoot.add("QUAL", vc.hasNegLog10PError() ? vc.getNegLog10PError() * 10 : VCFConstants.MISSING_VALUE_v4);
vcRoot.add("FILTER", vc.getFilters());
// add info fields
for (Map.Entry<String, Object> attribute : vc.getAttributes().entrySet()) {
if ( ! attribute.getKey().startsWith("_") && ! attribute.getKey().equals(VariantContext.ID_KEY))
vcRoot.add(attribute.getKey(), attribute.getValue());
}
for (Genotype g : vc.getGenotypes().values() ) {
DiffNode gRoot = DiffNode.empty(g.getSampleName(), vcRoot);
gRoot.add("GT", g.getGenotypeString());
gRoot.add("GQ", g.hasNegLog10PError() ? g.getNegLog10PError() * 10 : VCFConstants.MISSING_VALUE_v4 );
for (Map.Entry<String, Object> attribute : g.getAttributes().entrySet()) {
if ( ! attribute.getKey().startsWith("_") )
gRoot.add(attribute.getKey(), attribute.getValue());
}
vcRoot.add(gRoot);
}
root.add(vcRoot);
line = lineReader.readLine();
}
lineReader.close();
} catch ( IOException e ) {
return null;
}
return root.getBinding();
}
@Override
public boolean canRead(File file) {
try {
final String VCF4_HEADER = "##fileformat=VCFv4";
char[] buff = new char[VCF4_HEADER.length()];
new FileReader(file).read(buff, 0, VCF4_HEADER.length());
String firstLine = new String(buff);
return firstLine.startsWith(VCF4_HEADER);
} catch ( IOException e ) {
return false;
}
}
}

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableReadFilter;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
@ -37,7 +38,6 @@ import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
import java.util.*;
import java.io.PrintStream;
@ -48,7 +48,7 @@ import java.io.PrintStream;
* multi-sample data. The user can choose from several different incorporated calculation models.
*/
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_INPUT)
@ReadFilters( {BadMateFilter.class} )
@ReadFilters( {BadMateFilter.class, MappingQualityUnavailableReadFilter.class} )
@Reference(window=@Window(start=-200,stop=200))
@By(DataSource.REFERENCE)
@Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250)
@ -158,7 +158,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
}
// FORMAT and INFO fields
headerInfo.addAll(VCFUtils.getSupportedHeaderStrings());
headerInfo.addAll(getSupportedHeaderStrings());
// FILTER fields
if ( UAC.STANDARD_CONFIDENCE_FOR_EMITTING < UAC.STANDARD_CONFIDENCE_FOR_CALLING )
@ -167,6 +167,20 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
return headerInfo;
}
/**
* return a set of supported format lines; what we currently support for output in the genotype fields of a VCF
* @return a set of VCF format lines
*/
private static Set<VCFFormatHeaderLine> getSupportedHeaderStrings() {
Set<VCFFormatHeaderLine> result = new HashSet<VCFFormatHeaderLine>();
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality"));
result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)"));
result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
return result;
}
/**
* Compute at a given locus.
*

View File

@ -634,17 +634,27 @@ public class UnifiedGenotyperEngine {
if (vcInput == null)
return null;
if (vcInput.isSNP() && ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP))
return GenotypeLikelihoodsCalculationModel.Model.SNP;
// todo - no support to genotype MNP's yet
if (vcInput.isMNP())
return null;
if (vcInput.isSNP()) {
if (( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP))
return GenotypeLikelihoodsCalculationModel.Model.SNP;
else
// ignore SNP's if user chose INDEL mode
return null;
}
else if ((vcInput.isIndel() || vcInput.isMixed()) && (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL))
return GenotypeLikelihoodsCalculationModel.Model.INDEL;
} else {
}
else {
// todo - this assumes SNP's take priority when BOTH is selected, should do a smarter way once extended events are removed
if( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP)
return GenotypeLikelihoodsCalculationModel.Model.SNP;
else if (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL)
return GenotypeLikelihoodsCalculationModel.Model.INDEL;
}
}
}
return null;
}

View File

@ -30,7 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.filters.BadCigarFilter;
import org.broadinstitute.sting.gatk.filters.Platform454Filter;
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter;
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
@ -50,7 +50,7 @@ import java.io.PrintStream;
/**
* Emits intervals for the Local Indel Realigner to target for cleaning. Ignores 454 reads, MQ0 reads, and reads with consecutive indel operators in the CIGAR string.
*/
@ReadFilters({Platform454Filter.class, ZeroMappingQualityReadFilter.class, BadCigarFilter.class})
@ReadFilters({Platform454Filter.class, MappingQualityZeroReadFilter.class, BadCigarFilter.class})
@Reference(window=@Window(start=-1,stop=50))
@Allows(value={DataSource.READS, DataSource.REFERENCE})
@By(DataSource.REFERENCE)

View File

@ -72,7 +72,7 @@ import java.util.*;
* if first bam has coverage at the site but no indication for an indel. In the --somatic mode, BED output contains
* only somatic calls, while --verbose output contains all calls annotated with GERMLINE/SOMATIC keywords.
*/
@ReadFilters({Platform454Filter.class, ZeroMappingQualityReadFilter.class, PlatformUnitFilter.class})
@ReadFilters({Platform454Filter.class, MappingQualityZeroReadFilter.class, PlatformUnitFilter.class})
public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
// @Output
// PrintStream out;

View File

@ -32,7 +32,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.walkers.*;
@ -58,7 +58,7 @@ import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFr
@Requires(value = {DataSource.READS, DataSource.REFERENCE}, referenceMetaData = @RMD(name = "variant", type = ReferenceOrderedDatum.class))
@By(DataSource.READS)
@ReadFilters({ZeroMappingQualityReadFilter.class})
@ReadFilters({MappingQualityZeroReadFilter.class})
// Filter out all reads with zero mapping quality
public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, PhasingStats> {

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration;
import org.broad.tribble.bed.BEDCodec;
import org.broad.tribble.dbsnp.DbSNPCodec;
import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableReadFilter;
import org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
import org.broadinstitute.sting.commandline.Gather;
@ -34,7 +35,7 @@ import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.walkers.*;
@ -75,7 +76,7 @@ import java.util.Map;
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
@By( DataSource.READS ) // Only look at covered loci, not every loci of the reference file
@ReadFilters( {ZeroMappingQualityReadFilter.class} ) // Filter out all reads with zero mapping quality
@ReadFilters( {MappingQualityZeroReadFilter.class, MappingQualityUnavailableReadFilter.class} ) // Filter out all reads with zero or unavailable mapping quality
@Requires( {DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES} ) // This walker requires both -I input.bam and -R reference.fasta
@PartitionBy(PartitionType.LOCUS)
public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.CountedData, CountCovariatesWalker.CountedData> implements TreeReducible<CountCovariatesWalker.CountedData> {

View File

@ -9,9 +9,13 @@ import net.sf.samtools.SAMUtils;
* @author Kiran Garimella
*/
public class QualityUtils {
public final static byte MAX_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE;
public final static double MIN_REASONABLE_ERROR = 0.0001;
public final static byte MAX_REASONABLE_Q_SCORE = 40;
public final static byte MIN_USABLE_Q_SCORE = 6;
public final static int MAPPING_QUALITY_UNAVAILABLE = 255;
/**
* Private constructor. No instantiating this class!

View File

@ -123,12 +123,10 @@ public class StandardVCFWriter implements VCFWriter {
try {
// the file format field needs to be written first
mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF4_0.getFormatString() + "=" + VCFHeaderVersion.VCF4_0.getVersionString() + "\n");
mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF4_1.getFormatString() + "=" + VCFHeaderVersion.VCF4_1.getVersionString() + "\n");
for ( VCFHeaderLine line : mHeader.getMetaData() ) {
if ( line.getKey().equals(VCFHeaderVersion.VCF4_0.getFormatString()) ||
line.getKey().equals(VCFHeaderVersion.VCF3_3.getFormatString()) ||
line.getKey().equals(VCFHeaderVersion.VCF3_2.getFormatString()) )
if ( VCFHeaderVersion.isFormatString(line.getKey()) )
continue;
// are the records filtered (so we know what to put in the FILTER column of passing records) ?
@ -358,16 +356,8 @@ public class StandardVCFWriter implements VCFWriter {
mWriter.write(key);
if ( !entry.getValue().equals("") ) {
int numVals = 1;
VCFInfoHeaderLine metaData = mHeader.getInfoHeaderLine(key);
if ( metaData != null )
numVals = metaData.getCount();
// take care of unbounded encoding
if ( numVals == VCFInfoHeaderLine.UNBOUNDED )
numVals = 1;
if ( numVals > 0 ) {
if ( metaData == null || metaData.getCountType() != VCFHeaderLineCount.INTEGER || metaData.getCount() != 0 ) {
mWriter.write("=");
mWriter.write(entry.getValue());
}
@ -423,7 +413,7 @@ public class StandardVCFWriter implements VCFWriter {
VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(key);
if ( metaData != null ) {
int numInFormatField = metaData.getCount();
int numInFormatField = metaData.getCount(vc.getAlternateAlleles().size());
if ( numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4) ) {
// If we have a missing field but multiple values are expected, we need to construct a new string with all fields.
// For example, if Number=2, the string has to be ".,."

View File

@ -0,0 +1,28 @@
package org.broadinstitute.sting.utils.codecs.vcf;
/**
* @author ebanks
* A class representing a key=value entry for ALT fields in the VCF header
*/
public class VCFAltHeaderLine extends VCFSimpleHeaderLine {
/**
* create a VCF filter header line
*
* @param name the name for this header line
* @param description the description for this header line
*/
public VCFAltHeaderLine(String name, String description) {
super(name, description, SupportedHeaderLineType.ALT);
}
/**
* create a VCF info header line
*
* @param line the header line
* @param version the vcf header version
*/
protected VCFAltHeaderLine(String line, VCFHeaderVersion version) {
super(line, version, SupportedHeaderLineType.ALT);
}
}

View File

@ -24,6 +24,8 @@
package org.broadinstitute.sting.utils.codecs.vcf;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.Map;
@ -43,26 +45,43 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
// the field types
private String name;
private int count;
private int count = -1;
private VCFHeaderLineCount countType;
private String description;
private VCFHeaderLineType type;
// access methods
public String getName() { return name; }
public int getCount() { return count; }
public String getDescription() { return description; }
public VCFHeaderLineType getType() { return type; }
public VCFHeaderLineCount getCountType() { return countType; }
public int getCount() {
if ( countType != VCFHeaderLineCount.INTEGER )
throw new ReviewedStingException("Asking for header line count when type is not an integer");
return count;
}
//
public void setNumberToUnbounded() { this.count = UNBOUNDED; }
// utility method
public int getCount(int numAltAlleles) {
int myCount;
switch ( countType ) {
case INTEGER: myCount = count; break;
case UNBOUNDED: myCount = -1; break;
case A: myCount = numAltAlleles; break;
case G: myCount = ((numAltAlleles + 1) * (numAltAlleles + 2) / 2); break;
default: throw new ReviewedStingException("Unknown count type: " + countType);
}
return myCount;
}
public void setNumberToUnbounded() {
countType = VCFHeaderLineCount.UNBOUNDED;
count = -1;
}
// our type of line, i.e. format, info, etc
private final SupportedHeaderLineType lineType;
// line numerical values are allowed to be unbounded (or unknown), which is
// marked with a dot (.)
public static final int UNBOUNDED = -1; // the value we store internally for unbounded types
/**
* create a VCF format header line
*
@ -70,10 +89,12 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
* @param count the count for this header line
* @param type the type for this header line
* @param description the description for this header line
* @param lineType the header line type
*/
protected VCFCompoundHeaderLine(String name, int count, VCFHeaderLineType type, String description, SupportedHeaderLineType lineType) {
super(lineType.toString(), "");
this.name = name;
this.countType = VCFHeaderLineCount.INTEGER;
this.count = count;
this.type = type;
this.description = description;
@ -81,20 +102,53 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
validate();
}
/**
* create a VCF format header line
*
* @param name the name for this header line
* @param count the count type for this header line
* @param type the type for this header line
* @param description the description for this header line
* @param lineType the header line type
*/
protected VCFCompoundHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description, SupportedHeaderLineType lineType) {
super(lineType.toString(), "");
this.name = name;
this.countType = count;
this.type = type;
this.description = description;
this.lineType = lineType;
validate();
}
/**
* create a VCF format header line
*
* @param line the header line
* @param version the VCF header version
* @param lineType the header line type
*
*/
protected VCFCompoundHeaderLine(String line, VCFHeaderVersion version, SupportedHeaderLineType lineType) {
super(lineType.toString(), "");
Map<String,String> mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Number","Type","Description"));
name = mapping.get("ID");
count = (version == VCFHeaderVersion.VCF4_0 || version == VCFHeaderVersion.VCF4_1) ?
mapping.get("Number").equals(VCFConstants.UNBOUNDED_ENCODING_v4) ? UNBOUNDED : Integer.valueOf(mapping.get("Number")) :
mapping.get("Number").equals(VCFConstants.UNBOUNDED_ENCODING_v3) ? UNBOUNDED : Integer.valueOf(mapping.get("Number"));
count = -1;
final String numberStr = mapping.get("Number");
if ( numberStr.equals(VCFConstants.PER_ALLELE_COUNT) ) {
countType = VCFHeaderLineCount.A;
} else if ( numberStr.equals(VCFConstants.PER_GENOTYPE_COUNT) ) {
countType = VCFHeaderLineCount.G;
} else if ( ((version == VCFHeaderVersion.VCF4_0 || version == VCFHeaderVersion.VCF4_1) &&
numberStr.equals(VCFConstants.UNBOUNDED_ENCODING_v4)) ||
((version == VCFHeaderVersion.VCF3_2 || version == VCFHeaderVersion.VCF3_3) &&
numberStr.equals(VCFConstants.UNBOUNDED_ENCODING_v3)) ) {
countType = VCFHeaderLineCount.UNBOUNDED;
} else {
countType = VCFHeaderLineCount.INTEGER;
count = Integer.valueOf(numberStr);
}
type = VCFHeaderLineType.valueOf(mapping.get("Type"));
if (type == VCFHeaderLineType.Flag && !allowFlagValues())
throw new IllegalArgumentException("Flag is an unsupported type for this kind of field");
@ -121,7 +175,15 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
protected String toStringEncoding() {
Map<String,Object> map = new LinkedHashMap<String,Object>();
map.put("ID", name);
map.put("Number", count == UNBOUNDED ? VCFConstants.UNBOUNDED_ENCODING_v4 : count);
Object number;
switch ( countType ) {
case A: number = VCFConstants.PER_ALLELE_COUNT; break;
case G: number = VCFConstants.PER_GENOTYPE_COUNT; break;
case UNBOUNDED: number = VCFConstants.UNBOUNDED_ENCODING_v4; break;
case INTEGER:
default: number = count;
}
map.put("Number", number);
map.put("Type", type);
map.put("Description", description);
return lineType.toString() + "=" + VCFHeaderLine.toStringEncoding(map);
@ -136,15 +198,13 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
if ( !(o instanceof VCFCompoundHeaderLine) )
return false;
VCFCompoundHeaderLine other = (VCFCompoundHeaderLine)o;
return name.equals(other.name) &&
count == other.count &&
description.equals(other.description) &&
type == other.type &&
lineType == other.lineType;
return equalsExcludingDescription(other) &&
description.equals(other.description);
}
public boolean equalsExcludingDescription(VCFCompoundHeaderLine other) {
return count == other.count &&
countType == other.countType &&
type == other.type &&
lineType == other.lineType &&
name.equals(other.name);

View File

@ -99,6 +99,8 @@ public final class VCFConstants {
public static final String MISSING_DEPTH_v3 = "-1";
public static final String UNBOUNDED_ENCODING_v4 = ".";
public static final String UNBOUNDED_ENCODING_v3 = "-1";
public static final String PER_ALLELE_COUNT = "A";
public static final String PER_GENOTYPE_COUNT = "G";
public static final String EMPTY_ALLELE = ".";
public static final String EMPTY_GENOTYPE = "./.";
public static final double MAX_GENOTYPE_QUAL = 99.0;

View File

@ -1,19 +1,10 @@
package org.broadinstitute.sting.utils.codecs.vcf;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.Map;
/**
* @author ebanks
* A class representing a key=value entry for FILTER fields in the VCF header
*/
public class VCFFilterHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine {
private String name;
private String description;
public class VCFFilterHeaderLine extends VCFSimpleHeaderLine {
/**
* create a VCF filter header line
@ -22,12 +13,7 @@ public class VCFFilterHeaderLine extends VCFHeaderLine implements VCFNamedHeader
* @param description the description for this header line
*/
public VCFFilterHeaderLine(String name, String description) {
super("FILTER", "");
this.name = name;
this.description = description;
if ( name == null || description == null )
throw new IllegalArgumentException(String.format("Invalid VCFCompoundHeaderLine: key=%s name=%s desc=%s", super.getKey(), name, description ));
super(name, description, SupportedHeaderLineType.FILTER);
}
/**
@ -37,34 +23,6 @@ public class VCFFilterHeaderLine extends VCFHeaderLine implements VCFNamedHeader
* @param version the vcf header version
*/
protected VCFFilterHeaderLine(String line, VCFHeaderVersion version) {
super("FILTER", "");
Map<String,String> mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Description"));
name = mapping.get("ID");
description = mapping.get("Description");
if ( description == null && ALLOW_UNBOUND_DESCRIPTIONS ) // handle the case where there's no description provided
description = UNBOUND_DESCRIPTION;
}
protected String toStringEncoding() {
Map<String,Object> map = new LinkedHashMap<String,Object>();
map.put("ID", name);
map.put("Description", description);
return "FILTER=" + VCFHeaderLine.toStringEncoding(map);
}
public boolean equals(Object o) {
if ( !(o instanceof VCFFilterHeaderLine) )
return false;
VCFFilterHeaderLine other = (VCFFilterHeaderLine)o;
return name.equals(other.name) &&
description.equals(other.description);
}
public String getName() {
return name;
}
public String getDescription() {
return description;
super(line, version, SupportedHeaderLineType.FILTER);
}
}

View File

@ -16,6 +16,10 @@ public class VCFFormatHeaderLine extends VCFCompoundHeaderLine {
throw new IllegalArgumentException("Flag is an unsupported type for format fields");
}
public VCFFormatHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description) {
super(name, count, type, description, SupportedHeaderLineType.FORMAT);
}
protected VCFFormatHeaderLine(String line, VCFHeaderVersion version) {
super(line, version, SupportedHeaderLineType.FORMAT);
}

View File

@ -0,0 +1,8 @@
package org.broadinstitute.sting.utils.codecs.vcf;
/**
* the count encodings we use for fields in VCF header lines
*/
public enum VCFHeaderLineCount {
INTEGER, A, G, UNBOUNDED;
}

View File

@ -13,6 +13,10 @@ public class VCFInfoHeaderLine extends VCFCompoundHeaderLine {
super(name, count, type, description, SupportedHeaderLineType.INFO);
}
public VCFInfoHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description) {
super(name, count, type, description, SupportedHeaderLineType.INFO);
}
protected VCFInfoHeaderLine(String line, VCFHeaderVersion version) {
super(line, version, SupportedHeaderLineType.INFO);
}

View File

@ -0,0 +1,81 @@
package org.broadinstitute.sting.utils.codecs.vcf;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.Map;
/**
* @author ebanks
* A class representing a key=value entry for simple VCF header types
*/
public abstract class VCFSimpleHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine {
public enum SupportedHeaderLineType {
FILTER, ALT;
}
private String name;
private String description;
// our type of line, i.e. filter, alt, etc
private final SupportedHeaderLineType lineType;
/**
* create a VCF filter header line
*
* @param name the name for this header line
* @param description the description for this header line
* @param lineType the header line type
*/
public VCFSimpleHeaderLine(String name, String description, SupportedHeaderLineType lineType) {
super(lineType.toString(), "");
this.lineType = lineType;
this.name = name;
this.description = description;
if ( name == null || description == null )
throw new IllegalArgumentException(String.format("Invalid VCFSimpleHeaderLine: key=%s name=%s desc=%s", super.getKey(), name, description ));
}
/**
* create a VCF info header line
*
* @param line the header line
* @param version the vcf header version
* @param lineType the header line type
*/
protected VCFSimpleHeaderLine(String line, VCFHeaderVersion version, SupportedHeaderLineType lineType) {
super(lineType.toString(), "");
this.lineType = lineType;
Map<String,String> mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Description"));
name = mapping.get("ID");
description = mapping.get("Description");
if ( description == null && ALLOW_UNBOUND_DESCRIPTIONS ) // handle the case where there's no description provided
description = UNBOUND_DESCRIPTION;
}
protected String toStringEncoding() {
Map<String,Object> map = new LinkedHashMap<String,Object>();
map.put("ID", name);
map.put("Description", description);
return lineType.toString() + "=" + VCFHeaderLine.toStringEncoding(map);
}
public boolean equals(Object o) {
if ( !(o instanceof VCFSimpleHeaderLine) )
return false;
VCFSimpleHeaderLine other = (VCFSimpleHeaderLine)o;
return name.equals(other.name) &&
description.equals(other.description);
}
public String getName() {
return name;
}
public String getDescription() {
return description;
}
}

View File

@ -180,19 +180,4 @@ public class VCFUtils {
return new HashSet<VCFHeaderLine>(map.values());
}
/**
* return a set of supported format lines; what we currently support for output in the genotype fields of a VCF
* @return a set of VCF format lines
*/
public static Set<VCFFormatHeaderLine> getSupportedHeaderStrings() {
Set<VCFFormatHeaderLine> result = new HashSet<VCFFormatHeaderLine>();
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality"));
result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)"));
result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, -1, VCFHeaderLineType.Float, "Normalized, Phred-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; if site is not biallelic, number of likelihoods if n*(n+1)/2"));
return result;
}
}

View File

@ -867,7 +867,10 @@ public class VariantContext implements Feature { // to enable tribble intergrati
for ( String name : sampleNames ) {
if ( map.containsKey(name) ) throw new IllegalArgumentException("Duplicate names detected in requested samples " + sampleNames);
map.put(name, getGenotype(name));
final Genotype g = getGenotype(name);
if ( g != null ) {
map.put(name, g);
}
}
return map;

View File

@ -4,6 +4,7 @@ import org.apache.commons.io.FileUtils;
import org.apache.log4j.*;
import org.apache.log4j.spi.LoggingEvent;
import org.broadinstitute.sting.commandline.CommandLineUtils;
import org.broadinstitute.sting.gatk.walkers.diffengine.DiffEngine;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.testng.Assert;
@ -334,11 +335,14 @@ public abstract class BaseTest {
if (parameterize || expectedMD5.equals("")) {
// Don't assert
} else {
Assert.assertEquals(filemd5sum, expectedMD5, name + " Mismatching MD5s");
} else if ( filemd5sum.equals(expectedMD5) ) {
System.out.println(String.format(" => %s PASSED", name));
} else {
Assert.fail(String.format("%s has mismatching MD5s: expected=%s observed=%s", name, expectedMD5, filemd5sum));
}
return filemd5sum;
}
@ -381,7 +385,12 @@ public abstract class BaseTest {
System.out.printf("##### Path to calculated file (MD5=%s): %s%n", filemd5sum, pathToFileMD5File);
System.out.printf("##### Diff command: diff %s %s%n", pathToExpectedMD5File, pathToFileMD5File);
// todo -- add support for simple inline display of the first N differences for text file
// inline differences
DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(System.out, 20, 10, 0);
boolean success = DiffEngine.simpleDiffFiles(new File(pathToExpectedMD5File), new File(pathToFileMD5File), params);
if ( success )
System.out.printf("Note that the above list is not comprehensive. At most 20 lines of output, and 10 specific differences will be listed. Please use -T DiffObjects -R public/testdata/exampleFASTA.fasta -m %s -t %s to explore the differences more freely%n",
pathToExpectedMD5File, pathToFileMD5File);
}
}

View File

@ -15,7 +15,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsNotAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("4cc077eb3d343e6b7ba12bff86ebe347"));
Arrays.asList("8a105fa5eebdfffe7326bc5b3d8ffd1c"));
executeTest("test file has annotations, not asking for annotations, #1", spec);
}
@ -23,7 +23,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsNotAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("1de8e943fbf55246ebd19efa32f22a58"));
Arrays.asList("964f1016ec9a3c55333f62dd834c14d6"));
executeTest("test file has annotations, not asking for annotations, #2", spec);
}
@ -31,7 +31,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("93c110e45fd4aedb044a8a5501e23336"));
Arrays.asList("8e7de435105499cd71ffc099e268a83e"));
executeTest("test file has annotations, asking for annotations, #1", spec);
}
@ -39,7 +39,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("f5cb45910ed719f46159f9f71acaecf4"));
Arrays.asList("64b6804cb1e27826e3a47089349be581"));
executeTest("test file has annotations, asking for annotations, #2", spec);
}
@ -47,7 +47,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsNotAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("4b48e7d095ef73e3151542ea976ecd89"));
Arrays.asList("42ccee09fa9f8c58f4a0d4f1139c094f"));
executeTest("test file doesn't have annotations, not asking for annotations, #1", spec);
}
@ -55,7 +55,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsNotAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("28dfbfd178aca071b948cd3dc2365357"));
Arrays.asList("f2ddfa8105c290b1f34b7a261a02a1ac"));
executeTest("test file doesn't have annotations, not asking for annotations, #2", spec);
}
@ -63,7 +63,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("a330a5bc3ee72a51dbeb7e6c97a0db99"));
Arrays.asList("fd1ffb669800c2e07df1e2719aa38e49"));
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
}
@ -71,7 +71,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("3a31d1ef471acfb881a2dec7963fe3f4"));
Arrays.asList("09f8e840770a9411ff77508e0ed0837f"));
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
}
@ -79,7 +79,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testOverwritingHeader() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B:variant,VCF " + validationDataLocation + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
Arrays.asList("a63fd8ff7bafbd46b7f009144a7c2ad1"));
Arrays.asList("78d2c19f8107d865970dbaf3e12edd92"));
executeTest("test overwriting header", spec);
}
@ -87,7 +87,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoReads() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
Arrays.asList("36378f1245bb99d902fbfe147605bc42"));
Arrays.asList("16e3a1403fc376320d7c69492cad9345"));
executeTest("not passing it any reads", spec);
}
@ -95,7 +95,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testDBTagWithDbsnp() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -D " + GATKDataLocation + "dbsnp_129_b36.rod -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
Arrays.asList("0257a1cc3c703535b2d3c5046bf88ab7"));
Arrays.asList("3da8ca2b6bdaf6e92d94a8c77a71313d"));
executeTest("getting DB tag with dbSNP", spec);
}
@ -103,7 +103,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testDBTagWithHapMap() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B:compH3,VCF " + validationDataLocation + "fakeHM3.vcf -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
Arrays.asList("2d7c73489dcf0db433bebdf79a068764"));
Arrays.asList("1bc01c5b3bd0b7aef75230310c3ce688"));
executeTest("getting DB tag with HM3", spec);
}
@ -111,13 +111,13 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testUsingExpression() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B:foo,VCF " + validationDataLocation + "targetAnnotations.vcf -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.AF -BTI variant", 1,
Arrays.asList("2f6efd08d818faa1eb0631844437c64a"));
Arrays.asList("e9c0d832dc6b4ed06c955060f830c140"));
executeTest("using expression", spec);
}
@Test
public void testTabixAnnotations() {
final String MD5 = "6c7a6a1c0027bf82656542a9b2671a35";
final String MD5 = "13269d5a2e16f06fd755cc0fb9271acf";
for ( String file : Arrays.asList("CEU.exon.2010_03.sites.vcf", "CEU.exon.2010_03.sites.vcf.gz")) {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -A HomopolymerRun -B:variant,VCF " + validationDataLocation + "/" + file + " -BTI variant -NO_HEADER", 1,

View File

@ -29,7 +29,7 @@ public class GenomicAnnotatorIntegrationTest extends WalkerTest {
*/
String[] md5WithDashSArg = {"3d3b61a83c1189108eabb2df04218099"};
String[] md5WithDashSArg = {"efba4ce1641cfa2ef88a64395f2ebce8"};
WalkerTestSpec specWithSArg = new WalkerTestSpec(
"-T GenomicAnnotator -R " + b36KGReference +
" -B:variant,vcf3 /humgen/gsa-hpprojects/GATK/data/Annotations/examples/CEU_hapmap_nogt_23_subset.vcf" +
@ -58,7 +58,7 @@ public class GenomicAnnotatorIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("caa562160733aa638e1ba413ede209ae")
Arrays.asList("772fc3f43b70770ec6c6acbb8bbbd4c0")
);
executeTest("testGenomicAnnotatorOnIndels", testOnIndels);
}
@ -76,7 +76,7 @@ public class GenomicAnnotatorIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("a4cf76f08fa90284b6988a464b6e0c17")
Arrays.asList("081ade7f3d2d3c5f19cb1e8651a626f3")
);
executeTest("testGenomicAnnotatorOnSNPsAndIndels", testOnSNPsAndIndels);
}

View File

@ -41,7 +41,7 @@ public class BeagleIntegrationTest extends WalkerTest {
"-B:beagleR2,BEAGLE " + beagleValidationDataLocation + "inttestbgl.r2 " +
"-B:beagleProbs,BEAGLE " + beagleValidationDataLocation + "inttestbgl.gprobs " +
"-B:beaglePhased,BEAGLE " + beagleValidationDataLocation + "inttestbgl.phased " +
"-o %s -NO_HEADER", 1, Arrays.asList("6bccee48ad2f06ba5a8c774fed444478"));
"-o %s -NO_HEADER", 1, Arrays.asList("3531451e84208264104040993889aaf4"));
executeTest("test BeagleOutputToVCF", spec);
}
@ -60,7 +60,7 @@ public class BeagleIntegrationTest extends WalkerTest {
"-T ProduceBeagleInput -B:variant,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_HSQ_chr22_14-16m.vcf "+
"-B:validation,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_OMNI_chr22_14-16m.vcf "+
"-L 22:14000000-16000000 -o %s -bvcf %s -bs 0.8 -valp 0.98 -R /humgen/1kg/reference/human_g1k_v37.fasta -NO_HEADER ",2,
Arrays.asList("660986891b30cdc937e0f2a3a5743faa","223fb977e8db567dcaf632c6ee51f294"));
Arrays.asList("660986891b30cdc937e0f2a3a5743faa","e96ddd51da9f4a797b2aa8c20e404166"));
executeTest("test BeagleInputWithBootstrap",spec);
}
@ -72,7 +72,7 @@ public class BeagleIntegrationTest extends WalkerTest {
"-B:beagleR2,beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.r2 "+
"-B:beagleProbs,beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.gprobs.bgl "+
"-B:beaglePhased,beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.phased.bgl "+
"-L 20:1-70000 -o %s -NO_HEADER ",1,Arrays.asList("24b88ef8cdf6e347daab491f0256be5a"));
"-L 20:1-70000 -o %s -NO_HEADER ",1,Arrays.asList("8dd6ec53994fb46c5c22af8535d22965"));
executeTest("testBeagleChangesSitesToRef",spec);
}

View File

@ -0,0 +1,229 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
// our package
package org.broadinstitute.sting.gatk.walkers.diffengine;
// the imports for unit testing.
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.*;
/**
* Basic unit test for DifferableReaders in reduced reads
*/
public class DiffEngineUnitTest extends BaseTest {
DiffEngine engine;
@BeforeClass(enabled = true)
public void createDiffEngine() {
engine = new DiffEngine();
}
// --------------------------------------------------------------------------------
//
// Difference testing routines
//
// --------------------------------------------------------------------------------
private class DifferenceTest extends TestDataProvider {
public DiffElement tree1, tree2;
public List<String> differences;
private DifferenceTest(String tree1, String tree2) {
this(tree1, tree2, Collections.<String>emptyList());
}
private DifferenceTest(String tree1, String tree2, String difference) {
this(tree1, tree2, Arrays.asList(difference));
}
private DifferenceTest(String tree1, String tree2, List<String> differences) {
super(DifferenceTest.class);
this.tree1 = DiffNode.fromString(tree1);
this.tree2 = DiffNode.fromString(tree2);
this.differences = differences;
}
public String toString() {
return String.format("tree1=%s tree2=%s diff=%s",
tree1.toOneLineString(), tree2.toOneLineString(), differences);
}
}
@DataProvider(name = "trees")
public Object[][] createTrees() {
new DifferenceTest("A=X", "A=X");
new DifferenceTest("A=X", "A=Y", "A:X!=Y");
new DifferenceTest("A=X", "B=X", Arrays.asList("A:X!=MISSING", "B:MISSING!=X"));
new DifferenceTest("A=(X=1)", "B=(X=1)", Arrays.asList("A:(X=1)!=MISSING", "B:MISSING!=(X=1)"));
new DifferenceTest("A=(X=1)", "A=(X=1)");
new DifferenceTest("A=(X=1 Y=2)", "A=(X=1 Y=2)");
new DifferenceTest("A=(X=1 Y=2 B=(Z=3))", "A=(X=1 Y=2 B=(Z=3))");
new DifferenceTest("A=(X=1)", "A=(X=2)", "A.X:1!=2");
new DifferenceTest("A=(X=1 Y=2 B=(Z=3))", "A=(X=1 Y=2 B=(Z=4))", "A.B.Z:3!=4");
new DifferenceTest("A=(X=1)", "A=(X=1 Y=2)", "A.Y:MISSING!=2");
new DifferenceTest("A=(X=1 Y=2 B=(Z=3))", "A=(X=1 Y=2)", "A.B:(Z=3)!=MISSING");
return DifferenceTest.getTests(DifferenceTest.class);
}
@Test(enabled = true, dataProvider = "trees")
public void testDiffs(DifferenceTest test) {
logger.warn("Test tree1: " + test.tree1.toOneLineString());
logger.warn("Test tree2: " + test.tree2.toOneLineString());
List<SpecificDifference> diffs = engine.diff(test.tree1, test.tree2);
logger.warn("Test expected diff : " + test.differences);
logger.warn("Observed diffs : " + diffs);
}
// --------------------------------------------------------------------------------
//
// Low-level routines for summarizing differences
//
// --------------------------------------------------------------------------------
@Test(enabled = true)
public void testLongestCommonPostfix() {
testLongestCommonPostfixHelper("A", "A", 1);
testLongestCommonPostfixHelper("A", "B", 0);
testLongestCommonPostfixHelper("A.B", "A.B", 2);
testLongestCommonPostfixHelper("A.B.C", "A.B.C", 3);
testLongestCommonPostfixHelper("A.B.C", "X.B.C", 2);
testLongestCommonPostfixHelper("A.B.C", "X.Y.C", 1);
testLongestCommonPostfixHelper("A.B.C", "X.Y.Z", 0);
testLongestCommonPostfixHelper("A.B.C", "A.X.C", 1);
testLongestCommonPostfixHelper("A.B.C", "A.X.Z", 0);
testLongestCommonPostfixHelper("A.B.C", "A.B.Z", 0);
}
public void testLongestCommonPostfixHelper(String p1, String p2, int expected) {
String[] parts1 = p1.split("\\.");
String[] parts2 = p2.split("\\.");
int obs = DiffEngine.longestCommonPostfix(parts1, parts2);
Assert.assertEquals(obs, expected, "p1=" + p1 + " p2=" + p2 + " failed");
}
@Test(enabled = true, dependsOnMethods = "testLongestCommonPostfix")
public void testSummarizePath() {
testSummarizePathHelper("A", "A", "A");
testSummarizePathHelper("A", "B", "*");
testSummarizePathHelper("A.B", "A.B", "A.B");
testSummarizePathHelper("A.B", "X.B", "*.B");
testSummarizePathHelper("A.B", "X.Y", "*.*");
testSummarizePathHelper("A.B.C", "A.B.C", "A.B.C");
testSummarizePathHelper("A.B.C", "X.B.C", "*.B.C");
testSummarizePathHelper("A.B.C", "X.Y.C", "*.*.C");
testSummarizePathHelper("A.B.C", "X.Y.Z", "*.*.*");
testSummarizePathHelper("A.B.C", "A.X.C", "*.*.C");
testSummarizePathHelper("A.B.C", "A.X.Z", "*.*.*");
testSummarizePathHelper("A.B.C", "A.B.Z", "*.*.*");
}
public void testSummarizePathHelper(String p1, String p2, String expected) {
String[] parts1 = DiffEngine.diffNameToPath(p1);
String[] parts2 = DiffEngine.diffNameToPath(p2);
int obs = DiffEngine.longestCommonPostfix(parts1, parts2);
String path = DiffEngine.summarizedPath(parts2, obs);
Assert.assertEquals(path, expected, "p1=" + p1 + " p2=" + p2 + " failed");
}
// --------------------------------------------------------------------------------
//
// High-level difference summary
//
// --------------------------------------------------------------------------------
private class SummarizeDifferenceTest extends TestDataProvider {
List<String> diffs = new ArrayList<String>();
List<String> expecteds = new ArrayList<String>();
public SummarizeDifferenceTest() { super(SummarizeDifferenceTest.class); }
public SummarizeDifferenceTest addDiff(String... diffsToAdd) {
diffs.addAll(Arrays.asList(diffsToAdd));
return this;
}
public SummarizeDifferenceTest addSummary(String... expectedSummary) {
expecteds.addAll(Arrays.asList(expectedSummary));
return this;
}
public String toString() {
return String.format("diffs=%s => expected=%s", diffs, expecteds);
}
public void test() {
List<String[]> diffPaths = new ArrayList<String[]>(diffs.size());
for ( String diff : diffs ) { diffPaths.add(DiffEngine.diffNameToPath(diff)); }
List<Difference> sumDiffs = engine.summarizedDifferencesOfPathsFromString(diffs);
Assert.assertEquals(sumDiffs.size(), expecteds.size(), "Unexpected number of summarized differences: " + sumDiffs);
for ( int i = 0; i < sumDiffs.size(); i++ ) {
Difference sumDiff = sumDiffs.get(i);
String expected = expecteds.get(i);
String[] pathCount = expected.split(":");
String path = pathCount[0];
int count = Integer.valueOf(pathCount[1]);
Assert.assertEquals(sumDiff.getPath(), path, "Unexpected path at: " + expected + " obs=" + sumDiff + " all=" + sumDiffs);
Assert.assertEquals(sumDiff.getCount(), count, "Unexpected counts at: " + expected + " obs=" + sumDiff + " all=" + sumDiffs);
}
}
}
@DataProvider(name = "summaries")
public Object[][] createSummaries() {
new SummarizeDifferenceTest().addDiff("A", "A").addSummary("A:2");
new SummarizeDifferenceTest().addDiff("A", "B").addSummary("A:1", "B:1");
new SummarizeDifferenceTest().addDiff("A", "A", "A").addSummary("A:3");
new SummarizeDifferenceTest().addDiff("A", "A", "A", "B").addSummary("A:3", "B:1");
new SummarizeDifferenceTest().addDiff("A", "A", "A", "B", "B").addSummary("A:3", "B:2");
new SummarizeDifferenceTest().addDiff("A", "A", "A", "B", "B", "C").addSummary("A:3", "B:2", "C:1");
new SummarizeDifferenceTest().addDiff("A.X", "A.X").addSummary("A.X:2");
new SummarizeDifferenceTest().addDiff("A.X", "A.X", "B.X").addSummary("*.X:3", "A.X:2", "B.X:1");
new SummarizeDifferenceTest().addDiff("A.X", "A.X", "B.X", "B.X").addSummary("*.X:4", "A.X:2", "B.X:2");
new SummarizeDifferenceTest().addDiff("A.B.C", "X.B.C").addSummary("*.B.C:2", "A.B.C:1", "X.B.C:1");
new SummarizeDifferenceTest().addDiff("A.B.C", "X.Y.C", "X.Y.C").addSummary("*.*.C:3", "X.Y.C:2", "A.B.C:1");
new SummarizeDifferenceTest().addDiff("A.B.C", "A.X.C", "X.Y.C").addSummary("*.*.C:3", "A.B.C:1", "A.X.C:1", "X.Y.C:1");
new SummarizeDifferenceTest().addDiff("A.B.C", "A.X.C", "B.X.C").addSummary("*.*.C:3", "*.X.C:2", "A.B.C:1", "A.X.C:1", "B.X.C:1");
new SummarizeDifferenceTest().addDiff("A.B.C", "A.X.C", "B.X.C", "B.X.C").addSummary("*.*.C:4", "*.X.C:3", "B.X.C:2", "A.B.C:1", "A.X.C:1");
return SummarizeDifferenceTest.getTests(SummarizeDifferenceTest.class);
}
@Test(enabled = true, dependsOnMethods = "testSummarizePath", dataProvider = "summaries")
public void testSummarizeDifferences(SummarizeDifferenceTest test) {
test.test();
}
}

View File

@ -0,0 +1,249 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
// our package
package org.broadinstitute.sting.gatk.walkers.diffengine;
// the imports for unit testing.
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.*;
/**
* Basic unit test for DifferableReaders in reduced reads
*/
public class DiffNodeUnitTest extends BaseTest {
// Data is:
// MY_ROOT
// fields: A=A, B=B
// nodes: C, D
// C: fields: E=E, nodes: none
// D: fields: F=F, G=G, nodes: none
static DiffNode MY_ROOT = DiffNode.rooted("MY_ROOT");
static DiffValue Value_A = new DiffValue("A", MY_ROOT, "A");
static DiffValue Value_B = new DiffValue("B", MY_ROOT, "B");
static DiffNode NODE_C = DiffNode.empty("C", MY_ROOT);
static DiffNode NODE_D = DiffNode.empty("D", MY_ROOT);
static DiffValue Value_E = new DiffValue("E", NODE_C, "E");
static DiffValue Value_F = new DiffValue("F", NODE_D, "F");
static DiffValue Value_G = new DiffValue("G", NODE_D, "G");
static {
MY_ROOT.add(Value_A);
MY_ROOT.add(Value_B);
MY_ROOT.add(NODE_C);
MY_ROOT.add(NODE_D);
NODE_C.add(Value_E);
NODE_D.add(Value_F);
NODE_D.add(Value_G);
}
// --------------------------------------------------------------------------------
//
// Element testing routines
//
// --------------------------------------------------------------------------------
private class ElementTest extends TestDataProvider {
public DiffElement elt;
public String name;
public String fullName;
public DiffElement parent;
private ElementTest(DiffValue elt, DiffValue parent, String name, String fullName) {
this(elt.getBinding(), parent.getBinding(), name, fullName);
}
private ElementTest(DiffElement elt, DiffElement parent, String name, String fullName) {
super(ElementTest.class);
this.elt = elt;
this.name = name;
this.fullName = fullName;
this.parent = parent;
}
public String toString() {
return String.format("ElementTest elt=%s name=%s fullName=%s parent=%s",
elt.toOneLineString(), name, fullName, parent.getName());
}
}
@DataProvider(name = "elementdata")
public Object[][] createElementData() {
new ElementTest(MY_ROOT.getBinding(), DiffElement.ROOT, "MY_ROOT", "MY_ROOT");
new ElementTest(NODE_C, MY_ROOT, "C", "MY_ROOT.C");
new ElementTest(NODE_D, MY_ROOT, "D", "MY_ROOT.D");
new ElementTest(Value_A, MY_ROOT, "A", "MY_ROOT.A");
new ElementTest(Value_B, MY_ROOT, "B", "MY_ROOT.B");
new ElementTest(Value_E, NODE_C, "E", "MY_ROOT.C.E");
new ElementTest(Value_F, NODE_D, "F", "MY_ROOT.D.F");
new ElementTest(Value_G, NODE_D, "G", "MY_ROOT.D.G");
return TestDataProvider.getTests(ElementTest.class);
}
@Test(enabled = true, dataProvider = "elementdata")
public void testElementMethods(ElementTest test) {
Assert.assertNotNull(test.elt.getName());
Assert.assertNotNull(test.elt.getParent());
Assert.assertEquals(test.elt.getName(), test.name);
Assert.assertEquals(test.elt.getParent(), test.parent);
Assert.assertEquals(test.elt.fullyQualifiedName(), test.fullName);
}
// --------------------------------------------------------------------------------
//
// DiffValue testing routines
//
// --------------------------------------------------------------------------------
private class LeafTest extends TestDataProvider {
public DiffValue diffvalue;
public Object value;
private LeafTest(DiffValue diffvalue, Object value) {
super(LeafTest.class);
this.diffvalue = diffvalue;
this.value = value;
}
public String toString() {
return String.format("LeafTest diffvalue=%s value=%s", diffvalue.toOneLineString(), value);
}
}
@DataProvider(name = "leafdata")
public Object[][] createLeafData() {
new LeafTest(Value_A, "A");
new LeafTest(Value_B, "B");
new LeafTest(Value_E, "E");
new LeafTest(Value_F, "F");
new LeafTest(Value_G, "G");
return TestDataProvider.getTests(LeafTest.class);
}
@Test(enabled = true, dataProvider = "leafdata")
public void testLeafMethods(LeafTest test) {
Assert.assertNotNull(test.diffvalue.getValue());
Assert.assertEquals(test.diffvalue.getValue(), test.value);
}
// --------------------------------------------------------------------------------
//
// Node testing routines
//
// --------------------------------------------------------------------------------
private class NodeTest extends TestDataProvider {
public DiffNode node;
public Set<String> fields;
public Set<String> subnodes;
public Set<String> allNames;
private NodeTest(DiffNode node, List<String> fields, List<String> subnodes) {
super(NodeTest.class);
this.node = node;
this.fields = new HashSet<String>(fields);
this.subnodes = new HashSet<String>(subnodes);
this.allNames = new HashSet<String>(fields);
allNames.addAll(subnodes);
}
public String toString() {
return String.format("NodeTest node=%s fields=%s subnodes=%s",
node.toOneLineString(), fields, subnodes);
}
}
@DataProvider(name = "nodedata")
public Object[][] createData1() {
new NodeTest(MY_ROOT, Arrays.asList("A", "B"), Arrays.asList("C", "D"));
new NodeTest(NODE_C, Arrays.asList("E"), Collections.<String>emptyList());
new NodeTest(NODE_D, Arrays.asList("F", "G"), Collections.<String>emptyList());
return TestDataProvider.getTests(NodeTest.class);
}
@Test(enabled = true, dataProvider = "nodedata")
public void testNodeAccessors(NodeTest test) {
Assert.assertNotNull(test.node.getElements());
for ( String name : test.allNames ) {
DiffElement elt = test.node.getElement(name);
Assert.assertNotNull(elt, "Failed to find field " + elt + " in " + test.node);
Assert.assertEquals(elt.getName(), name);
Assert.assertEquals(elt.getValue().isAtomic(), test.fields.contains(name), "Failed atomic/compound expectation: " + test.node);
}
}
// NOTE: add routines are being implicitly tested by the creation of the data structures
@Test(enabled = true, dataProvider = "nodedata")
public void testCounts(NodeTest test) {
Assert.assertEquals(test.node.getElements().size(), test.allNames.size());
Assert.assertEquals(test.node.getElementNames(), test.allNames);
}
// --------------------------------------------------------------------------------
//
// fromString testing routines
//
// --------------------------------------------------------------------------------
private class FromStringTest extends TestDataProvider {
public String string;
public DiffElement expected;
private FromStringTest(String string, DiffElement expected) {
super(FromStringTest.class);
this.string = string;
this.expected = expected;
}
public String toString() {
return String.format("FromStringTest string=%s expected=%s", string, expected.toOneLineString());
}
}
@DataProvider(name = "fromstringdata")
public Object[][] createFromData() {
new FromStringTest("A=A", Value_A.getBinding());
new FromStringTest("B=B", Value_B.getBinding());
new FromStringTest("C=(E=E)", NODE_C.getBinding());
new FromStringTest("D=(F=F G=G)", NODE_D.getBinding());
return TestDataProvider.getTests(FromStringTest.class);
}
@Test(enabled = true, dataProvider = "fromstringdata")
public void parseFromString(FromStringTest test) {
logger.warn("Testing from string: " + test.string);
DiffElement elt = DiffNode.fromString(test.string);
Assert.assertEquals(elt.toOneLineString(), test.expected.toOneLineString());
}
}

View File

@ -0,0 +1,143 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
// our package
package org.broadinstitute.sting.gatk.walkers.diffengine;
// the imports for unit testing.
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.io.File;
import java.util.*;
/**
* Basic unit test for DifferableReaders in reduced reads
*/
public class DiffableReaderUnitTest extends BaseTest {
DiffEngine engine;
File vcfFile = new File(testDir + "diffTestMaster.vcf");
File bamFile = new File(testDir + "exampleBAM.bam");
@BeforeClass(enabled = true)
public void createDiffEngine() {
engine = new DiffEngine();
}
@Test(enabled = true)
public void testPluggableDiffableReaders() {
logger.warn("testPluggableDiffableReaders");
Map<String, DiffableReader> readers = engine.getReaders();
Assert.assertNotNull(readers);
Assert.assertTrue(readers.size() > 0);
Assert.assertNotNull(readers.get("VCF"));
for ( Map.Entry<String, DiffableReader> e : engine.getReaders().entrySet() ) {
logger.warn("Found diffable reader: " + e.getKey());
Assert.assertEquals(e.getValue().getName(), e.getKey());
Assert.assertEquals(e.getValue(), engine.getReader(e.getKey()));
}
}
private static void testLeaf(DiffNode rec, String field, Object expected) {
DiffElement value = rec.getElement(field);
Assert.assertNotNull(value, "Expected to see leaf named " + field + " in rec " + rec);
Assert.assertEquals(value.getValue().getValue(), expected, "Expected to leaf named " + field + " to have value " + expected + " in rec " + rec);
}
@Test(enabled = true, dependsOnMethods = "testPluggableDiffableReaders")
public void testVCF1() {
logger.warn("testVCF1");
DiffableReader vcfReader = engine.getReader("VCF");
Assert.assertTrue(vcfReader.canRead(vcfFile));
Assert.assertFalse(vcfReader.canRead(bamFile));
DiffElement diff = vcfReader.readFromFile(vcfFile, -1);
Assert.assertNotNull(diff);
Assert.assertEquals(diff.getName(), vcfFile.getName());
Assert.assertSame(diff.getParent(), DiffElement.ROOT);
DiffNode node = diff.getValueAsNode();
Assert.assertEquals(node.getElements().size(), 9);
// chr1 2646 rs62635284 G A 0.15 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:53,75:3:-12.40,-0.90,-0.00:9.03
DiffNode rec1 = node.getElement("chr1:2646").getValueAsNode();
testLeaf(rec1, "CHROM", "chr1");
testLeaf(rec1, "POS", 2646);
testLeaf(rec1, "ID", "rs62635284");
testLeaf(rec1, "REF", Allele.create("G", true));
testLeaf(rec1, "ALT", new HashSet<Allele>(Arrays.asList(Allele.create("A"))));
testLeaf(rec1, "QUAL", 0.15);
testLeaf(rec1, "FILTER", Collections.<Object>emptySet());
testLeaf(rec1, "AC", "2");
testLeaf(rec1, "AF", "1.00");
testLeaf(rec1, "AN", "2");
}
@Test(enabled = true, dependsOnMethods = "testPluggableDiffableReaders")
public void testBAM() {
logger.warn("testBAM");
DiffableReader bamReader = engine.getReader("BAM");
Assert.assertTrue(bamReader.canRead(bamFile));
Assert.assertFalse(bamReader.canRead(vcfFile));
DiffElement diff = bamReader.readFromFile(bamFile, -1);
Assert.assertNotNull(diff);
Assert.assertEquals(diff.getName(), bamFile.getName());
Assert.assertSame(diff.getParent(), DiffElement.ROOT);
DiffNode node = diff.getValueAsNode();
Assert.assertEquals(node.getElements().size(), 33);
// 30PPJAAXX090125:1:42:512:1817#0 99 chr1 200 0 76M =
// 255 -130 ACCCTAACCCTAACCCTAACCCTAACCATAACCCTAAGACTAACCCTAAACCTAACCCTCATAATCGAAATACAAC
// BBBBC@C?AABCBB<63>=B@>+B9-9+)2B8,+@327B5A>90((>-+''3?(/'''A)(''19('7.,**%)3:
// PG:Z:0 RG:Z:exampleBAM.bam SM:Z:exampleBAM.bam
DiffNode rec1 = node.getElement("30PPJAAXX090125:1:42:512:1817#0_1").getValueAsNode();
testLeaf(rec1, "NAME", "30PPJAAXX090125:1:42:512:1817#0");
testLeaf(rec1, "FLAGS", 99);
testLeaf(rec1, "RNAME", "chr1");
testLeaf(rec1, "POS", 200);
testLeaf(rec1, "MAPQ", 0);
testLeaf(rec1, "CIGAR", "76M");
testLeaf(rec1, "RNEXT", "chr1");
testLeaf(rec1, "PNEXT", 255);
testLeaf(rec1, "TLEN", -130);
testLeaf(rec1, "SEQ", "ACCCTAACCCTAACCCTAACCCTAACCATAACCCTAAGACTAACCCTAAACCTAACCCTCATAATCGAAATACAAC");
testLeaf(rec1, "QUAL", "BBBBC@C?AABCBB<63>=B@>+B9-9+)2B8,+@327B5A>90((>-+''3?(/'''A)(''19('7.,**%)3:");
testLeaf(rec1, "PG", "0");
testLeaf(rec1, "RG", "exampleBAM.bam");
testLeaf(rec1, "SM", "exampleBAM.bam");
}
}

View File

@ -0,0 +1,95 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
// our package
package org.broadinstitute.sting.gatk.walkers.diffengine;
// the imports for unit testing.
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
/**
* Basic unit test for DifferableReaders in reduced reads
*/
public class DifferenceUnitTest extends BaseTest {
// --------------------------------------------------------------------------------
//
// testing routines
//
// --------------------------------------------------------------------------------
private class DifferenceTest extends TestDataProvider {
public DiffElement tree1, tree2;
public String difference;
private DifferenceTest(String tree1, String tree2, String difference) {
this(DiffNode.fromString(tree1), DiffNode.fromString(tree2), difference);
}
private DifferenceTest(DiffElement tree1, DiffElement tree2, String difference) {
super(DifferenceTest.class);
this.tree1 = tree1;
this.tree2 = tree2;
this.difference = difference;
}
public String toString() {
return String.format("tree1=%s tree2=%s diff=%s",
tree1 == null ? "null" : tree1.toOneLineString(),
tree2 == null ? "null" : tree2.toOneLineString(),
difference);
}
}
@DataProvider(name = "data")
public Object[][] createTrees() {
new DifferenceTest("A=X", "A=Y", "A:X!=Y");
new DifferenceTest("A=Y", "A=X", "A:Y!=X");
new DifferenceTest(DiffNode.fromString("A=X"), null, "A:X!=MISSING");
new DifferenceTest(null, DiffNode.fromString("A=X"), "A:MISSING!=X");
return DifferenceTest.getTests(DifferenceTest.class);
}
@Test(enabled = true, dataProvider = "data")
public void testDiffToString(DifferenceTest test) {
logger.warn("Test tree1: " + (test.tree1 == null ? "null" : test.tree1.toOneLineString()));
logger.warn("Test tree2: " + (test.tree2 == null ? "null" : test.tree2.toOneLineString()));
logger.warn("Test expected diff : " + test.difference);
SpecificDifference diff = new SpecificDifference(test.tree1, test.tree2);
logger.warn("Observed diffs : " + diff);
Assert.assertEquals(diff.toString(), test.difference, "Observed diff string " + diff + " not equal to expected difference string " + test.difference );
}
}

View File

@ -16,7 +16,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testNoAction() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("4cc077eb3d343e6b7ba12bff86ebe347"));
Arrays.asList("8a105fa5eebdfffe7326bc5b3d8ffd1c"));
executeTest("test no action", spec);
}
@ -24,7 +24,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testClusteredSnps() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -window 10 -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("ada5540bb3d9b6eb8f1337ba01e90a94"));
Arrays.asList("27b13f179bb4920615dff3a32730d845"));
executeTest("test clustered SNPs", spec);
}
@ -32,17 +32,17 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testMasks() {
WalkerTestSpec spec1 = new WalkerTestSpec(
baseTestString() + " -mask foo -B:mask,VCF3 " + validationDataLocation + "vcfexample2.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("b0fcac4af3526e3b2a37602ab4c0e6ae"));
Arrays.asList("578f9e774784c25871678e6464fd212b"));
executeTest("test mask all", spec1);
WalkerTestSpec spec2 = new WalkerTestSpec(
baseTestString() + " -mask foo -B:mask,VCF " + validationDataLocation + "vcfMask.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("b64baabe905a5d197cc1ab594147d3d5"));
Arrays.asList("bfa86a674aefca1b13d341cb14ab3c4f"));
executeTest("test mask some", spec2);
WalkerTestSpec spec3 = new WalkerTestSpec(
baseTestString() + " -mask foo -maskExtend 10 -B:mask,VCF " + validationDataLocation + "vcfMask.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("0eff92fe72024d535c44b98e1e9e1993"));
Arrays.asList("5939f80d14b32d88587373532d7b90e5"));
executeTest("test mask extend", spec3);
}
@ -50,7 +50,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testFilter1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("7a40795147cbfa92941489d7239aad92"));
Arrays.asList("45219dbcfb6f81bba2ea0c35f5bfd368"));
executeTest("test filter #1", spec);
}
@ -58,7 +58,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testFilter2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("e9dd4991b1e325847c77d053dfe8ee54"));
Arrays.asList("c95845e817da7352b9b72bc9794f18fb"));
executeTest("test filter #2", spec);
}
@ -66,7 +66,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testFilterWithSeparateNames() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("9ded2cce63b8d97550079047051d80a3"));
Arrays.asList("b8cdd7f44ff1a395e0a9b06a87e1e530"));
executeTest("test filter with separate names #2", spec);
}
@ -74,12 +74,12 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testGenotypeFilters() {
WalkerTestSpec spec1 = new WalkerTestSpec(
baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("6696e3f65a62ce912230d47cdb0c129b"));
Arrays.asList("96b61e4543a73fe725e433f007260039"));
executeTest("test genotype filter #1", spec1);
WalkerTestSpec spec2 = new WalkerTestSpec(
baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("26e5b4ee954c9e0b5eb044afd4b88ee9"));
Arrays.asList("6c8112ab17ce39c8022c891ae73bf38e"));
executeTest("test genotype filter #2", spec2);
}
@ -87,7 +87,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testDeletions() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --filterExpression 'QUAL < 100' --filterName foo -B:variant,VCF " + validationDataLocation + "twoDeletions.vcf", 1,
Arrays.asList("e63b58be33c9126ad6cc55489aac539b"));
Arrays.asList("569546fd798afa0e65c5b61b440d07ac"));
executeTest("test deletions", spec);
}
}

View File

@ -28,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("258e1954e6ae55c89abc6a716e19cbe0"));
Arrays.asList("c97829259463d04b0159591bb6fb44af"));
executeTest("test MultiSample Pilot1", spec);
}
@ -54,12 +54,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testWithAllelesPassedIn() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("edeb1db288a24baff59575ceedd94243"));
Arrays.asList("2b69667f4770e8c0c894066b7f27e440"));
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("581990130d90071b084024f4cd7caf91"));
Arrays.asList("b77fe007c2a97fcd59dfd5eef94d8b95"));
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
}
@ -67,7 +67,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
Arrays.asList("d120db27d694a6da32367cc4fb5770fa"));
Arrays.asList("ee8a5e63ddd470726a749e69c0c20f60"));
executeTest("test SingleSample Pilot2", spec);
}
@ -77,7 +77,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
//
// --------------------------------------------------------------------------------------------------------------
private final static String COMPRESSED_OUTPUT_MD5 = "75e5c430ed39f79f24e375037a388dc4";
private final static String COMPRESSED_OUTPUT_MD5 = "ef31654a2b85b9b2d3bba4f4a75a17b6";
@Test
public void testCompressedOutput() {
@ -107,7 +107,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
String md5 = "a29615dd37222a11b8dadd341b53e43c";
String md5 = "46868a9c4134651c54535fb46b408aee";
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
@ -138,9 +138,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testCallingParameters() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "--min_base_quality_score 26", "93e6269e38db9bc1732555e9969e3648" );
e.put( "--min_mapping_quality_score 26", "64be99183c100caed4aa5f8bad64c7e9" );
e.put( "--p_nonref_model GRID_SEARCH", "0592fe33f705ad8e2f13619fcf157805" );
e.put( "--min_base_quality_score 26", "5043c9a101e691602eb7a3f9704bdf20" );
e.put( "--min_mapping_quality_score 26", "71a833eb8fd93ee62ae0d5a430f27940" );
e.put( "--p_nonref_model GRID_SEARCH", "ddf443e9dcadef367476b26b4d52c134" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -153,9 +153,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testOutputParameter() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "-sites_only", "1483e637dc0279935a7f90d136d147bb" );
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "adcd91bc7dae8020df8caf1a30060e98" );
e.put( "--output_mode EMIT_ALL_SITES", "b708acc2fa40f336bcd2d0c70091e07e" );
e.put( "-sites_only", "eaad6ceb71ab94290650a70bea5ab951" );
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "05bf7db8a3d19ef4a3d14772c90b732f" );
e.put( "--output_mode EMIT_ALL_SITES", "e4b86740468d7369f0156550855586c7" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -169,12 +169,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testConfidence() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
Arrays.asList("64be99183c100caed4aa5f8bad64c7e9"));
Arrays.asList("71a833eb8fd93ee62ae0d5a430f27940"));
executeTest("test confidence 1", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
Arrays.asList("e76ca54232d02f0d92730e1affeb804e"));
Arrays.asList("79968844dc3ddecb97748c1acf2984c7"));
executeTest("test confidence 2", spec2);
}
@ -186,8 +186,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testHeterozyosity() {
HashMap<Double, String> e = new HashMap<Double, String>();
e.put( 0.01, "18d37f7f107853b5e32c757b4e143205" );
e.put( 1.0 / 1850, "2bcb90ce2f7542bf590f7612018fae8e" );
e.put( 0.01, "4e878664f61d2d800146d3762303fde1" );
e.put( 1.0 / 1850, "9204caec095ff5e63ca21a10b6fab453" );
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -211,7 +211,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("825f05b31b5bb7e82231a15c7e4e2b0d"));
Arrays.asList("1a58ec52df545f946f80cc16c5736a91"));
executeTest(String.format("test multiple technologies"), spec);
}
@ -230,7 +230,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -L 1:10,000,000-10,100,000" +
" -baq CALCULATE_AS_NECESSARY",
1,
Arrays.asList("0919ab7e513c377610e23a67d33608fa"));
Arrays.asList("62d0f6d9de344ce68ce121c13b1e78b1"));
executeTest(String.format("test calling with BAQ"), spec);
}
@ -244,7 +244,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -L 1:10,000,000-10,100,000" +
" -baq OFF",
1,
Arrays.asList("825f05b31b5bb7e82231a15c7e4e2b0d"));
Arrays.asList("1a58ec52df545f946f80cc16c5736a91"));
executeTest(String.format("test calling with BAQ OFF"), spec);
}
@ -263,7 +263,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("cb37348c41b8181be829912730f747e1"));
Arrays.asList("631ae1f1eb6bc4c1a4136b8495250536"));
executeTest(String.format("test indel caller in SLX"), spec);
}
@ -278,7 +278,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -minIndelCnt 1" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("ca5b6a5fb53ae401b146cc3044f454f2"));
Arrays.asList("fd556585c79e2b892a5976668f45aa43"));
executeTest(String.format("test indel caller in SLX witn low min allele count"), spec);
}
@ -291,7 +291,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("ca4343a4ab6d3cce94ce61d7d1910f81"));
Arrays.asList("9cd56feedd2787919e571383889fde70"));
executeTest(String.format("test indel calling, multiple technologies"), spec);
}
@ -301,14 +301,14 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("3f555b53e9dd14cf7cdf96c24e322364"));
Arrays.asList("315e1b78d7a403d7fcbcf0caa8c496b8"));
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf "
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("1b9764b783acf7822edc58e6822eef5b"));
Arrays.asList("cf89e0c54f14482a23c105b73a333d8a"));
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2);
}

View File

@ -26,7 +26,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
+ " -L chr20:332341-382503",
1,
Arrays.asList("6020a68bbec97fcd87819c10cd4e2470"));
Arrays.asList("9568ba0b6624b97ac55a59bdee2d9150"));
executeTest("MAX 10 het sites [TEST ONE]; require PQ >= 10", spec);
}
@ -36,7 +36,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
+ " -L chr20:1232503-1332503",
1,
Arrays.asList("712c2145df4756c9a15758865d8007b5"));
Arrays.asList("ce65194c24fe83b0ec90faa6c8e6109a"));
executeTest("MAX 10 het sites [TEST TWO]; require PQ >= 10", spec);
}
@ -46,7 +46,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 2, 30)
+ " -L chr20:332341-382503",
1,
Arrays.asList("297e0896e4761529d979f40f5ad694db"));
Arrays.asList("02d134fd544613b1e5dd7f7197fc3753"));
executeTest("MAX 2 het sites [TEST THREE]; require PQ >= 30", spec);
}
@ -56,7 +56,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 5, 100)
+ " -L chr20:332341-382503",
1,
Arrays.asList("52a17f14692d726d3b726cf0ae7f2a09"));
Arrays.asList("2f7ec9904fc054c2ba1a7db05eb29334"));
executeTest("MAX 5 het sites [TEST FOUR]; require PQ >= 100", spec);
}
@ -66,7 +66,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 1000, 7, 10)
+ " -L chr20:332341-482503",
1,
Arrays.asList("af768f7958b8f4599c2374f1cc2fc613"));
Arrays.asList("da7a31725f229d1782dd3049848730aa"));
executeTest("MAX 7 het sites [TEST FIVE]; require PQ >= 10; cacheWindow = 1000", spec);
}
@ -76,7 +76,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
+ " -L chr20:652810-681757",
1,
Arrays.asList("3dd886672f59a47908b94136d0427bb0"));
Arrays.asList("e9d35cb88089fb0e8ae6678bfaeeac8c"));
executeTest("MAX 10 het sites [TEST SIX]; require PQ >= 10; cacheWindow = 20000; has inconsistent sites", spec);
}

View File

@ -19,9 +19,9 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
public void testCountCovariates1() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "7b5832d4b2a23b8ef2bb639eb59bfa88" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "f4f8a49bb5764d2a8f61e055f64dcce4");
e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "9c006f8e9fb5752b1c139f5a8cc7ea88");
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "e6f7b4ab9aa291022e0ba8b7dbe4c77e" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "570506533f079d738d70934dfe1c02cd" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "e6b98af01c5a08e4954b79ec42db6fc3" );
for ( String parallelism : Arrays.asList("", " -nt 4")) {
for ( Map.Entry<String, String> entry : e.entrySet() ) {
@ -53,9 +53,9 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
public void testTableRecalibrator1() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "0278cce4cfdab869dc0c11d6852a984b" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "344d4252143df8c2cce6b568747553a5");
e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "6797d7ffa4ef6c48413719ba32696ccf");
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "2bb3374dde131791d7638031ae3b3e10" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "064c4a7bdd23974c3a9c5f924540df76" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "1f9d8944b73169b367cb83b0d22e5432" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -107,7 +107,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testTableRecalibratorMaxQ70() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "344d4252143df8c2cce6b568747553a5" );
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "0278cce4cfdab869dc0c11d6852a984b" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -133,12 +133,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
}
}
@Test
public void testCountCovariatesSolidIndelsRemoveRefBias() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "0a6cdb9611e5880ea6611205080aa267" );
e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "c9ea5f995e1e2b7a5688533e678dcedc" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -164,7 +162,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testTableRecalibratorSolidIndelsRemoveRefBias() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "9bc7e1ad223ba759fe5e8ddb4c07369c" );
e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "993fae4270e7e1e15986f270acf247af" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -189,13 +187,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
}
}
@Test
public void testCountCovariatesVCF() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "3700eaf567e4937f442fc777a226d6ad");
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "170f0c3cc4b8d72c539136effeec9a16");
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -219,7 +214,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testCountCovariatesBED() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "6803891a3398821fc8a37e19ea8e5a00");
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "b460478d9683e827784e42bc352db8bb");
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -243,7 +238,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testCountCovariatesVCFPlusDBsnp() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "f224c42fbc4026db973ccc91265ab5c7");
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "a3d892bd60d8f679affda3c1e3af96c1");
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -268,69 +263,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
}
}
@Test
public void testCountCovariatesNoReadGroups() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "c024e03f019aeceaf364fa58c8295ad8" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
String md5 = entry.getValue();
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference +
" --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" +
" -T CountCovariates" +
" -I " + bam +
" -L 1:10,000,000-10,200,000" +
" -cov ReadGroupCovariate" +
" -cov QualityScoreCovariate" +
" -cov CycleCovariate" +
" -cov DinucCovariate" +
" --default_read_group DefaultReadGroup" +
" --default_platform illumina" +
" --solid_recal_mode SET_Q_ZERO" +
" -recalFile %s",
1, // just one output file
Arrays.asList(md5));
List<File> result = executeTest("testCountCovariatesNoReadGroups", spec).getFirst();
paramsFilesNoReadGroupTest.put(bam, result.get(0).getAbsolutePath());
}
}
@Test
public void testTableRecalibratorNoReadGroups() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "1eefbe7ac0376fc1ed1392d85242171e" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
String md5 = entry.getValue();
String paramsFile = paramsFilesNoReadGroupTest.get(bam);
System.out.printf("PARAMS FOR %s is %s%n", bam, paramsFile);
if ( paramsFile != null ) {
WalkerTestSpec spec = new WalkerTestSpec(
"-R " + b36KGReference +
" -T TableRecalibration" +
" -I " + bam +
" -L 1:10,100,000-10,300,000" +
" -o %s" +
" --no_pg_tag" +
" --solid_recal_mode SET_Q_ZERO" +
" --default_read_group DefaultReadGroup" +
" --default_platform illumina" +
" -recalFile " + paramsFile,
1, // just one output file
Arrays.asList(md5));
executeTest("testTableRecalibratorNoReadGroups", spec);
}
}
}
@Test
public void testCountCovariatesNoIndex() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "cfc31bb6f51436d1c3b34f62bb801dc8" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "284ccac1f8fe485e52c86333cac7c2d4" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -356,7 +292,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testTableRecalibratorNoIndex() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "83b848a16034c2fb423d1bb0f5be7784" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "c167799c2d9cab815d7c9b23337f162e" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -380,11 +316,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
}
}
@Test
public void testCountCovariatesFailWithoutDBSNP() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "");
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "");
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();

View File

@ -27,7 +27,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf",
"d33212a84368e821cbedecd4f59756d6", // tranches
"4652dca41222bebdf9d9fda343b2a835", // recal file
"5350b1a4c1250cf3b77ca45327c04711"); // cut VCF
"243a397a33a935fcaccd5deb6d16f0c0"); // cut VCF
@DataProvider(name = "VRTest")
public Object[][] createData1() {

View File

@ -34,76 +34,76 @@ import java.util.Arrays;
* Tests CombineVariants
*/
public class CombineVariantsIntegrationTest extends WalkerTest {
// public static String baseTestString(String args) {
// return "-T CombineVariants -NO_HEADER -L 1:1-50,000,000 -o %s -R " + b36KGReference + args;
// }
//
// public void test1InOut(String file, String md5, boolean vcf3) {
// test1InOut(file, md5, "", vcf3);
// }
//
// public void test1InOut(String file, String md5, String args, boolean vcf3) {
// WalkerTestSpec spec = new WalkerTestSpec(
// baseTestString(" -priority v1 -B:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file + args),
// 1,
// Arrays.asList(md5));
// executeTest("testInOut1--" + file, spec);
// }
//
// public void combine2(String file1, String file2, String args, String md5, boolean vcf3) {
// WalkerTestSpec spec = new WalkerTestSpec(
// baseTestString(" -priority v1,v2 -B:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file1 + " -B:v2,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file2 + args),
// 1,
// Arrays.asList(md5));
// executeTest("combine2 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec);
// }
//
// public void combineSites(String args, String md5) {
// String file1 = "1000G_omni2.5.b37.sites.vcf";
// String file2 = "hapmap_3.3.b37.sites.vcf";
// WalkerTestSpec spec = new WalkerTestSpec(
// "-T CombineVariants -NO_HEADER -o %s -R " + b37KGReference
// + " -L 1:1-10,000,000 -B:omni,VCF " + validationDataLocation + file1
// + " -B:hm3,VCF " + validationDataLocation + file2 + args,
// 1,
// Arrays.asList(md5));
// executeTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
// }
//
//
// @Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "2117fff6e0d182cd20be508e9661829c", true); }
// @Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "2cfaf7af3dd119df08b8a9c1f72e2f93", " -setKey foo", true); }
// @Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "1474ac0fde2ce42a3c24f1c97eab333e", " -setKey null", true); }
// @Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "7fc66df048a0ab08cf507906e1d4a308", false); } // official project VCF files in tabix format
//
// @Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "ec9715f53dbf4531570557c212822f12", false); }
// @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "f1072be5f5c6ee810276d9ca6537224d", false); }
//
// @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "b77a1eec725201d9d8e74ee0c45638d3", false); } // official project VCF files in tabix format
// @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "802977fdfd2f4905b501bb06800f60af", false); } // official project VCF files in tabix format
// @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "a67157287dd2b24b5cdf7ebf8fcbbe9a", false); }
//
// @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e1f4718a179f1196538a33863da04f53", false); }
//
// @Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "b3783384b7c8e877b971033e90beba48", true); }
//
// @Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "902e541c87caa72134db6293fc46f0ad"); }
// @Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "f339ad4bb5863b58b9c919ce7d040bb9"); }
//
// @Test public void threeWayWithRefs() {
// WalkerTestSpec spec = new WalkerTestSpec(
// baseTestString(" -B:NA19240_BGI,VCF "+validationDataLocation+"NA19240.BGI.RG.vcf" +
// " -B:NA19240_ILLUMINA,VCF "+validationDataLocation+"NA19240.ILLUMINA.RG.vcf" +
// " -B:NA19240_WUGSC,VCF "+validationDataLocation+"NA19240.WUGSC.RG.vcf" +
// " -B:denovoInfo,VCF "+validationDataLocation+"yri_merged_validation_data_240610.annotated.b36.vcf" +
// " -setKey centerSet" +
// " -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED" +
// " -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
// " -genotypeMergeOptions UNIQUIFY -L 1"),
// 1,
// Arrays.asList("a07995587b855f3214fb71940bf23c0f"));
// executeTest("threeWayWithRefs", spec);
// }
public static String baseTestString(String args) {
return "-T CombineVariants -NO_HEADER -L 1:1-50,000,000 -o %s -R " + b36KGReference + args;
}
public void test1InOut(String file, String md5, boolean vcf3) {
test1InOut(file, md5, "", vcf3);
}
public void test1InOut(String file, String md5, String args, boolean vcf3) {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -priority v1 -B:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file + args),
1,
Arrays.asList(md5));
executeTest("testInOut1--" + file, spec);
}
public void combine2(String file1, String file2, String args, String md5, boolean vcf3) {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -priority v1,v2 -B:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file1 + " -B:v2,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file2 + args),
1,
Arrays.asList(md5));
executeTest("combine2 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec);
}
public void combineSites(String args, String md5) {
String file1 = "1000G_omni2.5.b37.sites.vcf";
String file2 = "hapmap_3.3.b37.sites.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T CombineVariants -NO_HEADER -o %s -R " + b37KGReference
+ " -L 1:1-10,000,000 -B:omni,VCF " + validationDataLocation + file1
+ " -B:hm3,VCF " + validationDataLocation + file2 + args,
1,
Arrays.asList(md5));
executeTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
}
@Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "c608b9fc1e36dba6cebb4f259883f9f0", true); }
@Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "20caad94411d6ab48153b214de916df8", " -setKey foo", true); }
@Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "004f3065cb1bc2ce2f9afd695caf0b48", " -setKey null", true); }
@Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "c9c901ff9ef2a982624b203a8086dff0", false); } // official project VCF files in tabix format
@Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "7593be578d4274d672fc22fced38012b", false); }
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "1cd467863c4e948fadd970681552d57e", false); }
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f", false); } // official project VCF files in tabix format
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9", false); } // official project VCF files in tabix format
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "5b82f37df1f5ba40f0474d71c94142ec", false); }
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "c58dca482bf97069eac6d9f1a07a2cba", false); }
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "89f55abea8f59e39d1effb908440548c", true); }
@Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "4836086891f6cbdd40eebef3076d215a"); }
@Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "6a34b5d743efda8b2f3b639f3a2f5de8"); }
@Test public void threeWayWithRefs() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -B:NA19240_BGI,VCF "+validationDataLocation+"NA19240.BGI.RG.vcf" +
" -B:NA19240_ILLUMINA,VCF "+validationDataLocation+"NA19240.ILLUMINA.RG.vcf" +
" -B:NA19240_WUGSC,VCF "+validationDataLocation+"NA19240.WUGSC.RG.vcf" +
" -B:denovoInfo,VCF "+validationDataLocation+"yri_merged_validation_data_240610.annotated.b36.vcf" +
" -setKey centerSet" +
" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED" +
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
" -genotypeMergeOptions UNIQUIFY -L 1"),
1,
Arrays.asList("8b78339ccf7a5a5a837f79e88a3a38e5"));
executeTest("threeWayWithRefs", spec);
}
// complex examples with filtering, indels, and multiple alleles
@ -119,8 +119,8 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
executeTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
}
@Test public void complexTestFull() { combineComplexSites("", "64b991fd3850f83614518f7d71f0532f"); }
@Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "0db9ef50fe54b60426474273d7c7fa99"); }
@Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "d20acb3d53ba0a02ce92d540ebeda2a9"); }
@Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "8d1b3d120515f8b56b5a0d10bc5da713"); }
// @Test public void complexTestFull() { combineComplexSites("", "64b991fd3850f83614518f7d71f0532f"); }
@Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "df96cb3beb2dbb5e02f80abec7d3571e"); }
@Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "f72a178137e25dbe0b931934cdc0079d"); }
@Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "f704caeaaaed6711943014b847fe381a"); }
}

View File

@ -40,7 +40,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T LiftoverVariants -o %s -R " + b36KGReference + " -B:variant,vcf3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
1,
Arrays.asList("37e23efd7d6471fc0f807b31ccafe0eb"));
Arrays.asList("70aeaca5b74cc7ba8e2da7b71ff0fbfd"));
executeTest("test b36 to hg19", spec);
}
@ -49,7 +49,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T LiftoverVariants -o %s -R " + b36KGReference + " -B:variant,vcf3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.unsortedSamples.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
1,
Arrays.asList("b6ef4a2f026fd3843aeb9ed764a66921"));
Arrays.asList("3fd7ec2dc4064ef410786276b0dc9d08"));
executeTest("test b36 to hg19, unsorted samples", spec);
}
@ -58,7 +58,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T LiftoverVariants -o %s -R " + hg18Reference + " -B:variant,vcf " + validationDataLocation + "liftover_test.vcf -chain " + validationDataLocation + "hg18ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
1,
Arrays.asList("3275373b3c44ad14a270b50664b3f8a3"));
Arrays.asList("ab2c6254225d7e2ecf52eee604d5673b"));
executeTest("test hg18 to hg19, unsorted", spec);
}
}

View File

@ -18,7 +18,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' -B:variant,VCF3 " + testfile + " -NO_HEADER"),
1,
Arrays.asList("1b9d551298dc048c7d36b60440ff4d50")
Arrays.asList("d18516c1963802e92cb9e425c0b75fd6")
);
executeTest("testComplexSelection--" + testfile, spec);
@ -31,7 +31,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -sn A -sn B -sn C -B:variant,VCF3 " + testfile + " -NO_HEADER"),
1,
Arrays.asList("5ba7536a0819421b330350a160e4261a")
Arrays.asList("b74038779fe6485dbb8734ae48178356")
);
executeTest("testRepeatedLineSelection--" + testfile, spec);
@ -44,7 +44,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -disc myvar -L 20:1012700-1020000 -B:variant,VCF " + b37hapmapGenotypes + " -B:myvar,VCF " + testFile + " -o %s -NO_HEADER",
1,
Arrays.asList("97621ae8f29955eedfc4e0be3515fcb9")
Arrays.asList("78e6842325f1f1bc9ab30d5e7737ee6e")
);
executeTest("testDiscordance--" + testFile, spec);
@ -57,7 +57,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -conc hapmap -L 20:1012700-1020000 -B:hapmap,VCF " + b37hapmapGenotypes + " -B:variant,VCF " + testFile + " -o %s -NO_HEADER",
1,
Arrays.asList("a0ae016fdffcbe7bfb99fd3dbc311407")
Arrays.asList("d2ba3ea30a810f6f0fbfb1b643292b6a")
);
executeTest("testConcordance--" + testFile, spec);

View File

@ -60,7 +60,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest {
" --NO_HEADER" +
" -o %s",
1,
Arrays.asList("debbbf3e661b6857cc8d99ff7635bb1d")
Arrays.asList("658f580f7a294fd334bd897102616fed")
);
executeTest("testSimpleVCFStreaming", spec);

View File

@ -20,7 +20,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
@Test
public void testVariantsToVCFUsingGeliInput() {
List<String> md5 = new ArrayList<String>();
md5.add("bd15d98adc76b5798e3bbeff3f936feb");
md5.add("815b82fff92aab41c209eedce2d7e7d9");
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference +
@ -38,7 +38,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
@Test
public void testGenotypesToVCFUsingGeliInput() {
List<String> md5 = new ArrayList<String>();
md5.add("acd15d3f85bff5b545bc353e0e23cc6e");
md5.add("22336ee9c12aa222ce29c3c5babca7d0");
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference +
@ -56,7 +56,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
@Test
public void testGenotypesToVCFUsingHapMapInput() {
List<String> md5 = new ArrayList<String>();
md5.add("6f34528569f8cf5941cb365fa77288c1");
md5.add("9bedaa7670b86a07be5191898c3727cf");
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference +
@ -73,7 +73,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
@Test
public void testGenotypesToVCFUsingVCFInput() {
List<String> md5 = new ArrayList<String>();
md5.add("d8316fc1b9d8e954a58940354119a32e");
md5.add("cc215edec9ca28e5c79ab1b67506f9f7");
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference +

View File

@ -49,7 +49,7 @@ public class VariantContextIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( cmdRoot + " -NO_HEADER -B:vcf,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.vcf -L 1:1-1000000 -o %s --outputVCF %s",
2, // just one output file
Arrays.asList("e3c35d0c4b5d4935c84a270f9df0951f", "e6673737acbb6bfabfcd92c4b2268241"));
Arrays.asList("e3c35d0c4b5d4935c84a270f9df0951f", "ff91731213fd0bbdc200ab6fd1c93e63"));
executeTest("testToVCF", spec);
}

View File

@ -138,30 +138,32 @@ class QGraph extends Logging {
validate()
if (running && numMissingValues == 0) {
logger.info("Generating scatter gather jobs.")
val scatterGathers = jobGraph.edgeSet.filter(edge => scatterGatherable(edge))
if (!scatterGathers.isEmpty) {
logger.info("Generating scatter gather jobs.")
var addedFunctions = List.empty[QFunction]
for (scatterGather <- scatterGathers) {
val functions = scatterGather.asInstanceOf[FunctionEdge]
.function.asInstanceOf[ScatterGatherableFunction]
.generateFunctions()
addedFunctions ++= functions
var addedFunctions = List.empty[QFunction]
for (scatterGather <- scatterGathers) {
val functions = scatterGather.asInstanceOf[FunctionEdge]
.function.asInstanceOf[ScatterGatherableFunction]
.generateFunctions()
addedFunctions ++= functions
}
logger.info("Removing original jobs.")
this.jobGraph.removeAllEdges(scatterGathers)
prune()
logger.info("Adding scatter gather jobs.")
addedFunctions.foreach(function => if (running) this.add(function))
logger.info("Regenerating graph.")
fill
val scatterGatherDotFile = if (settings.expandedDotFile != null) settings.expandedDotFile else settings.dotFile
if (scatterGatherDotFile != null)
renderToDot(scatterGatherDotFile)
validate()
}
logger.info("Removing original jobs.")
this.jobGraph.removeAllEdges(scatterGathers)
prune()
logger.info("Adding scatter gather jobs.")
addedFunctions.foreach(function => if (running) this.add(function))
logger.info("Regenerating graph.")
fill
val scatterGatherDotFile = if (settings.expandedDotFile != null) settings.expandedDotFile else settings.dotFile
if (scatterGatherDotFile != null)
renderToDot(scatterGatherDotFile)
validate()
}
}

View File

@ -286,11 +286,11 @@ object Lsf706JobRunner extends Logging {
// LSB_SHAREDIR/cluster_name/logdir/lsb.acct (man bacct)
// LSB_SHAREDIR/cluster_name/logdir/lsb.events (man bhist)
logger.debug("Job Id %s status / exitStatus / exitInfo: ??? / ??? / ???".format(runner.jobId))
val unknownStatusSeconds = (System.currentTimeMillis - runner.lastStatusUpdate)
if (unknownStatusSeconds > (unknownStatusMaxSeconds * 1000L)) {
val unknownStatusMillis = (System.currentTimeMillis - runner.lastStatusUpdate)
if (unknownStatusMillis > (unknownStatusMaxSeconds * 1000L)) {
// Unknown status has been returned for a while now.
runner.updateStatus(RunnerStatus.FAILED)
logger.error("Unable to read LSF status for %d minutes: job id %d: %s".format(unknownStatusSeconds/60, runner.jobId, runner.function.description))
logger.error("Unable to read LSF status for %0.2f minutes: job id %d: %s".format(unknownStatusMillis/(60 * 1000D), runner.jobId, runner.function.description))
}
}