diff --git a/build.xml b/build.xml
index 80627fae0..068c69316 100644
--- a/build.xml
+++ b/build.xml
@@ -981,6 +981,7 @@
+
diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableReadFilter.java
new file mode 100644
index 000000000..cecbedda8
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableReadFilter.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2009 The Broad Institute
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.filters;
+
+import net.sf.picard.util.QualityUtil;
+import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.utils.QualityUtils;
+
+/**
+ * Filter out mapping quality zero reads.
+ *
+ * @author ebanks
+ * @version 0.1
+ */
+
+public class MappingQualityUnavailableReadFilter extends ReadFilter {
+ public boolean filterOut(SAMRecord rec) {
+ return (rec.getMappingQuality() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE);
+ }
+}
+
diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/ZeroMappingQualityReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroReadFilter.java
similarity index 90%
rename from public/java/src/org/broadinstitute/sting/gatk/filters/ZeroMappingQualityReadFilter.java
rename to public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroReadFilter.java
index 7e6fc5e82..e49d4117c 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/ZeroMappingQualityReadFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroReadFilter.java
@@ -24,17 +24,16 @@
package org.broadinstitute.sting.gatk.filters;
-import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMRecord;
/**
- * Filter out zero mapping quality reads.
+ * Filter out mapping quality zero reads.
*
* @author hanna
* @version 0.1
*/
-public class ZeroMappingQualityReadFilter extends ReadFilter {
+public class MappingQualityZeroReadFilter extends ReadFilter {
public boolean filterOut(SAMRecord rec) {
return (rec.getMappingQuality() == 0);
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java
index 0be737897..51d290763 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java
@@ -62,5 +62,5 @@ public class AlleleBalanceBySample implements GenotypeAnnotation, ExperimentalAn
public List getKeyNames() { return Arrays.asList("AB"); }
- public List getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), -1, VCFHeaderLineType.Float, "Allele balance for each het genotype")); }
+ public List getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Allele balance for each het genotype")); }
}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java
index 143722d7c..f3ec2b1df 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java
@@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
@@ -41,8 +42,8 @@ import java.util.*;
public class ChromosomeCounts implements InfoFieldAnnotation, StandardAnnotation {
private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };
- private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, -1, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"),
- new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, -1, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"),
+ private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"),
+ new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"),
new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") };
public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java
index 754d28dfd..ee66b50ee 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java
@@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@@ -142,5 +143,5 @@ public class DepthPerAlleleBySample implements GenotypeAnnotation, StandardAnnot
// public String getIndelBases()
public List getKeyNames() { return Arrays.asList("AD"); }
- public List getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), VCFCompoundHeaderLine.UNBOUNDED, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed")); }
+ public List getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed")); }
}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java
index 11f86b972..8260a5a81 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java
@@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
+import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
@@ -21,7 +22,7 @@ public class MappingQualityRankSumTest extends RankSumTest {
protected void fillQualsFromPileup(byte ref, byte alt, ReadBackedPileup pileup, List refQuals, List altQuals) {
for ( final PileupElement p : pileup ) {
- if( isUsableBase(p) && p.getMappingQual() < 254 ) { // 254 and 255 are special mapping qualities used as a code by aligners
+ if ( isUsableBase(p) ) {
if ( p.getBase() == ref ) {
refQuals.add((double)p.getMappingQual());
} else if ( p.getBase() == alt ) {
@@ -34,7 +35,7 @@ public class MappingQualityRankSumTest extends RankSumTest {
// equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele ?
HashMap> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();
for (final PileupElement p: pileup) {
- if (indelLikelihoodMap.containsKey(p) && p.getMappingQual() < 254) {
+ if (indelLikelihoodMap.containsKey(p) && p.getMappingQual() != 0 && p.getMappingQual() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE) {
// retrieve likelihood information corresponding to this read
LinkedHashMap el = indelLikelihoodMap.get(p);
// by design, first element in LinkedHashMap was ref allele
@@ -54,8 +55,6 @@ public class MappingQualityRankSumTest extends RankSumTest {
refQuals.add((double)p.getMappingQual());
else if (altLikelihood > refLikelihood + INDEL_LIKELIHOOD_THRESH)
altQuals.add((double)p.getMappingQual());
-
-
}
}
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java
index ba3e2cc8b..3b64abfff 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java
@@ -47,5 +47,5 @@ public class NBaseCount implements InfoFieldAnnotation {
public List getKeyNames() { return Arrays.asList("PercentNBaseSolid"); }
- public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("PercentNBaseSolid", 4, VCFHeaderLineType.Float, "Percentage of N bases in the pileup (counting only SOLiD reads)")); }
+ public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("PercentNBaseSolid", 1, VCFHeaderLineType.Float, "Percentage of N bases in the pileup (counting only SOLiD reads)")); }
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java
index 6e80c7555..1ef7ccd0b 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java
@@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
+import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
@@ -38,8 +39,10 @@ public class RMSMappingQuality implements InfoFieldAnnotation, StandardAnnotatio
pileup = context.getBasePileup();
if (pileup != null) {
- for (PileupElement p : pileup )
- qualities[index++] = p.getRead().getMappingQuality();
+ for (PileupElement p : pileup ) {
+ if ( p.getMappingQual() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE )
+ qualities[index++] = p.getMappingQual();
+ }
}
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java
index 1a967293f..f00abd6a1 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java
@@ -106,6 +106,9 @@ public abstract class RankSumTest implements InfoFieldAnnotation, StandardAnnota
protected abstract void fillIndelQualsFromPileup(ReadBackedPileup pileup, List refQuals, List altQuals);
protected static boolean isUsableBase( final PileupElement p ) {
- return !( p.isDeletion() || p.getMappingQual() == 0 || ((int)p.getQual()) < 6 ); // need the unBAQed quality score here
+ return !( p.isDeletion() ||
+ p.getMappingQual() == 0 ||
+ p.getMappingQual() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE ||
+ ((int)p.getQual()) < QualityUtils.MIN_USABLE_Q_SCORE ); // need the unBAQed quality score here
}
}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java
index f287549bb..a670532af 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java
@@ -29,6 +29,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
@@ -200,8 +201,8 @@ public class ReadDepthAndAllelicFractionBySample implements GenotypeAnnotation {
1,
VCFHeaderLineType.Integer,
"Total read depth per sample, including MQ0"),
- new VCFFormatHeaderLine(getKeyNames().get(1),
- VCFCompoundHeaderLine.UNBOUNDED,
+ new VCFFormatHeaderLine(getKeyNames().get(1),
+ VCFHeaderLineCount.UNBOUNDED,
VCFHeaderLineType.Float,
"Fractions of reads (excluding MQ0 from both ref and alt) supporting each reported alternative allele, per sample"));
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java
index 82f16be42..e2fd2a3d4 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java
@@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
@@ -65,5 +66,5 @@ public class SampleList implements InfoFieldAnnotation {
public List getKeyNames() { return Arrays.asList("Samples"); }
- public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("Samples", VCFInfoHeaderLine.UNBOUNDED, VCFHeaderLineType.String, "List of polymorphic samples")); }
+ public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("Samples", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "List of polymorphic samples")); }
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java
new file mode 100644
index 000000000..a5ebf27bb
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.diffengine;
+
+import net.sf.samtools.*;
+import net.sf.samtools.util.BlockCompressedInputStream;
+import org.broad.tribble.readers.AsciiLineReader;
+import org.broad.tribble.readers.LineReader;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
+import org.broadinstitute.sting.utils.variantcontext.Genotype;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+
+import java.io.DataInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Map;
+import java.util.zip.GZIPInputStream;
+
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: depristo
+ * Date: 7/4/11
+ * Time: 1:09 PM
+ *
+ * Class implementing diffnode reader for VCF
+ */
+public class BAMDiffableReader implements DiffableReader {
+ @Override
+ public String getName() { return "BAM"; }
+
+ @Override
+ public DiffElement readFromFile(File file, int maxElementsToRead) {
+ final SAMFileReader reader = new SAMFileReader(file, null); // null because we don't want it to look for the index
+ reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
+
+ DiffNode root = DiffNode.rooted(file.getName());
+ SAMRecordIterator iterator = reader.iterator();
+
+ int count = 0;
+ while ( iterator.hasNext() ) {
+ if ( count++ > maxElementsToRead && maxElementsToRead != -1)
+ break;
+ final SAMRecord record = iterator.next();
+
+ // name is the read name + first of pair
+ String name = record.getReadName().replace('.', '_');
+ if ( record.getReadPairedFlag() ) {
+ name += record.getFirstOfPairFlag() ? "_1" : "_2";
+ }
+
+ DiffNode readRoot = DiffNode.empty(name, root);
+
+ // add fields
+ readRoot.add("NAME", record.getReadName());
+ readRoot.add("FLAGS", record.getFlags());
+ readRoot.add("RNAME", record.getReferenceName());
+ readRoot.add("POS", record.getAlignmentStart());
+ readRoot.add("MAPQ", record.getMappingQuality());
+ readRoot.add("CIGAR", record.getCigarString());
+ readRoot.add("RNEXT", record.getMateReferenceName());
+ readRoot.add("PNEXT", record.getMateAlignmentStart());
+ readRoot.add("TLEN", record.getInferredInsertSize());
+ readRoot.add("SEQ", record.getReadString());
+ readRoot.add("QUAL", record.getBaseQualityString());
+
+ for ( SAMRecord.SAMTagAndValue xt : record.getAttributes() ) {
+ readRoot.add(xt.tag, xt.value);
+ }
+
+ // add record to root
+ if ( ! root.hasElement(name) )
+ // protect ourselves from malformed files
+ root.add(readRoot);
+ }
+
+ reader.close();
+
+ return root.getBinding();
+ }
+
+ @Override
+ public boolean canRead(File file) {
+ final byte[] BAM_MAGIC = "BAM\1".getBytes();
+ final byte[] buffer = new byte[BAM_MAGIC.length];
+ try {
+ FileInputStream fstream = new FileInputStream(file);
+ new BlockCompressedInputStream(fstream).read(buffer,0,BAM_MAGIC.length);
+ return Arrays.equals(buffer, BAM_MAGIC);
+ } catch ( IOException e ) {
+ return false;
+ } catch ( net.sf.samtools.FileTruncatedException e ) {
+ return false;
+ }
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java
new file mode 100644
index 000000000..4c3f7bd95
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.diffengine;
+
+import com.google.java.contract.*;
+import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: depristo
+ * Date: 7/4/11
+ * Time: 12:55 PM
+ *
+ * An interface that must be implemented to allow us to calculate differences
+ * between structured objects
+ */
+@Invariant({
+ "name != null",
+ "value != null",
+ "parent != null || name.equals(\"ROOT\")",
+ "value == null || value.getBinding() == this"})
+public class DiffElement {
+ public final static DiffElement ROOT = new DiffElement();
+
+ final private String name;
+ final private DiffElement parent;
+ final private DiffValue value;
+
+ /**
+ * For ROOT only
+ */
+ private DiffElement() {
+ this.name = "ROOT";
+ this.parent = null;
+ this.value = new DiffValue(this, "ROOT");
+ }
+
+ @Requires({"name != null", "parent != null", "value != null"})
+ public DiffElement(String name, DiffElement parent, DiffValue value) {
+ if ( name.equals("ROOT") ) throw new IllegalArgumentException("Cannot use reserved name ROOT");
+ this.name = name;
+ this.parent = parent;
+ this.value = value;
+ this.value.setBinding(this);
+ }
+
+ @Ensures({"result != null"})
+ public String getName() {
+ return name;
+ }
+
+ public DiffElement getParent() {
+ return parent;
+ }
+
+ @Ensures({"result != null"})
+ public DiffValue getValue() {
+ return value;
+ }
+
+ public boolean isRoot() { return this == ROOT; }
+
+ @Ensures({"result != null"})
+ @Override
+ public String toString() {
+ return getName() + "=" + getValue().toString();
+ }
+
+ public String toString(int offset) {
+ return (offset > 0 ? Utils.dupString(' ', offset) : 0) + getName() + "=" + getValue().toString(offset);
+ }
+
+ @Ensures({"result != null"})
+ public final String fullyQualifiedName() {
+ if ( isRoot() )
+ return "";
+ else if ( parent.isRoot() )
+ return name;
+ else
+ return parent.fullyQualifiedName() + "." + name;
+ }
+
+ @Ensures({"result != null"})
+ public String toOneLineString() {
+ return getName() + "=" + getValue().toOneLineString();
+ }
+
+ @Ensures({"result != null"})
+ public DiffNode getValueAsNode() {
+ if ( getValue().isCompound() )
+ return (DiffNode)getValue();
+ else
+ throw new ReviewedStingException("Illegal request conversion of a DiffValue into a DiffNode: " + this);
+ }
+
+ public int size() {
+ return 1 + getValue().size();
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java
new file mode 100644
index 000000000..6d85df71d
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java
@@ -0,0 +1,360 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.diffengine;
+
+import org.apache.log4j.Logger;
+import org.broadinstitute.sting.gatk.report.GATKReport;
+import org.broadinstitute.sting.gatk.report.GATKReportTable;
+import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.classloader.PluginManager;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+
+import java.io.File;
+import java.io.PrintStream;
+import java.util.*;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: depristo
+ * Date: 7/4/11
+ * Time: 12:51 PM
+ * A generic engine for comparing tree-structured objects
+ */
+public class DiffEngine {
+ final protected static Logger logger = Logger.getLogger(DiffEngine.class);
+
+ private final Map readers = new HashMap();
+
+ public DiffEngine() {
+ loadDiffableReaders();
+ }
+
+ // --------------------------------------------------------------------------------
+ //
+ // difference calculation
+ //
+ // --------------------------------------------------------------------------------
+
+ public List diff(DiffElement master, DiffElement test) {
+ DiffValue masterValue = master.getValue();
+ DiffValue testValue = test.getValue();
+
+ if ( masterValue.isCompound() && masterValue.isCompound() ) {
+ return diff(master.getValueAsNode(), test.getValueAsNode());
+ } else if ( masterValue.isAtomic() && testValue.isAtomic() ) {
+ return diff(masterValue, testValue);
+ } else {
+ // structural difference in types. one is node, other is leaf
+ return Arrays.asList(new SpecificDifference(master, test));
+ }
+ }
+
+ public List diff(DiffNode master, DiffNode test) {
+ Set allNames = new HashSet(master.getElementNames());
+ allNames.addAll(test.getElementNames());
+ List diffs = new ArrayList();
+
+ for ( String name : allNames ) {
+ DiffElement masterElt = master.getElement(name);
+ DiffElement testElt = test.getElement(name);
+ if ( masterElt == null && testElt == null ) {
+ throw new ReviewedStingException("BUG: unexceptedly got two null elements for field: " + name);
+ } else if ( masterElt == null || testElt == null ) { // if either is null, we are missing a value
+ // todo -- should one of these be a special MISSING item?
+ diffs.add(new SpecificDifference(masterElt, testElt));
+ } else {
+ diffs.addAll(diff(masterElt, testElt));
+ }
+ }
+
+ return diffs;
+ }
+
+ public List diff(DiffValue master, DiffValue test) {
+ if ( master.getValue().equals(test.getValue()) ) {
+ return Collections.emptyList();
+ } else {
+ return Arrays.asList(new SpecificDifference(master.getBinding(), test.getBinding()));
+ }
+ }
+
+ // --------------------------------------------------------------------------------
+ //
+ // Summarizing differences
+ //
+ // --------------------------------------------------------------------------------
+
+ /**
+ * Emits a summary of the diffs to out. Suppose you have the following three differences:
+ *
+ * A.X.Z:1!=2
+ * A.Y.Z:3!=4
+ * B.X.Z:5!=6
+ *
+ * The above is the itemized list of the differences. The summary looks for common differences
+ * in the name hierarchy, counts those shared elements, and emits the differences that occur
+ * in order of decreasing counts.
+ *
+ * So, in the above example, what are the shared elements?
+ *
+ * A.X.Z and B.X.Z share X.Z, so there's a *.X.Z with count 2
+ * A.X.Z, A.Y.Z, and B.X.Z all share *.*.Z, with count 3
+ * Each of A.X.Z, A.Y.Z, and B.X.Z are individually unique, with count 1
+ *
+ * So we would emit the following summary:
+ *
+ * *.*.Z: 3
+ * *.X.Z: 2
+ * A.X.Z: 1 [specific difference: 1!=2]
+ * A.Y.Z: 1 [specific difference: 3!=4]
+ * B.X.Z: 1 [specific difference: 5!=6]
+ *
+ * The algorithm to accomplish this calculation is relatively simple. Start with all of the
+ * concrete differences. For each pair of differences A1.A2....AN and B1.B2....BN:
+ *
+ * find the longest common subsequence Si.Si+1...SN where Ai = Bi = Si
+ * If i == 0, then there's no shared substructure
+ * If i > 0, then generate the summarized value X = *.*...Si.Si+1...SN
+ * if X is a known summary, increment it's count, otherwise set its count to 1
+ *
+ * Not that only pairs of the same length are considered as potentially equivalent
+ *
+ * @param params determines how we display the items
+ * @param diffs
+ */
+ public void reportSummarizedDifferences(List diffs, SummaryReportParams params ) {
+ printSummaryReport(summarizeDifferences(diffs), params );
+ }
+
+ public List summarizeDifferences(List diffs) {
+ return summarizedDifferencesOfPaths(diffs);
+ }
+
+ final protected static String[] diffNameToPath(String diffName) {
+ return diffName.split("\\.");
+ }
+
+ protected List summarizedDifferencesOfPathsFromString(List singletonDiffs) {
+ List diffs = new ArrayList();
+
+ for ( String diff : singletonDiffs ) {
+ diffs.add(new Difference(diff));
+ }
+
+ return summarizedDifferencesOfPaths(diffs);
+ }
+
+ protected List summarizedDifferencesOfPaths(List extends Difference> singletonDiffs) {
+ Map summaries = new HashMap();
+
+ // create the initial set of differences
+ for ( int i = 0; i < singletonDiffs.size(); i++ ) {
+ for ( int j = 0; j <= i; j++ ) {
+ Difference diffPath1 = singletonDiffs.get(i);
+ Difference diffPath2 = singletonDiffs.get(j);
+ if ( diffPath1.length() == diffPath2.length() ) {
+ int lcp = longestCommonPostfix(diffPath1.getParts(), diffPath2.getParts());
+ String path = lcp > 0 ? summarizedPath(diffPath2.getParts(), lcp) : diffPath2.getPath();
+ addSummary(summaries, path, true);
+ }
+ }
+ }
+
+ // count differences
+ for ( Difference diffPath : singletonDiffs ) {
+ for ( Difference sumDiff : summaries.values() ) {
+ if ( sumDiff.matches(diffPath.getParts()) )
+ addSummary(summaries, sumDiff.getPath(), false);
+ }
+ }
+
+ List sortedSummaries = new ArrayList(summaries.values());
+ Collections.sort(sortedSummaries);
+ return sortedSummaries;
+ }
+
+ private static void addSummary(Map summaries, String path, boolean onlyCatalog) {
+ if ( summaries.containsKey(path) ) {
+ if ( ! onlyCatalog )
+ summaries.get(path).incCount();
+ } else {
+ Difference sumDiff = new Difference(path);
+ summaries.put(sumDiff.getPath(), sumDiff);
+ }
+ }
+
+ protected void printSummaryReport(List sortedSummaries, SummaryReportParams params ) {
+ GATKReport report = new GATKReport();
+ final String tableName = "diffences";
+ report.addTable(tableName, "Summarized differences between the master and test files.\nSee http://www.broadinstitute.org/gsa/wiki/index.php/DiffObjectsWalker_and_SummarizedDifferences for more information");
+ GATKReportTable table = report.getTable(tableName);
+ table.addPrimaryKey("Difference", true);
+ table.addColumn("NumberOfOccurrences", 0);
+
+ int count = 0, count1 = 0;
+ for ( Difference diff : sortedSummaries ) {
+ if ( diff.getCount() < params.minSumDiffToShow )
+ // in order, so break as soon as the count is too low
+ break;
+
+ if ( params.maxItemsToDisplay != 0 && count++ > params.maxItemsToDisplay )
+ break;
+
+ if ( diff.getCount() == 1 ) {
+ count1++;
+ if ( params.maxCountOneItems != 0 && count1 > params.maxCountOneItems )
+ break;
+ }
+
+ table.set(diff.getPath(), "NumberOfOccurrences", diff.getCount());
+ }
+
+ table.write(params.out);
+ }
+
+ protected static int longestCommonPostfix(String[] diffPath1, String[] diffPath2) {
+ int i = 0;
+ for ( ; i < diffPath1.length; i++ ) {
+ int j = diffPath1.length - i - 1;
+ if ( ! diffPath1[j].equals(diffPath2[j]) )
+ break;
+ }
+ return i;
+ }
+
+ /**
+ * parts is [A B C D]
+ * commonPostfixLength: how many parts are shared at the end, suppose its 2
+ * We want to create a string *.*.C.D
+ *
+ * @param parts
+ * @param commonPostfixLength
+ * @return
+ */
+ protected static String summarizedPath(String[] parts, int commonPostfixLength) {
+ int stop = parts.length - commonPostfixLength;
+ if ( stop > 0 ) parts = parts.clone();
+ for ( int i = 0; i < stop; i++ ) {
+ parts[i] = "*";
+ }
+ return Utils.join(".", parts);
+ }
+
+ // --------------------------------------------------------------------------------
+ //
+ // plugin manager
+ //
+ // --------------------------------------------------------------------------------
+
+ public void loadDiffableReaders() {
+ List> drClasses = new PluginManager( DiffableReader.class ).getPlugins();
+
+ logger.info("Loading diffable modules:");
+ for (Class extends DiffableReader> drClass : drClasses ) {
+ logger.info("\t" + drClass.getSimpleName());
+
+ try {
+ DiffableReader dr = drClass.newInstance();
+ readers.put(dr.getName(), dr);
+ } catch (InstantiationException e) {
+ throw new ReviewedStingException("Unable to instantiate module '" + drClass.getSimpleName() + "'");
+ } catch (IllegalAccessException e) {
+ throw new ReviewedStingException("Illegal access error when trying to instantiate '" + drClass.getSimpleName() + "'");
+ }
+ }
+ }
+
+ protected Map getReaders() {
+ return readers;
+ }
+
+ protected DiffableReader getReader(String name) {
+ return readers.get(name);
+ }
+
+ /**
+ * Returns a reader appropriate for this file, or null if no such reader exists
+ * @param file
+ * @return
+ */
+ public DiffableReader findReaderForFile(File file) {
+ for ( DiffableReader reader : readers.values() )
+ if (reader.canRead(file) )
+ return reader;
+
+ return null;
+ }
+
+ /**
+ * Returns true if reader appropriate for this file, or false if no such reader exists
+ * @param file
+ * @return
+ */
+ public boolean canRead(File file) {
+ return findReaderForFile(file) != null;
+ }
+
+
+ public DiffElement createDiffableFromFile(File file) {
+ return createDiffableFromFile(file, -1);
+ }
+
+ public DiffElement createDiffableFromFile(File file, int maxElementsToRead) {
+ DiffableReader reader = findReaderForFile(file);
+ if ( reader == null )
+ throw new UserException("Unsupported file type: " + file);
+ else
+ return reader.readFromFile(file, maxElementsToRead);
+ }
+
+ public static boolean simpleDiffFiles(File masterFile, File testFile, DiffEngine.SummaryReportParams params) {
+ DiffEngine diffEngine = new DiffEngine();
+
+ if ( diffEngine.canRead(masterFile) && diffEngine.canRead(testFile) ) {
+ DiffElement master = diffEngine.createDiffableFromFile(masterFile);
+ DiffElement test = diffEngine.createDiffableFromFile(testFile);
+ List diffs = diffEngine.diff(master, test);
+ diffEngine.reportSummarizedDifferences(diffs, params);
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ public static class SummaryReportParams {
+ PrintStream out = System.out;
+ int maxItemsToDisplay = 0;
+ int maxCountOneItems = 0;
+ int minSumDiffToShow = 0;
+
+ public SummaryReportParams(PrintStream out, int maxItemsToDisplay, int maxCountOneItems, int minSumDiffToShow) {
+ this.out = out;
+ this.maxItemsToDisplay = maxItemsToDisplay;
+ this.maxCountOneItems = maxCountOneItems;
+ this.minSumDiffToShow = minSumDiffToShow;
+ }
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java
new file mode 100644
index 000000000..2f48de2d3
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java
@@ -0,0 +1,248 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.diffengine;
+
+import com.google.java.contract.Requires;
+import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+import java.util.*;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: depristo
+ * Date: 7/4/11
+ * Time: 12:55 PM
+ *
+ * An interface that must be implemented to allow us to calculate differences
+ * between structured objects
+ */
+public class DiffNode extends DiffValue {
+ private Map getElementMap() {
+ return (Map)super.getValue();
+ }
+ private static Map emptyElements() { return new HashMap(); }
+
+ private DiffNode(Map elements) {
+ super(elements);
+ }
+
+ private DiffNode(DiffElement binding, Map elements) {
+ super(binding, elements);
+ }
+
+ // ---------------------------------------------------------------------------
+ //
+ // constructors
+ //
+ // ---------------------------------------------------------------------------
+
+ public static DiffNode rooted(String name) {
+ return empty(name, DiffElement.ROOT);
+ }
+
+ public static DiffNode empty(String name, DiffElement parent) {
+ DiffNode df = new DiffNode(emptyElements());
+ DiffElement elt = new DiffElement(name, parent, df);
+ df.setBinding(elt);
+ return df;
+ }
+
+ public static DiffNode empty(String name, DiffValue parent) {
+ return empty(name, parent.getBinding());
+ }
+
+ // ---------------------------------------------------------------------------
+ //
+ // accessors
+ //
+ // ---------------------------------------------------------------------------
+
+ @Override
+ public boolean isAtomic() { return false; }
+
+ public Collection getElementNames() {
+ return getElementMap().keySet();
+ }
+
+ public Collection getElements() {
+ return getElementMap().values();
+ }
+
+ private Collection getElements(boolean atomicOnly) {
+ List elts = new ArrayList();
+ for ( DiffElement elt : getElements() )
+ if ( (atomicOnly && elt.getValue().isAtomic()) || (! atomicOnly && elt.getValue().isCompound()))
+ elts.add(elt);
+ return elts;
+ }
+
+ public Collection getAtomicElements() {
+ return getElements(true);
+ }
+
+ public Collection getCompoundElements() {
+ return getElements(false);
+ }
+
+ /**
+ * Returns the element bound to name, or null if no such binding exists
+ * @param name
+ * @return
+ */
+ public DiffElement getElement(String name) {
+ return getElementMap().get(name);
+ }
+
+ /**
+ * Returns true if name is bound in this node
+ * @param name
+ * @return
+ */
+ public boolean hasElement(String name) {
+ return getElement(name) != null;
+ }
+
+ // ---------------------------------------------------------------------------
+ //
+ // add
+ //
+ // ---------------------------------------------------------------------------
+
+ @Requires("elt != null")
+ public void add(DiffElement elt) {
+ if ( getElementMap().containsKey(elt.getName()) )
+ throw new IllegalArgumentException("Attempting to rebind already existing binding: " + elt + " node=" + this);
+ getElementMap().put(elt.getName(), elt);
+ }
+
+ @Requires("elt != null")
+ public void add(DiffValue elt) {
+ add(elt.getBinding());
+ }
+
+ @Requires("elts != null")
+ public void add(Collection elts) {
+ for ( DiffElement e : elts )
+ add(e);
+ }
+
+ public void add(String name, Object value) {
+ add(new DiffElement(name, this.getBinding(), new DiffValue(value)));
+ }
+
+ public int size() {
+ int count = 0;
+ for ( DiffElement value : getElements() )
+ count += value.size();
+ return count;
+ }
+
+ // ---------------------------------------------------------------------------
+ //
+ // toString
+ //
+ // ---------------------------------------------------------------------------
+
+ @Override
+ public String toString() {
+ return toString(0);
+ }
+
+ @Override
+ public String toString(int offset) {
+ String off = offset > 0 ? Utils.dupString(' ', offset) : "";
+ StringBuilder b = new StringBuilder();
+
+ b.append("(").append("\n");
+ Collection atomicElts = getAtomicElements();
+ for ( DiffElement elt : atomicElts ) {
+ b.append(elt.toString(offset + 2)).append('\n');
+ }
+
+ for ( DiffElement elt : getCompoundElements() ) {
+ b.append(elt.toString(offset + 4)).append('\n');
+ }
+ b.append(off).append(")").append("\n");
+
+ return b.toString();
+ }
+
+ @Override
+ public String toOneLineString() {
+ StringBuilder b = new StringBuilder();
+
+ b.append('(');
+ List parts = new ArrayList();
+ for ( DiffElement elt : getElements() )
+ parts.add(elt.toOneLineString());
+ b.append(Utils.join(" ", parts));
+ b.append(')');
+
+ return b.toString();
+ }
+
+ // --------------------------------------------------------------------------------
+ //
+ // fromString and toOneLineString
+ //
+ // --------------------------------------------------------------------------------
+
+ public static DiffElement fromString(String tree) {
+ return fromString(tree, DiffElement.ROOT);
+ }
+
+ /**
+ * Doesn't support full tree structure parsing
+ * @param tree
+ * @param parent
+ * @return
+ */
+ private static DiffElement fromString(String tree, DiffElement parent) {
+ // X=(A=A B=B C=(D=D))
+ String[] parts = tree.split("=", 2);
+ if ( parts.length != 2 )
+ throw new ReviewedStingException("Unexpected tree structure: " + tree + " parts=" + parts);
+ String name = parts[0];
+ String value = parts[1];
+
+ if ( value.length() == 0 )
+ throw new ReviewedStingException("Illegal tree structure: " + value + " at " + tree);
+
+ if ( value.charAt(0) == '(' ) {
+ if ( ! value.endsWith(")") )
+ throw new ReviewedStingException("Illegal tree structure. Missing ): " + value + " at " + tree);
+ String subtree = value.substring(1, value.length()-1);
+ DiffNode rec = DiffNode.empty(name, parent);
+ String[] subParts = subtree.split(" ");
+ for ( String subPart : subParts ) {
+ rec.add(fromString(subPart, rec.getBinding()));
+ }
+ return rec.getBinding();
+ } else {
+ return new DiffValue(name, parent, value).getBinding();
+ }
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java
new file mode 100644
index 000000000..ecb836af9
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.diffengine;
+
+import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.Requires;
+import org.broadinstitute.sting.gatk.walkers.RodWalker;
+
+import java.io.File;
+import java.io.PrintStream;
+import java.util.List;
+
+/**
+ * Compares two record-oriented files, itemizing specific difference between equivalent
+ * records in the two files. Reports both itemized and summarized differences.
+ * @author Mark DePristo
+ * @version 0.1
+ */
+@Requires(value={})
+public class DiffObjectsWalker extends RodWalker {
+ @Output(doc="File to which results should be written",required=true)
+ protected PrintStream out;
+
+ @Argument(fullName="maxObjectsToRead", shortName="motr", doc="Max. number of objects to read from the files. -1 [default] means unlimited", required=false)
+ int MAX_OBJECTS_TO_READ = -1;
+
+ @Argument(fullName="maxDiffs", shortName="M", doc="Max. number of diffs to process", required=false)
+ int MAX_DIFFS = 0;
+
+ @Argument(fullName="maxCount1Diffs", shortName="M1", doc="Max. number of diffs occuring exactly once in the file to process", required=false)
+ int MAX_COUNT1_DIFFS = 0;
+
+ @Argument(fullName="minCountForDiff", shortName="MCFD", doc="Min number of observations for a records to display", required=false)
+ int minCountForDiff = 1;
+
+ @Argument(fullName="showItemizedDifferences", shortName="SID", doc="Should we enumerate all differences between the files?", required=false)
+ boolean showItemizedDifferences = false;
+
+ @Argument(fullName="master", shortName="m", doc="Master file: expected results", required=true)
+ File masterFile;
+
+ @Argument(fullName="test", shortName="t", doc="Test file: new results to compare to the master file", required=true)
+ File testFile;
+
+ final DiffEngine diffEngine = new DiffEngine();
+
+ @Override
+ public void initialize() {
+
+ }
+
+ @Override
+ public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+ return 0;
+ }
+
+ @Override
+ public Integer reduceInit() {
+ return 0;
+ }
+
+ @Override
+ public Integer reduce(Integer counter, Integer sum) {
+ return counter + sum;
+ }
+
+ @Override
+ public void onTraversalDone(Integer sum) {
+ out.printf("Reading master file %s%n", masterFile);
+ DiffElement master = diffEngine.createDiffableFromFile(masterFile, MAX_OBJECTS_TO_READ);
+ out.printf(" Read %d objects%n", master.size());
+ out.printf("Reading test file %s%n", testFile);
+ DiffElement test = diffEngine.createDiffableFromFile(testFile, MAX_OBJECTS_TO_READ);
+ out.printf(" Read %d objects%n", test.size());
+
+// out.printf("Master diff objects%n");
+// out.println(master.toString());
+// out.printf("Test diff objects%n");
+// out.println(test.toString());
+
+ List diffs = diffEngine.diff(master, test);
+ if ( showItemizedDifferences ) {
+ out.printf("Itemized results%n");
+ for ( SpecificDifference diff : diffs )
+ out.printf("DIFF: %s%n", diff.toString());
+ }
+
+ DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out, MAX_DIFFS, MAX_COUNT1_DIFFS, minCountForDiff);
+ diffEngine.reportSummarizedDifferences(diffs, params);
+ }
+}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java
new file mode 100644
index 000000000..3750496a1
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.diffengine;
+
+import org.broadinstitute.sting.utils.Utils;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: depristo
+ * Date: 7/4/11
+ * Time: 12:55 PM
+ *
+ * An interface that must be implemented to allow us to calculate differences
+ * between structured objects
+ */
+public class DiffValue {
+ private DiffElement binding = null;
+ final private Object value;
+
+ public DiffValue(Object value) {
+ this.value = value;
+ }
+
+ public DiffValue(DiffElement binding, Object value) {
+ this.binding = binding;
+ this.value = value;
+ }
+
+ public DiffValue(DiffValue parent, Object value) {
+ this(parent.getBinding(), value);
+ }
+
+ public DiffValue(String name, DiffElement parent, Object value) {
+ this.binding = new DiffElement(name, parent, this);
+ this.value = value;
+ }
+
+ public DiffValue(String name, DiffValue parent, Object value) {
+ this(name, parent.getBinding(), value);
+ }
+
+ public DiffElement getBinding() {
+ return binding;
+ }
+
+ protected void setBinding(DiffElement binding) {
+ this.binding = binding;
+ }
+
+ public Object getValue() {
+ return value;
+ }
+
+ public String toString() {
+ return getValue().toString();
+ }
+
+ public String toString(int offset) {
+ return toString();
+ }
+
+ public String toOneLineString() {
+ return getValue().toString();
+ }
+
+ public boolean isAtomic() { return true; }
+ public boolean isCompound() { return ! isAtomic(); }
+ public int size() { return 1; }
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java
new file mode 100644
index 000000000..af5771c55
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.diffengine;
+
+import com.google.java.contract.Ensures;
+import com.google.java.contract.Requires;
+
+import java.io.File;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: depristo
+ * Date: 7/4/11
+ * Time: 1:09 PM
+ *
+ * Interface for readers creating diffable objects from a file
+ */
+public interface DiffableReader {
+ @Ensures("result != null")
+ public String getName();
+
+ @Ensures("result != null")
+ @Requires("file != null")
+ public DiffElement readFromFile(File file, int maxElementsToRead);
+
+ @Requires("file != null")
+ public boolean canRead(File file);
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java
new file mode 100644
index 000000000..efc6ef160
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.diffengine;
+
+public class Difference implements Comparable {
+ final String path; // X.Y.Z
+ final String[] parts;
+ int count = 0;
+
+ public Difference(String path) {
+ this.path = path;
+ this.parts = DiffEngine.diffNameToPath(path);
+ }
+
+ public String[] getParts() {
+ return parts;
+ }
+
+ public void incCount() { count++; }
+
+ public int getCount() {
+ return count;
+ }
+
+ /**
+ * The fully qualified path object A.B.C etc
+ * @return
+ */
+ public String getPath() {
+ return path;
+ }
+
+ /**
+ * @return the length of the parts of this summary
+ */
+ public int length() {
+ return this.parts.length;
+ }
+
+ /**
+ * Returns true if the string parts matches this summary. Matches are
+ * must be equal() everywhere where this summary isn't *.
+ * @param otherParts
+ * @return
+ */
+ public boolean matches(String[] otherParts) {
+ if ( otherParts.length != length() )
+ return false;
+
+ // TODO optimization: can start at right most non-star element
+ for ( int i = 0; i < length(); i++ ) {
+ String part = parts[i];
+ if ( ! part.equals("*") && ! part.equals(otherParts[i]) )
+ return false;
+ }
+
+ return true;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("%s:%d", getPath(), getCount());
+ }
+
+ @Override
+ public int compareTo(Difference other) {
+ // sort first highest to lowest count, then by lowest to highest path
+ int countCmp = Integer.valueOf(count).compareTo(other.count);
+ return countCmp != 0 ? -1 * countCmp : path.compareTo(other.path);
+ }
+
+
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/SpecificDifference.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/SpecificDifference.java
new file mode 100644
index 000000000..2fe9b47f8
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/SpecificDifference.java
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.diffengine;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: depristo
+ * Date: 7/4/11
+ * Time: 12:53 PM
+ *
+ * Represents a specific difference between two specific DiffElements
+ */
+public class SpecificDifference extends Difference {
+ DiffElement master, test;
+
+ public SpecificDifference(DiffElement master, DiffElement test) {
+ super(createName(master, test));
+ if ( master == null && test == null ) throw new IllegalArgumentException("Master and test both cannot be null");
+ this.master = master;
+ this.test = test;
+ }
+
+ public String toString() {
+ return String.format("%s:%s!=%s",
+ getPath(),
+ getOneLineString(master),
+ getOneLineString(test));
+ }
+
+ private static String createName(DiffElement master, DiffElement test) {
+ return (master == null ? test : master).fullyQualifiedName();
+ }
+
+ private static String getOneLineString(DiffElement elt) {
+ return elt == null ? "MISSING" : elt.getValue().toOneLineString();
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java
new file mode 100644
index 000000000..06d14366f
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.diffengine;
+
+import org.broad.tribble.readers.AsciiLineReader;
+import org.broad.tribble.readers.LineReader;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
+import org.broadinstitute.sting.utils.variantcontext.Genotype;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+
+import java.io.*;
+import java.util.Arrays;
+import java.util.Map;
+import java.util.zip.GZIPInputStream;
+
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: depristo
+ * Date: 7/4/11
+ * Time: 1:09 PM
+ *
+ * Class implementing diffnode reader for VCF
+ */
+public class VCFDiffableReader implements DiffableReader {
+ @Override
+ public String getName() { return "VCF"; }
+
+ @Override
+ public DiffElement readFromFile(File file, int maxElementsToRead) {
+ DiffNode root = DiffNode.rooted(file.getName());
+ try {
+ LineReader lineReader = new AsciiLineReader(new FileInputStream(file));
+ VCFCodec vcfCodec = new VCFCodec();
+
+ // must be read as state is stored in reader itself
+ vcfCodec.readHeader(lineReader);
+
+ String line = lineReader.readLine();
+ int count = 0;
+ while ( line != null ) {
+ if ( count++ > maxElementsToRead && maxElementsToRead != -1)
+ break;
+
+ VariantContext vc = (VariantContext)vcfCodec.decode(line);
+ String name = vc.getChr() + ":" + vc.getStart();
+ DiffNode vcRoot = DiffNode.empty(name, root);
+
+ // add fields
+ vcRoot.add("CHROM", vc.getChr());
+ vcRoot.add("POS", vc.getStart());
+ vcRoot.add("ID", vc.hasID() ? vc.getID() : VCFConstants.MISSING_VALUE_v4);
+ vcRoot.add("REF", vc.getReference());
+ vcRoot.add("ALT", vc.getAlternateAlleles());
+ vcRoot.add("QUAL", vc.hasNegLog10PError() ? vc.getNegLog10PError() * 10 : VCFConstants.MISSING_VALUE_v4);
+ vcRoot.add("FILTER", vc.getFilters());
+
+ // add info fields
+ for (Map.Entry attribute : vc.getAttributes().entrySet()) {
+ if ( ! attribute.getKey().startsWith("_") && ! attribute.getKey().equals(VariantContext.ID_KEY))
+ vcRoot.add(attribute.getKey(), attribute.getValue());
+ }
+
+ for (Genotype g : vc.getGenotypes().values() ) {
+ DiffNode gRoot = DiffNode.empty(g.getSampleName(), vcRoot);
+ gRoot.add("GT", g.getGenotypeString());
+ gRoot.add("GQ", g.hasNegLog10PError() ? g.getNegLog10PError() * 10 : VCFConstants.MISSING_VALUE_v4 );
+
+ for (Map.Entry attribute : g.getAttributes().entrySet()) {
+ if ( ! attribute.getKey().startsWith("_") )
+ gRoot.add(attribute.getKey(), attribute.getValue());
+ }
+
+ vcRoot.add(gRoot);
+ }
+
+ root.add(vcRoot);
+ line = lineReader.readLine();
+ }
+
+ lineReader.close();
+ } catch ( IOException e ) {
+ return null;
+ }
+
+ return root.getBinding();
+ }
+
+ @Override
+ public boolean canRead(File file) {
+ try {
+ final String VCF4_HEADER = "##fileformat=VCFv4";
+ char[] buff = new char[VCF4_HEADER.length()];
+ new FileReader(file).read(buff, 0, VCF4_HEADER.length());
+ String firstLine = new String(buff);
+ return firstLine.startsWith(VCF4_HEADER);
+ } catch ( IOException e ) {
+ return false;
+ }
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
index 7a765c602..fc8a5819a 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
@@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
+import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableReadFilter;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
@@ -37,7 +38,6 @@ import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.commandline.*;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
import java.util.*;
import java.io.PrintStream;
@@ -48,7 +48,7 @@ import java.io.PrintStream;
* multi-sample data. The user can choose from several different incorporated calculation models.
*/
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_INPUT)
-@ReadFilters( {BadMateFilter.class} )
+@ReadFilters( {BadMateFilter.class, MappingQualityUnavailableReadFilter.class} )
@Reference(window=@Window(start=-200,stop=200))
@By(DataSource.REFERENCE)
@Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250)
@@ -158,7 +158,7 @@ public class UnifiedGenotyper extends LocusWalker getSupportedHeaderStrings() {
+ Set result = new HashSet();
+ result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
+ result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality"));
+ result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)"));
+ result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
+
+ return result;
+ }
+
/**
* Compute at a given locus.
*
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
index 4c9080884..6fc972b5d 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@@ -634,17 +634,27 @@ public class UnifiedGenotyperEngine {
if (vcInput == null)
return null;
- if (vcInput.isSNP() && ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP))
- return GenotypeLikelihoodsCalculationModel.Model.SNP;
+ // todo - no support to genotype MNP's yet
+ if (vcInput.isMNP())
+ return null;
+
+ if (vcInput.isSNP()) {
+ if (( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP))
+ return GenotypeLikelihoodsCalculationModel.Model.SNP;
+ else
+ // ignore SNP's if user chose INDEL mode
+ return null;
+ }
else if ((vcInput.isIndel() || vcInput.isMixed()) && (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL))
return GenotypeLikelihoodsCalculationModel.Model.INDEL;
- } else {
+ }
+ else {
// todo - this assumes SNP's take priority when BOTH is selected, should do a smarter way once extended events are removed
if( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP)
return GenotypeLikelihoodsCalculationModel.Model.SNP;
else if (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL)
return GenotypeLikelihoodsCalculationModel.Model.INDEL;
- }
+ }
}
return null;
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java
index 048dbd8cb..3b94989aa 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java
@@ -30,7 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.filters.BadCigarFilter;
import org.broadinstitute.sting.gatk.filters.Platform454Filter;
-import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
+import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter;
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
@@ -50,7 +50,7 @@ import java.io.PrintStream;
/**
* Emits intervals for the Local Indel Realigner to target for cleaning. Ignores 454 reads, MQ0 reads, and reads with consecutive indel operators in the CIGAR string.
*/
-@ReadFilters({Platform454Filter.class, ZeroMappingQualityReadFilter.class, BadCigarFilter.class})
+@ReadFilters({Platform454Filter.class, MappingQualityZeroReadFilter.class, BadCigarFilter.class})
@Reference(window=@Window(start=-1,stop=50))
@Allows(value={DataSource.READS, DataSource.REFERENCE})
@By(DataSource.REFERENCE)
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java
index c2953d1d7..1f05ddaf0 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java
@@ -72,7 +72,7 @@ import java.util.*;
* if first bam has coverage at the site but no indication for an indel. In the --somatic mode, BED output contains
* only somatic calls, while --verbose output contains all calls annotated with GERMLINE/SOMATIC keywords.
*/
-@ReadFilters({Platform454Filter.class, ZeroMappingQualityReadFilter.class, PlatformUnitFilter.class})
+@ReadFilters({Platform454Filter.class, MappingQualityZeroReadFilter.class, PlatformUnitFilter.class})
public class SomaticIndelDetectorWalker extends ReadWalker {
// @Output
// PrintStream out;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java
index e59b29502..4833a6cad 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java
@@ -32,7 +32,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
-import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
+import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.walkers.*;
@@ -58,7 +58,7 @@ import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFr
@Requires(value = {DataSource.READS, DataSource.REFERENCE}, referenceMetaData = @RMD(name = "variant", type = ReferenceOrderedDatum.class))
@By(DataSource.READS)
-@ReadFilters({ZeroMappingQualityReadFilter.class})
+@ReadFilters({MappingQualityZeroReadFilter.class})
// Filter out all reads with zero mapping quality
public class ReadBackedPhasingWalker extends RodWalker {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
index ee504b6e7..c21f548b3 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
@@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration;
import org.broad.tribble.bed.BEDCodec;
import org.broad.tribble.dbsnp.DbSNPCodec;
+import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableReadFilter;
import org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
import org.broadinstitute.sting.commandline.Gather;
@@ -34,7 +35,7 @@ import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
-import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
+import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.walkers.*;
@@ -75,7 +76,7 @@ import java.util.Map;
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
@By( DataSource.READS ) // Only look at covered loci, not every loci of the reference file
-@ReadFilters( {ZeroMappingQualityReadFilter.class} ) // Filter out all reads with zero mapping quality
+@ReadFilters( {MappingQualityZeroReadFilter.class, MappingQualityUnavailableReadFilter.class} ) // Filter out all reads with zero or unavailable mapping quality
@Requires( {DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES} ) // This walker requires both -I input.bam and -R reference.fasta
@PartitionBy(PartitionType.LOCUS)
public class CountCovariatesWalker extends LocusWalker implements TreeReducible {
diff --git a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java
index 23054e95f..fad2320fc 100755
--- a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java
@@ -9,9 +9,13 @@ import net.sf.samtools.SAMUtils;
* @author Kiran Garimella
*/
public class QualityUtils {
+
public final static byte MAX_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE;
public final static double MIN_REASONABLE_ERROR = 0.0001;
public final static byte MAX_REASONABLE_Q_SCORE = 40;
+ public final static byte MIN_USABLE_Q_SCORE = 6;
+
+ public final static int MAPPING_QUALITY_UNAVAILABLE = 255;
/**
* Private constructor. No instantiating this class!
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java
index 31251c089..a8bf74707 100755
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java
@@ -123,12 +123,10 @@ public class StandardVCFWriter implements VCFWriter {
try {
// the file format field needs to be written first
- mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF4_0.getFormatString() + "=" + VCFHeaderVersion.VCF4_0.getVersionString() + "\n");
+ mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF4_1.getFormatString() + "=" + VCFHeaderVersion.VCF4_1.getVersionString() + "\n");
for ( VCFHeaderLine line : mHeader.getMetaData() ) {
- if ( line.getKey().equals(VCFHeaderVersion.VCF4_0.getFormatString()) ||
- line.getKey().equals(VCFHeaderVersion.VCF3_3.getFormatString()) ||
- line.getKey().equals(VCFHeaderVersion.VCF3_2.getFormatString()) )
+ if ( VCFHeaderVersion.isFormatString(line.getKey()) )
continue;
// are the records filtered (so we know what to put in the FILTER column of passing records) ?
@@ -358,16 +356,8 @@ public class StandardVCFWriter implements VCFWriter {
mWriter.write(key);
if ( !entry.getValue().equals("") ) {
- int numVals = 1;
VCFInfoHeaderLine metaData = mHeader.getInfoHeaderLine(key);
- if ( metaData != null )
- numVals = metaData.getCount();
-
- // take care of unbounded encoding
- if ( numVals == VCFInfoHeaderLine.UNBOUNDED )
- numVals = 1;
-
- if ( numVals > 0 ) {
+ if ( metaData == null || metaData.getCountType() != VCFHeaderLineCount.INTEGER || metaData.getCount() != 0 ) {
mWriter.write("=");
mWriter.write(entry.getValue());
}
@@ -423,7 +413,7 @@ public class StandardVCFWriter implements VCFWriter {
VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(key);
if ( metaData != null ) {
- int numInFormatField = metaData.getCount();
+ int numInFormatField = metaData.getCount(vc.getAlternateAlleles().size());
if ( numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4) ) {
// If we have a missing field but multiple values are expected, we need to construct a new string with all fields.
// For example, if Number=2, the string has to be ".,."
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFAltHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFAltHeaderLine.java
new file mode 100644
index 000000000..a9de949d8
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFAltHeaderLine.java
@@ -0,0 +1,28 @@
+package org.broadinstitute.sting.utils.codecs.vcf;
+
+/**
+ * @author ebanks
+ * A class representing a key=value entry for ALT fields in the VCF header
+ */
+public class VCFAltHeaderLine extends VCFSimpleHeaderLine {
+
+ /**
+ * create a VCF filter header line
+ *
+ * @param name the name for this header line
+ * @param description the description for this header line
+ */
+ public VCFAltHeaderLine(String name, String description) {
+ super(name, description, SupportedHeaderLineType.ALT);
+ }
+
+ /**
+ * create a VCF info header line
+ *
+ * @param line the header line
+ * @param version the vcf header version
+ */
+ protected VCFAltHeaderLine(String line, VCFHeaderVersion version) {
+ super(line, version, SupportedHeaderLineType.ALT);
+ }
+}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java
index a799161ad..bb822f2ed 100755
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java
@@ -24,6 +24,8 @@
package org.broadinstitute.sting.utils.codecs.vcf;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.Map;
@@ -43,26 +45,43 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
// the field types
private String name;
- private int count;
+ private int count = -1;
+ private VCFHeaderLineCount countType;
private String description;
private VCFHeaderLineType type;
// access methods
public String getName() { return name; }
- public int getCount() { return count; }
public String getDescription() { return description; }
public VCFHeaderLineType getType() { return type; }
+ public VCFHeaderLineCount getCountType() { return countType; }
+ public int getCount() {
+ if ( countType != VCFHeaderLineCount.INTEGER )
+ throw new ReviewedStingException("Asking for header line count when type is not an integer");
+ return count;
+ }
- //
- public void setNumberToUnbounded() { this.count = UNBOUNDED; }
+ // utility method
+ public int getCount(int numAltAlleles) {
+ int myCount;
+ switch ( countType ) {
+ case INTEGER: myCount = count; break;
+ case UNBOUNDED: myCount = -1; break;
+ case A: myCount = numAltAlleles; break;
+ case G: myCount = ((numAltAlleles + 1) * (numAltAlleles + 2) / 2); break;
+ default: throw new ReviewedStingException("Unknown count type: " + countType);
+ }
+ return myCount;
+ }
+
+ public void setNumberToUnbounded() {
+ countType = VCFHeaderLineCount.UNBOUNDED;
+ count = -1;
+ }
// our type of line, i.e. format, info, etc
private final SupportedHeaderLineType lineType;
- // line numerical values are allowed to be unbounded (or unknown), which is
- // marked with a dot (.)
- public static final int UNBOUNDED = -1; // the value we store internally for unbounded types
-
/**
* create a VCF format header line
*
@@ -70,10 +89,12 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
* @param count the count for this header line
* @param type the type for this header line
* @param description the description for this header line
+ * @param lineType the header line type
*/
protected VCFCompoundHeaderLine(String name, int count, VCFHeaderLineType type, String description, SupportedHeaderLineType lineType) {
super(lineType.toString(), "");
this.name = name;
+ this.countType = VCFHeaderLineCount.INTEGER;
this.count = count;
this.type = type;
this.description = description;
@@ -81,20 +102,53 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
validate();
}
+ /**
+ * create a VCF format header line
+ *
+ * @param name the name for this header line
+ * @param count the count type for this header line
+ * @param type the type for this header line
+ * @param description the description for this header line
+ * @param lineType the header line type
+ */
+ protected VCFCompoundHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description, SupportedHeaderLineType lineType) {
+ super(lineType.toString(), "");
+ this.name = name;
+ this.countType = count;
+ this.type = type;
+ this.description = description;
+ this.lineType = lineType;
+ validate();
+ }
+
/**
* create a VCF format header line
*
* @param line the header line
* @param version the VCF header version
+ * @param lineType the header line type
*
*/
protected VCFCompoundHeaderLine(String line, VCFHeaderVersion version, SupportedHeaderLineType lineType) {
super(lineType.toString(), "");
Map mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Number","Type","Description"));
name = mapping.get("ID");
- count = (version == VCFHeaderVersion.VCF4_0 || version == VCFHeaderVersion.VCF4_1) ?
- mapping.get("Number").equals(VCFConstants.UNBOUNDED_ENCODING_v4) ? UNBOUNDED : Integer.valueOf(mapping.get("Number")) :
- mapping.get("Number").equals(VCFConstants.UNBOUNDED_ENCODING_v3) ? UNBOUNDED : Integer.valueOf(mapping.get("Number"));
+ count = -1;
+ final String numberStr = mapping.get("Number");
+ if ( numberStr.equals(VCFConstants.PER_ALLELE_COUNT) ) {
+ countType = VCFHeaderLineCount.A;
+ } else if ( numberStr.equals(VCFConstants.PER_GENOTYPE_COUNT) ) {
+ countType = VCFHeaderLineCount.G;
+ } else if ( ((version == VCFHeaderVersion.VCF4_0 || version == VCFHeaderVersion.VCF4_1) &&
+ numberStr.equals(VCFConstants.UNBOUNDED_ENCODING_v4)) ||
+ ((version == VCFHeaderVersion.VCF3_2 || version == VCFHeaderVersion.VCF3_3) &&
+ numberStr.equals(VCFConstants.UNBOUNDED_ENCODING_v3)) ) {
+ countType = VCFHeaderLineCount.UNBOUNDED;
+ } else {
+ countType = VCFHeaderLineCount.INTEGER;
+ count = Integer.valueOf(numberStr);
+
+ }
type = VCFHeaderLineType.valueOf(mapping.get("Type"));
if (type == VCFHeaderLineType.Flag && !allowFlagValues())
throw new IllegalArgumentException("Flag is an unsupported type for this kind of field");
@@ -121,7 +175,15 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
protected String toStringEncoding() {
Map map = new LinkedHashMap();
map.put("ID", name);
- map.put("Number", count == UNBOUNDED ? VCFConstants.UNBOUNDED_ENCODING_v4 : count);
+ Object number;
+ switch ( countType ) {
+ case A: number = VCFConstants.PER_ALLELE_COUNT; break;
+ case G: number = VCFConstants.PER_GENOTYPE_COUNT; break;
+ case UNBOUNDED: number = VCFConstants.UNBOUNDED_ENCODING_v4; break;
+ case INTEGER:
+ default: number = count;
+ }
+ map.put("Number", number);
map.put("Type", type);
map.put("Description", description);
return lineType.toString() + "=" + VCFHeaderLine.toStringEncoding(map);
@@ -136,15 +198,13 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
if ( !(o instanceof VCFCompoundHeaderLine) )
return false;
VCFCompoundHeaderLine other = (VCFCompoundHeaderLine)o;
- return name.equals(other.name) &&
- count == other.count &&
- description.equals(other.description) &&
- type == other.type &&
- lineType == other.lineType;
+ return equalsExcludingDescription(other) &&
+ description.equals(other.description);
}
public boolean equalsExcludingDescription(VCFCompoundHeaderLine other) {
return count == other.count &&
+ countType == other.countType &&
type == other.type &&
lineType == other.lineType &&
name.equals(other.name);
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java
index 695c46c27..91cf86c70 100755
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java
@@ -99,6 +99,8 @@ public final class VCFConstants {
public static final String MISSING_DEPTH_v3 = "-1";
public static final String UNBOUNDED_ENCODING_v4 = ".";
public static final String UNBOUNDED_ENCODING_v3 = "-1";
+ public static final String PER_ALLELE_COUNT = "A";
+ public static final String PER_GENOTYPE_COUNT = "G";
public static final String EMPTY_ALLELE = ".";
public static final String EMPTY_GENOTYPE = "./.";
public static final double MAX_GENOTYPE_QUAL = 99.0;
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java
index 9176fc16e..418b80074 100755
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java
@@ -1,19 +1,10 @@
package org.broadinstitute.sting.utils.codecs.vcf;
-import java.util.Arrays;
-import java.util.LinkedHashMap;
-import java.util.Map;
-
-
/**
* @author ebanks
* A class representing a key=value entry for FILTER fields in the VCF header
*/
-public class VCFFilterHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine {
-
- private String name;
- private String description;
-
+public class VCFFilterHeaderLine extends VCFSimpleHeaderLine {
/**
* create a VCF filter header line
@@ -22,12 +13,7 @@ public class VCFFilterHeaderLine extends VCFHeaderLine implements VCFNamedHeader
* @param description the description for this header line
*/
public VCFFilterHeaderLine(String name, String description) {
- super("FILTER", "");
- this.name = name;
- this.description = description;
-
- if ( name == null || description == null )
- throw new IllegalArgumentException(String.format("Invalid VCFCompoundHeaderLine: key=%s name=%s desc=%s", super.getKey(), name, description ));
+ super(name, description, SupportedHeaderLineType.FILTER);
}
/**
@@ -37,34 +23,6 @@ public class VCFFilterHeaderLine extends VCFHeaderLine implements VCFNamedHeader
* @param version the vcf header version
*/
protected VCFFilterHeaderLine(String line, VCFHeaderVersion version) {
- super("FILTER", "");
- Map mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Description"));
- name = mapping.get("ID");
- description = mapping.get("Description");
- if ( description == null && ALLOW_UNBOUND_DESCRIPTIONS ) // handle the case where there's no description provided
- description = UNBOUND_DESCRIPTION;
- }
-
- protected String toStringEncoding() {
- Map map = new LinkedHashMap();
- map.put("ID", name);
- map.put("Description", description);
- return "FILTER=" + VCFHeaderLine.toStringEncoding(map);
- }
-
- public boolean equals(Object o) {
- if ( !(o instanceof VCFFilterHeaderLine) )
- return false;
- VCFFilterHeaderLine other = (VCFFilterHeaderLine)o;
- return name.equals(other.name) &&
- description.equals(other.description);
- }
-
- public String getName() {
- return name;
- }
-
- public String getDescription() {
- return description;
+ super(line, version, SupportedHeaderLineType.FILTER);
}
}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFormatHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFormatHeaderLine.java
index 352be3e97..474c8dd14 100755
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFormatHeaderLine.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFormatHeaderLine.java
@@ -16,6 +16,10 @@ public class VCFFormatHeaderLine extends VCFCompoundHeaderLine {
throw new IllegalArgumentException("Flag is an unsupported type for format fields");
}
+ public VCFFormatHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description) {
+ super(name, count, type, description, SupportedHeaderLineType.FORMAT);
+ }
+
protected VCFFormatHeaderLine(String line, VCFHeaderVersion version) {
super(line, version, SupportedHeaderLineType.FORMAT);
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLineCount.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLineCount.java
new file mode 100644
index 000000000..d615c7c78
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLineCount.java
@@ -0,0 +1,8 @@
+package org.broadinstitute.sting.utils.codecs.vcf;
+
+/**
+ * the count encodings we use for fields in VCF header lines
+ */
+public enum VCFHeaderLineCount {
+ INTEGER, A, G, UNBOUNDED;
+}
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFInfoHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFInfoHeaderLine.java
index 135a5c1a1..9b20f38a1 100755
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFInfoHeaderLine.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFInfoHeaderLine.java
@@ -13,6 +13,10 @@ public class VCFInfoHeaderLine extends VCFCompoundHeaderLine {
super(name, count, type, description, SupportedHeaderLineType.INFO);
}
+ public VCFInfoHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description) {
+ super(name, count, type, description, SupportedHeaderLineType.INFO);
+ }
+
protected VCFInfoHeaderLine(String line, VCFHeaderVersion version) {
super(line, version, SupportedHeaderLineType.INFO);
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFSimpleHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFSimpleHeaderLine.java
new file mode 100644
index 000000000..152043f28
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFSimpleHeaderLine.java
@@ -0,0 +1,81 @@
+package org.broadinstitute.sting.utils.codecs.vcf;
+
+import java.util.Arrays;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+
+/**
+ * @author ebanks
+ * A class representing a key=value entry for simple VCF header types
+ */
+public abstract class VCFSimpleHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine {
+
+ public enum SupportedHeaderLineType {
+ FILTER, ALT;
+ }
+
+ private String name;
+ private String description;
+
+ // our type of line, i.e. filter, alt, etc
+ private final SupportedHeaderLineType lineType;
+
+
+ /**
+ * create a VCF filter header line
+ *
+ * @param name the name for this header line
+ * @param description the description for this header line
+ * @param lineType the header line type
+ */
+ public VCFSimpleHeaderLine(String name, String description, SupportedHeaderLineType lineType) {
+ super(lineType.toString(), "");
+ this.lineType = lineType;
+ this.name = name;
+ this.description = description;
+
+ if ( name == null || description == null )
+ throw new IllegalArgumentException(String.format("Invalid VCFSimpleHeaderLine: key=%s name=%s desc=%s", super.getKey(), name, description ));
+ }
+
+ /**
+ * create a VCF info header line
+ *
+ * @param line the header line
+ * @param version the vcf header version
+ * @param lineType the header line type
+ */
+ protected VCFSimpleHeaderLine(String line, VCFHeaderVersion version, SupportedHeaderLineType lineType) {
+ super(lineType.toString(), "");
+ this.lineType = lineType;
+ Map mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Description"));
+ name = mapping.get("ID");
+ description = mapping.get("Description");
+ if ( description == null && ALLOW_UNBOUND_DESCRIPTIONS ) // handle the case where there's no description provided
+ description = UNBOUND_DESCRIPTION;
+ }
+
+ protected String toStringEncoding() {
+ Map map = new LinkedHashMap();
+ map.put("ID", name);
+ map.put("Description", description);
+ return lineType.toString() + "=" + VCFHeaderLine.toStringEncoding(map);
+ }
+
+ public boolean equals(Object o) {
+ if ( !(o instanceof VCFSimpleHeaderLine) )
+ return false;
+ VCFSimpleHeaderLine other = (VCFSimpleHeaderLine)o;
+ return name.equals(other.name) &&
+ description.equals(other.description);
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public String getDescription() {
+ return description;
+ }
+}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java
index ecede068e..4037f75b9 100755
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java
@@ -180,19 +180,4 @@ public class VCFUtils {
return new HashSet(map.values());
}
-
- /**
- * return a set of supported format lines; what we currently support for output in the genotype fields of a VCF
- * @return a set of VCF format lines
- */
- public static Set getSupportedHeaderStrings() {
- Set result = new HashSet();
- result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
- result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality"));
- result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)"));
- result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, -1, VCFHeaderLineType.Float, "Normalized, Phred-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; if site is not biallelic, number of likelihoods if n*(n+1)/2"));
-
- return result;
- }
-
}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
index 5787b591f..da80a3431 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
@@ -867,7 +867,10 @@ public class VariantContext implements Feature { // to enable tribble intergrati
for ( String name : sampleNames ) {
if ( map.containsKey(name) ) throw new IllegalArgumentException("Duplicate names detected in requested samples " + sampleNames);
- map.put(name, getGenotype(name));
+ final Genotype g = getGenotype(name);
+ if ( g != null ) {
+ map.put(name, g);
+ }
}
return map;
diff --git a/public/java/test/org/broadinstitute/sting/BaseTest.java b/public/java/test/org/broadinstitute/sting/BaseTest.java
index b469c8a41..b3e422ba9 100755
--- a/public/java/test/org/broadinstitute/sting/BaseTest.java
+++ b/public/java/test/org/broadinstitute/sting/BaseTest.java
@@ -4,6 +4,7 @@ import org.apache.commons.io.FileUtils;
import org.apache.log4j.*;
import org.apache.log4j.spi.LoggingEvent;
import org.broadinstitute.sting.commandline.CommandLineUtils;
+import org.broadinstitute.sting.gatk.walkers.diffengine.DiffEngine;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.testng.Assert;
@@ -334,11 +335,14 @@ public abstract class BaseTest {
if (parameterize || expectedMD5.equals("")) {
// Don't assert
- } else {
- Assert.assertEquals(filemd5sum, expectedMD5, name + " Mismatching MD5s");
+ } else if ( filemd5sum.equals(expectedMD5) ) {
System.out.println(String.format(" => %s PASSED", name));
+ } else {
+ Assert.fail(String.format("%s has mismatching MD5s: expected=%s observed=%s", name, expectedMD5, filemd5sum));
}
+
+
return filemd5sum;
}
@@ -381,7 +385,12 @@ public abstract class BaseTest {
System.out.printf("##### Path to calculated file (MD5=%s): %s%n", filemd5sum, pathToFileMD5File);
System.out.printf("##### Diff command: diff %s %s%n", pathToExpectedMD5File, pathToFileMD5File);
- // todo -- add support for simple inline display of the first N differences for text file
+ // inline differences
+ DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(System.out, 20, 10, 0);
+ boolean success = DiffEngine.simpleDiffFiles(new File(pathToExpectedMD5File), new File(pathToFileMD5File), params);
+ if ( success )
+ System.out.printf("Note that the above list is not comprehensive. At most 20 lines of output, and 10 specific differences will be listed. Please use -T DiffObjects -R public/testdata/exampleFASTA.fasta -m %s -t %s to explore the differences more freely%n",
+ pathToExpectedMD5File, pathToFileMD5File);
}
}
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
index 6ba6926c6..e6300e6c9 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
@@ -15,7 +15,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsNotAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
- Arrays.asList("4cc077eb3d343e6b7ba12bff86ebe347"));
+ Arrays.asList("8a105fa5eebdfffe7326bc5b3d8ffd1c"));
executeTest("test file has annotations, not asking for annotations, #1", spec);
}
@@ -23,7 +23,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsNotAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
- Arrays.asList("1de8e943fbf55246ebd19efa32f22a58"));
+ Arrays.asList("964f1016ec9a3c55333f62dd834c14d6"));
executeTest("test file has annotations, not asking for annotations, #2", spec);
}
@@ -31,7 +31,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
- Arrays.asList("93c110e45fd4aedb044a8a5501e23336"));
+ Arrays.asList("8e7de435105499cd71ffc099e268a83e"));
executeTest("test file has annotations, asking for annotations, #1", spec);
}
@@ -39,7 +39,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
- Arrays.asList("f5cb45910ed719f46159f9f71acaecf4"));
+ Arrays.asList("64b6804cb1e27826e3a47089349be581"));
executeTest("test file has annotations, asking for annotations, #2", spec);
}
@@ -47,7 +47,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsNotAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
- Arrays.asList("4b48e7d095ef73e3151542ea976ecd89"));
+ Arrays.asList("42ccee09fa9f8c58f4a0d4f1139c094f"));
executeTest("test file doesn't have annotations, not asking for annotations, #1", spec);
}
@@ -55,7 +55,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsNotAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
- Arrays.asList("28dfbfd178aca071b948cd3dc2365357"));
+ Arrays.asList("f2ddfa8105c290b1f34b7a261a02a1ac"));
executeTest("test file doesn't have annotations, not asking for annotations, #2", spec);
}
@@ -63,7 +63,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
- Arrays.asList("a330a5bc3ee72a51dbeb7e6c97a0db99"));
+ Arrays.asList("fd1ffb669800c2e07df1e2719aa38e49"));
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
}
@@ -71,7 +71,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
- Arrays.asList("3a31d1ef471acfb881a2dec7963fe3f4"));
+ Arrays.asList("09f8e840770a9411ff77508e0ed0837f"));
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
}
@@ -79,7 +79,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testOverwritingHeader() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B:variant,VCF " + validationDataLocation + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
- Arrays.asList("a63fd8ff7bafbd46b7f009144a7c2ad1"));
+ Arrays.asList("78d2c19f8107d865970dbaf3e12edd92"));
executeTest("test overwriting header", spec);
}
@@ -87,7 +87,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoReads() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
- Arrays.asList("36378f1245bb99d902fbfe147605bc42"));
+ Arrays.asList("16e3a1403fc376320d7c69492cad9345"));
executeTest("not passing it any reads", spec);
}
@@ -95,7 +95,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testDBTagWithDbsnp() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -D " + GATKDataLocation + "dbsnp_129_b36.rod -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
- Arrays.asList("0257a1cc3c703535b2d3c5046bf88ab7"));
+ Arrays.asList("3da8ca2b6bdaf6e92d94a8c77a71313d"));
executeTest("getting DB tag with dbSNP", spec);
}
@@ -103,7 +103,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testDBTagWithHapMap() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B:compH3,VCF " + validationDataLocation + "fakeHM3.vcf -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
- Arrays.asList("2d7c73489dcf0db433bebdf79a068764"));
+ Arrays.asList("1bc01c5b3bd0b7aef75230310c3ce688"));
executeTest("getting DB tag with HM3", spec);
}
@@ -111,13 +111,13 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testUsingExpression() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B:foo,VCF " + validationDataLocation + "targetAnnotations.vcf -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.AF -BTI variant", 1,
- Arrays.asList("2f6efd08d818faa1eb0631844437c64a"));
+ Arrays.asList("e9c0d832dc6b4ed06c955060f830c140"));
executeTest("using expression", spec);
}
@Test
public void testTabixAnnotations() {
- final String MD5 = "6c7a6a1c0027bf82656542a9b2671a35";
+ final String MD5 = "13269d5a2e16f06fd755cc0fb9271acf";
for ( String file : Arrays.asList("CEU.exon.2010_03.sites.vcf", "CEU.exon.2010_03.sites.vcf.gz")) {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -A HomopolymerRun -B:variant,VCF " + validationDataLocation + "/" + file + " -BTI variant -NO_HEADER", 1,
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotatorIntegrationTest.java
index c4f6d5ebc..c75a5b2dc 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotatorIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotatorIntegrationTest.java
@@ -29,7 +29,7 @@ public class GenomicAnnotatorIntegrationTest extends WalkerTest {
*/
- String[] md5WithDashSArg = {"3d3b61a83c1189108eabb2df04218099"};
+ String[] md5WithDashSArg = {"efba4ce1641cfa2ef88a64395f2ebce8"};
WalkerTestSpec specWithSArg = new WalkerTestSpec(
"-T GenomicAnnotator -R " + b36KGReference +
" -B:variant,vcf3 /humgen/gsa-hpprojects/GATK/data/Annotations/examples/CEU_hapmap_nogt_23_subset.vcf" +
@@ -58,7 +58,7 @@ public class GenomicAnnotatorIntegrationTest extends WalkerTest {
"-o %s"
),
1,
- Arrays.asList("caa562160733aa638e1ba413ede209ae")
+ Arrays.asList("772fc3f43b70770ec6c6acbb8bbbd4c0")
);
executeTest("testGenomicAnnotatorOnIndels", testOnIndels);
}
@@ -76,7 +76,7 @@ public class GenomicAnnotatorIntegrationTest extends WalkerTest {
"-o %s"
),
1,
- Arrays.asList("a4cf76f08fa90284b6988a464b6e0c17")
+ Arrays.asList("081ade7f3d2d3c5f19cb1e8651a626f3")
);
executeTest("testGenomicAnnotatorOnSNPsAndIndels", testOnSNPsAndIndels);
}
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java
index 70c34e729..fef1b6e64 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java
@@ -41,7 +41,7 @@ public class BeagleIntegrationTest extends WalkerTest {
"-B:beagleR2,BEAGLE " + beagleValidationDataLocation + "inttestbgl.r2 " +
"-B:beagleProbs,BEAGLE " + beagleValidationDataLocation + "inttestbgl.gprobs " +
"-B:beaglePhased,BEAGLE " + beagleValidationDataLocation + "inttestbgl.phased " +
- "-o %s -NO_HEADER", 1, Arrays.asList("6bccee48ad2f06ba5a8c774fed444478"));
+ "-o %s -NO_HEADER", 1, Arrays.asList("3531451e84208264104040993889aaf4"));
executeTest("test BeagleOutputToVCF", spec);
}
@@ -60,7 +60,7 @@ public class BeagleIntegrationTest extends WalkerTest {
"-T ProduceBeagleInput -B:variant,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_HSQ_chr22_14-16m.vcf "+
"-B:validation,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_OMNI_chr22_14-16m.vcf "+
"-L 22:14000000-16000000 -o %s -bvcf %s -bs 0.8 -valp 0.98 -R /humgen/1kg/reference/human_g1k_v37.fasta -NO_HEADER ",2,
- Arrays.asList("660986891b30cdc937e0f2a3a5743faa","223fb977e8db567dcaf632c6ee51f294"));
+ Arrays.asList("660986891b30cdc937e0f2a3a5743faa","e96ddd51da9f4a797b2aa8c20e404166"));
executeTest("test BeagleInputWithBootstrap",spec);
}
@@ -72,7 +72,7 @@ public class BeagleIntegrationTest extends WalkerTest {
"-B:beagleR2,beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.r2 "+
"-B:beagleProbs,beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.gprobs.bgl "+
"-B:beaglePhased,beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.phased.bgl "+
- "-L 20:1-70000 -o %s -NO_HEADER ",1,Arrays.asList("24b88ef8cdf6e347daab491f0256be5a"));
+ "-L 20:1-70000 -o %s -NO_HEADER ",1,Arrays.asList("8dd6ec53994fb46c5c22af8535d22965"));
executeTest("testBeagleChangesSitesToRef",spec);
}
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java
new file mode 100644
index 000000000..96dfec6e8
--- /dev/null
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java
@@ -0,0 +1,229 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// our package
+package org.broadinstitute.sting.gatk.walkers.diffengine;
+
+
+// the imports for unit testing.
+
+import org.broadinstitute.sting.BaseTest;
+import org.testng.Assert;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.*;
+
+/**
+ * Basic unit test for DifferableReaders in reduced reads
+ */
+public class DiffEngineUnitTest extends BaseTest {
+ DiffEngine engine;
+
+ @BeforeClass(enabled = true)
+ public void createDiffEngine() {
+ engine = new DiffEngine();
+ }
+
+ // --------------------------------------------------------------------------------
+ //
+ // Difference testing routines
+ //
+ // --------------------------------------------------------------------------------
+
+ private class DifferenceTest extends TestDataProvider {
+ public DiffElement tree1, tree2;
+ public List differences;
+
+ private DifferenceTest(String tree1, String tree2) {
+ this(tree1, tree2, Collections.emptyList());
+ }
+
+ private DifferenceTest(String tree1, String tree2, String difference) {
+ this(tree1, tree2, Arrays.asList(difference));
+ }
+
+ private DifferenceTest(String tree1, String tree2, List differences) {
+ super(DifferenceTest.class);
+ this.tree1 = DiffNode.fromString(tree1);
+ this.tree2 = DiffNode.fromString(tree2);
+ this.differences = differences;
+ }
+
+ public String toString() {
+ return String.format("tree1=%s tree2=%s diff=%s",
+ tree1.toOneLineString(), tree2.toOneLineString(), differences);
+ }
+ }
+
+ @DataProvider(name = "trees")
+ public Object[][] createTrees() {
+ new DifferenceTest("A=X", "A=X");
+ new DifferenceTest("A=X", "A=Y", "A:X!=Y");
+ new DifferenceTest("A=X", "B=X", Arrays.asList("A:X!=MISSING", "B:MISSING!=X"));
+ new DifferenceTest("A=(X=1)", "B=(X=1)", Arrays.asList("A:(X=1)!=MISSING", "B:MISSING!=(X=1)"));
+ new DifferenceTest("A=(X=1)", "A=(X=1)");
+ new DifferenceTest("A=(X=1 Y=2)", "A=(X=1 Y=2)");
+ new DifferenceTest("A=(X=1 Y=2 B=(Z=3))", "A=(X=1 Y=2 B=(Z=3))");
+ new DifferenceTest("A=(X=1)", "A=(X=2)", "A.X:1!=2");
+ new DifferenceTest("A=(X=1 Y=2 B=(Z=3))", "A=(X=1 Y=2 B=(Z=4))", "A.B.Z:3!=4");
+ new DifferenceTest("A=(X=1)", "A=(X=1 Y=2)", "A.Y:MISSING!=2");
+ new DifferenceTest("A=(X=1 Y=2 B=(Z=3))", "A=(X=1 Y=2)", "A.B:(Z=3)!=MISSING");
+ return DifferenceTest.getTests(DifferenceTest.class);
+ }
+
+ @Test(enabled = true, dataProvider = "trees")
+ public void testDiffs(DifferenceTest test) {
+ logger.warn("Test tree1: " + test.tree1.toOneLineString());
+ logger.warn("Test tree2: " + test.tree2.toOneLineString());
+
+ List diffs = engine.diff(test.tree1, test.tree2);
+ logger.warn("Test expected diff : " + test.differences);
+ logger.warn("Observed diffs : " + diffs);
+ }
+
+ // --------------------------------------------------------------------------------
+ //
+ // Low-level routines for summarizing differences
+ //
+ // --------------------------------------------------------------------------------
+
+ @Test(enabled = true)
+ public void testLongestCommonPostfix() {
+ testLongestCommonPostfixHelper("A", "A", 1);
+ testLongestCommonPostfixHelper("A", "B", 0);
+ testLongestCommonPostfixHelper("A.B", "A.B", 2);
+ testLongestCommonPostfixHelper("A.B.C", "A.B.C", 3);
+ testLongestCommonPostfixHelper("A.B.C", "X.B.C", 2);
+ testLongestCommonPostfixHelper("A.B.C", "X.Y.C", 1);
+ testLongestCommonPostfixHelper("A.B.C", "X.Y.Z", 0);
+ testLongestCommonPostfixHelper("A.B.C", "A.X.C", 1);
+ testLongestCommonPostfixHelper("A.B.C", "A.X.Z", 0);
+ testLongestCommonPostfixHelper("A.B.C", "A.B.Z", 0);
+ }
+
+ public void testLongestCommonPostfixHelper(String p1, String p2, int expected) {
+ String[] parts1 = p1.split("\\.");
+ String[] parts2 = p2.split("\\.");
+ int obs = DiffEngine.longestCommonPostfix(parts1, parts2);
+ Assert.assertEquals(obs, expected, "p1=" + p1 + " p2=" + p2 + " failed");
+ }
+
+ @Test(enabled = true, dependsOnMethods = "testLongestCommonPostfix")
+ public void testSummarizePath() {
+ testSummarizePathHelper("A", "A", "A");
+ testSummarizePathHelper("A", "B", "*");
+ testSummarizePathHelper("A.B", "A.B", "A.B");
+ testSummarizePathHelper("A.B", "X.B", "*.B");
+ testSummarizePathHelper("A.B", "X.Y", "*.*");
+ testSummarizePathHelper("A.B.C", "A.B.C", "A.B.C");
+ testSummarizePathHelper("A.B.C", "X.B.C", "*.B.C");
+ testSummarizePathHelper("A.B.C", "X.Y.C", "*.*.C");
+ testSummarizePathHelper("A.B.C", "X.Y.Z", "*.*.*");
+ testSummarizePathHelper("A.B.C", "A.X.C", "*.*.C");
+ testSummarizePathHelper("A.B.C", "A.X.Z", "*.*.*");
+ testSummarizePathHelper("A.B.C", "A.B.Z", "*.*.*");
+ }
+
+ public void testSummarizePathHelper(String p1, String p2, String expected) {
+ String[] parts1 = DiffEngine.diffNameToPath(p1);
+ String[] parts2 = DiffEngine.diffNameToPath(p2);
+ int obs = DiffEngine.longestCommonPostfix(parts1, parts2);
+ String path = DiffEngine.summarizedPath(parts2, obs);
+ Assert.assertEquals(path, expected, "p1=" + p1 + " p2=" + p2 + " failed");
+ }
+
+ // --------------------------------------------------------------------------------
+ //
+ // High-level difference summary
+ //
+ // --------------------------------------------------------------------------------
+
+ private class SummarizeDifferenceTest extends TestDataProvider {
+ List diffs = new ArrayList();
+ List expecteds = new ArrayList();
+
+ public SummarizeDifferenceTest() { super(SummarizeDifferenceTest.class); }
+
+ public SummarizeDifferenceTest addDiff(String... diffsToAdd) {
+ diffs.addAll(Arrays.asList(diffsToAdd));
+ return this;
+ }
+
+ public SummarizeDifferenceTest addSummary(String... expectedSummary) {
+ expecteds.addAll(Arrays.asList(expectedSummary));
+ return this;
+ }
+
+ public String toString() {
+ return String.format("diffs=%s => expected=%s", diffs, expecteds);
+ }
+
+ public void test() {
+ List diffPaths = new ArrayList(diffs.size());
+ for ( String diff : diffs ) { diffPaths.add(DiffEngine.diffNameToPath(diff)); }
+
+ List sumDiffs = engine.summarizedDifferencesOfPathsFromString(diffs);
+
+ Assert.assertEquals(sumDiffs.size(), expecteds.size(), "Unexpected number of summarized differences: " + sumDiffs);
+
+ for ( int i = 0; i < sumDiffs.size(); i++ ) {
+ Difference sumDiff = sumDiffs.get(i);
+ String expected = expecteds.get(i);
+ String[] pathCount = expected.split(":");
+ String path = pathCount[0];
+ int count = Integer.valueOf(pathCount[1]);
+ Assert.assertEquals(sumDiff.getPath(), path, "Unexpected path at: " + expected + " obs=" + sumDiff + " all=" + sumDiffs);
+ Assert.assertEquals(sumDiff.getCount(), count, "Unexpected counts at: " + expected + " obs=" + sumDiff + " all=" + sumDiffs);
+ }
+ }
+ }
+
+ @DataProvider(name = "summaries")
+ public Object[][] createSummaries() {
+ new SummarizeDifferenceTest().addDiff("A", "A").addSummary("A:2");
+ new SummarizeDifferenceTest().addDiff("A", "B").addSummary("A:1", "B:1");
+ new SummarizeDifferenceTest().addDiff("A", "A", "A").addSummary("A:3");
+ new SummarizeDifferenceTest().addDiff("A", "A", "A", "B").addSummary("A:3", "B:1");
+ new SummarizeDifferenceTest().addDiff("A", "A", "A", "B", "B").addSummary("A:3", "B:2");
+ new SummarizeDifferenceTest().addDiff("A", "A", "A", "B", "B", "C").addSummary("A:3", "B:2", "C:1");
+ new SummarizeDifferenceTest().addDiff("A.X", "A.X").addSummary("A.X:2");
+ new SummarizeDifferenceTest().addDiff("A.X", "A.X", "B.X").addSummary("*.X:3", "A.X:2", "B.X:1");
+ new SummarizeDifferenceTest().addDiff("A.X", "A.X", "B.X", "B.X").addSummary("*.X:4", "A.X:2", "B.X:2");
+ new SummarizeDifferenceTest().addDiff("A.B.C", "X.B.C").addSummary("*.B.C:2", "A.B.C:1", "X.B.C:1");
+ new SummarizeDifferenceTest().addDiff("A.B.C", "X.Y.C", "X.Y.C").addSummary("*.*.C:3", "X.Y.C:2", "A.B.C:1");
+ new SummarizeDifferenceTest().addDiff("A.B.C", "A.X.C", "X.Y.C").addSummary("*.*.C:3", "A.B.C:1", "A.X.C:1", "X.Y.C:1");
+ new SummarizeDifferenceTest().addDiff("A.B.C", "A.X.C", "B.X.C").addSummary("*.*.C:3", "*.X.C:2", "A.B.C:1", "A.X.C:1", "B.X.C:1");
+ new SummarizeDifferenceTest().addDiff("A.B.C", "A.X.C", "B.X.C", "B.X.C").addSummary("*.*.C:4", "*.X.C:3", "B.X.C:2", "A.B.C:1", "A.X.C:1");
+
+ return SummarizeDifferenceTest.getTests(SummarizeDifferenceTest.class);
+ }
+
+
+ @Test(enabled = true, dependsOnMethods = "testSummarizePath", dataProvider = "summaries")
+ public void testSummarizeDifferences(SummarizeDifferenceTest test) {
+ test.test();
+ }
+}
\ No newline at end of file
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNodeUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNodeUnitTest.java
new file mode 100644
index 000000000..534416d29
--- /dev/null
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNodeUnitTest.java
@@ -0,0 +1,249 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// our package
+package org.broadinstitute.sting.gatk.walkers.diffengine;
+
+
+// the imports for unit testing.
+
+
+import org.broadinstitute.sting.BaseTest;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.*;
+
+/**
+ * Basic unit test for DifferableReaders in reduced reads
+ */
+public class DiffNodeUnitTest extends BaseTest {
+ // Data is:
+ // MY_ROOT
+ // fields: A=A, B=B
+ // nodes: C, D
+ // C: fields: E=E, nodes: none
+ // D: fields: F=F, G=G, nodes: none
+ static DiffNode MY_ROOT = DiffNode.rooted("MY_ROOT");
+ static DiffValue Value_A = new DiffValue("A", MY_ROOT, "A");
+ static DiffValue Value_B = new DiffValue("B", MY_ROOT, "B");
+ static DiffNode NODE_C = DiffNode.empty("C", MY_ROOT);
+ static DiffNode NODE_D = DiffNode.empty("D", MY_ROOT);
+ static DiffValue Value_E = new DiffValue("E", NODE_C, "E");
+ static DiffValue Value_F = new DiffValue("F", NODE_D, "F");
+ static DiffValue Value_G = new DiffValue("G", NODE_D, "G");
+
+ static {
+ MY_ROOT.add(Value_A);
+ MY_ROOT.add(Value_B);
+ MY_ROOT.add(NODE_C);
+ MY_ROOT.add(NODE_D);
+ NODE_C.add(Value_E);
+ NODE_D.add(Value_F);
+ NODE_D.add(Value_G);
+ }
+
+
+ // --------------------------------------------------------------------------------
+ //
+ // Element testing routines
+ //
+ // --------------------------------------------------------------------------------
+
+ private class ElementTest extends TestDataProvider {
+ public DiffElement elt;
+ public String name;
+ public String fullName;
+ public DiffElement parent;
+
+ private ElementTest(DiffValue elt, DiffValue parent, String name, String fullName) {
+ this(elt.getBinding(), parent.getBinding(), name, fullName);
+ }
+
+ private ElementTest(DiffElement elt, DiffElement parent, String name, String fullName) {
+ super(ElementTest.class);
+ this.elt = elt;
+ this.name = name;
+ this.fullName = fullName;
+ this.parent = parent;
+ }
+
+ public String toString() {
+ return String.format("ElementTest elt=%s name=%s fullName=%s parent=%s",
+ elt.toOneLineString(), name, fullName, parent.getName());
+ }
+ }
+
+ @DataProvider(name = "elementdata")
+ public Object[][] createElementData() {
+ new ElementTest(MY_ROOT.getBinding(), DiffElement.ROOT, "MY_ROOT", "MY_ROOT");
+ new ElementTest(NODE_C, MY_ROOT, "C", "MY_ROOT.C");
+ new ElementTest(NODE_D, MY_ROOT, "D", "MY_ROOT.D");
+ new ElementTest(Value_A, MY_ROOT, "A", "MY_ROOT.A");
+ new ElementTest(Value_B, MY_ROOT, "B", "MY_ROOT.B");
+ new ElementTest(Value_E, NODE_C, "E", "MY_ROOT.C.E");
+ new ElementTest(Value_F, NODE_D, "F", "MY_ROOT.D.F");
+ new ElementTest(Value_G, NODE_D, "G", "MY_ROOT.D.G");
+ return TestDataProvider.getTests(ElementTest.class);
+ }
+
+ @Test(enabled = true, dataProvider = "elementdata")
+ public void testElementMethods(ElementTest test) {
+ Assert.assertNotNull(test.elt.getName());
+ Assert.assertNotNull(test.elt.getParent());
+ Assert.assertEquals(test.elt.getName(), test.name);
+ Assert.assertEquals(test.elt.getParent(), test.parent);
+ Assert.assertEquals(test.elt.fullyQualifiedName(), test.fullName);
+ }
+
+ // --------------------------------------------------------------------------------
+ //
+ // DiffValue testing routines
+ //
+ // --------------------------------------------------------------------------------
+
+ private class LeafTest extends TestDataProvider {
+ public DiffValue diffvalue;
+ public Object value;
+
+ private LeafTest(DiffValue diffvalue, Object value) {
+ super(LeafTest.class);
+ this.diffvalue = diffvalue;
+ this.value = value;
+ }
+
+ public String toString() {
+ return String.format("LeafTest diffvalue=%s value=%s", diffvalue.toOneLineString(), value);
+ }
+ }
+
+ @DataProvider(name = "leafdata")
+ public Object[][] createLeafData() {
+ new LeafTest(Value_A, "A");
+ new LeafTest(Value_B, "B");
+ new LeafTest(Value_E, "E");
+ new LeafTest(Value_F, "F");
+ new LeafTest(Value_G, "G");
+ return TestDataProvider.getTests(LeafTest.class);
+ }
+
+ @Test(enabled = true, dataProvider = "leafdata")
+ public void testLeafMethods(LeafTest test) {
+ Assert.assertNotNull(test.diffvalue.getValue());
+ Assert.assertEquals(test.diffvalue.getValue(), test.value);
+ }
+
+ // --------------------------------------------------------------------------------
+ //
+ // Node testing routines
+ //
+ // --------------------------------------------------------------------------------
+
+ private class NodeTest extends TestDataProvider {
+ public DiffNode node;
+ public Set fields;
+ public Set subnodes;
+ public Set allNames;
+
+ private NodeTest(DiffNode node, List fields, List subnodes) {
+ super(NodeTest.class);
+ this.node = node;
+ this.fields = new HashSet(fields);
+ this.subnodes = new HashSet(subnodes);
+ this.allNames = new HashSet(fields);
+ allNames.addAll(subnodes);
+ }
+
+ public String toString() {
+ return String.format("NodeTest node=%s fields=%s subnodes=%s",
+ node.toOneLineString(), fields, subnodes);
+ }
+ }
+
+ @DataProvider(name = "nodedata")
+ public Object[][] createData1() {
+ new NodeTest(MY_ROOT, Arrays.asList("A", "B"), Arrays.asList("C", "D"));
+ new NodeTest(NODE_C, Arrays.asList("E"), Collections.emptyList());
+ new NodeTest(NODE_D, Arrays.asList("F", "G"), Collections.emptyList());
+ return TestDataProvider.getTests(NodeTest.class);
+ }
+
+ @Test(enabled = true, dataProvider = "nodedata")
+ public void testNodeAccessors(NodeTest test) {
+ Assert.assertNotNull(test.node.getElements());
+
+ for ( String name : test.allNames ) {
+ DiffElement elt = test.node.getElement(name);
+ Assert.assertNotNull(elt, "Failed to find field " + elt + " in " + test.node);
+ Assert.assertEquals(elt.getName(), name);
+ Assert.assertEquals(elt.getValue().isAtomic(), test.fields.contains(name), "Failed atomic/compound expectation: " + test.node);
+ }
+ }
+
+ // NOTE: add routines are being implicitly tested by the creation of the data structures
+
+ @Test(enabled = true, dataProvider = "nodedata")
+ public void testCounts(NodeTest test) {
+ Assert.assertEquals(test.node.getElements().size(), test.allNames.size());
+ Assert.assertEquals(test.node.getElementNames(), test.allNames);
+ }
+
+ // --------------------------------------------------------------------------------
+ //
+ // fromString testing routines
+ //
+ // --------------------------------------------------------------------------------
+
+ private class FromStringTest extends TestDataProvider {
+ public String string;
+ public DiffElement expected;
+
+ private FromStringTest(String string, DiffElement expected) {
+ super(FromStringTest.class);
+ this.string = string;
+ this.expected = expected;
+ }
+
+ public String toString() {
+ return String.format("FromStringTest string=%s expected=%s", string, expected.toOneLineString());
+ }
+ }
+
+ @DataProvider(name = "fromstringdata")
+ public Object[][] createFromData() {
+ new FromStringTest("A=A", Value_A.getBinding());
+ new FromStringTest("B=B", Value_B.getBinding());
+ new FromStringTest("C=(E=E)", NODE_C.getBinding());
+ new FromStringTest("D=(F=F G=G)", NODE_D.getBinding());
+ return TestDataProvider.getTests(FromStringTest.class);
+ }
+
+ @Test(enabled = true, dataProvider = "fromstringdata")
+ public void parseFromString(FromStringTest test) {
+ logger.warn("Testing from string: " + test.string);
+ DiffElement elt = DiffNode.fromString(test.string);
+ Assert.assertEquals(elt.toOneLineString(), test.expected.toOneLineString());
+ }
+}
\ No newline at end of file
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java
new file mode 100644
index 000000000..baa2f0383
--- /dev/null
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// our package
+package org.broadinstitute.sting.gatk.walkers.diffengine;
+
+
+// the imports for unit testing.
+
+
+import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.utils.variantcontext.Allele;
+import org.testng.Assert;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+import java.io.File;
+import java.util.*;
+
+/**
+ * Basic unit test for DifferableReaders in reduced reads
+ */
+public class DiffableReaderUnitTest extends BaseTest {
+ DiffEngine engine;
+
+ File vcfFile = new File(testDir + "diffTestMaster.vcf");
+ File bamFile = new File(testDir + "exampleBAM.bam");
+
+ @BeforeClass(enabled = true)
+ public void createDiffEngine() {
+ engine = new DiffEngine();
+ }
+
+ @Test(enabled = true)
+ public void testPluggableDiffableReaders() {
+ logger.warn("testPluggableDiffableReaders");
+ Map readers = engine.getReaders();
+ Assert.assertNotNull(readers);
+ Assert.assertTrue(readers.size() > 0);
+ Assert.assertNotNull(readers.get("VCF"));
+ for ( Map.Entry e : engine.getReaders().entrySet() ) {
+ logger.warn("Found diffable reader: " + e.getKey());
+ Assert.assertEquals(e.getValue().getName(), e.getKey());
+ Assert.assertEquals(e.getValue(), engine.getReader(e.getKey()));
+ }
+ }
+
+ private static void testLeaf(DiffNode rec, String field, Object expected) {
+ DiffElement value = rec.getElement(field);
+ Assert.assertNotNull(value, "Expected to see leaf named " + field + " in rec " + rec);
+ Assert.assertEquals(value.getValue().getValue(), expected, "Expected to leaf named " + field + " to have value " + expected + " in rec " + rec);
+ }
+
+ @Test(enabled = true, dependsOnMethods = "testPluggableDiffableReaders")
+ public void testVCF1() {
+ logger.warn("testVCF1");
+ DiffableReader vcfReader = engine.getReader("VCF");
+ Assert.assertTrue(vcfReader.canRead(vcfFile));
+ Assert.assertFalse(vcfReader.canRead(bamFile));
+
+ DiffElement diff = vcfReader.readFromFile(vcfFile, -1);
+ Assert.assertNotNull(diff);
+
+ Assert.assertEquals(diff.getName(), vcfFile.getName());
+ Assert.assertSame(diff.getParent(), DiffElement.ROOT);
+
+ DiffNode node = diff.getValueAsNode();
+ Assert.assertEquals(node.getElements().size(), 9);
+
+ // chr1 2646 rs62635284 G A 0.15 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:53,75:3:-12.40,-0.90,-0.00:9.03
+ DiffNode rec1 = node.getElement("chr1:2646").getValueAsNode();
+ testLeaf(rec1, "CHROM", "chr1");
+ testLeaf(rec1, "POS", 2646);
+ testLeaf(rec1, "ID", "rs62635284");
+ testLeaf(rec1, "REF", Allele.create("G", true));
+ testLeaf(rec1, "ALT", new HashSet(Arrays.asList(Allele.create("A"))));
+ testLeaf(rec1, "QUAL", 0.15);
+ testLeaf(rec1, "FILTER", Collections.