diff --git a/build.xml b/build.xml
index b9fdd5dbe..80627fae0 100644
--- a/build.xml
+++ b/build.xml
@@ -28,6 +28,7 @@
+
@@ -44,11 +45,11 @@
-
-
+
+
-
-
+
+
@@ -101,7 +102,7 @@
-
+
@@ -126,14 +127,14 @@
-
+
-
+
-
+
@@ -227,6 +228,10 @@
+
+
+
+
@@ -250,7 +255,7 @@
-
+
@@ -285,7 +290,7 @@
depends="gatk.compile.public.source,gatk.compile.private.source,gatk.compile.external.source"
description="compile the GATK source" />
-
+
@@ -297,7 +302,16 @@
-
+
+
+
+
+
+
+
+
+
+
@@ -310,7 +324,7 @@
+ description="create GATK contracts" if="include.contracts" />
@@ -448,7 +462,7 @@
-
+
@@ -659,7 +673,7 @@
-
+
-
-
+
@@ -815,7 +831,7 @@
-
+
@@ -823,7 +839,7 @@
-
+
@@ -916,8 +932,8 @@
-
-
+
+
@@ -939,7 +955,7 @@
-
+
@@ -964,7 +980,7 @@
-
+
diff --git a/ivy.xml b/ivy.xml
index 10e4ee570..3f3d1c97f 100644
--- a/ivy.xml
+++ b/ivy.xml
@@ -63,6 +63,10 @@
+
+
+
+
diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableReadFilter.java
new file mode 100644
index 000000000..cecbedda8
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableReadFilter.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2009 The Broad Institute
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.filters;
+
+import net.sf.picard.util.QualityUtil;
+import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.utils.QualityUtils;
+
+/**
+ * Filter out mapping quality zero reads.
+ *
+ * @author ebanks
+ * @version 0.1
+ */
+
+public class MappingQualityUnavailableReadFilter extends ReadFilter {
+ public boolean filterOut(SAMRecord rec) {
+ return (rec.getMappingQuality() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE);
+ }
+}
+
diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/ZeroMappingQualityReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroReadFilter.java
similarity index 90%
rename from public/java/src/org/broadinstitute/sting/gatk/filters/ZeroMappingQualityReadFilter.java
rename to public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroReadFilter.java
index 7e6fc5e82..e49d4117c 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/ZeroMappingQualityReadFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroReadFilter.java
@@ -24,17 +24,16 @@
package org.broadinstitute.sting.gatk.filters;
-import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMRecord;
/**
- * Filter out zero mapping quality reads.
+ * Filter out mapping quality zero reads.
*
* @author hanna
* @version 0.1
*/
-public class ZeroMappingQualityReadFilter extends ReadFilter {
+public class MappingQualityZeroReadFilter extends ReadFilter {
public boolean filterOut(SAMRecord rec) {
return (rec.getMappingQuality() == 0);
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableCodec.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableCodec.java
old mode 100644
new mode 100755
diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableFeature.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableFeature.java
old mode 100644
new mode 100755
index 6ff0384a0..4b4ebe450
--- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableFeature.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableFeature.java
@@ -55,10 +55,14 @@ public class TableFeature implements Feature {
}
public List getAllValues() {
- return getValuesTo(values.size()-1);
+ return getValuesTo(values.size());
}
public List getValuesTo(int columnPosition) {
return values.subList(0,columnPosition);
}
+
+ public List getHeader() {
+ return keys;
+ }
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java
index 0be737897..51d290763 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java
@@ -62,5 +62,5 @@ public class AlleleBalanceBySample implements GenotypeAnnotation, ExperimentalAn
public List getKeyNames() { return Arrays.asList("AB"); }
- public List getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), -1, VCFHeaderLineType.Float, "Allele balance for each het genotype")); }
+ public List getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Allele balance for each het genotype")); }
}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java
index 143722d7c..f3ec2b1df 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java
@@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
@@ -41,8 +42,8 @@ import java.util.*;
public class ChromosomeCounts implements InfoFieldAnnotation, StandardAnnotation {
private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };
- private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, -1, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"),
- new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, -1, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"),
+ private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"),
+ new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"),
new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") };
public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java
index 754d28dfd..ee66b50ee 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java
@@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@@ -142,5 +143,5 @@ public class DepthPerAlleleBySample implements GenotypeAnnotation, StandardAnnot
// public String getIndelBases()
public List getKeyNames() { return Arrays.asList("AD"); }
- public List getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), VCFCompoundHeaderLine.UNBOUNDED, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed")); }
+ public List getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed")); }
}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java
index 11f86b972..8260a5a81 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java
@@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
+import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
@@ -21,7 +22,7 @@ public class MappingQualityRankSumTest extends RankSumTest {
protected void fillQualsFromPileup(byte ref, byte alt, ReadBackedPileup pileup, List refQuals, List altQuals) {
for ( final PileupElement p : pileup ) {
- if( isUsableBase(p) && p.getMappingQual() < 254 ) { // 254 and 255 are special mapping qualities used as a code by aligners
+ if ( isUsableBase(p) ) {
if ( p.getBase() == ref ) {
refQuals.add((double)p.getMappingQual());
} else if ( p.getBase() == alt ) {
@@ -34,7 +35,7 @@ public class MappingQualityRankSumTest extends RankSumTest {
// equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele ?
HashMap> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();
for (final PileupElement p: pileup) {
- if (indelLikelihoodMap.containsKey(p) && p.getMappingQual() < 254) {
+ if (indelLikelihoodMap.containsKey(p) && p.getMappingQual() != 0 && p.getMappingQual() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE) {
// retrieve likelihood information corresponding to this read
LinkedHashMap el = indelLikelihoodMap.get(p);
// by design, first element in LinkedHashMap was ref allele
@@ -54,8 +55,6 @@ public class MappingQualityRankSumTest extends RankSumTest {
refQuals.add((double)p.getMappingQual());
else if (altLikelihood > refLikelihood + INDEL_LIKELIHOOD_THRESH)
altQuals.add((double)p.getMappingQual());
-
-
}
}
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java
index ba3e2cc8b..3b64abfff 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java
@@ -47,5 +47,5 @@ public class NBaseCount implements InfoFieldAnnotation {
public List getKeyNames() { return Arrays.asList("PercentNBaseSolid"); }
- public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("PercentNBaseSolid", 4, VCFHeaderLineType.Float, "Percentage of N bases in the pileup (counting only SOLiD reads)")); }
+ public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("PercentNBaseSolid", 1, VCFHeaderLineType.Float, "Percentage of N bases in the pileup (counting only SOLiD reads)")); }
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java
index 6e80c7555..1ef7ccd0b 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java
@@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
+import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
@@ -38,8 +39,10 @@ public class RMSMappingQuality implements InfoFieldAnnotation, StandardAnnotatio
pileup = context.getBasePileup();
if (pileup != null) {
- for (PileupElement p : pileup )
- qualities[index++] = p.getRead().getMappingQuality();
+ for (PileupElement p : pileup ) {
+ if ( p.getMappingQual() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE )
+ qualities[index++] = p.getMappingQual();
+ }
}
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java
index 1a967293f..f00abd6a1 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java
@@ -106,6 +106,9 @@ public abstract class RankSumTest implements InfoFieldAnnotation, StandardAnnota
protected abstract void fillIndelQualsFromPileup(ReadBackedPileup pileup, List refQuals, List altQuals);
protected static boolean isUsableBase( final PileupElement p ) {
- return !( p.isDeletion() || p.getMappingQual() == 0 || ((int)p.getQual()) < 6 ); // need the unBAQed quality score here
+ return !( p.isDeletion() ||
+ p.getMappingQual() == 0 ||
+ p.getMappingQual() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE ||
+ ((int)p.getQual()) < QualityUtils.MIN_USABLE_Q_SCORE ); // need the unBAQed quality score here
}
}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java
index f287549bb..a670532af 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java
@@ -29,6 +29,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
@@ -200,8 +201,8 @@ public class ReadDepthAndAllelicFractionBySample implements GenotypeAnnotation {
1,
VCFHeaderLineType.Integer,
"Total read depth per sample, including MQ0"),
- new VCFFormatHeaderLine(getKeyNames().get(1),
- VCFCompoundHeaderLine.UNBOUNDED,
+ new VCFFormatHeaderLine(getKeyNames().get(1),
+ VCFHeaderLineCount.UNBOUNDED,
VCFHeaderLineType.Float,
"Fractions of reads (excluding MQ0 from both ref and alt) supporting each reported alternative allele, per sample"));
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java
index 82f16be42..e2fd2a3d4 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java
@@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
@@ -65,5 +66,5 @@ public class SampleList implements InfoFieldAnnotation {
public List getKeyNames() { return Arrays.asList("Samples"); }
- public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("Samples", VCFInfoHeaderLine.UNBOUNDED, VCFHeaderLineType.String, "List of polymorphic samples")); }
+ public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("Samples", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "List of polymorphic samples")); }
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java
new file mode 100644
index 000000000..f7a395d9d
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.diffengine;
+
+import net.sf.samtools.*;
+import net.sf.samtools.util.BlockCompressedInputStream;
+import org.broad.tribble.readers.AsciiLineReader;
+import org.broad.tribble.readers.LineReader;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
+import org.broadinstitute.sting.utils.variantcontext.Genotype;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+
+import java.io.DataInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Map;
+import java.util.zip.GZIPInputStream;
+
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: depristo
+ * Date: 7/4/11
+ * Time: 1:09 PM
+ *
+ * Class implementing diffnode reader for VCF
+ */
+public class BAMDiffableReader implements DiffableReader {
+ private final static int MAX_RECORDS_TO_READ = 1000;
+ @Override
+ public String getName() { return "BAM"; }
+
+ @Override
+ public DiffElement readFromFile(File file) {
+ final SAMFileReader reader = new SAMFileReader(file, null); // null because we don't want it to look for the index
+ reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
+
+ DiffNode root = DiffNode.rooted(file.getName());
+ SAMRecordIterator iterator = reader.iterator();
+
+ int count = 0;
+ while ( iterator.hasNext() ) {
+ if ( count++ > MAX_RECORDS_TO_READ )
+ break;
+ final SAMRecord record = iterator.next();
+
+ // name is the read name + first of pair
+ String name = record.getReadName().replace('.', '_');
+ if ( record.getReadPairedFlag() ) {
+ name += record.getFirstOfPairFlag() ? "_1" : "_2";
+ }
+
+ DiffNode readRoot = DiffNode.empty(name, root);
+
+ // add fields
+ readRoot.add("NAME", record.getReadName());
+ readRoot.add("FLAGS", record.getFlags());
+ readRoot.add("RNAME", record.getReferenceName());
+ readRoot.add("POS", record.getAlignmentStart());
+ readRoot.add("MAPQ", record.getMappingQuality());
+ readRoot.add("CIGAR", record.getCigarString());
+ readRoot.add("RNEXT", record.getMateReferenceName());
+ readRoot.add("PNEXT", record.getMateAlignmentStart());
+ readRoot.add("TLEN", record.getInferredInsertSize());
+ readRoot.add("SEQ", record.getReadString());
+ readRoot.add("QUAL", record.getBaseQualityString());
+
+ for ( SAMRecord.SAMTagAndValue xt : record.getAttributes() ) {
+ readRoot.add(xt.tag, xt.value);
+ }
+
+ // add record to root
+ if ( ! root.hasElement(name) )
+ // protect ourselves from malformed files
+ root.add(readRoot);
+ }
+
+ reader.close();
+
+ return root.getBinding();
+ }
+
+ @Override
+ public boolean canRead(File file) {
+ final byte[] BAM_MAGIC = "BAM\1".getBytes();
+ final byte[] buffer = new byte[BAM_MAGIC.length];
+ try {
+ FileInputStream fstream = new FileInputStream(file);
+ new BlockCompressedInputStream(fstream).read(buffer,0,BAM_MAGIC.length);
+ return Arrays.equals(buffer, BAM_MAGIC);
+ } catch ( IOException e ) {
+ return false;
+ } catch ( net.sf.samtools.FileTruncatedException e ) {
+ return false;
+ }
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java
new file mode 100644
index 000000000..eff24bb88
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.diffengine;
+
+import com.google.java.contract.*;
+import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: depristo
+ * Date: 7/4/11
+ * Time: 12:55 PM
+ *
+ * An interface that must be implemented to allow us to calculate differences
+ * between structured objects
+ */
+@Invariant({
+ "name != null",
+ "value != null",
+ "parent != null || name.equals(\"ROOT\")",
+ "value == null || value.getBinding() == this"})
+public class DiffElement {
+ public final static DiffElement ROOT = new DiffElement();
+
+ final private String name;
+ final private DiffElement parent;
+ final private DiffValue value;
+
+ /**
+ * For ROOT only
+ */
+ private DiffElement() {
+ this.name = "ROOT";
+ this.parent = null;
+ this.value = new DiffValue(this, "ROOT");
+ }
+
+ @Requires({"name != null", "parent != null", "value != null"})
+ public DiffElement(String name, DiffElement parent, DiffValue value) {
+ if ( name.equals("ROOT") ) throw new IllegalArgumentException("Cannot use reserved name ROOT");
+ this.name = name;
+ this.parent = parent;
+ this.value = value;
+ this.value.setBinding(this);
+ }
+
+ @Ensures({"result != null"})
+ public String getName() {
+ return name;
+ }
+
+ public DiffElement getParent() {
+ return parent;
+ }
+
+ @Ensures({"result != null"})
+ public DiffValue getValue() {
+ return value;
+ }
+
+ public boolean isRoot() { return this == ROOT; }
+
+ @Ensures({"result != null"})
+ @Override
+ public String toString() {
+ return getName() + "=" + getValue().toString();
+ }
+
+ public String toString(int offset) {
+ return (offset > 0 ? Utils.dupString(' ', offset) : 0) + getName() + "=" + getValue().toString(offset);
+ }
+
+ @Ensures({"result != null"})
+ public final String fullyQualifiedName() {
+ if ( isRoot() )
+ return "";
+ else if ( parent.isRoot() )
+ return name;
+ else
+ return parent.fullyQualifiedName() + "." + name;
+ }
+
+ @Ensures({"result != null"})
+ public String toOneLineString() {
+ return getName() + "=" + getValue().toOneLineString();
+ }
+
+ @Ensures({"result != null"})
+ public DiffNode getValueAsNode() {
+ if ( getValue().isCompound() )
+ return (DiffNode)getValue();
+ else
+ throw new ReviewedStingException("Illegal request conversion of a DiffValue into a DiffNode: " + this);
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java
new file mode 100644
index 000000000..ba2713bff
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java
@@ -0,0 +1,423 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.diffengine;
+
+import com.google.java.contract.Requires;
+import org.apache.log4j.Logger;
+import org.broadinstitute.sting.gatk.report.GATKReport;
+import org.broadinstitute.sting.gatk.report.GATKReportTable;
+import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier;
+import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.classloader.PluginManager;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+
+import java.io.File;
+import java.io.PrintStream;
+import java.util.*;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: depristo
+ * Date: 7/4/11
+ * Time: 12:51 PM
+ * A generic engine for comparing tree-structured objects
+ */
+public class DiffEngine {
+ final protected static Logger logger = Logger.getLogger(DiffEngine.class);
+
+ private final Map readers = new HashMap();
+
+ public DiffEngine() {
+ loadDiffableReaders();
+ }
+
+ // --------------------------------------------------------------------------------
+ //
+ // difference calculation
+ //
+ // --------------------------------------------------------------------------------
+
+ public List diff(DiffElement master, DiffElement test) {
+ DiffValue masterValue = master.getValue();
+ DiffValue testValue = test.getValue();
+
+ if ( masterValue.isCompound() && masterValue.isCompound() ) {
+ return diff(master.getValueAsNode(), test.getValueAsNode());
+ } else if ( masterValue.isAtomic() && testValue.isAtomic() ) {
+ return diff(masterValue, testValue);
+ } else {
+ // structural difference in types. one is node, other is leaf
+ return Arrays.asList(new Difference(master, test));
+ }
+ }
+
+ public List diff(DiffNode master, DiffNode test) {
+ Set allNames = new HashSet(master.getElementNames());
+ allNames.addAll(test.getElementNames());
+ List diffs = new ArrayList();
+
+ for ( String name : allNames ) {
+ DiffElement masterElt = master.getElement(name);
+ DiffElement testElt = test.getElement(name);
+ if ( masterElt == null && testElt == null ) {
+ throw new ReviewedStingException("BUG: unexceptedly got two null elements for field: " + name);
+ } else if ( masterElt == null || testElt == null ) { // if either is null, we are missing a value
+ // todo -- should one of these be a special MISSING item?
+ diffs.add(new Difference(masterElt, testElt));
+ } else {
+ diffs.addAll(diff(masterElt, testElt));
+ }
+ }
+
+ return diffs;
+ }
+
+ public List diff(DiffValue master, DiffValue test) {
+ if ( master.getValue().equals(test.getValue()) ) {
+ return Collections.emptyList();
+ } else {
+ return Arrays.asList(new Difference(master.getBinding(), test.getBinding()));
+ }
+ }
+
+ // --------------------------------------------------------------------------------
+ //
+ // Summarizing differences
+ //
+ // --------------------------------------------------------------------------------
+
+ /**
+ * Emits a summary of the diffs to out. Suppose you have the following three differences:
+ *
+ * A.X.Z:1!=2
+ * A.Y.Z:3!=4
+ * B.X.Z:5!=6
+ *
+ * The above is the itemized list of the differences. The summary looks for common differences
+ * in the name hierarchy, counts those shared elements, and emits the differences that occur
+ * in order of decreasing counts.
+ *
+ * So, in the above example, what are the shared elements?
+ *
+ * A.X.Z and B.X.Z share X.Z, so there's a *.X.Z with count 2
+ * A.X.Z, A.Y.Z, and B.X.Z all share *.*.Z, with count 3
+ * Each of A.X.Z, A.Y.Z, and B.X.Z are individually unique, with count 1
+ *
+ * So we would emit the following summary:
+ *
+ * *.*.Z: 3
+ * *.X.Z: 2
+ * A.X.Z: 1 [specific difference: 1!=2]
+ * A.Y.Z: 1 [specific difference: 3!=4]
+ * B.X.Z: 1 [specific difference: 5!=6]
+ *
+ * The algorithm to accomplish this calculation is relatively simple. Start with all of the
+ * concrete differences. For each pair of differences A1.A2....AN and B1.B2....BN:
+ *
+ * find the longest common subsequence Si.Si+1...SN where Ai = Bi = Si
+ * If i == 0, then there's no shared substructure
+ * If i > 0, then generate the summarized value X = *.*...Si.Si+1...SN
+ * if X is a known summary, increment it's count, otherwise set its count to 1
+ *
+ * Not that only pairs of the same length are considered as potentially equivalent
+ *
+ * @param params determines how we display the items
+ * @param diffs
+ */
+ public void reportSummarizedDifferences(List diffs, SummaryReportParams params ) {
+ printSummaryReport(summarizeDifferences(diffs), params );
+ }
+
+ public List summarizeDifferences(List diffs) {
+ List diffPaths = new ArrayList(diffs.size());
+
+ for ( Difference diff1 : diffs ) {
+ diffPaths.add(diffNameToPath(diff1.getFullyQualifiedName()));
+ }
+
+ return summarizedDifferencesOfPaths(diffPaths);
+ }
+
+ final protected static String[] diffNameToPath(String diffName) {
+ return diffName.split("\\.");
+ }
+
+ protected List summarizedDifferencesOfPaths(List diffPaths) {
+ Map summaries = new HashMap();
+
+ // create the initial set of differences
+ for ( int i = 0; i < diffPaths.size(); i++ ) {
+ for ( int j = 0; j <= i; j++ ) {
+ String[] diffPath1 = diffPaths.get(i);
+ String[] diffPath2 = diffPaths.get(j);
+ if ( diffPath1.length == diffPath2.length ) {
+ int lcp = longestCommonPostfix(diffPath1, diffPath2);
+ String path = lcp > 0 ? summarizedPath(diffPath2, lcp) : Utils.join(".", diffPath2);
+ addSummary(summaries, path, true);
+ }
+ }
+ }
+
+ // count differences
+ for ( String[] diffPath : diffPaths ) {
+ for ( SummarizedDifference sumDiff : summaries.values() ) {
+ if ( sumDiff.matches(diffPath) )
+ addSummary(summaries, sumDiff.getPath(), false);
+ }
+ }
+
+ List sortedSummaries = new ArrayList(summaries.values());
+ Collections.sort(sortedSummaries);
+ return sortedSummaries;
+ }
+
+ private static void addSummary(Map summaries, String path, boolean onlyCatalog) {
+ if ( summaries.containsKey(path) ) {
+ if ( ! onlyCatalog )
+ summaries.get(path).incCount();
+ } else {
+ SummarizedDifference sumDiff = new SummarizedDifference(path);
+ summaries.put(sumDiff.getPath(), sumDiff);
+ }
+ }
+
+ protected void printSummaryReport(List sortedSummaries, SummaryReportParams params ) {
+ GATKReport report = new GATKReport();
+ final String tableName = "diffences";
+ report.addTable(tableName, "Summarized differences between the master and test files.\nSee http://www.broadinstitute.org/gsa/wiki/index.php/DiffObjectsWalker_and_SummarizedDifferences for more information");
+ GATKReportTable table = report.getTable(tableName);
+ table.addPrimaryKey("Difference", true);
+ table.addColumn("NumberOfOccurrences", 0);
+
+ int count = 0, count1 = 0;
+ for ( SummarizedDifference diff : sortedSummaries ) {
+ if ( diff.getCount() < params.minSumDiffToShow )
+ // in order, so break as soon as the count is too low
+ break;
+
+ if ( params.maxItemsToDisplay != 0 && count++ > params.maxItemsToDisplay )
+ break;
+
+ if ( diff.getCount() == 1 ) {
+ count1++;
+ if ( params.maxCountOneItems != 0 && count1 > params.maxCountOneItems )
+ break;
+ }
+
+ table.set(diff.getPath(), "NumberOfOccurrences", diff.getCount());
+ }
+
+ table.write(params.out);
+ }
+
+ protected static int longestCommonPostfix(String[] diffPath1, String[] diffPath2) {
+ int i = 0;
+ for ( ; i < diffPath1.length; i++ ) {
+ int j = diffPath1.length - i - 1;
+ if ( ! diffPath1[j].equals(diffPath2[j]) )
+ break;
+ }
+ return i;
+ }
+
+ /**
+ * parts is [A B C D]
+ * commonPostfixLength: how many parts are shared at the end, suppose its 2
+ * We want to create a string *.*.C.D
+ *
+ * @param parts
+ * @param commonPostfixLength
+ * @return
+ */
+ protected static String summarizedPath(String[] parts, int commonPostfixLength) {
+ int stop = parts.length - commonPostfixLength;
+ if ( stop > 0 ) parts = parts.clone();
+ for ( int i = 0; i < stop; i++ ) {
+ parts[i] = "*";
+ }
+ return Utils.join(".", parts);
+ }
+
+ /**
+ * TODO -- all of the algorithms above should use SummarizedDifference instead
+ * TODO -- of some SummarizedDifferences and some low-level String[]
+ */
+ public static class SummarizedDifference implements Comparable {
+ final String path; // X.Y.Z
+ final String[] parts;
+ int count = 0;
+
+ public SummarizedDifference(String path) {
+ this.path = path;
+ this.parts = diffNameToPath(path);
+ }
+
+ public void incCount() { count++; }
+
+ public int getCount() {
+ return count;
+ }
+
+ /**
+ * The fully qualified path object A.B.C etc
+ * @return
+ */
+ public String getPath() {
+ return path;
+ }
+
+ /**
+ * @return the length of the parts of this summary
+ */
+ public int length() {
+ return this.parts.length;
+ }
+
+ /**
+ * Returns true if the string parts matches this summary. Matches are
+ * must be equal() everywhere where this summary isn't *.
+ * @param otherParts
+ * @return
+ */
+ public boolean matches(String[] otherParts) {
+ if ( otherParts.length != length() )
+ return false;
+
+ // TODO optimization: can start at right most non-star element
+ for ( int i = 0; i < length(); i++ ) {
+ String part = parts[i];
+ if ( ! part.equals("*") && ! part.equals(otherParts[i]) )
+ return false;
+ }
+
+ return true;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("%s:%d", getPath(), getCount());
+ }
+
+ @Override
+ public int compareTo(SummarizedDifference other) {
+ // sort first highest to lowest count, then by lowest to highest path
+ int countCmp = Integer.valueOf(count).compareTo(other.count);
+ return countCmp != 0 ? -1 * countCmp : path.compareTo(other.path);
+ }
+
+
+ }
+
+ // --------------------------------------------------------------------------------
+ //
+ // plugin manager
+ //
+ // --------------------------------------------------------------------------------
+
+ public void loadDiffableReaders() {
+ List> drClasses = new PluginManager( DiffableReader.class ).getPlugins();
+
+ logger.info("Loading diffable modules:");
+ for (Class extends DiffableReader> drClass : drClasses ) {
+ logger.info("\t" + drClass.getSimpleName());
+
+ try {
+ DiffableReader dr = drClass.newInstance();
+ readers.put(dr.getName(), dr);
+ } catch (InstantiationException e) {
+ throw new ReviewedStingException("Unable to instantiate module '" + drClass.getSimpleName() + "'");
+ } catch (IllegalAccessException e) {
+ throw new ReviewedStingException("Illegal access error when trying to instantiate '" + drClass.getSimpleName() + "'");
+ }
+ }
+ }
+
+ protected Map getReaders() {
+ return readers;
+ }
+
+ protected DiffableReader getReader(String name) {
+ return readers.get(name);
+ }
+
+ /**
+ * Returns a reader appropriate for this file, or null if no such reader exists
+ * @param file
+ * @return
+ */
+ public DiffableReader findReaderForFile(File file) {
+ for ( DiffableReader reader : readers.values() )
+ if (reader.canRead(file) )
+ return reader;
+
+ return null;
+ }
+
+ /**
+ * Returns true if reader appropriate for this file, or false if no such reader exists
+ * @param file
+ * @return
+ */
+ public boolean canRead(File file) {
+ return findReaderForFile(file) != null;
+ }
+
+ public DiffElement createDiffableFromFile(File file) {
+ DiffableReader reader = findReaderForFile(file);
+ if ( reader == null )
+ throw new UserException("Unsupported file type: " + file);
+ else
+ return reader.readFromFile(file);
+ }
+
+ public static boolean simpleDiffFiles(File masterFile, File testFile, DiffEngine.SummaryReportParams params) {
+ DiffEngine diffEngine = new DiffEngine();
+
+ if ( diffEngine.canRead(masterFile) && diffEngine.canRead(testFile) ) {
+ DiffElement master = diffEngine.createDiffableFromFile(masterFile);
+ DiffElement test = diffEngine.createDiffableFromFile(testFile);
+ List diffs = diffEngine.diff(master, test);
+ diffEngine.reportSummarizedDifferences(diffs, params);
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ public static class SummaryReportParams {
+ PrintStream out = System.out;
+ int maxItemsToDisplay = 0;
+ int maxCountOneItems = 0;
+ int minSumDiffToShow = 0;
+
+ public SummaryReportParams(PrintStream out, int maxItemsToDisplay, int maxCountOneItems, int minSumDiffToShow) {
+ this.out = out;
+ this.maxItemsToDisplay = maxItemsToDisplay;
+ this.maxCountOneItems = maxCountOneItems;
+ this.minSumDiffToShow = minSumDiffToShow;
+ }
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java
new file mode 100644
index 000000000..0720e18c0
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.diffengine;
+
+import com.google.java.contract.Requires;
+import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+import java.util.*;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: depristo
+ * Date: 7/4/11
+ * Time: 12:55 PM
+ *
+ * An interface that must be implemented to allow us to calculate differences
+ * between structured objects
+ */
+public class DiffNode extends DiffValue {
+ private Map getElementMap() {
+ return (Map)super.getValue();
+ }
+ private static Map emptyElements() { return new HashMap(); }
+
+ private DiffNode(Map elements) {
+ super(elements);
+ }
+
+ private DiffNode(DiffElement binding, Map elements) {
+ super(binding, elements);
+ }
+
+ // ---------------------------------------------------------------------------
+ //
+ // constructors
+ //
+ // ---------------------------------------------------------------------------
+
+ public static DiffNode rooted(String name) {
+ return empty(name, DiffElement.ROOT);
+ }
+
+ public static DiffNode empty(String name, DiffElement parent) {
+ DiffNode df = new DiffNode(emptyElements());
+ DiffElement elt = new DiffElement(name, parent, df);
+ df.setBinding(elt);
+ return df;
+ }
+
+ public static DiffNode empty(String name, DiffValue parent) {
+ return empty(name, parent.getBinding());
+ }
+
+ // ---------------------------------------------------------------------------
+ //
+ // accessors
+ //
+ // ---------------------------------------------------------------------------
+
+ @Override
+ public boolean isAtomic() { return false; }
+
+ public Collection getElementNames() {
+ return getElementMap().keySet();
+ }
+
+ public Collection getElements() {
+ return getElementMap().values();
+ }
+
+ private Collection getElements(boolean atomicOnly) {
+ List elts = new ArrayList();
+ for ( DiffElement elt : getElements() )
+ if ( (atomicOnly && elt.getValue().isAtomic()) || (! atomicOnly && elt.getValue().isCompound()))
+ elts.add(elt);
+ return elts;
+ }
+
+ public Collection getAtomicElements() {
+ return getElements(true);
+ }
+
+ public Collection getCompoundElements() {
+ return getElements(false);
+ }
+
+ public DiffElement getElement(String name) {
+ for ( DiffElement elt : getElements() )
+ if ( elt.getName().equals(name) )
+ return elt;
+ return null;
+ }
+
+ /**
+ * Returns true if name is bound in this node
+ * @param name
+ * @return
+ */
+ public boolean hasElement(String name) {
+ return getElement(name) != null;
+ }
+
+ // ---------------------------------------------------------------------------
+ //
+ // add
+ //
+ // ---------------------------------------------------------------------------
+
+ @Requires("elt != null")
+ public void add(DiffElement elt) {
+ if ( getElementMap().containsKey(elt.getName()) )
+ throw new IllegalArgumentException("Attempting to rebind already existing binding: " + elt + " node=" + this);
+ getElementMap().put(elt.getName(), elt);
+ }
+
+ @Requires("elt != null")
+ public void add(DiffValue elt) {
+ add(elt.getBinding());
+ }
+
+ @Requires("elts != null")
+ public void add(Collection elts) {
+ for ( DiffElement e : elts )
+ add(e);
+ }
+
+ public void add(String name, Object value) {
+ add(new DiffElement(name, this.getBinding(), new DiffValue(value)));
+ }
+
+ // ---------------------------------------------------------------------------
+ //
+ // toString
+ //
+ // ---------------------------------------------------------------------------
+
+ @Override
+ public String toString() {
+ return toString(0);
+ }
+
+ @Override
+ public String toString(int offset) {
+ String off = offset > 0 ? Utils.dupString(' ', offset) : "";
+ StringBuilder b = new StringBuilder();
+
+ b.append("(").append("\n");
+ Collection atomicElts = getAtomicElements();
+ for ( DiffElement elt : atomicElts ) {
+ b.append(elt.toString(offset + 2)).append('\n');
+ }
+
+ for ( DiffElement elt : getCompoundElements() ) {
+ b.append(elt.toString(offset + 4)).append('\n');
+ }
+ b.append(off).append(")").append("\n");
+
+ return b.toString();
+ }
+
+ @Override
+ public String toOneLineString() {
+ StringBuilder b = new StringBuilder();
+
+ b.append('(');
+ List parts = new ArrayList();
+ for ( DiffElement elt : getElements() )
+ parts.add(elt.toOneLineString());
+ b.append(Utils.join(" ", parts));
+ b.append(')');
+
+ return b.toString();
+ }
+
+ // --------------------------------------------------------------------------------
+ //
+ // fromString and toOneLineString
+ //
+ // --------------------------------------------------------------------------------
+
+ public static DiffElement fromString(String tree) {
+ return fromString(tree, DiffElement.ROOT);
+ }
+
+ /**
+ * Doesn't support full tree structure parsing
+ * @param tree
+ * @param parent
+ * @return
+ */
+ private static DiffElement fromString(String tree, DiffElement parent) {
+ // X=(A=A B=B C=(D=D))
+ String[] parts = tree.split("=", 2);
+ if ( parts.length != 2 )
+ throw new ReviewedStingException("Unexpected tree structure: " + tree + " parts=" + parts);
+ String name = parts[0];
+ String value = parts[1];
+
+ if ( value.length() == 0 )
+ throw new ReviewedStingException("Illegal tree structure: " + value + " at " + tree);
+
+ if ( value.charAt(0) == '(' ) {
+ if ( ! value.endsWith(")") )
+ throw new ReviewedStingException("Illegal tree structure. Missing ): " + value + " at " + tree);
+ String subtree = value.substring(1, value.length()-1);
+ DiffNode rec = DiffNode.empty(name, parent);
+ String[] subParts = subtree.split(" ");
+ for ( String subPart : subParts ) {
+ rec.add(fromString(subPart, rec.getBinding()));
+ }
+ return rec.getBinding();
+ } else {
+ return new DiffValue(name, parent, value).getBinding();
+ }
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java
new file mode 100644
index 000000000..a08108db2
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.diffengine;
+
+import org.apache.xmlbeans.impl.tool.Diff;
+import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.Requires;
+import org.broadinstitute.sting.gatk.walkers.RodWalker;
+
+import java.io.File;
+import java.io.PrintStream;
+import java.util.List;
+
+/**
+ * Compares two record-oriented files, itemizing specific difference between equivalent
+ * records in the two files. Reports both itemized and summarized differences.
+ * @author Mark DePristo
+ * @version 0.1
+ */
+@Requires(value={})
+public class DiffObjectsWalker extends RodWalker {
+ @Output(doc="File to which results should be written",required=true)
+ protected PrintStream out;
+
+ @Argument(fullName="maxRecords", shortName="M", doc="Max. number of records to process", required=false)
+ int MAX_RECORDS = 0;
+
+ @Argument(fullName="maxCount1Records", shortName="M1", doc="Max. number of records occuring exactly once in the file to process", required=false)
+ int MAX_COUNT1_RECORDS = 0;
+
+ @Argument(fullName="minCountForDiff", shortName="MCFD", doc="Min number of observations for a records to display", required=false)
+ int minCountForDiff = 1;
+
+ @Argument(fullName="showItemizedDifferences", shortName="SID", doc="Should we enumerate all differences between the files?", required=false)
+ boolean showItemizedDifferences = false;
+
+ @Argument(fullName="master", shortName="m", doc="Master file: expected results", required=true)
+ File masterFile;
+
+ @Argument(fullName="test", shortName="t", doc="Test file: new results to compare to the master file", required=true)
+ File testFile;
+
+ final DiffEngine diffEngine = new DiffEngine();
+
+ @Override
+ public void initialize() {
+
+ }
+
+ @Override
+ public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+ return 0;
+ }
+
+ @Override
+ public Integer reduceInit() {
+ return 0;
+ }
+
+ @Override
+ public Integer reduce(Integer counter, Integer sum) {
+ return counter + sum;
+ }
+
+ @Override
+ public void onTraversalDone(Integer sum) {
+ out.printf("Reading master file %s%n", masterFile);
+ DiffElement master = diffEngine.createDiffableFromFile(masterFile);
+ out.printf("Reading test file %s%n", testFile);
+ DiffElement test = diffEngine.createDiffableFromFile(testFile);
+
+// out.printf("Master diff objects%n");
+// out.println(master.toString());
+// out.printf("Test diff objects%n");
+// out.println(test.toString());
+
+ List diffs = diffEngine.diff(master, test);
+ if ( showItemizedDifferences ) {
+ out.printf("Itemized results%n");
+ for ( Difference diff : diffs )
+ out.printf("DIFF: %s%n", diff.toString());
+ }
+
+ DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out, MAX_RECORDS, MAX_COUNT1_RECORDS, minCountForDiff);
+ diffEngine.reportSummarizedDifferences(diffs, params);
+ }
+}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java
new file mode 100644
index 000000000..7245e9e8d
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.diffengine;
+
+import org.broadinstitute.sting.utils.Utils;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: depristo
+ * Date: 7/4/11
+ * Time: 12:55 PM
+ *
+ * An interface that must be implemented to allow us to calculate differences
+ * between structured objects
+ */
+public class DiffValue {
+ private DiffElement binding = null;
+ final private Object value;
+
+ public DiffValue(Object value) {
+ this.value = value;
+ }
+
+ public DiffValue(DiffElement binding, Object value) {
+ this.binding = binding;
+ this.value = value;
+ }
+
+ public DiffValue(DiffValue parent, Object value) {
+ this(parent.getBinding(), value);
+ }
+
+ public DiffValue(String name, DiffElement parent, Object value) {
+ this.binding = new DiffElement(name, parent, this);
+ this.value = value;
+ }
+
+ public DiffValue(String name, DiffValue parent, Object value) {
+ this(name, parent.getBinding(), value);
+ }
+
+ public DiffElement getBinding() {
+ return binding;
+ }
+
+ protected void setBinding(DiffElement binding) {
+ this.binding = binding;
+ }
+
+ public Object getValue() {
+ return value;
+ }
+
+ public String toString() {
+ return getValue().toString();
+ }
+
+ public String toString(int offset) {
+ return toString();
+ }
+
+ public String toOneLineString() {
+ return getValue().toString();
+ }
+
+ public boolean isAtomic() { return true; }
+ public boolean isCompound() { return ! isAtomic(); }
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java
new file mode 100644
index 000000000..84c2eed10
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.diffengine;
+
+import com.google.java.contract.Ensures;
+import com.google.java.contract.Requires;
+
+import java.io.File;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: depristo
+ * Date: 7/4/11
+ * Time: 1:09 PM
+ *
+ * Interface for readers creating diffable objects from a file
+ */
+public interface DiffableReader {
+ @Ensures("result != null")
+ public String getName();
+
+ @Ensures("result != null")
+ @Requires("file != null")
+ public DiffElement readFromFile(File file);
+
+ @Requires("file != null")
+ public boolean canRead(File file);
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java
new file mode 100644
index 000000000..6627a4cc5
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.diffengine;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: depristo
+ * Date: 7/4/11
+ * Time: 12:53 PM
+ *
+ * Represents a specific difference between two specific DiffElements
+ */
+public class Difference {
+ DiffElement master, test;
+
+ public Difference(DiffElement master, DiffElement test) {
+ if ( master == null && test == null ) throw new IllegalArgumentException("Master and test both cannot be null");
+ this.master = master;
+ this.test = test;
+ }
+
+ public String toString() {
+ return String.format("%s:%s!=%s",
+ getFullyQualifiedName(),
+ getOneLineString(master),
+ getOneLineString(test));
+ }
+
+ public String getFullyQualifiedName() {
+ return (master == null ? test : master).fullyQualifiedName();
+ }
+
+ private static String getOneLineString(DiffElement elt) {
+ return elt == null ? "MISSING" : elt.getValue().toOneLineString();
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java
new file mode 100644
index 000000000..743178538
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.diffengine;
+
+import org.broad.tribble.readers.AsciiLineReader;
+import org.broad.tribble.readers.LineReader;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
+import org.broadinstitute.sting.utils.variantcontext.Genotype;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+
+import java.io.*;
+import java.util.Arrays;
+import java.util.Map;
+import java.util.zip.GZIPInputStream;
+
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: depristo
+ * Date: 7/4/11
+ * Time: 1:09 PM
+ *
+ * Class implementing diffnode reader for VCF
+ */
+public class VCFDiffableReader implements DiffableReader {
+ @Override
+ public String getName() { return "VCF"; }
+
+ @Override
+ public DiffElement readFromFile(File file) {
+ DiffNode root = DiffNode.rooted(file.getName());
+ try {
+ LineReader lineReader = new AsciiLineReader(new FileInputStream(file));
+ VCFCodec vcfCodec = new VCFCodec();
+ VCFHeader header = (VCFHeader)vcfCodec.readHeader(lineReader);
+
+ String line = lineReader.readLine();
+ while ( line != null ) {
+ VariantContext vc = (VariantContext)vcfCodec.decode(line);
+ String name = vc.getChr() + ":" + vc.getStart();
+ DiffNode vcRoot = DiffNode.empty(name, root);
+
+ // add fields
+ vcRoot.add("CHROM", vc.getChr());
+ vcRoot.add("POS", vc.getStart());
+ vcRoot.add("ID", vc.hasID() ? vc.getID() : VCFConstants.MISSING_VALUE_v4);
+ vcRoot.add("REF", vc.getReference());
+ vcRoot.add("ALT", vc.getAlternateAlleles());
+ vcRoot.add("QUAL", vc.hasNegLog10PError() ? vc.getNegLog10PError() * 10 : VCFConstants.MISSING_VALUE_v4);
+ vcRoot.add("FILTER", vc.getFilters());
+
+ // add info fields
+ for (Map.Entry attribute : vc.getAttributes().entrySet()) {
+ if ( ! attribute.getKey().startsWith("_") && ! attribute.getKey().equals(VariantContext.ID_KEY))
+ vcRoot.add(attribute.getKey(), attribute.getValue());
+ }
+
+ for (Genotype g : vc.getGenotypes().values() ) {
+ DiffNode gRoot = DiffNode.empty(g.getSampleName(), vcRoot);
+ gRoot.add("GT", g.getGenotypeString());
+ gRoot.add("GQ", g.hasNegLog10PError() ? g.getNegLog10PError() * 10 : VCFConstants.MISSING_VALUE_v4 );
+
+ for (Map.Entry attribute : g.getAttributes().entrySet()) {
+ if ( ! attribute.getKey().startsWith("_") )
+ gRoot.add(attribute.getKey(), attribute.getValue());
+ }
+
+ vcRoot.add(gRoot);
+ }
+
+ root.add(vcRoot);
+ line = lineReader.readLine();
+ }
+
+ lineReader.close();
+ } catch ( IOException e ) {
+ return null;
+ }
+
+ return root.getBinding();
+ }
+
+ @Override
+ public boolean canRead(File file) {
+ try {
+ final String VCF4_HEADER = "##fileformat=VCFv4";
+ char[] buff = new char[VCF4_HEADER.length()];
+ new FileReader(file).read(buff, 0, VCF4_HEADER.length());
+ String firstLine = new String(buff);
+ return firstLine.startsWith(VCF4_HEADER);
+ } catch ( IOException e ) {
+ return false;
+ }
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
index 7a765c602..fc8a5819a 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
@@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
+import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableReadFilter;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
@@ -37,7 +38,6 @@ import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.commandline.*;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
import java.util.*;
import java.io.PrintStream;
@@ -48,7 +48,7 @@ import java.io.PrintStream;
* multi-sample data. The user can choose from several different incorporated calculation models.
*/
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_INPUT)
-@ReadFilters( {BadMateFilter.class} )
+@ReadFilters( {BadMateFilter.class, MappingQualityUnavailableReadFilter.class} )
@Reference(window=@Window(start=-200,stop=200))
@By(DataSource.REFERENCE)
@Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250)
@@ -158,7 +158,7 @@ public class UnifiedGenotyper extends LocusWalker getSupportedHeaderStrings() {
+ Set result = new HashSet();
+ result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
+ result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality"));
+ result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)"));
+ result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
+
+ return result;
+ }
+
/**
* Compute at a given locus.
*
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
index 4c9080884..6fc972b5d 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@@ -634,17 +634,27 @@ public class UnifiedGenotyperEngine {
if (vcInput == null)
return null;
- if (vcInput.isSNP() && ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP))
- return GenotypeLikelihoodsCalculationModel.Model.SNP;
+ // todo - no support to genotype MNP's yet
+ if (vcInput.isMNP())
+ return null;
+
+ if (vcInput.isSNP()) {
+ if (( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP))
+ return GenotypeLikelihoodsCalculationModel.Model.SNP;
+ else
+ // ignore SNP's if user chose INDEL mode
+ return null;
+ }
else if ((vcInput.isIndel() || vcInput.isMixed()) && (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL))
return GenotypeLikelihoodsCalculationModel.Model.INDEL;
- } else {
+ }
+ else {
// todo - this assumes SNP's take priority when BOTH is selected, should do a smarter way once extended events are removed
if( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP)
return GenotypeLikelihoodsCalculationModel.Model.SNP;
else if (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL)
return GenotypeLikelihoodsCalculationModel.Model.INDEL;
- }
+ }
}
return null;
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java
index 048dbd8cb..3b94989aa 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java
@@ -30,7 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.filters.BadCigarFilter;
import org.broadinstitute.sting.gatk.filters.Platform454Filter;
-import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
+import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter;
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
@@ -50,7 +50,7 @@ import java.io.PrintStream;
/**
* Emits intervals for the Local Indel Realigner to target for cleaning. Ignores 454 reads, MQ0 reads, and reads with consecutive indel operators in the CIGAR string.
*/
-@ReadFilters({Platform454Filter.class, ZeroMappingQualityReadFilter.class, BadCigarFilter.class})
+@ReadFilters({Platform454Filter.class, MappingQualityZeroReadFilter.class, BadCigarFilter.class})
@Reference(window=@Window(start=-1,stop=50))
@Allows(value={DataSource.READS, DataSource.REFERENCE})
@By(DataSource.REFERENCE)
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java
index c2953d1d7..1f05ddaf0 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java
@@ -72,7 +72,7 @@ import java.util.*;
* if first bam has coverage at the site but no indication for an indel. In the --somatic mode, BED output contains
* only somatic calls, while --verbose output contains all calls annotated with GERMLINE/SOMATIC keywords.
*/
-@ReadFilters({Platform454Filter.class, ZeroMappingQualityReadFilter.class, PlatformUnitFilter.class})
+@ReadFilters({Platform454Filter.class, MappingQualityZeroReadFilter.class, PlatformUnitFilter.class})
public class SomaticIndelDetectorWalker extends ReadWalker {
// @Output
// PrintStream out;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java
index e59b29502..4833a6cad 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java
@@ -32,7 +32,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
-import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
+import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.walkers.*;
@@ -58,7 +58,7 @@ import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFr
@Requires(value = {DataSource.READS, DataSource.REFERENCE}, referenceMetaData = @RMD(name = "variant", type = ReferenceOrderedDatum.class))
@By(DataSource.READS)
-@ReadFilters({ZeroMappingQualityReadFilter.class})
+@ReadFilters({MappingQualityZeroReadFilter.class})
// Filter out all reads with zero mapping quality
public class ReadBackedPhasingWalker extends RodWalker {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
index ee504b6e7..6673bec92 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
@@ -34,7 +34,7 @@ import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
-import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
+import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.walkers.*;
@@ -75,7 +75,7 @@ import java.util.Map;
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
@By( DataSource.READS ) // Only look at covered loci, not every loci of the reference file
-@ReadFilters( {ZeroMappingQualityReadFilter.class} ) // Filter out all reads with zero mapping quality
+@ReadFilters( {MappingQualityZeroReadFilter.class} ) // Filter out all reads with zero mapping quality
@Requires( {DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES} ) // This walker requires both -I input.bam and -R reference.fasta
@PartitionBy(PartitionType.LOCUS)
public class CountCovariatesWalker extends LocusWalker implements TreeReducible {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java
index ff59c9e29..2cbc66e31 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java
@@ -43,9 +43,9 @@ public class AlleleCount extends VariantStratifier {
if (eval != null) {
int AC = -1;
- if ( eval.hasAttribute("AC") )
+ if ( eval.hasAttribute("AC") && eval.getAttribute("AC") instanceof Integer ) {
AC = eval.getAttributeAsInt("AC");
- else if ( eval.isVariant() ) {
+ } else if ( eval.isVariant() ) {
for (Allele allele : eval.getAlternateAlleles())
AC = Math.max(AC, eval.getChromosomeCount(allele));
} else
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java
index 1f622afc8..17461de2f 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java
@@ -97,7 +97,7 @@ public class GaussianMixtureModel {
int ttt = 0;
while( ttt++ < numIterations ) {
- // Estep: assign each variant to the nearest cluster
+ // E step: assign each variant to the nearest cluster
for( final VariantDatum datum : data ) {
double minDistance = Double.MAX_VALUE;
MultivariateGaussian minGaussian = null;
@@ -112,7 +112,7 @@ public class GaussianMixtureModel {
datum.assignment = minGaussian;
}
- // Mstep: update gaussian means based on assigned variants
+ // M step: update gaussian means based on assigned variants
for( final MultivariateGaussian gaussian : gaussians ) {
gaussian.zeroOutMu();
int numAssigned = 0;
@@ -229,26 +229,29 @@ public class GaussianMixtureModel {
}
public double evaluateDatumMarginalized( final VariantDatum datum ) {
- int numVals = 0;
+ int numSamples = 0;
double sumPVarInGaussian = 0.0;
- int numIter = 10;
+ final int numIterPerMissingAnnotation = 10; // Trade off here between speed of computation and accuracy of the marginalization
final double[] pVarInGaussianLog10 = new double[gaussians.size()];
+ // for each dimension
for( int iii = 0; iii < datum.annotations.length; iii++ ) {
- // marginalize over the missing dimension by drawing X random values for the missing annotation and averaging the lod
+ // if it is missing marginalize over the missing dimension by drawing X random values for the missing annotation and averaging the lod
if( datum.isNull[iii] ) {
- for( int ttt = 0; ttt < numIter; ttt++ ) {
- datum.annotations[iii] = Normal.staticNextDouble(0.0, 1.0);
+ for( int ttt = 0; ttt < numIterPerMissingAnnotation; ttt++ ) {
+ datum.annotations[iii] = GenomeAnalysisEngine.getRandomGenerator().nextGaussian(); // draw a random sample from the standard normal distribution
+ // evaluate this random data point
int gaussianIndex = 0;
for( final MultivariateGaussian gaussian : gaussians ) {
pVarInGaussianLog10[gaussianIndex++] = gaussian.pMixtureLog10 + gaussian.evaluateDatumLog10( datum );
}
- sumPVarInGaussian += Math.pow(10.0, MathUtils.log10sumLog10(pVarInGaussianLog10));
- numVals++;
+ // add this sample's probability to the pile in order to take an average in the end
+ sumPVarInGaussian += Math.pow(10.0, MathUtils.log10sumLog10(pVarInGaussianLog10)); // p = 10 ^ Sum(pi_k * p(v|n,k))
+ numSamples++;
}
}
}
- return Math.log10( sumPVarInGaussian / ((double) numVals) );
+ return Math.log10( sumPVarInGaussian / ((double) numSamples) );
}
}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
index 357bbaa24..ddeda1699 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
@@ -25,7 +25,6 @@
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
-import cern.jet.random.Normal;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
@@ -83,19 +82,11 @@ public class VariantDataManager {
}
foundZeroVarianceAnnotation = foundZeroVarianceAnnotation || (theSTD < 1E-6);
- if( annotationKeys.get(iii).toLowerCase().contains("ranksum") ) { // BUGBUG: to clean up
- for( final VariantDatum datum : data ) {
- if( datum.annotations[iii] > 0.0 ) { datum.annotations[iii] /= 3.0; }
- }
- }
meanVector[iii] = theMean;
varianceVector[iii] = theSTD;
for( final VariantDatum datum : data ) {
- datum.annotations[iii] = ( datum.isNull[iii] ? Normal.staticNextDouble(0.0, 1.0) : ( datum.annotations[iii] - theMean ) / theSTD );
- // Each data point is now [ (x - mean) / standard deviation ]
- if( annotationKeys.get(iii).toLowerCase().contains("ranksum") && datum.isNull[iii] && datum.annotations[iii] > 0.0 ) {
- datum.annotations[iii] /= 3.0;
- }
+ // Transform each data point via: (x - mean) / standard deviation
+ datum.annotations[iii] = ( datum.isNull[iii] ? GenomeAnalysisEngine.getRandomGenerator().nextGaussian() : ( datum.annotations[iii] - theMean ) / theSTD );
}
}
if( foundZeroVarianceAnnotation ) {
@@ -164,7 +155,7 @@ public class VariantDataManager {
final int numBadSitesAdded = trainingData.size();
logger.info( "Found " + numBadSitesAdded + " variants overlapping bad sites training tracks." );
- // Next, sort the variants by the LOD coming from the positive model and add to the list the bottom X percent of variants
+ // Next sort the variants by the LOD coming from the positive model and add to the list the bottom X percent of variants
Collections.sort( data );
final int numToAdd = Math.max( minimumNumber - trainingData.size(), Math.round((float)bottomPercentage * data.size()) );
if( numToAdd > data.size() ) {
@@ -242,23 +233,15 @@ public class VariantDataManager {
double value;
try {
- if( annotationKey.equalsIgnoreCase("QUAL") ) {
- value = vc.getPhredScaledQual();
- } else if( annotationKey.equalsIgnoreCase("DP") ) {
- value = Double.parseDouble( (String)vc.getAttribute( "DP" ) ) / Double.parseDouble( (String)vc.getAttribute( "AN" ) );
- } else {
- value = Double.parseDouble( (String)vc.getAttribute( annotationKey ) );
- if( Double.isInfinite(value) ) { value = Double.NaN; }
- if( annotationKey.equalsIgnoreCase("InbreedingCoeff") && value > 0.05 ) { value = Double.NaN; }
- if( jitter && annotationKey.equalsIgnoreCase("HRUN") ) { // Integer valued annotations must be jittered a bit to work in this GMM
- value += -0.25 + 0.5 * GenomeAnalysisEngine.getRandomGenerator().nextDouble();
- }
- if( annotationKey.equalsIgnoreCase("HaplotypeScore") && MathUtils.compareDoubles(value, 0.0, 0.0001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); }
- if( annotationKey.equalsIgnoreCase("FS") && MathUtils.compareDoubles(value, 0.0, 0.01) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); }
+ value = Double.parseDouble( (String)vc.getAttribute( annotationKey ) );
+ if( Double.isInfinite(value) ) { value = Double.NaN; }
+ if( jitter && annotationKey.equalsIgnoreCase("HRUN") ) { // Integer valued annotations must be jittered a bit to work in this GMM
+ value += -0.25 + 0.5 * GenomeAnalysisEngine.getRandomGenerator().nextDouble();
}
-
+ if( jitter && annotationKey.equalsIgnoreCase("HaplotypeScore") && MathUtils.compareDoubles(value, 0.0, 0.0001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); }
+ if( jitter && annotationKey.equalsIgnoreCase("FS") && MathUtils.compareDoubles(value, 0.0, 0.001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); }
} catch( Exception e ) {
- value = Double.NaN; // The VQSR works with missing data now by marginalizing over the missing dimension when evaluating Gaussians
+ value = Double.NaN; // The VQSR works with missing data by marginalizing over the missing dimension when evaluating the Gaussian mixture model
}
return value;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
index 2c51f02d6..2d0355d7d 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
@@ -284,7 +284,7 @@ public class VariantRecalibrator extends RodWalker newAlleles = new ArrayList();
- loc = clipAlleles(pos, ref, alleles, newAlleles);
+ loc = clipAlleles(pos, ref, alleles, newAlleles, lineNo);
alleles = newAlleles;
}
@@ -504,7 +504,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
* @param clippedAlleles output list of clipped alleles
* @return a list of alleles, clipped to the reference
*/
- protected static long clipAlleles(long position, String ref, List unclippedAlleles, List clippedAlleles) {
+ protected static long clipAlleles(long position, String ref, List unclippedAlleles, List clippedAlleles, int lineNo) {
// Note that the computation of forward clipping here is meant only to see whether there is a common
// base to all alleles, and to correctly compute reverse clipping,
@@ -522,6 +522,8 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
}
if (a.length() - reverseClipped <= forwardClipping || a.length() - forwardClipping == 0)
clipping = false;
+ else if (ref.length() == reverseClipped)
+ generateException("bad alleles encountered", lineNo);
else if (a.getBases()[a.length()-reverseClipped-1] != ref.getBytes()[ref.length()-reverseClipped-1])
clipping = false;
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java
index 31251c089..a8bf74707 100755
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java
@@ -123,12 +123,10 @@ public class StandardVCFWriter implements VCFWriter {
try {
// the file format field needs to be written first
- mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF4_0.getFormatString() + "=" + VCFHeaderVersion.VCF4_0.getVersionString() + "\n");
+ mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF4_1.getFormatString() + "=" + VCFHeaderVersion.VCF4_1.getVersionString() + "\n");
for ( VCFHeaderLine line : mHeader.getMetaData() ) {
- if ( line.getKey().equals(VCFHeaderVersion.VCF4_0.getFormatString()) ||
- line.getKey().equals(VCFHeaderVersion.VCF3_3.getFormatString()) ||
- line.getKey().equals(VCFHeaderVersion.VCF3_2.getFormatString()) )
+ if ( VCFHeaderVersion.isFormatString(line.getKey()) )
continue;
// are the records filtered (so we know what to put in the FILTER column of passing records) ?
@@ -358,16 +356,8 @@ public class StandardVCFWriter implements VCFWriter {
mWriter.write(key);
if ( !entry.getValue().equals("") ) {
- int numVals = 1;
VCFInfoHeaderLine metaData = mHeader.getInfoHeaderLine(key);
- if ( metaData != null )
- numVals = metaData.getCount();
-
- // take care of unbounded encoding
- if ( numVals == VCFInfoHeaderLine.UNBOUNDED )
- numVals = 1;
-
- if ( numVals > 0 ) {
+ if ( metaData == null || metaData.getCountType() != VCFHeaderLineCount.INTEGER || metaData.getCount() != 0 ) {
mWriter.write("=");
mWriter.write(entry.getValue());
}
@@ -423,7 +413,7 @@ public class StandardVCFWriter implements VCFWriter {
VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(key);
if ( metaData != null ) {
- int numInFormatField = metaData.getCount();
+ int numInFormatField = metaData.getCount(vc.getAlternateAlleles().size());
if ( numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4) ) {
// If we have a missing field but multiple values are expected, we need to construct a new string with all fields.
// For example, if Number=2, the string has to be ".,."
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFAltHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFAltHeaderLine.java
new file mode 100644
index 000000000..a9de949d8
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFAltHeaderLine.java
@@ -0,0 +1,28 @@
+package org.broadinstitute.sting.utils.codecs.vcf;
+
+/**
+ * @author ebanks
+ * A class representing a key=value entry for ALT fields in the VCF header
+ */
+public class VCFAltHeaderLine extends VCFSimpleHeaderLine {
+
+ /**
+ * create a VCF filter header line
+ *
+ * @param name the name for this header line
+ * @param description the description for this header line
+ */
+ public VCFAltHeaderLine(String name, String description) {
+ super(name, description, SupportedHeaderLineType.ALT);
+ }
+
+ /**
+ * create a VCF info header line
+ *
+ * @param line the header line
+ * @param version the vcf header version
+ */
+ protected VCFAltHeaderLine(String line, VCFHeaderVersion version) {
+ super(line, version, SupportedHeaderLineType.ALT);
+ }
+}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java
index a799161ad..bb822f2ed 100755
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java
@@ -24,6 +24,8 @@
package org.broadinstitute.sting.utils.codecs.vcf;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.Map;
@@ -43,26 +45,43 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
// the field types
private String name;
- private int count;
+ private int count = -1;
+ private VCFHeaderLineCount countType;
private String description;
private VCFHeaderLineType type;
// access methods
public String getName() { return name; }
- public int getCount() { return count; }
public String getDescription() { return description; }
public VCFHeaderLineType getType() { return type; }
+ public VCFHeaderLineCount getCountType() { return countType; }
+ public int getCount() {
+ if ( countType != VCFHeaderLineCount.INTEGER )
+ throw new ReviewedStingException("Asking for header line count when type is not an integer");
+ return count;
+ }
- //
- public void setNumberToUnbounded() { this.count = UNBOUNDED; }
+ // utility method
+ public int getCount(int numAltAlleles) {
+ int myCount;
+ switch ( countType ) {
+ case INTEGER: myCount = count; break;
+ case UNBOUNDED: myCount = -1; break;
+ case A: myCount = numAltAlleles; break;
+ case G: myCount = ((numAltAlleles + 1) * (numAltAlleles + 2) / 2); break;
+ default: throw new ReviewedStingException("Unknown count type: " + countType);
+ }
+ return myCount;
+ }
+
+ public void setNumberToUnbounded() {
+ countType = VCFHeaderLineCount.UNBOUNDED;
+ count = -1;
+ }
// our type of line, i.e. format, info, etc
private final SupportedHeaderLineType lineType;
- // line numerical values are allowed to be unbounded (or unknown), which is
- // marked with a dot (.)
- public static final int UNBOUNDED = -1; // the value we store internally for unbounded types
-
/**
* create a VCF format header line
*
@@ -70,10 +89,12 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
* @param count the count for this header line
* @param type the type for this header line
* @param description the description for this header line
+ * @param lineType the header line type
*/
protected VCFCompoundHeaderLine(String name, int count, VCFHeaderLineType type, String description, SupportedHeaderLineType lineType) {
super(lineType.toString(), "");
this.name = name;
+ this.countType = VCFHeaderLineCount.INTEGER;
this.count = count;
this.type = type;
this.description = description;
@@ -81,20 +102,53 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
validate();
}
+ /**
+ * create a VCF format header line
+ *
+ * @param name the name for this header line
+ * @param count the count type for this header line
+ * @param type the type for this header line
+ * @param description the description for this header line
+ * @param lineType the header line type
+ */
+ protected VCFCompoundHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description, SupportedHeaderLineType lineType) {
+ super(lineType.toString(), "");
+ this.name = name;
+ this.countType = count;
+ this.type = type;
+ this.description = description;
+ this.lineType = lineType;
+ validate();
+ }
+
/**
* create a VCF format header line
*
* @param line the header line
* @param version the VCF header version
+ * @param lineType the header line type
*
*/
protected VCFCompoundHeaderLine(String line, VCFHeaderVersion version, SupportedHeaderLineType lineType) {
super(lineType.toString(), "");
Map mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Number","Type","Description"));
name = mapping.get("ID");
- count = (version == VCFHeaderVersion.VCF4_0 || version == VCFHeaderVersion.VCF4_1) ?
- mapping.get("Number").equals(VCFConstants.UNBOUNDED_ENCODING_v4) ? UNBOUNDED : Integer.valueOf(mapping.get("Number")) :
- mapping.get("Number").equals(VCFConstants.UNBOUNDED_ENCODING_v3) ? UNBOUNDED : Integer.valueOf(mapping.get("Number"));
+ count = -1;
+ final String numberStr = mapping.get("Number");
+ if ( numberStr.equals(VCFConstants.PER_ALLELE_COUNT) ) {
+ countType = VCFHeaderLineCount.A;
+ } else if ( numberStr.equals(VCFConstants.PER_GENOTYPE_COUNT) ) {
+ countType = VCFHeaderLineCount.G;
+ } else if ( ((version == VCFHeaderVersion.VCF4_0 || version == VCFHeaderVersion.VCF4_1) &&
+ numberStr.equals(VCFConstants.UNBOUNDED_ENCODING_v4)) ||
+ ((version == VCFHeaderVersion.VCF3_2 || version == VCFHeaderVersion.VCF3_3) &&
+ numberStr.equals(VCFConstants.UNBOUNDED_ENCODING_v3)) ) {
+ countType = VCFHeaderLineCount.UNBOUNDED;
+ } else {
+ countType = VCFHeaderLineCount.INTEGER;
+ count = Integer.valueOf(numberStr);
+
+ }
type = VCFHeaderLineType.valueOf(mapping.get("Type"));
if (type == VCFHeaderLineType.Flag && !allowFlagValues())
throw new IllegalArgumentException("Flag is an unsupported type for this kind of field");
@@ -121,7 +175,15 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
protected String toStringEncoding() {
Map map = new LinkedHashMap();
map.put("ID", name);
- map.put("Number", count == UNBOUNDED ? VCFConstants.UNBOUNDED_ENCODING_v4 : count);
+ Object number;
+ switch ( countType ) {
+ case A: number = VCFConstants.PER_ALLELE_COUNT; break;
+ case G: number = VCFConstants.PER_GENOTYPE_COUNT; break;
+ case UNBOUNDED: number = VCFConstants.UNBOUNDED_ENCODING_v4; break;
+ case INTEGER:
+ default: number = count;
+ }
+ map.put("Number", number);
map.put("Type", type);
map.put("Description", description);
return lineType.toString() + "=" + VCFHeaderLine.toStringEncoding(map);
@@ -136,15 +198,13 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
if ( !(o instanceof VCFCompoundHeaderLine) )
return false;
VCFCompoundHeaderLine other = (VCFCompoundHeaderLine)o;
- return name.equals(other.name) &&
- count == other.count &&
- description.equals(other.description) &&
- type == other.type &&
- lineType == other.lineType;
+ return equalsExcludingDescription(other) &&
+ description.equals(other.description);
}
public boolean equalsExcludingDescription(VCFCompoundHeaderLine other) {
return count == other.count &&
+ countType == other.countType &&
type == other.type &&
lineType == other.lineType &&
name.equals(other.name);
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java
index 695c46c27..91cf86c70 100755
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java
@@ -99,6 +99,8 @@ public final class VCFConstants {
public static final String MISSING_DEPTH_v3 = "-1";
public static final String UNBOUNDED_ENCODING_v4 = ".";
public static final String UNBOUNDED_ENCODING_v3 = "-1";
+ public static final String PER_ALLELE_COUNT = "A";
+ public static final String PER_GENOTYPE_COUNT = "G";
public static final String EMPTY_ALLELE = ".";
public static final String EMPTY_GENOTYPE = "./.";
public static final double MAX_GENOTYPE_QUAL = 99.0;
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java
index 9176fc16e..418b80074 100755
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java
@@ -1,19 +1,10 @@
package org.broadinstitute.sting.utils.codecs.vcf;
-import java.util.Arrays;
-import java.util.LinkedHashMap;
-import java.util.Map;
-
-
/**
* @author ebanks
* A class representing a key=value entry for FILTER fields in the VCF header
*/
-public class VCFFilterHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine {
-
- private String name;
- private String description;
-
+public class VCFFilterHeaderLine extends VCFSimpleHeaderLine {
/**
* create a VCF filter header line
@@ -22,12 +13,7 @@ public class VCFFilterHeaderLine extends VCFHeaderLine implements VCFNamedHeader
* @param description the description for this header line
*/
public VCFFilterHeaderLine(String name, String description) {
- super("FILTER", "");
- this.name = name;
- this.description = description;
-
- if ( name == null || description == null )
- throw new IllegalArgumentException(String.format("Invalid VCFCompoundHeaderLine: key=%s name=%s desc=%s", super.getKey(), name, description ));
+ super(name, description, SupportedHeaderLineType.FILTER);
}
/**
@@ -37,34 +23,6 @@ public class VCFFilterHeaderLine extends VCFHeaderLine implements VCFNamedHeader
* @param version the vcf header version
*/
protected VCFFilterHeaderLine(String line, VCFHeaderVersion version) {
- super("FILTER", "");
- Map mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Description"));
- name = mapping.get("ID");
- description = mapping.get("Description");
- if ( description == null && ALLOW_UNBOUND_DESCRIPTIONS ) // handle the case where there's no description provided
- description = UNBOUND_DESCRIPTION;
- }
-
- protected String toStringEncoding() {
- Map map = new LinkedHashMap();
- map.put("ID", name);
- map.put("Description", description);
- return "FILTER=" + VCFHeaderLine.toStringEncoding(map);
- }
-
- public boolean equals(Object o) {
- if ( !(o instanceof VCFFilterHeaderLine) )
- return false;
- VCFFilterHeaderLine other = (VCFFilterHeaderLine)o;
- return name.equals(other.name) &&
- description.equals(other.description);
- }
-
- public String getName() {
- return name;
- }
-
- public String getDescription() {
- return description;
+ super(line, version, SupportedHeaderLineType.FILTER);
}
}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFormatHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFormatHeaderLine.java
index 352be3e97..474c8dd14 100755
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFormatHeaderLine.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFormatHeaderLine.java
@@ -16,6 +16,10 @@ public class VCFFormatHeaderLine extends VCFCompoundHeaderLine {
throw new IllegalArgumentException("Flag is an unsupported type for format fields");
}
+ public VCFFormatHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description) {
+ super(name, count, type, description, SupportedHeaderLineType.FORMAT);
+ }
+
protected VCFFormatHeaderLine(String line, VCFHeaderVersion version) {
super(line, version, SupportedHeaderLineType.FORMAT);
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLineCount.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLineCount.java
new file mode 100644
index 000000000..d615c7c78
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLineCount.java
@@ -0,0 +1,8 @@
+package org.broadinstitute.sting.utils.codecs.vcf;
+
+/**
+ * the count encodings we use for fields in VCF header lines
+ */
+public enum VCFHeaderLineCount {
+ INTEGER, A, G, UNBOUNDED;
+}
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFInfoHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFInfoHeaderLine.java
index 135a5c1a1..9b20f38a1 100755
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFInfoHeaderLine.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFInfoHeaderLine.java
@@ -13,6 +13,10 @@ public class VCFInfoHeaderLine extends VCFCompoundHeaderLine {
super(name, count, type, description, SupportedHeaderLineType.INFO);
}
+ public VCFInfoHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description) {
+ super(name, count, type, description, SupportedHeaderLineType.INFO);
+ }
+
protected VCFInfoHeaderLine(String line, VCFHeaderVersion version) {
super(line, version, SupportedHeaderLineType.INFO);
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFSimpleHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFSimpleHeaderLine.java
new file mode 100644
index 000000000..152043f28
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFSimpleHeaderLine.java
@@ -0,0 +1,81 @@
+package org.broadinstitute.sting.utils.codecs.vcf;
+
+import java.util.Arrays;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+
+/**
+ * @author ebanks
+ * A class representing a key=value entry for simple VCF header types
+ */
+public abstract class VCFSimpleHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine {
+
+ public enum SupportedHeaderLineType {
+ FILTER, ALT;
+ }
+
+ private String name;
+ private String description;
+
+ // our type of line, i.e. filter, alt, etc
+ private final SupportedHeaderLineType lineType;
+
+
+ /**
+ * create a VCF filter header line
+ *
+ * @param name the name for this header line
+ * @param description the description for this header line
+ * @param lineType the header line type
+ */
+ public VCFSimpleHeaderLine(String name, String description, SupportedHeaderLineType lineType) {
+ super(lineType.toString(), "");
+ this.lineType = lineType;
+ this.name = name;
+ this.description = description;
+
+ if ( name == null || description == null )
+ throw new IllegalArgumentException(String.format("Invalid VCFSimpleHeaderLine: key=%s name=%s desc=%s", super.getKey(), name, description ));
+ }
+
+ /**
+ * create a VCF info header line
+ *
+ * @param line the header line
+ * @param version the vcf header version
+ * @param lineType the header line type
+ */
+ protected VCFSimpleHeaderLine(String line, VCFHeaderVersion version, SupportedHeaderLineType lineType) {
+ super(lineType.toString(), "");
+ this.lineType = lineType;
+ Map mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Description"));
+ name = mapping.get("ID");
+ description = mapping.get("Description");
+ if ( description == null && ALLOW_UNBOUND_DESCRIPTIONS ) // handle the case where there's no description provided
+ description = UNBOUND_DESCRIPTION;
+ }
+
+ protected String toStringEncoding() {
+ Map map = new LinkedHashMap();
+ map.put("ID", name);
+ map.put("Description", description);
+ return lineType.toString() + "=" + VCFHeaderLine.toStringEncoding(map);
+ }
+
+ public boolean equals(Object o) {
+ if ( !(o instanceof VCFSimpleHeaderLine) )
+ return false;
+ VCFSimpleHeaderLine other = (VCFSimpleHeaderLine)o;
+ return name.equals(other.name) &&
+ description.equals(other.description);
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public String getDescription() {
+ return description;
+ }
+}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java
index ecede068e..4037f75b9 100755
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java
@@ -180,19 +180,4 @@ public class VCFUtils {
return new HashSet(map.values());
}
-
- /**
- * return a set of supported format lines; what we currently support for output in the genotype fields of a VCF
- * @return a set of VCF format lines
- */
- public static Set getSupportedHeaderStrings() {
- Set result = new HashSet();
- result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
- result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality"));
- result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)"));
- result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, -1, VCFHeaderLineType.Float, "Normalized, Phred-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; if site is not biallelic, number of likelihoods if n*(n+1)/2"));
-
- return result;
- }
-
}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java
index cb6557408..31791e805 100755
--- a/public/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java
+++ b/public/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java
@@ -133,8 +133,12 @@ public class Haplotype {
byte[] basesBeforeVariant = Arrays.copyOfRange(refBases,startIdxInReference,startIdxInReference+numPrefBases);
+ int startAfter = startIdxInReference+numPrefBases+ refAllele.getBases().length;
+ // protect against long events that overrun available reference context
+ if (startAfter > refBases.length)
+ startAfter = refBases.length;
byte[] basesAfterVariant = Arrays.copyOfRange(refBases,
- startIdxInReference+numPrefBases+ refAllele.getBases().length, refBases.length);
+ startAfter, refBases.length);
// Create location for all haplotypes
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java
index a9ba46159..901de6fae 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java
@@ -108,7 +108,7 @@ public class Allele implements Comparable {
this.bases = bases;
if ( ! acceptableAlleleBases(bases) )
- throw new IllegalArgumentException("Unexpected base in allele bases " + new String(bases));
+ throw new IllegalArgumentException("Unexpected base in allele bases \'" + new String(bases)+"\'");
}
private Allele(String bases, boolean isRef) {
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
index 5787b591f..da80a3431 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
@@ -867,7 +867,10 @@ public class VariantContext implements Feature { // to enable tribble intergrati
for ( String name : sampleNames ) {
if ( map.containsKey(name) ) throw new IllegalArgumentException("Duplicate names detected in requested samples " + sampleNames);
- map.put(name, getGenotype(name));
+ final Genotype g = getGenotype(name);
+ if ( g != null ) {
+ map.put(name, g);
+ }
}
return map;
diff --git a/public/java/test/org/broadinstitute/sting/BaseTest.java b/public/java/test/org/broadinstitute/sting/BaseTest.java
index 61bb8b34b..b3e422ba9 100755
--- a/public/java/test/org/broadinstitute/sting/BaseTest.java
+++ b/public/java/test/org/broadinstitute/sting/BaseTest.java
@@ -4,6 +4,7 @@ import org.apache.commons.io.FileUtils;
import org.apache.log4j.*;
import org.apache.log4j.spi.LoggingEvent;
import org.broadinstitute.sting.commandline.CommandLineUtils;
+import org.broadinstitute.sting.gatk.walkers.diffengine.DiffEngine;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.testng.Assert;
@@ -12,6 +13,10 @@ import java.io.*;
import java.math.BigInteger;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
/**
*
@@ -107,6 +112,57 @@ public abstract class BaseTest {
}
}
+ /**
+ * Simple generic utility class to creating TestNG data providers:
+ *
+ * 1: inherit this class, as in
+ *
+ * private class SummarizeDifferenceTest extends TestDataProvider {
+ * public SummarizeDifferenceTest() {
+ * super(SummarizeDifferenceTest.class);
+ * }
+ * ...
+ * }
+ *
+ * Provide a reference to your class to the TestDataProvider constructor.
+ *
+ * 2: Create instances of your subclass. Return from it the call to getTests, providing
+ * the class type of your test
+ *
+ * @DataProvider(name = "summaries")
+ * public Object[][] createSummaries() {
+ * new SummarizeDifferenceTest().addDiff("A", "A").addSummary("A:2");
+ * new SummarizeDifferenceTest().addDiff("A", "B").addSummary("A:1", "B:1");
+ * return SummarizeDifferenceTest.getTests(SummarizeDifferenceTest.class);
+ * }
+ *
+ * This class magically tracks created objects of this
+ */
+ public static class TestDataProvider {
+ private static final Map> tests = new HashMap>();
+
+ /**
+ * Create a new TestDataProvider instance bound to the class variable C
+ * @param c
+ */
+ public TestDataProvider(Class c) {
+ if ( ! tests.containsKey(c) )
+ tests.put(c, new ArrayList