Stage 1 of the VariantFiltration refactoring is now complete. There now exists a parallel tool called VariantAnnotator which simply takes variant calls and annotates them with the same type of data that we used to use for filtering (e.g. DoC, allele balance). The output is a VCF with the INFO field appropriately annotated.

VariantAnnotator can be called as a standalone walker or by another walker, as it is by the UnifiedGenotyper. UG now no longer computes any of this meta data - it relegates the task completely to the annotator (assuming the output format accepts it). This is a fairly all-encompassing check in. It involves changes to all of the UG code, bug fixes to much of the VCF code as things popped up, and other changes throughout. All integration tests pass and I've tediously confirmed that the annotation values are correct, but this framework could use some more rigorous testing. Stage 2 of the process will happen later this week. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2053 348d0f76-0448-11de-a6fe-93d51630548a
2009-11-16 02:41:20 +00:00 · 2009-11-16 02:41:20 +00:00 · 4558375575
parent ce5034dc5d
commit 4558375575
31 changed files with 1051 additions and 303 deletions
--- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java
@ -0,0 +1,119 @@
+package org.broadinstitute.sting.gatk.walkers.annotator;
+
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.utils.*;
+import org.broadinstitute.sting.utils.genotype.*;
+
+import java.util.List;
+import java.util.ArrayList;
+
+
+public class AlleleBalance implements VariantAnnotation {
+
+    public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, List<Genotype> genotypes) {
+
+        double ratio;
+
+        Genotype g = genotypes.get(0);
+        if ( g instanceof ReadBacked && g instanceof PosteriorsBacked ) {
+            Pair<Double, Integer> weightedBalance = computeWeightedBalance(ref.getBase(), genotypes, pileup);
+            if ( weightedBalance.second == 0 )
+                return null;
+            ratio = weightedBalance.first;
+        } else {
+            // this test doesn't make sense for homs
+            Genotype genotype = VariantAnnotator.getFirstHetVariant(genotypes);
+            if ( genotype == null )
+                return null;
+
+            final String genotypeStr = genotype.getBases().toUpperCase();
+            if ( genotypeStr.length() != 2 )
+                return null;
+
+            final String bases = pileup.getBases().toUpperCase();
+            if ( bases.length() == 0 )
+                return null;
+
+            ratio = computeSingleBalance(ref.getBase(), genotypeStr, bases);
+        }
+
+        return new Pair<String, String>("AlleleBalance", String.format("%.2f", ratio));
+    }
+
+    private double computeSingleBalance(char ref, final String genotypeStr, final String bases) {
+
+        char a = genotypeStr.charAt(0);
+        char b = genotypeStr.charAt(1);
+        int aCount = Utils.countOccurrences(a, bases);
+        int bCount = Utils.countOccurrences(b, bases);
+
+        int refCount = a == ref ? aCount : bCount;
+        int altCount = a == ref ? bCount : aCount;
+
+        double ratio = (double)refCount / (double)(refCount + altCount);
+        return ratio;
+    }
+
+    // returns the ratio and then number of points which comprise it
+    private Pair<Double, Integer> computeWeightedBalance(char ref, List<Genotype> genotypes, ReadBackedPileup pileup) {
+
+        ArrayList<Double> refBalances = new ArrayList<Double>();
+        ArrayList<Double> weights = new ArrayList<Double>();
+
+        // accumulate ratios and weights
+        for ( Genotype g : genotypes ) {
+
+            if ( !(g instanceof ReadBacked) || !(g instanceof PosteriorsBacked) )
+                continue;
+
+            final String genotypeStr = g.getBases().toUpperCase();
+            if ( genotypeStr.length() != 2 )
+                continue;
+
+            DiploidGenotype bestGenotype = DiploidGenotype.unorderedValueOf(genotypeStr);
+
+            // we care only about het ref calls
+            if ( !bestGenotype.isHetRef(ref) )
+                continue;
+
+            char a = genotypeStr.charAt(0);
+            char b = genotypeStr.charAt(1);
+            char altBase = a != ref ? a : b;
+
+            // get the base counts at this pileup (minus deletions)
+            ReadBackedPileup myPileup = ((ReadBacked)g).getPileup();
+
+            // if the pileup is null, we'll just have to use the full pileup (it's better than nothing)
+            if ( myPileup == null )
+                myPileup = pileup;
+
+            int[] counts = myPileup.getBasePileupAsCounts();
+            int refCount = counts[BaseUtils.simpleBaseToBaseIndex(ref)];
+            int altCount = counts[BaseUtils.simpleBaseToBaseIndex(altBase)];
+
+            double[] posteriors = ((PosteriorsBacked)g).getPosteriors();
+            posteriors = MathUtils.normalizeFromLog10(posteriors);
+            weights.add(posteriors[bestGenotype.ordinal()]);
+            refBalances.add((double)refCount / (double)(refCount + altCount));
+        }
+
+        double ratio = 0.0;
+
+        if ( weights.size() > 0 ) {
+            // normalize the weights
+            double sum = 0.0;
+            for (int i = 0; i < weights.size(); i++)
+                sum += weights.get(i);
+            for (int i = 0; i < weights.size(); i++)
+                weights.set(i, weights.get(i) / sum);
+
+            // calculate total weighted ratios
+            for (int i = 0; i < weights.size(); i++)
+                ratio += weights.get(i) * refBalances.get(i);
+        }
+
+        return new Pair<Double, Integer>(ratio, weights.size());
+    }
+
+    public boolean useZeroQualityReads() { return false; }
+}
--- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java
@ -0,0 +1,19 @@
+package org.broadinstitute.sting.gatk.walkers.annotator;
+
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.utils.Pair;
+import org.broadinstitute.sting.utils.ReadBackedPileup;
+import org.broadinstitute.sting.utils.genotype.Genotype;
+
+import java.util.List;
+
+
+public class DepthOfCoverage implements VariantAnnotation {
+
+    public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, List<Genotype> genotypes) {
+        int depth = pileup.getReads().size();
+        return new Pair<String, String>("DoC", String.format("%d", depth));
+    }
+
+    public boolean useZeroQualityReads() { return false; }
+}
--- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java
@ -0,0 +1,179 @@
+package org.broadinstitute.sting.gatk.walkers.annotator;
+
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.utils.BaseUtils;
+import org.broadinstitute.sting.utils.Pair;
+import org.broadinstitute.sting.utils.ReadBackedPileup;
+import org.broadinstitute.sting.utils.genotype.Genotype;
+import net.sf.samtools.SAMRecord;
+import cern.jet.math.Arithmetic;
+
+import java.util.List;
+
+
+public class FisherStrand implements VariantAnnotation {
+
+    public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, List<Genotype> genotypes) {
+
+        // this test doesn't make sense for homs
+        Genotype genotype = VariantAnnotator.getFirstHetVariant(genotypes);
+        if ( genotype == null )
+            return null;
+
+        final String genotypeStr = genotype.getBases().toUpperCase();
+        if ( genotypeStr.length() != 2 )
+            return null;
+
+        int allele1 = BaseUtils.simpleBaseToBaseIndex(genotypeStr.charAt(0));
+        int allele2 = BaseUtils.simpleBaseToBaseIndex(genotypeStr.charAt(1));
+
+        Double pvalue = strandTest(pileup, allele1, allele2);
+        if ( pvalue == null )
+            return null;
+
+        return new Pair<String, String>("FisherStrand", String.format("%.4f", pvalue));
+    }
+
+    public boolean useZeroQualityReads() { return false; }
+
+    private Double strandTest(ReadBackedPileup pileup, int allele1, int allele2) {
+        int[][] table = getContingencyTable(pileup, allele1, allele2);
+        if ( !variantIsHet(table) )
+            return null;
+
+        double pCutoff = computePValue(table);
+        //printTable(table, pCutoff);
+
+        double pValue = pCutoff;
+        while (rotateTable(table)) {
+            double pValuePiece = computePValue(table);
+
+            //printTable(table, pValuePiece);
+
+            if (pValuePiece <= pCutoff) {
+                pValue += pValuePiece;
+            }
+        }
+
+        table = getContingencyTable(pileup, allele1, allele2);
+
+        while (unrotateTable(table)) {
+            double pValuePiece = computePValue(table);
+
+            //printTable(table, pValuePiece);
+
+            if (pValuePiece <= pCutoff) {
+                pValue += pValuePiece;
+            }
+        }
+
+        //System.out.printf("P-cutoff: %f\n", pCutoff);
+        //System.out.printf("P-value: %f\n\n", pValue);
+
+       return pValue;
+    }
+
+    private static boolean variantIsHet(int[][] table) {
+        return ((table[0][1] != 0 || table[0][1] != 0) && (table[1][0] != 0 || table[1][1] != 0));
+    }
+
+    private static void printTable(int[][] table, double pValue) {
+        System.out.printf("%d %d; %d %d : %f\n", table[0][0], table[0][1], table[1][0], table[1][1], pValue);
+    }
+
+    private static boolean rotateTable(int[][] table) {
+        table[0][0] -= 1;
+        table[1][0] += 1;
+
+        table[0][1] += 1;
+        table[1][1] -= 1;
+
+        return (table[0][0] >= 0 && table[1][1] >= 0) ? true : false;
+    }
+
+    private static boolean unrotateTable(int[][] table) {
+        table[0][0] += 1;
+        table[1][0] -= 1;
+
+        table[0][1] -= 1;
+        table[1][1] += 1;
+
+        return (table[0][1] >= 0 && table[1][0] >= 0) ? true : false;
+    }
+
+    private double computePValue(int[][] table) {
+
+        int[] rowSums = { sumRow(table, 0), sumRow(table, 1) };
+        int[] colSums = { sumColumn(table, 0), sumColumn(table, 1) };
+        int N = rowSums[0] + rowSums[1];
+
+        // calculate in log space so we don't die with high numbers
+        double pCutoff = Arithmetic.logFactorial(rowSums[0])
+                         + Arithmetic.logFactorial(rowSums[1])
+                         + Arithmetic.logFactorial(colSums[0])
+                         + Arithmetic.logFactorial(colSums[1])
+                         - Arithmetic.logFactorial(table[0][0])
+                         - Arithmetic.logFactorial(table[0][1])
+                         - Arithmetic.logFactorial(table[1][0])
+                         - Arithmetic.logFactorial(table[1][1])
+                         - Arithmetic.logFactorial(N);
+        return Math.exp(pCutoff);
+    }
+
+    private static int sumRow(int[][] table, int column) {
+        int sum = 0;
+        for (int r = 0; r < table.length; r++) {
+            sum += table[r][column];
+        }
+
+        return sum;
+    }
+
+    private static int sumColumn(int[][] table, int row) {
+        int sum = 0;
+        for (int c = 0; c < table[row].length; c++) {
+            sum += table[row][c];
+        }
+
+        return sum;
+    }
+
+    /**
+     Allocate and fill a 2x2 strand contingency table.  In the end, it'll look something like this:
+     *             fw      rc
+     *   allele1   #       #
+     *   allele2   #       #
+     * @param pileup   the pileup for the locus
+     * @param allele1  information for the called variant
+     * @param allele2  information for the called variant
+     * @return a 2x2 contingency table
+     */
+    private static int[][] getContingencyTable(ReadBackedPileup pileup, int allele1, int allele2) {
+
+        int[][] table = new int[2][2];
+
+        List<SAMRecord> reads = pileup.getReads();
+        List<Integer> offsets = pileup.getOffsets();
+
+        for (int readIndex = 0; readIndex < reads.size(); readIndex++) {
+            SAMRecord read = reads.get(readIndex);
+            int offset = offsets.get(readIndex);
+
+	    // skip over deletion sites
+	    if ( offset == -1 )
+		continue;
+
+            int readAllele = BaseUtils.simpleBaseToBaseIndex(read.getReadString().charAt(offset));
+            boolean isFW = !read.getReadNegativeStrandFlag();
+
+            if (readAllele == allele1 || readAllele == allele2) {
+                int row = (readAllele == allele1) ? 0 : 1;
+                int column = isFW ? 0 : 1;
+
+                table[row][column]++;
+            }
+        }
+
+        return table;
+    }
+}
--- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java
@ -0,0 +1,59 @@
+package org.broadinstitute.sting.gatk.walkers.annotator;
+
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.utils.Pair;
+import org.broadinstitute.sting.utils.ReadBackedPileup;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.genotype.Genotype;
+
+import java.util.List;
+
+
+public class HomopolymerRun implements VariantAnnotation {
+
+    public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, List<Genotype> genotypes) {
+
+        Genotype genotype = VariantAnnotator.getFirstVariant(ref.getBase(), genotypes);
+        if ( genotype == null )
+            return null;
+
+        final String genotypeStr = genotype.getBases().toUpperCase();
+        if ( genotypeStr.length() != 2 )
+            return null;
+
+        char altAllele = (genotypeStr.charAt(0) != ref.getBase() ? genotypeStr.charAt(0) : genotypeStr.charAt(1));
+
+        int run = computeHomopolymerRun(altAllele, ref);
+        return new Pair<String, String>("HomopolymerRun", String.format("%d", run));
+    }
+
+    public boolean useZeroQualityReads() { return false; }
+
+    private static int computeHomopolymerRun(char altAllele, ReferenceContext ref) {
+
+        // TODO -- this needs to be computed in a more accurate manner
+        // We currently look only at direct runs of the alternate allele adjacent to this position
+
+        char[] bases = ref.getBases();
+        GenomeLoc window = ref.getWindow();
+        GenomeLoc locus = ref.getLocus();
+
+        int refBasePos = (int)(locus.getStart() - window.getStart());
+
+        int leftRun = 0;
+        for ( int i = refBasePos - 1; i >= 0; i--) {
+            if ( bases[i] != altAllele )
+                break;
+            leftRun++;
+        }
+
+        int rightRun = 0;
+        for ( int i = refBasePos + 1; i < bases.length; i++) {
+            if ( bases[i] != altAllele )
+                break;
+            rightRun++;
+        }
+
+        return Math.max(leftRun, rightRun);
+     }
+}
--- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java
@ -0,0 +1,25 @@
+package org.broadinstitute.sting.gatk.walkers.annotator;
+
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.utils.Pair;
+import org.broadinstitute.sting.utils.ReadBackedPileup;
+import org.broadinstitute.sting.utils.genotype.Genotype;
+import net.sf.samtools.SAMRecord;
+
+import java.util.List;
+
+
+public class MappingQualityZero implements VariantAnnotation {
+
+    public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, List<Genotype> genotypes) {
+        List<SAMRecord> reads = pileup.getReads();
+        int MQ0Count = 0;
+        for (int i=0; i < reads.size(); i++) {
+            if ( reads.get(i).getMappingQuality() == 0 )
+                MQ0Count++;
+        }
+        return new Pair<String, String>("MAPQ0", String.format("%d", MQ0Count));
+    }
+
+    public boolean useZeroQualityReads() { return true; }
+}
--- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/OnOffGenotype.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/OnOffGenotype.java
@ -0,0 +1,123 @@
+package org.broadinstitute.sting.gatk.walkers.annotator;
+
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.utils.*;
+import org.broadinstitute.sting.utils.genotype.Genotype;
+import org.broadinstitute.sting.utils.genotype.ReadBacked;
+import org.broadinstitute.sting.utils.genotype.PosteriorsBacked;
+import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
+
+import java.util.List;
+import java.util.ArrayList;
+
+public class OnOffGenotype implements VariantAnnotation {
+
+    public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, List<Genotype> genotypes) {
+
+        double ratio;
+
+        Genotype g = genotypes.get(0);
+        if ( g instanceof ReadBacked && g instanceof PosteriorsBacked) {
+            Pair<Double, Integer> weightedBalance = computeWeightedBalance(ref.getBase(), genotypes, pileup);
+            if ( weightedBalance.second == 0 )
+                return null;
+            ratio = weightedBalance.first;
+        } else {
+            Genotype genotype = VariantAnnotator.getFirstVariant(ref.getBase(), genotypes);
+            if ( genotype == null )
+                return null;
+
+            final String genotypeStr = genotype.getBases().toUpperCase();
+            if ( genotypeStr.length() != 2 )
+                return null;
+
+            final String bases = pileup.getBases().toUpperCase();
+            if ( bases.length() == 0 )
+                return null;
+
+            ratio = computeSingleBalance(genotypeStr, bases);
+        }
+
+        return new Pair<String, String>("OnOffGenotype", String.format("%.2f", ratio));
+    }
+
+    private double computeSingleBalance(final String genotypeStr, final String bases) {
+
+        int on = 0, off = 0;
+        for ( char base : BaseUtils.BASES ) {
+            int count = BasicPileup.countBase(base, bases);
+            if ( Utils.countOccurrences(base, genotypeStr) > 0 )
+                on += count;
+            else
+                off += count;
+        }
+
+        double ratio = (double)on / (double)(on + off);
+        return ratio;
+    }
+
+    private Pair<Double, Integer> computeWeightedBalance(char ref, List<Genotype> genotypes, ReadBackedPileup pileup) {
+
+        ArrayList<Double> onOffBalances = new ArrayList<Double>();
+        ArrayList<Double> weights = new ArrayList<Double>();
+
+        // accumulate ratios and weights
+        for ( Genotype g : genotypes ) {
+
+            if ( !(g instanceof ReadBacked) || !(g instanceof PosteriorsBacked) )
+                continue;
+
+            final String genotypeStr = g.getBases().toUpperCase();
+            if ( genotypeStr.length() != 2 )
+                continue;
+
+            DiploidGenotype bestGenotype = DiploidGenotype.unorderedValueOf(genotypeStr);
+
+            // we care only about non-ref calls
+            if ( bestGenotype.isHomRef(ref) )
+                continue;
+
+            char a = genotypeStr.charAt(0);
+            char b = genotypeStr.charAt(1);
+
+            // get the base counts at this pileup (minus deletions)
+            ReadBackedPileup myPileup = ((ReadBacked)g).getPileup();
+
+            // if the pileup is null, we'll just have to use the full pileup (it's better than nothing)
+            if ( myPileup == null )
+                myPileup = pileup;
+
+            int[] counts = myPileup.getBasePileupAsCounts();
+            int onCount = counts[BaseUtils.simpleBaseToBaseIndex(a)];
+            if ( a != b )
+                onCount += counts[BaseUtils.simpleBaseToBaseIndex(b)];
+            int totalCount = 0;
+            for (int i = 0; i < counts.length; i++)
+                totalCount += counts[i];
+
+            double[] posteriors = ((PosteriorsBacked)g).getPosteriors();
+            posteriors = MathUtils.normalizeFromLog10(posteriors);
+            weights.add(posteriors[bestGenotype.ordinal()]);
+            onOffBalances.add((double)onCount / (double)totalCount);
+        }
+
+        double ratio = 0.0;
+
+        if ( weights.size() > 0 ) {
+            // normalize the weights
+            double sum = 0.0;
+            for (int i = 0; i < weights.size(); i++)
+                sum += weights.get(i);
+            for (int i = 0; i < weights.size(); i++)
+                weights.set(i, weights.get(i) / sum);
+
+            // calculate total weighted ratios
+            for (int i = 0; i < weights.size(); i++)
+                ratio += weights.get(i) * onOffBalances.get(i);
+        }
+
+        return new Pair<Double, Integer>(ratio, weights.size());
+    }
+
+    public boolean useZeroQualityReads() { return false; }
+}
--- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java
@ -0,0 +1,25 @@
+package org.broadinstitute.sting.gatk.walkers.annotator;
+
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.Pair;
+import org.broadinstitute.sting.utils.ReadBackedPileup;
+import org.broadinstitute.sting.utils.genotype.Genotype;
+import net.sf.samtools.SAMRecord;
+
+import java.util.List;
+
+
+public class RMSMappingQuality implements VariantAnnotation {
+
+    public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, List<Genotype> genotypes) {
+        List<SAMRecord> reads = pileup.getReads();
+        int[] qualities = new int[reads.size()];
+        for (int i=0; i < reads.size(); i++)
+            qualities[i] = reads.get(i).getMappingQuality();
+        double rms = MathUtils.rms(qualities);
+        return new Pair<String, String>("RMSMAPQ", String.format("%.2f", rms));
+    }
+
+    public boolean useZeroQualityReads() { return true; }
+}
--- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java
@ -0,0 +1,23 @@
+package org.broadinstitute.sting.gatk.walkers.annotator;
+
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.utils.Pair;
+import org.broadinstitute.sting.utils.ReadBackedPileup;
+import org.broadinstitute.sting.utils.genotype.Genotype;
+
+import java.util.List;
+
+
+public class SpanningDeletions implements VariantAnnotation {
+
+    public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, List<Genotype> genotypes) {
+        int deletions = 0;
+        for (Integer offset : pileup.getOffsets() ) {
+            if ( offset == -1 )
+                deletions++;
+        }
+        return new Pair<String, String>("SpanningDeletions", String.format("%d", deletions));
+    }
+
+    public boolean useZeroQualityReads() { return false; }
+}
--- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotation.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotation.java
@ -0,0 +1,15 @@
+package org.broadinstitute.sting.gatk.walkers.annotator;
+
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.utils.Pair;
+import org.broadinstitute.sting.utils.ReadBackedPileup;
+import org.broadinstitute.sting.utils.genotype.Genotype;
+
+import java.util.List;
+
+public interface VariantAnnotation {
+
+    public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, List<Genotype> genotypes);
+    public boolean useZeroQualityReads();
+
+}
--- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java
@ -0,0 +1,265 @@
+package org.broadinstitute.sting.gatk.walkers.annotator;
+
+import org.broadinstitute.sting.gatk.contexts.*;
+import org.broadinstitute.sting.gatk.refdata.*;
+import org.broadinstitute.sting.gatk.walkers.*;
+import org.broadinstitute.sting.utils.*;
+import org.broadinstitute.sting.utils.genotype.vcf.*;
+import org.broadinstitute.sting.utils.genotype.*;
+import org.broadinstitute.sting.utils.genotype.Genotype;
+import org.broadinstitute.sting.utils.cmdLine.Argument;
+import org.broadinstitute.sting.playground.gatk.walkers.variantstovcf.VariantsToVCF;
+
+import java.util.*;
+import java.io.*;
+
+import net.sf.samtools.SAMRecord;
+
+
+/**
+ * VariantAnnotator annotates variants.
+ */
+//@Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="variant",type=VariationRod.class))
+@Allows(value={DataSource.READS, DataSource.REFERENCE})
+@Reference(window=@Window(start=-20,stop=20))
+public class VariantAnnotator extends RodWalker<Integer, Integer> {
+    @Argument(fullName="vcfOutput", shortName="vcf", doc="VCF file to which all variants should be written with annotations", required=true)
+    protected File VCF_OUT;
+    @Argument(fullName="sampleName", shortName="sample", doc="The sample (NA-ID) corresponding to the variant input (for non-VCF input only)", required=false)
+    protected String sampleName = null;
+    @Argument(fullName="annotations", shortName="A", doc="Annotation types to apply to variant calls", required=false)
+    protected String[] ANNOTATIONS;
+    @Argument(fullName="useAllAnnotations", shortName="all", doc="Use all possible annotations", required=false)
+    protected Boolean USE_ALL_ANNOTATIONS = false;
+    @Argument(fullName="list", shortName="ls", doc="List the available annotations and exit")
+    protected Boolean LIST = false;
+
+    private VCFWriter vcfWriter;
+    private VCFHeader vcfHeader;
+
+    private HashMap<String, String> nonVCFsampleName = new HashMap<String, String>();
+
+    private ArrayList<VariantAnnotation> requestedAnnotations;
+
+    // mapping from class name to class
+    private static HashMap<String, VariantAnnotation> allAnnotations = null;
+
+
+    private static void determineAllAnnotations() {
+        allAnnotations = new HashMap<String, VariantAnnotation>();
+        List<Class<? extends VariantAnnotation>> annotationClasses = PackageUtils.getClassesImplementingInterface(VariantAnnotation.class);
+        for ( Class c : annotationClasses ) {
+            try {
+                VariantAnnotation annot = (VariantAnnotation) c.newInstance();
+                allAnnotations.put(c.getSimpleName().toUpperCase(), annot);
+            } catch (InstantiationException e) {
+                throw new StingException(String.format("Cannot instantiate annotation class '%s': must be concrete class", c.getSimpleName()));
+            } catch (IllegalAccessException e) {
+                throw new StingException(String.format("Cannot instantiate annotation class '%s': must have no-arg constructor", c.getSimpleName()));
+            }
+        }
+    }
+
+    private void listFiltersAndExit() {
+        List<Class<? extends VariantAnnotation>> annotationClasses = PackageUtils.getClassesImplementingInterface(VariantAnnotation.class);
+        out.println("\nAvailable annotations:");
+        for (int i = 0; i < annotationClasses.size(); i++)
+            out.println("\t" + annotationClasses.get(i).getSimpleName());
+        out.println();
+        System.exit(0);
+    }
+
+    /**
+     * Prepare the output file and the list of available features.
+     */
+    public void initialize() {
+
+        if ( LIST )
+            listFiltersAndExit();
+
+        // get the list of all sample names from the various VCF input rods
+        HashSet<String> samples = new HashSet<String>();
+        VCFUtils.getUniquifiedSamplesFromRods(getToolkit(), samples, new HashMap<Pair<String, String>, String>());
+
+        // add the non-VCF sample from the command-line, if applicable
+        if ( sampleName != null ) {
+            nonVCFsampleName.put(sampleName.toUpperCase(), "variant");
+            samples.add(sampleName.toUpperCase());
+        }
+
+        // if there are no valid samples, die
+        if ( samples.size() == 0 )
+            throw new StingException("There are no samples input at all; use the --sampleName argument to specify one");
+
+        determineAllAnnotations();
+
+        if ( USE_ALL_ANNOTATIONS ) {
+            requestedAnnotations = new ArrayList<VariantAnnotation>(allAnnotations.values());
+        } else {
+            requestedAnnotations = new ArrayList<VariantAnnotation>();
+            if ( ANNOTATIONS != null ) {
+                for ( String requested : ANNOTATIONS ) {
+
+                    VariantAnnotation annot = allAnnotations.get(requested.toUpperCase());
+                    if ( annot == null )
+                        throw new StingException("Unknown annotation '" + requested + "'.  Issue the '-ls' argument to list available annotations.");
+
+                    requestedAnnotations.add(annot);
+                }
+            }
+        }
+
+        // setup the header fields
+        Map<String, String> hInfo = new HashMap<String, String>();
+        hInfo.put("format", VCFWriter.VERSION);
+        hInfo.put("source", "VariantAnnotator");
+        hInfo.put("reference", this.getToolkit().getArguments().referenceFile.getName());
+
+        vcfHeader = new VCFHeader(hInfo, samples);
+        vcfWriter = new VCFWriter(vcfHeader, VCF_OUT);
+    }
+
+    /**
+     * Initialize the number of loci processed to zero.
+     *
+     * @return 0
+     */
+    public Integer reduceInit() { return 0; }
+
+    
+    /**
+     * We want reads that span deletions
+     *
+     * @return true
+     */
+    public boolean includeReadsWithDeletionAtLoci() { return true; }
+
+    /**
+     * For each site of interest, annotate based on the requested annotation types
+     *
+     * @param tracker  the meta-data tracker
+     * @param ref      the reference base
+     * @param context  the context for the given locus
+     * @return 1 if the locus was successfully processed, 0 if otherwise
+     */
+    public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+        if ( tracker == null )
+            return 0;
+
+        RODRecordList<ReferenceOrderedDatum> rods = tracker.getTrackData("variant", null);
+        // ignore places where we don't have a variant
+        if ( rods == null || rods.getRecords().size() == 0 )
+            return 0;
+
+        Map<String, String> annotations = new HashMap<String, String>();
+        VariationRod variant = (VariationRod)rods.getRecords().get(0);
+
+        // if the reference base is not ambiguous, the variant is a SNP, and it's the appropriate type, we can annotate
+        if ( BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 && variant.isSNP() && variant instanceof VariantBackedByGenotype ) {
+            final List<org.broadinstitute.sting.utils.genotype.Genotype> genotypes = ((VariantBackedByGenotype)variant).getGenotypes();
+            if ( genotypes != null )
+                annotations = getAnnotations(ref, context, genotypes, requestedAnnotations);
+        }
+
+        writeVCF(tracker, ref, context, variant, annotations);
+
+        return 1;
+    }
+
+    public static Map<String, String> getAnnotations(ReferenceContext ref, AlignmentContext context, List<Genotype> genotypes) {
+        if ( allAnnotations == null )
+            determineAllAnnotations();
+        return getAnnotations(ref, context, genotypes, allAnnotations.values());
+    }
+
+    public static Map<String, String> getAnnotations(ReferenceContext ref, AlignmentContext context, List<Genotype> genotypes, Collection<VariantAnnotation> annotations) {
+
+        // set up the pileup for the full collection of reads at this position
+        ReadBackedPileup fullPileup = new ReadBackedPileup(ref.getBase(), context);
+
+        // also, set up the pileup for the mapping-quality-zero-free context
+        List<SAMRecord> reads = context.getReads();
+        List<Integer> offsets = context.getOffsets();
+        Iterator<SAMRecord> readIter = reads.iterator();
+        Iterator<Integer> offsetIter = offsets.iterator();
+        ArrayList<SAMRecord> MQ0freeReads = new ArrayList<SAMRecord>();
+        ArrayList<Integer> MQ0freeOffsets = new ArrayList<Integer>();
+        while ( readIter.hasNext() ) {
+            SAMRecord read = readIter.next();
+            Integer offset = offsetIter.next();
+            if ( read.getMappingQuality() > 0 ) {
+                MQ0freeReads.add(read);
+                MQ0freeOffsets.add(offset);
+            }
+        }
+        ReadBackedPileup MQ0freePileup = new ReadBackedPileup(context.getLocation(), ref.getBase(), MQ0freeReads, MQ0freeOffsets);
+
+        HashMap<String, String> results = new HashMap<String, String>();
+
+        for ( VariantAnnotation annotator : annotations) {
+            Pair<String, String> annot = annotator.annotate(ref, (annotator.useZeroQualityReads() ? fullPileup : MQ0freePileup), genotypes);
+            if ( annot != null )
+                results.put(annot.first, annot.second);
+        }
+
+        return results;
+    }
+
+
+    private void writeVCF(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariationRod variant, Map<String, String> annotations) {
+        VCFRecord rec = getVCFRecord(tracker, ref, context, variant);
+        if ( rec != null ) {
+            rec.addInfoFields(annotations);
+            vcfWriter.addRecord(rec);
+        }
+    }
+
+
+    private VCFRecord getVCFRecord(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariationRod variant) {
+        if ( variant instanceof RodVCF ) {
+            return ((RodVCF)variant).mCurrentRecord;
+        } else {
+            List<VCFGenotypeRecord> gt = new ArrayList<VCFGenotypeRecord>();
+            Map<VCFHeader.HEADER_FIELDS, String> map = new HashMap<VCFHeader.HEADER_FIELDS, String>();
+            return VariantsToVCF.generateVCFRecord(tracker, ref, context, vcfHeader, gt, map, nonVCFsampleName, out, false, false);
+        }
+    }
+
+    /**
+     * Increment the number of loci processed.
+     *
+     * @param value result of the map.
+     * @param sum   accumulator for the reduce.
+     * @return the new number of loci processed.
+     */
+    public Integer reduce(Integer value, Integer sum) {
+        return sum + value;
+    }
+
+    /**
+     * Tell the user the number of loci processed and close out the new variants file.
+     *
+     * @param result  the number of loci seen.
+     */
+    public void onTraversalDone(Integer result) {
+        out.printf("Processed %d loci.\n", result);
+
+        vcfWriter.close();
+    }
+
+    public static Genotype getFirstVariant(char ref, List<Genotype> genotypes) {
+        for ( Genotype g : genotypes ) {
+            if ( g.isVariant(ref) )
+                return g;
+        }
+        return null;
+    }
+
+    public static Genotype getFirstHetVariant(List<Genotype> genotypes) {
+        for ( Genotype g : genotypes ) {
+            if ( g.isHet() )
+                return g;
+        }
+        return null;
+    }
+}
--- a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceWalker.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceWalker.java
@ -33,7 +33,7 @@ public class CallsetConcordanceWalker extends RodWalker<Integer, Integer> {
    private VCFWriter vcfWriter;

    // a map of rod name to uniquified sample name
-    HashMap<Pair<String, String>, String> rodNamesToSampleNames = new HashMap<Pair<String, String>, String>();
+    private HashMap<Pair<String, String>, String> rodNamesToSampleNames = new HashMap<Pair<String, String>, String>();


    /**
@ -53,7 +53,7 @@ public class CallsetConcordanceWalker extends RodWalker<Integer, Integer> {
            System.exit(0);
        }

-        // get the list of all sample names from the various input rods (the need to be uniquified in case there's overlap)
+        // get the list of all sample names from the various input rods (they need to be uniquified in case there's overlap)
        HashSet<String> samples = new HashSet<String>();
        VCFUtils.getUniquifiedSamplesFromRods(getToolkit(), samples, rodNamesToSampleNames);

--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java
@ -97,7 +97,8 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
            Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation());

            if ( call instanceof ReadBacked ) {
-                ((ReadBacked)call).setReads(context.getReads());
+                ReadBackedPileup pileup = new ReadBackedPileup(ref, contexts.get(sample).getContext(StratifiedContext.OVERALL));
+                ((ReadBacked)call).setPileup(pileup);
            }
            if ( call instanceof SampleBacked ) {
                ((SampleBacked)call).setSampleName(sample);
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java
@ -166,7 +166,7 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
                    DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(altAllele);

                    double[] allelePosteriors = new double[] { refPosterior, posteriors[hetGenotype.ordinal()], posteriors[homGenotype.ordinal()] };
-                    normalizeFromLog10(allelePosteriors);
+                    allelePosteriors = MathUtils.normalizeFromLog10(allelePosteriors);
                    //logger.debug("Normalized posteriors for " + altAllele + ": " + allelePosteriors[0] + " " + allelePosteriors[1] + " " + allelePosteriors[2]);

                    // calculate the posterior weighted frequencies
@ -191,7 +191,7 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
            // multiply by null allele frequency priors to get AF posteriors, then normalize
            for (int i = 0; i < frequencyEstimationPoints; i++)
                alleleFrequencyPosteriors[baseIndex][i] = log10AlleleFrequencyPriors[i] + log10PofDgivenAFi[baseIndex][i];
-            normalizeFromLog10(alleleFrequencyPosteriors[baseIndex]);
+            alleleFrequencyPosteriors[baseIndex] = MathUtils.normalizeFromLog10(alleleFrequencyPosteriors[baseIndex]);

            // calculate p(f>0)
            double sum = 0.0;
@ -231,34 +231,6 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
        return HWvalues;
    }

-    private static void normalizeFromLog10(double[] array) {
-        // for precision purposes, we need to add (or really subtract, since they're
-        // all negative) the largest value; also, we need to convert to normal-space.
-        double maxValue = findMaxEntry(array).first;
-        for (int i = 0; i < array.length; i++)
-            array[i] = Math.pow(10, array[i] - maxValue);
-
-        // normalize
-        double sum = 0.0;
-        for (int i = 0; i < array.length; i++)
-            sum += array[i];
-        for (int i = 0; i < array.length; i++)
-            array[i] /= sum;
-    }
-
-    // returns the maximum value in the array and its index
-    private static Pair<Double, Integer> findMaxEntry(double[] array) {
-        int index = 0;
-        double max = array[0];
-        for (int i = 1; i < array.length; i++) {
-            if ( array[i] > max ) {
-                max = array[i];
-                index = i;
-            }
-        }
-        return new Pair<Double, Integer>(max, index);
-    }
-
    private void printAlleleFrequencyData(char ref, GenomeLoc loc) {

        verboseWriter.println("Location=" + loc + ", ref=" + ref);
@ -312,7 +284,7 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
        }

        double phredScaledConfidence = -10.0 * Math.log10(alleleFrequencyPosteriors[indexOfMax][0]);
-        int bestAFguess = findMaxEntry(alleleFrequencyPosteriors[indexOfMax]).second;
+        int bestAFguess = Utils.findIndexOfMaxEntry(alleleFrequencyPosteriors[indexOfMax]);

        // return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero
        if ( !ALL_BASE_MODE && (bestAFguess == 0 || phredScaledConfidence < CONFIDENCE_THRESHOLD) )
@ -328,7 +300,8 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
            Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation());

            if ( call instanceof ReadBacked ) {
-                ((ReadBacked)call).setReads(context.getReads());
+                ReadBackedPileup pileup = new ReadBackedPileup(ref, contexts.get(sample).getContext(StratifiedContext.OVERALL));               
+                ((ReadBacked)call).setPileup(pileup);
            }
            if ( call instanceof SampleBacked ) {
                ((SampleBacked)call).setSampleName(sample);
@ -360,63 +333,6 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
                if ( dbsnp != null )
                    ((IDBacked)locusdata).setID(dbsnp.getRS_ID());
            }
-            if ( locusdata instanceof ArbitraryFieldsBacked) {
-                ArrayList<Double> refBalances = new ArrayList<Double>();
-                ArrayList<Double> onOffBalances = new ArrayList<Double>();
-                ArrayList<Double> weights = new ArrayList<Double>();
-
-                // accumulate ratios and weights
-                for ( java.util.Map.Entry<String, GenotypeLikelihoods> entry : GLs.entrySet() ) {
-                    // determine the best genotype
-                    Integer sorted[] = Utils.SortPermutation(entry.getValue().getPosteriors());
-                    DiploidGenotype bestGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]];
-
-                    // we care only about het calls
-                    if ( bestGenotype.isHetRef(ref) ) {
-
-                        // make sure the alt base is our target alt
-                        char altBase = bestGenotype.base1 != ref ? bestGenotype.base1 : bestGenotype.base2;
-                        if ( altBase != baseOfMax )
-                            continue;
-
-                        // get the base counts at this pileup (minus deletions)
-                        ReadBackedPileup pileup = new ReadBackedPileup(ref, contexts.get(entry.getKey()).getContext(StratifiedContext.OVERALL));
-                        int[] counts = pileup.getBasePileupAsCounts();
-                        int refCount = counts[BaseUtils.simpleBaseToBaseIndex(ref)];
-                        int altCount = counts[BaseUtils.simpleBaseToBaseIndex(altBase)];
-                        int totalCount = 0;
-                        for (int i = 0; i < counts.length; i++)
-                            totalCount += counts[i];
-
-                        // add the entries
-                        weights.add(entry.getValue().getNormalizedPosteriors()[bestGenotype.ordinal()]);
-                        refBalances.add((double)refCount / (double)(refCount + altCount));
-                        onOffBalances.add((double)(refCount + altCount) / (double)totalCount);
-                    }
-                }
-
-                if ( weights.size() > 0 ) {
-                    // normalize the weights
-                    double sum = 0.0;
-                    for (int i = 0; i < weights.size(); i++)
-                        sum += weights.get(i);
-                    for (int i = 0; i < weights.size(); i++)
-                        weights.set(i, weights.get(i) / sum);
-
-                    // calculate total weighted ratios
-                    double normalizedRefRatio = 0.0;
-                    double normalizedOnOffRatio = 0.0;
-                    for (int i = 0; i < weights.size(); i++) {
-                        normalizedRefRatio += weights.get(i) * refBalances.get(i);
-                        normalizedOnOffRatio += weights.get(i) * onOffBalances.get(i);
-                    }
-
-                    HashMap<String, String> fields = new HashMap<String, String>();
-                    fields.put("AB", String.format("%.2f", normalizedRefRatio));
-                    fields.put("OO", String.format("%.2f", normalizedOnOffRatio));
-                    ((ArbitraryFieldsBacked)locusdata).setFields(fields);
-                }
-            }
            if ( locusdata instanceof SLODBacked ) {
                // the overall lod
                double overallLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]);
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java
@ -74,7 +74,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
            Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation());

            if ( call instanceof ReadBacked ) {
-                ((ReadBacked)call).setReads(discoveryGL.first.getReads());
+                ((ReadBacked)call).setPileup(discoveryGL.first);
            }
            if ( call instanceof SampleBacked ) {
                ((SampleBacked)call).setSampleName(sample);
@ -96,29 +96,6 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
                    if ( dbsnp != null )
                        ((IDBacked)locusdata).setID(dbsnp.getRS_ID());
                }
-                if ( locusdata instanceof ArbitraryFieldsBacked) {
-
-                    DiploidGenotype bestGenotype = DiploidGenotype.values()[bestIndex];
-                    // we care only about het calls
-                    if ( bestGenotype.isHetRef(ref) ) {
-
-                        char altBase = bestGenotype.base1 != ref ? bestGenotype.base1 : bestGenotype.base2;
-
-                        // get the base counts at this pileup (minus deletions)
-                        int[] counts = discoveryGL.first.getBasePileupAsCounts();
-                        int refCount = counts[BaseUtils.simpleBaseToBaseIndex(ref)];
-                        int altCount = counts[BaseUtils.simpleBaseToBaseIndex(altBase)];
-                        int totalCount = 0;
-                        for (int i = 0; i < counts.length; i++)
-                            totalCount += counts[i];
-
-                        // set the ratios
-                        HashMap<String, String> fields = new HashMap<String, String>();
-                        fields.put("AB", String.format("%.2f", (double)refCount / (double)(refCount + altCount)));
-                        fields.put("OO", String.format("%.2f",(double)(refCount + altCount) / (double)totalCount));
-                        ((ArbitraryFieldsBacked)locusdata).setFields(fields);
-                    }
-                }
            }

            return new Pair<List<Genotype>, GenotypeLocusData>(Arrays.asList(call), locusdata);
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
@ -30,27 +30,23 @@ import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.LocusWalker;
-import org.broadinstitute.sting.gatk.walkers.ReadFilters;
+import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotator;
+import org.broadinstitute.sting.gatk.walkers.Reference;
+import org.broadinstitute.sting.gatk.walkers.Window;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.Pair;
 import org.broadinstitute.sting.utils.StingException;
 import org.broadinstitute.sting.utils.cmdLine.Argument;
 import org.broadinstitute.sting.utils.cmdLine.ArgumentCollection;
-import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
-import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
-import org.broadinstitute.sting.utils.genotype.Genotype;
-import org.broadinstitute.sting.utils.genotype.GenotypeLocusData;
+import org.broadinstitute.sting.utils.genotype.*;

 import java.io.File;
-import java.util.HashSet;
-import java.util.List;
-import java.util.ArrayList;
+import java.util.*;


-@ReadFilters({ZeroMappingQualityReadFilter.class})
+@Reference(window=@Window(start=-20,stop=20))
 public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeLocusData>, Integer> {

    @ArgumentCollection private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
@ -72,8 +68,6 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeL
    // keep track of some metrics about our calls
    private CallMetrics callsMetrics;

-    // are we being called by another walker (which gets around the traversal-level filtering)?
-    private boolean calledByAnotherWalker = true;

    /** Enable deletions in the pileup **/
    public boolean includeReadsWithDeletionAtLoci() { return true; }
@ -134,7 +128,6 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeL
            return;

        // if we got here, then we were instantiated by the GATK engine
-        calledByAnotherWalker = false;

        // create the output writer stream
        if ( VARIANTS_FILE != null )
@ -143,7 +136,8 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeL
                                                  this.getToolkit().getArguments().referenceFile.getName(),
                                                  samples);
        else
-            writer = GenotypeWriterFactory.create(UAC.VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out, "UnifiedGenotyper",
+            writer = GenotypeWriterFactory.create(UAC.VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out,
+                                                  "UnifiedGenotyper",
                                                  this.getToolkit().getArguments().referenceFile.getName(),
                                                  samples);
        callsMetrics = new CallMetrics();
@ -154,28 +148,33 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeL
     *
     * @param tracker the meta data tracker
     * @param refContext the reference base
-     * @param context contextual information around the locus
+     * @param fullContext contextual information around the locus
     */
-    public Pair<List<Genotype>, GenotypeLocusData> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext context) {
+    public Pair<List<Genotype>, GenotypeLocusData> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext fullContext) {
        char ref = Character.toUpperCase(refContext.getBase());
        if ( !BaseUtils.isRegularBase(ref) )
            return null;

-        // because other walkers externally call this map method with their own contexts,
-        // we need to make sure that the reads are appropriately filtered
-        if ( calledByAnotherWalker )
-            context = filterAlignmentContext(context);
+        // remove mapping quality zero reads
+        AlignmentContext MQ0freeContext = filterAlignmentContext(fullContext);

        // an optimization to speed things up when there is no coverage or when overly covered
-        if ( context.getReads().size() == 0 ||
-             (UAC.MAX_READS_IN_PILEUP > 0 && context.getReads().size() > UAC.MAX_READS_IN_PILEUP) )
+        if ( MQ0freeContext.getReads().size() == 0 ||
+             (UAC.MAX_READS_IN_PILEUP > 0 && MQ0freeContext.getReads().size() > UAC.MAX_READS_IN_PILEUP) )
            return null;

        DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
-        return gcm.calculateGenotype(tracker, ref, context, priors);
+        Pair<List<Genotype>, GenotypeLocusData> call = gcm.calculateGenotype(tracker, ref, MQ0freeContext, priors);
+
+        // annotate the call, if possible
+        if ( call != null && call.second != null && call.second instanceof ArbitraryFieldsBacked) {
+            Map<String, String> annotations = VariantAnnotator.getAnnotations(refContext, fullContext, call.first);
+            ((ArbitraryFieldsBacked)call.second).setFields(annotations);
+        }
+
+        return call;
    }

-    // filter the given alignment context
    private AlignmentContext filterAlignmentContext(AlignmentContext context) {
        List<SAMRecord> reads = context.getReads();
        List<Integer> offsets = context.getOffsets();
@ -185,7 +184,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeL

        for (int i = 0; i < reads.size(); i++) {
            SAMRecord read = reads.get(i);
-            if ( read.getMappingQuality() != 0 && read.getReadGroup() != null ) {
+            if ( read.getMappingQuality() != 0 ) {
                newReads.add(read);
                newOffsets.add(offsets.get(i));
            }
--- a/java/src/org/broadinstitute/sting/utils/MathUtils.java
+++ b/java/src/org/broadinstitute/sting/utils/MathUtils.java
@ -262,5 +262,26 @@ public class MathUtils {
        return Math.sqrt(rms);
    }

+    /**
+     * normalizes the log10-based array
+     * @param array  the array to be normalized
+    */
+    public static double[] normalizeFromLog10(double[] array) {
+        double[] normalized = new double[array.length];

+        // for precision purposes, we need to add (or really subtract, since they're
+        // all negative) the largest value; also, we need to convert to normal-space.
+        double maxValue = Utils.findMaxEntry(array);
+        for (int i = 0; i < array.length; i++)
+            normalized[i] = Math.pow(10, array[i] - maxValue);
+
+        // normalize
+        double sum = 0.0;
+        for (int i = 0; i < array.length; i++)
+            sum += normalized[i];
+        for (int i = 0; i < array.length; i++)
+            normalized[i] /= sum;
+
+        return normalized;
+    }
 }
--- a/java/src/org/broadinstitute/sting/utils/Utils.java
+++ b/java/src/org/broadinstitute/sting/utils/Utils.java
@ -486,6 +486,31 @@ public class Utils {
    }


+    // returns the maximum value in the array
+    public static double findMaxEntry(double[] array) {
+        return findIndexAndMaxEntry(array).first;
+    }
+
+    // returns the index of the maximum value in the array
+    public static int findIndexOfMaxEntry(double[] array) {
+        return findIndexAndMaxEntry(array).second;
+    }
+
+    // returns the the maximum value and its index in the array
+    private static Pair<Double, Integer> findIndexAndMaxEntry(double[] array) {
+        if ( array.length == 0 )
+            return new Pair<Double, Integer>(0.0, -1);
+        int index = 0;
+        double max = array[0];
+        for (int i = 1; i < array.length; i++) {
+            if ( array[i] > max ) {
+                max = array[i];
+                index = i;
+            }
+        }
+        return new Pair<Double, Integer>(max, index);
+    }
+
    /** Returns indices of all occurrences of the specified symbol in the string */
    public static int[] indexOfAll(String s, int ch) {
        int[] pos = new int[64];
--- a/java/src/org/broadinstitute/sting/utils/genotype/ReadBacked.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/ReadBacked.java
@ -1,11 +1,9 @@
 package org.broadinstitute.sting.utils.genotype;

-import net.sf.samtools.SAMRecord;
-
-import java.util.List;
+import org.broadinstitute.sting.utils.ReadBackedPileup;

 /**
- * @author aaron
+ * @author ebanks
 *
 * Interface ReadBacked
 *
@ -13,16 +11,16 @@ import java.util.List;
 */
 public interface ReadBacked {
    /**
-     * get the SAM records that back this genotype call
-     * @return a list of SAM records
+     * get the pileup that backs this genotype call
+     * @return a pileup
     */
-    public List<SAMRecord> getReads();
+    public ReadBackedPileup getPileup();

    /**
     *
-     * @param   reads    the reads for this genotype
+     * @param   pileup    the pileup for this genotype
     */
-    public void setReads(List<SAMRecord> reads);
+    public void setPileup(ReadBackedPileup pileup);

    /**
     * get the count of reads
--- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java
@ -109,11 +109,13 @@ public class GeliAdapter implements GenotypeWriter {
        GeliGenotypeCall gCall = (GeliGenotypeCall)call;

        char ref = gCall.getReference();
-        List<SAMRecord> recs = gCall.getReads();
-        int readCount = recs.size();
+        int readCount = gCall.getReadCount();
        double maxMappingQual = 0;
-        for (SAMRecord rec : recs) {
-            if (maxMappingQual < rec.getMappingQuality()) maxMappingQual = rec.getMappingQuality();
+        if ( gCall.getPileup() != null ) {
+            List<SAMRecord> recs = gCall.getPileup().getReads();
+            for (SAMRecord rec : recs) {
+                if (maxMappingQual < rec.getMappingQuality()) maxMappingQual = rec.getMappingQuality();
+            }
        }

        double[] posteriors = gCall.getPosteriors();
--- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java
@ -1,11 +1,8 @@
 package org.broadinstitute.sting.utils.genotype.geli;

-import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.utils.*;
 import org.broadinstitute.sting.utils.genotype.*;

-import java.util.List;
-import java.util.ArrayList;
 import java.util.Arrays;


@ -20,7 +17,7 @@ public class GeliGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked
    private final char mRefBase;
    private final GenomeLoc mLocation;

-    private List<SAMRecord> mReads;
+    private ReadBackedPileup mPileup = null;
    private double[] mPosteriors;


@ -32,6 +29,8 @@ public class GeliGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked
    /**
     * Generate a single sample genotype object
     *
+     * @param ref  the ref character
+     * @param loc  the genome loc
     */
    public GeliGenotypeCall(char ref, GenomeLoc loc) {
        mRefBase = ref;
@ -40,7 +39,6 @@ public class GeliGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked
        // fill in empty data
        mPosteriors = new double[10];
        Arrays.fill(mPosteriors, Double.MIN_VALUE);
-        mReads = new ArrayList<SAMRecord>();
    }

    public GeliGenotypeCall(char ref, GenomeLoc loc, String genotype, double negLog10PError) {
@ -66,12 +64,10 @@ public class GeliGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked
        else
            mNextGenotype = mRefGenotype;
        mPosteriors[mNextGenotype.ordinal()] = -1.0 * negLog10PError;
-
-        mReads = new ArrayList<SAMRecord>();
    }

-    public void setReads(List<SAMRecord> reads) {
-        mReads = new ArrayList<SAMRecord>(reads);
+    public void setPileup(ReadBackedPileup pileup) {
+        mPileup = pileup;
    }

    public void setPosteriors(double[] posteriors) {
@ -98,7 +94,7 @@ public class GeliGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked
    public String toString() {
        lazyEval();
        return String.format("%s best=%s cmp=%s ref=%s depth=%d negLog10PError=%.2f",
-                             getLocation(), mBestGenotype, mRefGenotype, mRefBase, mReads.size(), getNegLog10PError());
+                             getLocation(), mBestGenotype, mRefGenotype, mRefBase, getReadCount(), getNegLog10PError());
    }

    private void lazyEval() {
@ -223,12 +219,12 @@ public class GeliGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked
    }

    /**
-     * get the SAM records that back this genotype call
+     * get the pileup that backs this genotype call
     *
-     * @return a list of SAM records
+     * @return a pileup
     */
-    public List<SAMRecord> getReads() {
-        return mReads;
+    public ReadBackedPileup getPileup() {
+        return mPileup;
    }

    /**
@ -237,7 +233,7 @@ public class GeliGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked
     * @return the number of reads we're backed by
     */
    public int getReadCount() {
-        return mReads.size();
+        return (mPileup != null ? mPileup.getReads().size() : 0);
    }

    /**
--- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java
@ -67,7 +67,7 @@ public class GeliTextWriter implements GenotypeWriter {
            nextVrsRef = lks[9] - posteriors[DiploidGenotype.createHomGenotype(ref).ordinal()];

        double maxMappingQual = 0;
-        List<SAMRecord> recs = gCall.getReads();
+        List<SAMRecord> recs = gCall.getPileup().getReads();
        int readDepth = recs.size();
        for (SAMRecord rec : recs) {
            if (maxMappingQual < rec.getMappingQuality()) maxMappingQual = rec.getMappingQuality();
--- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeCall.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeCall.java
@ -1,17 +1,13 @@
 package org.broadinstitute.sting.utils.genotype.glf;

-import net.sf.samtools.SAMRecord;
-import org.broadinstitute.sting.utils.GenomeLoc;
-import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.*;
 import org.broadinstitute.sting.utils.genotype.*;

-import java.util.List;
-import java.util.ArrayList;
 import java.util.Arrays;


 /**
- * @author aaron
+ * @author ebanks
 *         <p/>
 *         Class GLFGenotypeCall
 *         <p/>
@ -21,7 +17,7 @@ public class GLFGenotypeCall implements Genotype, ReadBacked, LikelihoodsBacked
    private final char mRefBase;
    private final GenomeLoc mLocation;

-    private List<SAMRecord> mReads;
+    private ReadBackedPileup mPileup;
    private double[] mLikelihoods;

    private double mNegLog10PError;
@ -30,6 +26,8 @@ public class GLFGenotypeCall implements Genotype, ReadBacked, LikelihoodsBacked
    /**
     * Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object
     *
+     * @param ref  the ref character
+     * @param loc  the genome loc
     */
    public GLFGenotypeCall(char ref, GenomeLoc loc) {
        mRefBase = ref;
@ -39,12 +37,12 @@ public class GLFGenotypeCall implements Genotype, ReadBacked, LikelihoodsBacked
        mLocation = loc;
        mLikelihoods = new double[10];
        Arrays.fill(mLikelihoods, Double.MIN_VALUE);
-        mReads = new ArrayList<SAMRecord>();
+        mPileup = null;
        mNegLog10PError = Double.MIN_VALUE;
    }

-    public void setReads(List<SAMRecord> reads) {
-        mReads = new ArrayList<SAMRecord>(reads);
+    public void setPileup(ReadBackedPileup pileup) {
+        mPileup = pileup;
    }

    public void setLikelihoods(double[] likelihoods) {
@ -68,7 +66,7 @@ public class GLFGenotypeCall implements Genotype, ReadBacked, LikelihoodsBacked

    public String toString() {
        return String.format("%s ref=%s depth=%d negLog10PError=%.2f",
-                             getLocation(), mRefBase, mReads.size(), getNegLog10PError());
+                             getLocation(), mRefBase, getReadCount(), getNegLog10PError());
    }

    /**
@ -155,12 +153,12 @@ public class GLFGenotypeCall implements Genotype, ReadBacked, LikelihoodsBacked
    }

    /**
-     * get the SAM records that back this genotype call
+     * get the pileup that backs this genotype call
     *
-     * @return a list of SAM records
+     * @return a pileup
     */
-    public List<SAMRecord> getReads() {
-        return mReads;
+    public ReadBackedPileup getPileup() {
+        return mPileup;
    }

    /**
@ -169,7 +167,7 @@ public class GLFGenotypeCall implements Genotype, ReadBacked, LikelihoodsBacked
     * @return the number of reads we're backed by
     */
    public int getReadCount() {
-        return mReads.size();
+        return (mPileup != null ? mPileup.getReads().size() : 0);
    }

    /**
--- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java
@ -133,7 +133,9 @@ public class GLFWriter implements GenotypeWriter {
        obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.NEGATIVE_LOG);  // transform! ... to negitive log likelihoods

        // calculate the RMS mapping qualities and the read depth
-        double rms = calculateRMS(gCall.getReads());
+        double rms = 0.0;
+        if ( gCall.getPileup() != null )
+            rms = calculateRMS(gCall.getPileup().getReads());
        int readCount = gCall.getReadCount();
        this.addGenotypeCall(GenomeLocParser.getContigInfo(gCall.getLocation().getContig()),(int)gCall.getLocation().getStart(),(float)rms,ref,readCount,obj);
    }
--- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java
@ -1,12 +1,9 @@
 package org.broadinstitute.sting.utils.genotype.vcf;

-import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.utils.*;
 import org.broadinstitute.sting.utils.genotype.*;

 import java.util.Arrays;
-import java.util.ArrayList;
-import java.util.List;


 /**
@ -20,7 +17,7 @@ public class VCFGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked,
    private final char mRefBase;
    private final GenomeLoc mLocation;

-    private List<SAMRecord> mReads;
+    private ReadBackedPileup mPileup = null;
    private int mCoverage = 0;
    private double[] mPosteriors;

@ -41,7 +38,6 @@ public class VCFGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked,
        mPosteriors = new double[10];
        Arrays.fill(mPosteriors, Double.MIN_VALUE);
        mSampleName = "";
-        mReads = new ArrayList<SAMRecord>();
    }

    public VCFGenotypeCall(char ref, GenomeLoc loc, String genotype, double negLog10PError, int coverage, String sample) {
@ -68,12 +64,10 @@ public class VCFGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked,
        else
            mNextGenotype = mRefGenotype;
        mPosteriors[mNextGenotype.ordinal()] = -1.0 * negLog10PError;
-
-        mReads = new ArrayList<SAMRecord>();
    }

-    public void setReads(List<SAMRecord> reads) {
-        mReads = new ArrayList<SAMRecord>(reads);
+    public void setPileup(ReadBackedPileup pileup) {
+        mPileup = pileup;
    }

    public void setPosteriors(double[] posteriors) {
@ -104,7 +98,7 @@ public class VCFGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked,
    public String toString() {
        lazyEval();
        return String.format("%s best=%s cmp=%s ref=%s depth=%d negLog10PError=%.2f",
-                             getLocation(), mBestGenotype, mRefGenotype, mRefBase, mReads.size(), getNegLog10PError());
+                             getLocation(), mBestGenotype, mRefGenotype, mRefBase, getReadCount(), getNegLog10PError());
    }

    private void lazyEval() {
@ -229,12 +223,12 @@ public class VCFGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked,
    }

    /**
-     * get the SAM records that back this genotype call
+     * get the pileup that backs this genotype call
     *
-     * @return a list of SAM records
+     * @return a pileup
     */
-    public List<SAMRecord> getReads() {
-        return mReads;
+    public ReadBackedPileup getPileup() {
+        return mPileup;
    }

    /**
@ -243,7 +237,7 @@ public class VCFGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked,
     * @return the number of reads we're backed by
     */
    public int getReadCount() {
-       return (mCoverage > 0 ? mCoverage : mReads.size());
+       return (mCoverage > 0 ? mCoverage : (mPileup != null ? mPileup.getReads().size() : 0));
    }

    /**
--- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java
@ -126,14 +126,10 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {

        Map<String, VCFGenotypeCall> genotypeMap = genotypeListToSampleNameMap(genotypes);

-        // keep track of the total read depth
-        int totalReadDepth = 0;
-
        for (String name : mHeader.getGenotypeSamples()) {
            if (genotypeMap.containsKey(name)) {
                Genotype gtype = genotypeMap.get(name);
                VCFGenotypeRecord record = VCFUtils.createVCFGenotypeRecord(params, (VCFGenotypeCall)gtype);
-                totalReadDepth += ((VCFGenotypeCall)gtype).getReadCount();
                params.addGenotypeRecord(record);
                genotypeMap.remove(name);
            } else {
@ -150,7 +146,6 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {

        // info fields
        Map<String, String> infoFields = getInfoFields((VCFGenotypeLocusData)locusdata, params);
-        infoFields.put("DP", String.format("%d", totalReadDepth));

        // q-score
        double qual = (locusdata == null) ? 0 : ((VCFGenotypeLocusData)locusdata).getConfidence();
--- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java
@ -31,8 +31,8 @@ public class VCFRecord {
    private double mQual;
    // our filter string
    private String mFilterString;
-    // our info fields
-    private final Map<String, String> mInfoFields = new HashMap<String, String>();
+    // our info fields -- use a TreeMap to ensure they can be pulled out in order (so it passes integration tests)
+    private final Map<String, String> mInfoFields = new TreeMap<String, String>();

    private final String mGenotypeFormatString;

@ -336,6 +336,10 @@ public class VCFRecord {
    public void addInfoField(String key, String value) {
        //System.out.printf("Adding info field %s=%s%n", key, value);
        this.mInfoFields.put(key, value);
+
+        // remove the empty token if it's present
+        if ( mInfoFields.containsKey(".") )
+            mInfoFields.remove(".");
    }

    public void printInfoFields() {
--- a/java/test/org/broadinstitute/sting/gatk/refdata/RodVCFTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/refdata/RodVCFTest.java
@ -91,7 +91,7 @@ public class RodVCFTest extends BaseTest {
    @Test
    public void testToString() {
        // slightly altered line, due to map ordering
-        final String firstLine = "20\t14370\trs6054257\tG\tA\t29.00\t0\tDP=258;AF=0.786;NS=58\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5\n";
+        final String firstLine = "20\t14370\trs6054257\tG\tA\t29.00\t0\tAF=0.786;DP=258;NS=58\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5\n";
        RodVCF vcf = getVCFObject();
        VCFReader reader = new VCFReader(vcfFile);
        Iterator<RodVCF> iter = vcf.createIterator("VCF", vcfFile);
--- a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
@ -0,0 +1,44 @@
+package org.broadinstitute.sting.gatk.walkers.annotator;
+
+import org.broadinstitute.sting.WalkerTest;
+import org.junit.Test;
+
+import java.util.Arrays;
+
+public class VariantAnnotatorIntegrationTest extends WalkerTest {
+    public static String baseTestString() {
+        return "-T VariantAnnotator -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 20:10,000,000-10,010,000 -vcf %s";
+    }
+
+    @Test
+    public void testNoAnnots1() {
+        WalkerTestSpec spec = new WalkerTestSpec(
+                baseTestString() + " -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf ", 1,
+                Arrays.asList("4e231f16c202d88ca3adb17168af0e0f"));
+        executeTest("testNoAnnots1", spec);
+    }
+
+    @Test
+    public void testNoAnnots2() {
+        WalkerTestSpec spec = new WalkerTestSpec(
+                baseTestString() + " -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3.vcf ", 1,
+                Arrays.asList("ef0c59e47a2afcbecf2bcef6aa01e7e7"));
+        executeTest("testNoAnnots2", spec);
+    }
+
+    @Test
+    public void testAllAnnots1() {
+        WalkerTestSpec spec = new WalkerTestSpec(
+                baseTestString() + " -all -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf ", 1,
+                Arrays.asList("ced92b5ac9e2692c4d8acce1235317b6"));
+        executeTest("testAllAnnots1", spec);
+    }
+
+    @Test
+    public void testAllAnnots2() {
+        WalkerTestSpec spec = new WalkerTestSpec(
+                baseTestString() + " -all -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3.vcf ", 1,
+                Arrays.asList("573a6c02f659b2c4cf014f84bd0b9c8a"));
+        executeTest("testAllAnnots2", spec);
+    }
+}
--- a/java/test/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceIntegrationTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceIntegrationTest.java
@ -14,7 +14,7 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest {
    public void testSimpleVenn() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + " -B set1,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example1.vcf -B set2,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example2.vcf -CT SimpleVenn", 1,
-                Arrays.asList("1b8e26cd30e993da9318abd6475f38d0"));
+                Arrays.asList("2fc12b6f02f4cb589f2fd134e765d6b7"));
        executeTest("testSimpleVenn", spec);
    }

@ -22,7 +22,7 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest {
    public void testSNPConcordance() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + " -B set1,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example1.vcf -B set2,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example2.vcf -CT SNPGenotypeConcordance:qscore=5", 1,
-                Arrays.asList("5a89b8edcdf2e3f469ac354cb1524033"));
+                Arrays.asList("142bcfcc6bb404cd4bd1a4624fa9a15e"));
        executeTest("testSNPConcordance", spec);
    }

@ -30,7 +30,7 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest {
    public void testNWayVenn() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + " -B set1,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example1.vcf -B set2,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example2.vcf -B set3,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/CEU.sample.vcf -CT NWayVenn", 1,
-                Arrays.asList("1dec083580b75a9c59fcb61426117134"));
+                Arrays.asList("e067aace9080bafdf3312632de60b4fc"));
        executeTest("testNWayVenn", spec);
    }
 }
--- a/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java
@ -1,80 +0,0 @@
-package org.broadinstitute.sting.gatk.walkers.filters;
-
-import org.broadinstitute.sting.WalkerTest;
-import org.junit.Test;
-
-import java.util.Arrays;
-
-public class VariantFiltrationIntegrationTest extends WalkerTest {
-    @Test
-    public void testIntervals() {
-        String[] md5DoC = {"d11547059193b90e2fd1eb47dac18999", "21c8e1f9dc65fdfb39347547f9b04011"};
-        WalkerTestSpec spec1 = new WalkerTestSpec(
-                "-T VariantFiltration -X DepthOfCoverage:max=70 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
-                 2,
-                 Arrays.asList(md5DoC));
-        executeTest("testDoCFilter", spec1);
-
-        String[] md5AlleleBalance = {"409f2b7fb193919d28c20be4dc5c4516", "a13e4ce6260bf9f33ca99dc808b8e6ad"};
-        WalkerTestSpec spec2 = new WalkerTestSpec(
-                "-T VariantFiltration -X AlleleBalance:low=0.25,high=0.75 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
-                 2,
-                 Arrays.asList(md5AlleleBalance));
-        executeTest("testAlleleBalanceFilter", spec2);
-
-        String[] md5Strand = {"e5806925867088edeee9f6276ab030f3", "0f7db0aad764268ee8fa3b857df8d87d"};
-        WalkerTestSpec spec3 = new WalkerTestSpec(
-                "-T VariantFiltration -X FisherStrand:pvalue=0.0001 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
-                 2,
-                 Arrays.asList(md5Strand));
-        executeTest("testStrandFilter", spec3);
-
-        String[] md5Lod = {"2c93f56e07e320ac7a012bd4940b1dbc", "7e0c4f2b0fda85fd2891eee76c396a55"};
-        WalkerTestSpec spec4 = new WalkerTestSpec(
-                "-T VariantFiltration -X LodThreshold:lod=10 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
-                 2,
-                 Arrays.asList(md5Lod));
-        executeTest("testLodFilter", spec4);
-
-        String[] md5MQ0 = {"fde4be5eec12be3405863649170aaaac", "3203de335621851bccf596242b079e23"};
-        WalkerTestSpec spec5 = new WalkerTestSpec(
-                "-T VariantFiltration -X MappingQualityZero:max=70 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
-                 2,
-                 Arrays.asList(md5MQ0));
-        executeTest("testMappingQuality0Filter", spec5);
-
-        String[] md5MQ = {"17c35a5ccd814991dbcd3be6c2f244cd", "ecc777feedea61f7b570d114c2ab89b1"};
-        WalkerTestSpec spec6 = new WalkerTestSpec(
-                "-T VariantFiltration -X MappingQuality:min=20 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
-                 2,
-                 Arrays.asList(md5MQ));
-        executeTest("testRMSMappingQualityFilter", spec6);
-
-        String[] md5OnOff = {"fc649a7ebd4a7b7fa739fc4fcb7704e2", "67f2e1bc025833b0fa31f47195198997"};
-        WalkerTestSpec spec7 = new WalkerTestSpec(
-                "-T VariantFiltration -X OnOffGenotypeRatio:threshold=0.9 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
-                 2,
-                 Arrays.asList(md5OnOff));
-        executeTest("testOnOffGenotypeFilter", spec7);
-
-        String[] md5Clusters = {"9b3aede689a25431ed2e1aac9dd73dc6", "8fa6b6ffc93ee7fb8d6b52a7fb7815ef"};
-        WalkerTestSpec spec8 = new WalkerTestSpec(
-                "-T VariantFiltration -X ClusteredSnps:window=10,snps=3 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
-                 2,
-                 Arrays.asList(md5Clusters));
-        executeTest("testClusteredSnpsFilter", spec8);
-
-        String[] md5Indels = {"c36cb0021f90b3509e483b57f4f41f6e", "8e0e915a1cb63d7049e0671ed00101fe"};
-        WalkerTestSpec spec9 = new WalkerTestSpec(
-                "-T VariantFiltration -X IndelArtifact -B indels,PointIndel,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.indels -B cleaned,CleanedOutSNP,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.realigner_badsnps -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
-                 2,
-                 Arrays.asList(md5Indels));
-        executeTest("testIndelArtifactFilter", spec9);
-
-
-
-
-
-
-    }
-}
--- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@ -7,6 +7,10 @@ import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Map;

+// ********************************************************************************** //
+// Note that this class also serves as an integration test for the VariantAnnotator!  //
+// ********************************************************************************** //
+
 public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public static String baseTestString() {
        return "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s";
@ -42,16 +46,16 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    @Test
    public void testMultiSamplePilot1PointEM() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
-                "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 50", 1,
-                Arrays.asList("b7e12c4011d0043024e0dd2dd5764752"));
+                "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1,
+                Arrays.asList("b992e55996942c893948ea85660478ba"));
        executeTest("testMultiSamplePilot1 - Point Estimate EM", spec);
    }

    @Test
    public void testMultiSamplePilot2PointEM() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
-                "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 50", 1,
-                Arrays.asList("89c600d72a815c09412c97f82fa2281e"));
+                "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1,
+                Arrays.asList("6d28b2af631805dc593508a21ef46a83"));
        executeTest("testMultiSamplePilot2 - Point Estimate EM", spec);
    }

@ -63,24 +67,24 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    @Test
    public void testMultiSamplePilot1Joint() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
-                "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm JOINT_ESTIMATE -confidence 50", 1,
-                Arrays.asList("a96862eb5bd3d8db143712f427e3db91"));
+                "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
+                Arrays.asList("90c5129f298075ee0e18233b3763f25d"));
        executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
    }

    @Test
    public void testMultiSamplePilot2Joint() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
-                "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm JOINT_ESTIMATE -confidence 50", 1,
-                Arrays.asList("22735bba0cb6ea3984f1d3913e376ac4"));
+                "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
+                Arrays.asList("033390940ecc0e2dcda5559d6a1802fa"));
        executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
    }

    @Test
    public void testSingleSamplePilot2Joint() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
-                "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,067,000-10,083,000 -bm empirical -gm JOINT_ESTIMATE -confidence 50", 1,
-                Arrays.asList("580248cfb813824194bda830427ab3d6"));
+                "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,067,000-10,083,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
+                Arrays.asList("8deb8b1132e7ddf28c7a0d919ce22985"));
        executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
    }