Moving G's MyHapScore to replace the old HapScore

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3943 348d0f76-0448-11de-a6fe-93d51630548a
2010-08-04 21:00:54 +00:00 · 2010-08-04 21:00:54 +00:00 · 8d8acc9fae
parent 7858ffec32
commit 8d8acc9fae
4 changed files with 210 additions and 498 deletions
--- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 The Broad Institute
+ * Copyright (c) 2010, The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
@ -12,15 +12,14 @@
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * OTHER DEALINGS IN THE SOFTWARE.
 */
 package org.broadinstitute.sting.gatk.walkers.annotator;
@ -34,20 +33,16 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
 import org.broadinstitute.sting.utils.*;
 import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.pileup.*;
 import java.util.*;
 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
 // todo -- rename to haplotype penalty
 public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
    private final static boolean DEBUG = false;
    private final static int MIN_CONTEXT_WING_SIZE = 10;
-
+    private final static String REGEXP_WILDCARD = ".";
    // if true, we compute a second haplotype from the reads, instead of constraining ourselves to the reference
    // as a haplotype itself
    private final static boolean USE_NON_REFERENCE_SECOND_HAPLOTYPE = true;
    public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
        if ( !vc.isBiallelic() || !vc.isSNP() || stratifiedContexts.size() == 0 ) // size 0 means that call was made by someone else and we have no data here
@ -58,13 +53,14 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
        int contextWingSize = Math.min(((int)ref.getWindow().size() - 1)/2, MIN_CONTEXT_WING_SIZE);
        int contextSize = contextWingSize * 2 + 1;
-        // calculate
+        // Compute all haplotypes consistent with the current read pileup
-        Haplotype refHaplotype = calcRefHaplotype(vc, ref, context, contextSize);
+        List<Haplotype> haplotypes = computeHaplotypes(context.getBasePileup(), contextSize);
        Haplotype altHaplotype = new Haplotype(getPileupOfAllele(vc.getAlternateAllele(0), context.getBasePileup()), contextSize);
        //System.exit(1);
        // calculate the haplotype scores by walking over all reads and comparing them to the haplotypes
-        double score = scoreReadsAgainstHaplotypes(Arrays.asList(refHaplotype, altHaplotype), context.getBasePileup(), contextSize);
+        double score = 0.0;
        if (haplotypes != null)
            score = scoreReadsAgainstHaplotypes(haplotypes, context.getBasePileup(), contextSize);
        // return the score
        Map<String, Object> map = new HashMap<String, Object>();
@ -72,37 +68,184 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
        return map;
    }
-    // todo -- note that the refPileup.size() won't deal correctly with the situation where the current site is hom-var
+    private class HaplotypeComparator implements Comparator<Haplotype>{
-    // todo -- but there's nearby het size.  In order to really handle this we need to group reads into two clusters,
+
-    // todo -- but if we are going to do this we might as well just assemble the whole region
+        public int compare(Haplotype a, Haplotype b) {
-    public Haplotype calcRefHaplotype(VariantContext vc, ReferenceContext ref, AlignmentContext context, int contextSize) {
+            if (a.getQualitySum() < b.getQualitySum())
-        ReadBackedPileup refPileup = getPileupOfAllele(vc.getReference(), context.getBasePileup());
+                return 1;
-        if ( USE_NON_REFERENCE_SECOND_HAPLOTYPE && refPileup.size() > 0 ) {
+            if (a.getQualitySum() > b.getQualitySum()){
-            // we are calculating the reference haplotype from the reads itself -- effectively allows us to
+                return -1;
-            // have het haplotypes that are hom-var in the surrounding context, indicating that the individual
+            }
-            // as two alt haplotypes
+            return 0;
            return new Haplotype(refPileup, contextSize);
        } else {
            // we are constraining the reference haplotype to really be the reference itself
            int contextWingSize = (contextSize - 1) / 2;
            int refMiddle = (int)(ref.getWindow().size() - 1) / 2;
            int refStart = refMiddle - contextWingSize;
            int refStop = refMiddle + contextWingSize + 1;
            String refString = new String(ref.getBases()).substring(refStart, refStop);
            return new Haplotype(refString.getBytes(), 60);
        }
    }
    private List<Haplotype> computeHaplotypes(ReadBackedPileup pileup, int contextSize) {
        // Compute all possible haplotypes consistent with current pileup
        ArrayList<Haplotype> haplotypeList = new ArrayList<Haplotype>();
        PriorityQueue<Haplotype> haplotypeQueue = new PriorityQueue<Haplotype>(100, new HaplotypeComparator());
        for ( ExtendedPileupElement p : pileup.extendedForeachIterator() ) {
            if (ReadUtils.is454Read(p.getRead()))
                continue;
            Haplotype haplotypeFromRead = getHaplotypeFromRead(p, contextSize);
            haplotypeQueue.add(haplotypeFromRead);
            //haplotypeList.add(haplotypeFromRead);
        }
        // Now that priority queue has been built with all reads at context, we need to merge and find possible segregating haplotypes
        Haplotype elem;
        while ((elem = haplotypeQueue.poll()) != null)  {
            //System.out.print("element: "+elem.toString());
            //System.out.format(" SumQual = %f\n", elem.getQualitySum());
            boolean foundHaplotypeMatch = false;
            //Haplotype[] remainingHaplotypes = haplotypeQueue.toArray(new Haplotype[haplotypeQueue.size()]);
            for ( Haplotype haplotypeFromList : haplotypeList ) {
                Haplotype consensusHaplotype = getConsensusHaplotype(elem, haplotypeFromList);
                //System.out.format("-Checking consensus for %s:", haplotypeFromList.toString());
                if (consensusHaplotype != null)  {
                    //System.out.format("--Consensus haplotype  = %s, qual = %f\n", consensusHaplotype.toString(), consensusHaplotype.getQualitySum());
                    foundHaplotypeMatch = true;
                    if (consensusHaplotype.getQualitySum() > haplotypeFromList.getQualitySum()) {
                        haplotypeList.remove(haplotypeFromList);
                        haplotypeList.add(consensusHaplotype);
                    }
                    break;
                }
        /*        else {
                    System.out.println("no consensus found");
                }
          */
            }
            if (!foundHaplotypeMatch) {
                haplotypeList.add(elem);
            }
        }
        // Now retrieve two most popular haplotypes
        // TODO - quick and dirty solution, could use better data structures to do this automatically
        int bestIdx=0, secondBestIdx=0;
        double bestIdxVal=-1.0, secondBestIdxVal = -1.0;
        for (int k=0; k < haplotypeList.size(); k++) {
            double qualSum = haplotypeList.get(k).getQualitySum();
            if (qualSum >= bestIdxVal) {
                secondBestIdx = bestIdx;
                secondBestIdxVal = bestIdxVal;
                bestIdx = k;
                bestIdxVal = qualSum;
            }
            else if (qualSum >= secondBestIdxVal) {
                // check if current is second best
                secondBestIdx = k;
                secondBestIdxVal = qualSum;
            }
        }
        if (haplotypeList.size() > 0) {
            Haplotype haplotypeR = haplotypeList.get(bestIdx);
            Haplotype haplotypeA = haplotypeList.get(secondBestIdx);
            // Temp hack to match old implementation's scaling, TBD better behavior
            return Arrays.asList(new Haplotype(haplotypeR.bases, 60), new Haplotype(haplotypeA.bases, contextSize));
        }
        else
            return null;
    }
    private Haplotype getHaplotypeFromRead(ExtendedPileupElement p, int contextSize) {
        SAMRecord read = p.getRead();
        int readOffsetFromPileup = p.getOffset();
        int baseOffsetStart = readOffsetFromPileup - (contextSize - 1)/2;
        byte[] haplotypeBases = new byte[contextSize];
        for(int i=0; i < contextSize; i++) {
            haplotypeBases[i] = REGEXP_WILDCARD.getBytes()[0];
        }
        double[] baseQualities = new double[contextSize];
        Arrays.fill(baseQualities,0.0);
        for (int i = 0; i < contextSize; i++ ) {
            int baseOffset = i + baseOffsetStart;
            if ( baseOffset < 0 )
                continue;
            if ( baseOffset >= read.getReadLength() )
                break;
            haplotypeBases[i] = read.getReadBases()[baseOffset];
            baseQualities[i] = (double)read.getBaseQualities()[baseOffset];
        }
        return new Haplotype(haplotypeBases, baseQualities);
    }
    private Haplotype getConsensusHaplotype(Haplotype haplotypeA, Haplotype haplotypeB) {
        String a = haplotypeA.toString();
        String b = haplotypeB.toString();
        if (a.length() != b.length())
            throw new StingException("Haplotypes a and b must be of same length");
        char chA, chB;
        char wc = REGEXP_WILDCARD.charAt(0);
        char[] consensusChars = new char[a.length()];
        double[] consensusQuals = new double[a.length()];
        for (int i=0; i < a.length(); i++) {
            chA = a.charAt(i);
            chB = b.charAt(i);
            if ((chA != chB) && (chA != wc) && (chB != wc))
                return null;
            if ((chA == wc) && (chB == wc)) {
                consensusChars[i] = wc;
                consensusQuals[i] = 0.0;
            }
            else if ((chA == wc)) {
                consensusChars[i] = chB;
                consensusQuals[i] = haplotypeB.quals[i];
            }
            else if ((chB == wc)){
                consensusChars[i] = chA;
                consensusQuals[i] = haplotypeA.quals[i];
            } else {
                consensusChars[i] = chA;
                consensusQuals[i] = haplotypeA.quals[i]+haplotypeB.quals[i];
            }
        }
        return new Haplotype(new String(consensusChars), consensusQuals);
    }
    // calculate the haplotype scores by walking over all reads and comparing them to the haplotypes
    private double scoreReadsAgainstHaplotypes(List<Haplotype> haplotypes, ReadBackedPileup pileup, int contextSize) {
-        if ( DEBUG ) System.out.printf("HAP1: %s%n", haplotypes.get(0));
+//        if ( DEBUG ) System.out.printf("HAP1: %s%n", haplotypes.get(0));
-        if ( DEBUG ) System.out.printf("HAP1: %s%n", haplotypes.get(1));
+//        if ( DEBUG ) System.out.printf("HAP1: %s%n", haplotypes.get(1));
        double[][] haplotypeScores = new double[pileup.size()][haplotypes.size()];
        for ( ExtendedPileupElement p : pileup.extendedForeachIterator() ) {
            SAMRecord read = p.getRead();
            int readOffsetFromPileup = p.getOffset();
            if (ReadUtils.is454Read(read))
                continue;
            if ( DEBUG ) System.out.printf("--------------------------------------------- Read %s%n", read.getReadName());
            double m = 10000000;
            for ( int i = 0; i < haplotypes.size(); i++ ) {
@ -172,112 +315,38 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
    private class Haplotype {
        byte[] bases = null;
-        byte[] quals = null;
+        double[] quals = null;
        /**
         * Create a simple consensus sequence with provided bases and a uniform quality over all bases of qual
         *
-         * @param bases
+         * @param bases bases
-         * @param qual
+         * @param qual  qual
         */
        Haplotype(byte[] bases, int qual) {
            this.bases = bases;
-            quals = new byte[bases.length];
+            quals = new double[bases.length];
-            Arrays.fill(quals, (byte)qual);
+            Arrays.fill(quals, (double)qual);
        }
-        Haplotype(ReadBackedPileup pileup, int contextSize ) {
+        Haplotype(byte[] bases, double[] quals) {
-            this.bases = new byte[contextSize];
+            this.bases = bases;
-            this.quals = new byte[contextSize];
+            this.quals = quals;
            calculateConsensusOverWindow(pileup, contextSize, (contextSize - 1) / 2);
        }
-        private void calculateConsensusOverWindow(ReadBackedPileup pileup, int contextSize, int pileupOffset) {
+        Haplotype(String bases, double[] quals) {
-            // for each context position
+            this.bases = bases.getBytes();
-            for ( int i = 0; i < contextSize; i++ ) {
+            this.quals = quals;
                int offsetFromPileup = i - pileupOffset;
                ReadBackedPileup offsetPileup = pileupAtOffset(pileup, offsetFromPileup);
                if ( DEBUG ) System.out.printf("pileup is %s%n", offsetPileup);
                BaseQual bq = calcConsensusAtLocus(offsetPileup, FLAT_BASE_PRIORS);
                this.bases[i] = bq.getBase();
                this.quals[i] = bq.getQual();
                if ( DEBUG ) System.out.printf("  At %d: offset %d bq = %c / %d%n", i, offsetFromPileup, (char)bq.getBase(), bq.getQual());
        }
        }
        private BaseQual calcConsensusAtLocus( ReadBackedPileup pileup, double[] log10priors ) {
            double[] log10BaseLikelihoods = new double[BaseUtils.Base.values().length];
            // loop over a, c, g, t and determine the most likely hypothesis
            for ( BaseUtils.Base base : BaseUtils.Base.values() ) {
                double log10L = log10priors[base.getIndex()];
                for ( PileupElement p : pileup ) {
                    byte qual = p.getQual();
                    if ( qual > 5 ) {
                        double baseP = QualityUtils.qualToProb(qual);
                        double L = base.sameBase(p.getBase()) ? baseP : 1 - baseP;
                        if ( Double.isInfinite(Math.log10(L)) )
                            throw new StingException("BUG -- base likelihood is infinity!");
                        log10L += Math.log10(L);
                    }
                }
                log10BaseLikelihoods[base.getIndex()] = log10L;
            }
            double[] posteriors = MathUtils.normalizeFromLog10(log10BaseLikelihoods, false);
            int mostLikelyIndex = MathUtils.maxElementIndex(posteriors);
            byte mostLikelyBase = BaseUtils.Base.values()[mostLikelyIndex].getBase();                // get the most likely option
            double MAX_CONSENSUS_QUALITY = 0.000001;
            byte qual = QualityUtils.probToQual(posteriors[mostLikelyIndex],MAX_CONSENSUS_QUALITY);  // call posterior calculator here over L over bases
            return new BaseQual(mostLikelyBase, qual);
        }
        public String toString() { return new String(this.bases); }
        double getQualitySum() {
            double s = 0;
            for (int k=0; k < bases.length; k++) {
                s += quals[k];
            }
-
+            return s;
    private static class BaseQual extends Pair<Byte, Byte> {
        public BaseQual(byte base, byte qual) {
            super(base, qual);
        }
        public byte getBase() { return getFirst(); }
        public byte getQual() { return getSecond(); }
    }
    private static ReadBackedPileup pileupAtOffset(ReadBackedPileup pileup, int offsetFromPileup) {
        ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>();
        ArrayList<Integer> offsets = new ArrayList<Integer>();
        // go through the pileup, read by read, collecting up the context at offset
        for ( PileupElement p : pileup ) {
            // not safe -- doesn't work for indel-containing reads!
            // whole algorithm should be restructured to handle other reads in the window or use LocusIteratorByState
            SAMRecord read = p.getRead();
            int readOffsetInPileup = p.getOffset();
            int neededReadOffset = readOffsetInPileup + offsetFromPileup;
            if ( neededReadOffset >= 0 && neededReadOffset < read.getReadLength() ) {
                reads.add(p.getRead());
                offsets.add(neededReadOffset);
            }
        }
        return new ReadBackedPileupImpl(pileup.getLocation(), reads, offsets);
    }
    private static ReadBackedPileup getPileupOfAllele( Allele allele, ReadBackedPileup pileup ) {
        ArrayList<PileupElement> filteredPileup = new ArrayList<PileupElement>();
        byte alleleBase = allele.getBases()[0]; // assumes SNP 
        for ( PileupElement p : pileup ) {
            if ( p.getBase() == alleleBase ) {
                filteredPileup.add(p);
            }
        }
        return new ReadBackedPileupImpl(pileup.getLocation(), filteredPileup);
    }
    public List<String> getKeyNames() { return Arrays.asList("HaplotypeScore"); }
--- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MyHaplotypeScore.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MyHaplotypeScore.java
@ -1,357 +0,0 @@
 /*
 * Copyright (c) 2010, The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
 package org.broadinstitute.sting.gatk.walkers.annotator;
 import org.broad.tribble.vcf.VCFHeaderLineType;
 import org.broad.tribble.vcf.VCFInfoHeaderLine;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
 import org.broadinstitute.sting.gatk.filters.Platform454Filter;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
 import org.broadinstitute.sting.utils.*;
 import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.pileup.*;
 import java.util.*;
 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
 public class MyHaplotypeScore implements InfoFieldAnnotation {
    private final static boolean DEBUG = false;
    private final static int MIN_CONTEXT_WING_SIZE = 10;
    private final static String REGEXP_WILDCARD = ".";
    public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
        if ( !vc.isBiallelic() || !vc.isSNP() || stratifiedContexts.size() == 0 ) // size 0 means that call was made by someone else and we have no data here
            return null;
        AlignmentContext context = StratifiedAlignmentContext.joinContexts(stratifiedContexts.values());
        int contextWingSize = Math.min(((int)ref.getWindow().size() - 1)/2, MIN_CONTEXT_WING_SIZE);
        int contextSize = contextWingSize * 2 + 1;
        // Compute all haplotypes consistent with the current read pileup
        List<Haplotype> haplotypes = computeHaplotypes(context.getBasePileup(), contextSize);
        // calculate the haplotype scores by walking over all reads and comparing them to the haplotypes
        double score = 0.0;
        if (haplotypes != null)
            score = scoreReadsAgainstHaplotypes(haplotypes, context.getBasePileup(), contextSize);
        // return the score
        Map<String, Object> map = new HashMap<String, Object>();
        map.put(getKeyNames().get(0), String.format("%.2f", score));
        return map;
    }
    private class HaplotypeComparator implements Comparator<Haplotype>{
        public int compare(Haplotype a, Haplotype b) {
            if (a.getQualitySum() < b.getQualitySum())
                return 1;
            if (a.getQualitySum() > b.getQualitySum()){
                return -1;
            }
            return 0;
        }
    }
    private List<Haplotype> computeHaplotypes(ReadBackedPileup pileup, int contextSize) {
        // Compute all possible haplotypes consistent with current pileup
        ArrayList<Haplotype> haplotypeList = new ArrayList<Haplotype>();
        ArrayList<Integer> readsPerHaplotype = new ArrayList<Integer>();
        PriorityQueue<Haplotype> haplotypeQueue = new PriorityQueue<Haplotype>(100, new HaplotypeComparator());
        for ( ExtendedPileupElement p : pileup.extendedForeachIterator() ) {
            if (ReadUtils.is454Read(p.getRead()))
                continue;
            Haplotype haplotypeFromRead = getHaplotypeFromRead(p, contextSize);
            haplotypeQueue.add(haplotypeFromRead);
            //haplotypeList.add(haplotypeFromRead);
        }
        // Now that priority queue has been built with all reads at context, we need to merge and find possible segregating haplotypes
        Haplotype elem;
        while ((elem = haplotypeQueue.poll()) != null)  {
            //System.out.print("element: "+elem.toString());
            //System.out.format(" SumQual = %f\n", elem.getQualitySum());
            boolean foundHaplotypeMatch = false;
            //Haplotype[] remainingHaplotypes = haplotypeQueue.toArray(new Haplotype[haplotypeQueue.size()]);
            for ( Haplotype haplotypeFromList : haplotypeList ) {
                Haplotype consensusHaplotype = getConsensusHaplotype(elem, haplotypeFromList);
                //System.out.format("-Checking consensus for %s:", haplotypeFromList.toString());
                if (consensusHaplotype != null)  {
                    //System.out.format("--Consensus haplotype  = %s, qual = %f\n", consensusHaplotype.toString(), consensusHaplotype.getQualitySum());
                    foundHaplotypeMatch = true;
                    if (consensusHaplotype.getQualitySum() > haplotypeFromList.getQualitySum()) {
                        haplotypeList.remove(haplotypeFromList);
                        haplotypeList.add(consensusHaplotype);
                    }
                    break;
                }
        /*        else {
                    System.out.println("no consensus found");
                }
          */
            }
            if (!foundHaplotypeMatch) {
                haplotypeList.add(elem);
            }
        }
        // Now retrieve two most popular haplotypes
        // TODO - quick and dirty solution, could use better data structures to do this automatically
        int bestIdx=0, secondBestIdx=0;
        double bestIdxVal=-1.0, secondBestIdxVal = -1.0;
        for (int k=0; k < haplotypeList.size(); k++) {
            double qualSum = haplotypeList.get(k).getQualitySum();
            if (qualSum >= bestIdxVal) {
                secondBestIdx = bestIdx;
                secondBestIdxVal = bestIdxVal;
                bestIdx = k;
                bestIdxVal = qualSum;
            }
            else if (qualSum >= secondBestIdxVal) {
                // check if current is second best
                secondBestIdx = k;
                secondBestIdxVal = qualSum;
            }
        }
        if (haplotypeList.size() > 0) {
            Haplotype haplotypeR = haplotypeList.get(bestIdx);
            Haplotype haplotypeA = haplotypeList.get(secondBestIdx);
            // Temp hack to match old implementation's scaling, TBD better behavior
            return Arrays.asList(new Haplotype(haplotypeR.bases, 60), new Haplotype(haplotypeA.bases, contextSize));
        }
        else
            return null;
    }
    private Haplotype getHaplotypeFromRead(ExtendedPileupElement p, int contextSize) {
        SAMRecord read = p.getRead();
        int readOffsetFromPileup = p.getOffset();
        int baseOffsetStart = readOffsetFromPileup - (contextSize - 1)/2;
        byte[] haplotypeBases = new byte[contextSize];
        for(int i=0; i < contextSize; i++) {
            haplotypeBases[i] = REGEXP_WILDCARD.getBytes()[0];
        }
        double[] baseQualities = new double[contextSize];
        Arrays.fill(baseQualities,0.0);
        for (int i = 0; i < contextSize; i++ ) {
            int baseOffset = i + baseOffsetStart;
            if ( baseOffset < 0 )
                continue;
            if ( baseOffset >= read.getReadLength() )
                break;
            haplotypeBases[i] = read.getReadBases()[baseOffset];
            baseQualities[i] = (double)read.getBaseQualities()[baseOffset];
        }
        return new Haplotype(haplotypeBases, baseQualities);
    }
    private Haplotype getConsensusHaplotype(Haplotype haplotypeA, Haplotype haplotypeB) {
        String a = haplotypeA.toString();
        String b = haplotypeB.toString();
        if (a.length() != b.length())
            throw new StingException("Haplotypes a and b must be of same length");
        char chA, chB;
        char wc = REGEXP_WILDCARD.charAt(0);
        char[] consensusChars = new char[a.length()];
        double[] consensusQuals = new double[a.length()];
        for (int i=0; i < a.length(); i++) {
            chA = a.charAt(i);
            chB = b.charAt(i);
            if ((chA != chB) && (chA != wc) && (chB != wc))
                return null;
            if ((chA == wc) && (chB == wc)) {
                consensusChars[i] = wc;
                consensusQuals[i] = 0.0;
            }
            else if ((chA == wc) && (chB != wc)) {
                consensusChars[i] = chB;
                consensusQuals[i] = haplotypeB.quals[i];
            }
            else if ((chA != wc) && (chB == wc)){
                consensusChars[i] = chA;
                consensusQuals[i] = haplotypeA.quals[i];
            } else {
                consensusChars[i] = chA;
                consensusQuals[i] = haplotypeA.quals[i]+haplotypeB.quals[i];
            }
        }
        return new Haplotype(new String(consensusChars), consensusQuals);
    }
    // calculate the haplotype scores by walking over all reads and comparing them to the haplotypes
    private double scoreReadsAgainstHaplotypes(List<Haplotype> haplotypes, ReadBackedPileup pileup, int contextSize) {
 //        if ( DEBUG ) System.out.printf("HAP1: %s%n", haplotypes.get(0));
 //        if ( DEBUG ) System.out.printf("HAP1: %s%n", haplotypes.get(1));
        double[][] haplotypeScores = new double[pileup.size()][haplotypes.size()];
        for ( ExtendedPileupElement p : pileup.extendedForeachIterator() ) {
            SAMRecord read = p.getRead();
            int readOffsetFromPileup = p.getOffset();
            if (ReadUtils.is454Read(read))
                continue;
            if ( DEBUG ) System.out.printf("--------------------------------------------- Read %s%n", read.getReadName());
            double m = 10000000;
            for ( int i = 0; i < haplotypes.size(); i++ ) {
                Haplotype haplotype = haplotypes.get(i);
                int start = readOffsetFromPileup - (contextSize - 1)/2;
                double score = scoreReadAgainstHaplotype(read, start, contextSize, haplotype);
                haplotypeScores[p.getPileupOffset()][i] = score;
                if ( DEBUG ) System.out.printf("  vs. haplotype %d = %f%n", i, score);
                m = Math.min(score, m);
            }
            if ( DEBUG ) System.out.printf("  => best score was %f%n", m);
        }
        double overallScore = 0.0;
        for ( double[] readHaplotypeScores : haplotypeScores ) {
            overallScore += MathUtils.arrayMin(readHaplotypeScores);
        }
        return overallScore;
    }
    private double scoreReadAgainstHaplotype(SAMRecord read, int baseOffsetStart, int contextSize, Haplotype haplotype ) {
        double expected = 0.0;
        double mismatches = 0.0;
        // What's the expected mismatch rate under the model that this read is actually sampled from
        // this haplotype?  Let's assume the consensus base c is a random choice one of A, C, G, or T, and that
        // the observed base is actually from a c with an error rate e.  Since e is the rate at which we'd
        // see a miscalled c, the expected mismatch rate is really e.  So the expected number of mismatches
        // is just sum_i e_i for i from 1..n for n sites
        //
        // Now, what's the probabilistic sum of mismatches?  Suppose that the base b is equal to c.  Well, it could
        // actually be a miscall in a matching direction, which would happen at a e / 3 rate.  If b != c, then
        // the chance that it is actually a mismatch is 1 - e, since any of the other 3 options would be a mismatch.
        // so the probability-weighted mismatch rate is sum_i ( matched ? e_i / 3 : 1 - e_i ) for i = 1 ... n
        for ( int i = 0; i < contextSize; i++ ) {
            int baseOffset = i + baseOffsetStart;
            if ( baseOffset < 0 )
                continue;
            if ( baseOffset >= read.getReadLength() )
                break;
            byte haplotypeBase = haplotype.bases[i];
            byte readBase = read.getReadBases()[baseOffset];
            boolean matched = BaseUtils.basesAreEqual(readBase, haplotypeBase );
            double e = QualityUtils.qualToErrorProb(read.getBaseQualities()[baseOffset]);
            expected += e;
            mismatches += matched ? e : 1 - e / 3;
            // a more sophisticated calculation would include the reference quality, but it's nice to actually penalize
            // the mismatching of poorly determined regions of the consensus
            if ( DEBUG ) System.out.printf("Read %s: scoring %c vs. %c => e = %f Q%d esum %f vs. msum %f%n",
                    read.getReadName(), (char)haplotypeBase, (char)readBase, e, read.getBaseQualities()[baseOffset], expected, mismatches);
        }
        return mismatches - expected;
    }
    private static final double[] FLAT_BASE_PRIORS = new double[BaseUtils.Base.values().length];
    static {
        for ( int i = 0; i < BaseUtils.Base.values().length; i++ )
            FLAT_BASE_PRIORS[i] = Math.log10(1.0 / BaseUtils.Base.values().length);
    }
    private class Haplotype {
        byte[] bases = null;
        double[] quals = null;
        /**
         * Create a simple consensus sequence with provided bases and a uniform quality over all bases of qual
         *
         * @param bases
         * @param qual
         */
        Haplotype(byte[] bases, int qual) {
            this.bases = bases;
            quals = new double[bases.length];
            Arrays.fill(quals, (double)qual);
        }
        Haplotype(byte[] bases, double[] quals) {
            this.bases = bases;
            this.quals = quals;
        }
        Haplotype(String bases, double[] quals) {
            this.bases = bases.getBytes();
            this.quals = quals;
        }
        public String toString() { return new String(this.bases); }
        double getQualitySum() {
            double s = 0;
            for (int k=0; k < bases.length; k++) {
                s += quals[k];
            }
            return s;
        }
    }
    public List<String> getKeyNames() { return Arrays.asList("MyHaplotypeScore"); }
    public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MyHaplotypeScore", 1, VCFHeaderLineType.Float, "Consistency of the site with two (and only two) segregating haplotypes")); }
 }
--- a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
@ -31,7 +31,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
    public void testHasAnnotsAsking1() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
-                Arrays.asList("184fc1f99dfba3e2519d16458d728fcc"));
+                Arrays.asList("674541eb78fa6e4f9bee172b3f34bbab"));
        executeTest("test file has annotations, asking for annotations, #1", spec);
    }
@ -39,7 +39,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
    public void testHasAnnotsAsking2() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
-                Arrays.asList("12a069d5f1c9cd3970ea6301397219aa"));
+                Arrays.asList("985c55e3f0c41082dc56f7a291ef307a"));
        executeTest("test file has annotations, asking for annotations, #2", spec);
    }
@ -63,7 +63,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
    public void testNoAnnotsAsking1() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
-                Arrays.asList("8f42df642b329ff19bc2c39470117280"));
+                Arrays.asList("c789e8f795cf4b182f717423bf3328f2"));
        executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
    }
@ -71,7 +71,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
    public void testNoAnnotsAsking2() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
-                Arrays.asList("908aa4b6ac65bee57f91bc6fed4d46ad"));
+                Arrays.asList("be18a3b9589ea60350fbaf8f7e1dd769"));
        executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
    }
--- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@ -35,7 +35,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void testMultiSamplePilot1Joint() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000", 1,
-                Arrays.asList("3a402233264e21a84d421e3a4ea64768"));
+                Arrays.asList("0c1a5f6372f6d17544a95292bd313c86"));
        executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
    }
@ -43,7 +43,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void testMultiSamplePilot2Joint() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000", 1,
-                Arrays.asList("79736b3e955a16b30f827b2786fc08b1"));
+                Arrays.asList("049c98bc6db7029995f8893f570dd9ad"));
        executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
    }
@ -51,7 +51,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void testSingleSamplePilot2Joint() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000", 1,
-                Arrays.asList("b8d93c6fcb4b17d454cdcbfc4b43f076"));
+                Arrays.asList("6c278b09727cd14523423a6beda627a5"));
        executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
    }
@ -63,7 +63,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    @Test
    public void testParallelization() {
-        String md5 = "098802639cfab1b777c96d38376f118a";
+        String md5 = "c66c55363fb9e951569667e3ff6eb728";
        WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
                "-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,075,000", 1,
@ -90,11 +90,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    @Test
    public void testParameter() {
        HashMap<String, String> e = new HashMap<String, String>();
-        e.put( "-genotype", "b87f28a772eb75c8dad9062c6d039da5" );
+        e.put( "-genotype", "dfe1a220ee0966a20af4a2fca9e635fe" );
-        e.put( "-all_bases", "ec9de2044cd5f5901d6879b56f12993a" );
+        e.put( "-all_bases", "e7b766485dd37631b3fcf9f655588456" );
-        e.put( "--min_base_quality_score 26", "875c64a64fd402626e04c9540388c483" );
+        e.put( "--min_base_quality_score 26", "e4e318853e091e63166eaef648ec26ac" );
-        e.put( "--min_mapping_quality_score 26", "de7d90e425f8f08f219dc91a25d60c68" );
+        e.put( "--min_mapping_quality_score 26", "57fffb32a3a7a736f6c30ae08ef71c91" );
-        e.put( "--max_mismatches_in_40bp_window 5", "758e312b2a2e7c4d83f60174d43fac8a" );
+        e.put( "--max_mismatches_in_40bp_window 5", "4f7a5c22247234c4ee3ca2cfbbc16729" );
        for ( Map.Entry<String, String> entry : e.entrySet() ) {
            WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -108,12 +108,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void testConfidence() {
        WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
                "-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
-                Arrays.asList("4937bab94b0bae1aa61cdf3a06cb49e8"));
+                Arrays.asList("8b4ebcbf330340a437e572a73a99227b"));
        executeTest("testConfidence1", spec1);
        WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
                "-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
-                Arrays.asList("f8b722dad5c4868a4bba246eef83f96d"));
+                Arrays.asList("0d8825a98a8918bed2c02ac44e15dde7"));
        executeTest("testConfidence2", spec2);
    }