Merge branch 'master' of ssh://gsa3/humgen/gsa-scr1/gsa-engineering/git/unstable

2012-10-21 12:38:49 -04:00 · 2012-10-21 12:38:49 -04:00 · e9b7324dc1
parent 67b9e7319e 0616b98551
commit e9b7324dc1
20 changed files with 948 additions and 528 deletions
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java
@ -72,7 +72,7 @@ public class ErrorModel  {
            haplotypeMap = new LinkedHashMap<Allele, Haplotype>();
            if (refSampleVC.isIndel()) {
                pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY,
-                        UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION);
+                        UAC.OUTPUT_DEBUG_INDEL_INFO, UAC.pairHMM);
                IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles(refSampleVC.getAlleles(), refContext, refContext.getLocus(), haplotypeMap); // will update haplotypeMap adding elements
            }
        }
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoodsCalculationModel.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoodsCalculationModel.java
@ -62,7 +62,7 @@ public class GeneralPloidyIndelGenotypeLikelihoodsCalculationModel extends Gener


        pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY,
-                UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION);
+                UAC.OUTPUT_DEBUG_INDEL_INFO, UAC.pairHMM);
        haplotypeMap = new LinkedHashMap<Allele, Haplotype>();
    }

--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
@ -52,6 +52,7 @@ import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
 import org.broadinstitute.sting.utils.fragments.FragmentCollection;
 import org.broadinstitute.sting.utils.fragments.FragmentUtils;
 import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
+import org.broadinstitute.sting.utils.pairhmm.PairHMM;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.sam.AlignmentUtils;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -114,6 +115,12 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
    @Output(fullName="graphOutput", shortName="graph", doc="File to which debug assembly graph information should be written", required = false)
    protected PrintStream graphWriter = null;

+    /**
+     * The PairHMM implementation to use for genotype likelihood calculations. The various implementations balance a tradeoff of accuracy and runtime.
+     */
+    @Argument(fullName = "pair_hmm_implementation", shortName = "pairHMM", doc = "The PairHMM implementation to use for genotype likelihood calculations", required = false)
+    public PairHMM.HMM_IMPLEMENTATION pairHMM = PairHMM.HMM_IMPLEMENTATION.LOGLESS_CACHING;
+
    @Hidden
    @Argument(fullName="keepRG", shortName="keepRG", doc="Only use read from this read group when making calls (but use all reads to build the assembly)", required = false)
    protected String keepRG = null;
@ -234,14 +241,14 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
        samplesList.addAll( samples );
        // initialize the UnifiedGenotyper Engine which is used to call into the exact model
        final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection( SCAC ); // this adapter is used so that the full set of unused UG arguments aren't exposed to the HC user
-        UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC.clone(), logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
-        UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; // low values used for isActive determination only, default/user-specified values used for actual calling
-        UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; // low values used for isActive determination only, default/user-specified values used for actual calling
-        UAC.STANDARD_CONFIDENCE_FOR_CALLING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_CALLING );
-        UAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING );
+        UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);

        // create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested
-        UnifiedArgumentCollection simpleUAC = UAC.clone();
+        UnifiedArgumentCollection simpleUAC = new UnifiedArgumentCollection(UAC);
+        simpleUAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; // low values used for isActive determination only, default/user-specified values used for actual calling
+        simpleUAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; // low values used for isActive determination only, default/user-specified values used for actual calling
+        simpleUAC.STANDARD_CONFIDENCE_FOR_CALLING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_CALLING );
+        simpleUAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING );
        simpleUAC.exactCallsLog = null;
        UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), simpleUAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);

@ -287,7 +294,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
        }

        assemblyEngine = new SimpleDeBruijnAssembler( DEBUG, graphWriter );
-        likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, false );
+        likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, pairHMM );
        genotypingEngine = new GenotypingEngine( DEBUG, OUTPUT_FULL_HAPLOTYPE_SEQUENCE );
    }

@ -400,6 +407,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
        final List<GATKSAMRecord> filteredReads = filterNonPassingReads( activeRegion ); // filter out reads from genotyping which fail mapping quality based criteria
        if( activeRegion.size() == 0 ) { return 1; } // no reads remain after filtering so nothing else to do!

+        // sort haplotypes to take full advantage of haplotype start offset optimizations in PairHMM
+        Collections.sort( haplotypes, new Haplotype.HaplotypeBaseComparator() );
+
        // evaluate each sample's reads against all haplotypes
        final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList = splitReadsBySample( activeRegion.getReads() );
        final HashMap<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList = splitReadsBySample( filteredReads );
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
@ -30,6 +30,9 @@ import com.google.java.contract.Requires;
 import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
 import org.broadinstitute.sting.utils.*;
 import org.broadinstitute.sting.utils.collections.Pair;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.pairhmm.*;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
 import org.broadinstitute.sting.utils.variantcontext.Allele;
@ -44,8 +47,25 @@ public class LikelihoodCalculationEngine {
    private final boolean DEBUG;
    private final PairHMM pairHMM;

-    public LikelihoodCalculationEngine( final byte constantGCP, final boolean debug, final boolean noBanded ) {
-        pairHMM = new PairHMM( noBanded );
+    public LikelihoodCalculationEngine( final byte constantGCP, final boolean debug, final PairHMM.HMM_IMPLEMENTATION hmmType ) {
+
+        switch (hmmType) {
+            case EXACT:
+                pairHMM = new ExactPairHMM();
+                break;
+            case ORIGINAL:
+                pairHMM = new OriginalPairHMM();
+                break;
+            case CACHING:
+                pairHMM = new CachingPairHMM();
+                break;
+            case LOGLESS_CACHING:
+                pairHMM = new LoglessCachingPairHMM();
+                break;
+            default:
+                throw new UserException.BadArgumentValue("pairHMM", "Specified pairHMM implementation is unrecognized or incompatible with the HaplotypeCaller. Acceptable options are ORIGINAL, EXACT, CACHING, and LOGLESS_CACHING.");
+        }
+
        this.constantGCP = constantGCP;
        DEBUG = debug;
    }
@ -69,23 +89,18 @@ public class LikelihoodCalculationEngine {
        X_METRIC_LENGTH += 2;
        Y_METRIC_LENGTH += 2;

-        // initial arrays to hold the probabilities of being in the match, insertion and deletion cases
-        final double[][] matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
-        final double[][] XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
-        final double[][] YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
-
-        PairHMM.initializeArrays(matchMetricArray, XMetricArray, YMetricArray, X_METRIC_LENGTH);
+        // initialize arrays to hold the probabilities of being in the match, insertion and deletion cases
+        pairHMM.initialize(X_METRIC_LENGTH, Y_METRIC_LENGTH);

        // for each sample's reads
        for( final Map.Entry<String, ArrayList<GATKSAMRecord>> sampleEntry : perSampleReadList.entrySet() ) {
            //if( DEBUG ) { System.out.println("Evaluating sample " + sample + " with " + perSampleReadList.get( sample ).size() + " passing reads"); }
            // evaluate the likelihood of the reads given those haplotypes
-            computeReadLikelihoods( haplotypes, sampleEntry.getValue(), sampleEntry.getKey(), matchMetricArray, XMetricArray, YMetricArray );
+            computeReadLikelihoods( haplotypes, sampleEntry.getValue(), sampleEntry.getKey() );
        }
    }

-    private void computeReadLikelihoods( final ArrayList<Haplotype> haplotypes, final ArrayList<GATKSAMRecord> reads, final String sample,
-                                         final double[][] matchMetricArray, final double[][] XMetricArray, final double[][] YMetricArray ) {
+    private void computeReadLikelihoods( final ArrayList<Haplotype> haplotypes, final ArrayList<GATKSAMRecord> reads, final String sample ) {

        final int numHaplotypes = haplotypes.size();
        final int numReads = reads.size();
@ -113,9 +128,8 @@ public class LikelihoodCalculationEngine {
                final int haplotypeStart = ( previousHaplotypeSeen == null ? 0 : computeFirstDifferingPosition(haplotype.getBases(), previousHaplotypeSeen.getBases()) );
                previousHaplotypeSeen = haplotype;

-                readLikelihoods[jjj][iii] = pairHMM.computeReadLikelihoodGivenHaplotype(haplotype.getBases(), read.getReadBases(),
-                        readQuals, readInsQuals, readDelQuals, overallGCP,
-                        haplotypeStart, matchMetricArray, XMetricArray, YMetricArray);
+                readLikelihoods[jjj][iii] = pairHMM.computeReadLikelihoodGivenHaplotypeLog10(haplotype.getBases(), read.getReadBases(),
+                        readQuals, readInsQuals, readDelQuals, overallGCP, haplotypeStart, jjj == 0);
                readCounts[jjj][iii] = readCount;
            }
        }
@ -130,7 +144,7 @@ public class LikelihoodCalculationEngine {
                return iii;
            }
        }
-        return b1.length;
+        return Math.min(b1.length, b2.length);
    }

    @Requires({"haplotypes.size() > 0"})
@ -280,7 +294,7 @@ public class LikelihoodCalculationEngine {
        final int numHaplotypes = haplotypes.size();
        final Set<String> sampleKeySet = haplotypes.get(0).getSampleKeySet(); // BUGBUG: assume all haplotypes saw the same samples
        final ArrayList<Integer> bestHaplotypesIndexList = new ArrayList<Integer>();
-        bestHaplotypesIndexList.add(0); // always start with the reference haplotype
+        bestHaplotypesIndexList.add( findReferenceIndex(haplotypes) ); // always start with the reference haplotype
        // set up the default 1-to-1 haplotype mapping object
        final ArrayList<ArrayList<Haplotype>> haplotypeMapping = new ArrayList<ArrayList<Haplotype>>();
        for( final Haplotype h : haplotypes ) {
@ -322,6 +336,13 @@ public class LikelihoodCalculationEngine {
        return bestHaplotypes;
    }

+    public static int findReferenceIndex( final List<Haplotype> haplotypes ) {
+        for( final Haplotype h : haplotypes ) {
+            if( h.isReference() ) { return haplotypes.indexOf(h); }
+        }
+        throw new ReviewedStingException( "No reference haplotype found in the list of haplotypes!" );
+    }
+
    public static Map<String, PerReadAlleleLikelihoodMap> partitionReadsBasedOnLikelihoods( final GenomeLocParser parser,
                                                                                            final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList,
                                                                                            final HashMap<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList,
--- a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/CachingPairHMM.java
+++ b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/CachingPairHMM.java
@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.utils.pairhmm;
+
+import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.QualityUtils;
+
+import java.util.Arrays;
+
+/**
+ * Created with IntelliJ IDEA.
+ * User: rpoplin, carneiro
+ * Date: 10/16/12
+ */
+
+public class CachingPairHMM extends OriginalPairHMM {
+
+    double[][] constantMatrix = null; // The cache in the CachingPairHMM
+    double[][] distanceMatrix = null; // The cache in the CachingPairHMM
+
+    protected static final double [] firstRowConstantMatrix = {
+            QualityUtils.qualToProbLog10((byte) (DEFAULT_GOP + DEFAULT_GOP)),
+            QualityUtils.qualToProbLog10(DEFAULT_GCP),
+            QualityUtils.qualToErrorProbLog10(DEFAULT_GOP),
+            QualityUtils.qualToErrorProbLog10(DEFAULT_GCP),
+            0.0,
+            0.0
+    };
+
+    @Override
+    public void initialize( final int READ_MAX_LENGTH, final int HAPLOTYPE_MAX_LENGTH ) {
+
+        super.initialize(READ_MAX_LENGTH, HAPLOTYPE_MAX_LENGTH);
+
+        // M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions and + 1 to consider the final base in a non-global alignment
+        final int X_METRIC_LENGTH = READ_MAX_LENGTH + 2;
+        final int Y_METRIC_LENGTH = HAPLOTYPE_MAX_LENGTH + 2;
+
+        constantMatrix = new double[X_METRIC_LENGTH][6];
+        distanceMatrix = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+
+        // fill in the first row
+        for( int jjj = 2; jjj < Y_METRIC_LENGTH; jjj++ ) {
+            updateCell(1, jjj, 0.0, firstRowConstantMatrix, matchMetricArray, XMetricArray, YMetricArray);
+        }
+    }
+
+    @Override
+    public double computeReadLikelihoodGivenHaplotypeLog10( final byte[] haplotypeBases,
+                                                            final byte[] readBases,
+                                                            final byte[] readQuals,
+                                                            final byte[] insertionGOP,
+                                                            final byte[] deletionGOP,
+                                                            final byte[] overallGCP,
+                                                            final int hapStartIndex,
+                                                            final boolean recacheReadValues ) {
+
+        if( recacheReadValues ) {
+            initializeConstants( insertionGOP, deletionGOP, overallGCP );
+        }
+        initializeDistanceMatrix( haplotypeBases, readBases, readQuals, hapStartIndex );
+
+        // M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions and + 1 to consider the final base in a non-global alignment
+        final int X_METRIC_LENGTH = readBases.length + 2;
+        final int Y_METRIC_LENGTH = haplotypeBases.length + 2;
+
+        for (int i = 2; i < X_METRIC_LENGTH; i++) {
+            for (int j = hapStartIndex+1; j < Y_METRIC_LENGTH; j++) {
+                updateCell(i, j, distanceMatrix[i][j], constantMatrix[i], matchMetricArray, XMetricArray, YMetricArray);
+            }
+        }
+
+        // final probability is the log10 sum of the last element in all three state arrays
+        final int endI = X_METRIC_LENGTH - 1;
+        final int endJ = Y_METRIC_LENGTH - 1;
+        return MathUtils.approximateLog10SumLog10(matchMetricArray[endI][endJ], XMetricArray[endI][endJ], YMetricArray[endI][endJ]);
+    }
+
+    /**
+     * Initializes the matrix that holds all the constants related to the editing
+     * distance between the read and the haplotype.
+     *
+     * @param haplotypeBases the bases of the haplotype
+     * @param readBases      the bases of the read
+     * @param readQuals      the base quality scores of the read
+     * @param startIndex     where to start updating the distanceMatrix (in case this read is similar to the previous read)
+     */
+    public void initializeDistanceMatrix( final byte[] haplotypeBases,
+                                          final byte[] readBases,
+                                          final byte[] readQuals,
+                                          final int startIndex ) {
+
+        // initialize the pBaseReadLog10 matrix for all combinations of read x haplotype bases
+        // Abusing the fact that java initializes arrays with 0.0, so no need to fill in rows and columns below 2.
+
+        for (int i = 0; i < readBases.length; i++) {
+            final byte x = readBases[i];
+            final byte qual = readQuals[i];
+            for (int j = startIndex; j < haplotypeBases.length; j++) {
+                final byte y = haplotypeBases[j];
+                distanceMatrix[i+2][j+2] = ( x == y || x == (byte) 'N' || y == (byte) 'N' ?
+                        QualityUtils.qualToProbLog10(qual) : QualityUtils.qualToErrorProbLog10(qual) );
+            }
+        }
+    }
+
+    /**
+     * Initializes the matrix that holds all the constants related to quality scores.
+     *
+     * @param insertionGOP   insertion quality scores of the read
+     * @param deletionGOP    deletion quality scores of the read
+     * @param overallGCP     overall gap continuation penalty
+     */
+    public void initializeConstants( final byte[] insertionGOP,
+                                     final byte[] deletionGOP,
+                                     final byte[] overallGCP ) {
+
+        final int l = insertionGOP.length;
+        constantMatrix[1] = firstRowConstantMatrix;
+        for (int i = 0; i < l; i++) {
+            final int qualIndexGOP = Math.min(insertionGOP[i] + deletionGOP[i], Byte.MAX_VALUE);
+            constantMatrix[i+2][0] = QualityUtils.qualToProbLog10((byte) qualIndexGOP);
+            constantMatrix[i+2][1] = QualityUtils.qualToProbLog10(overallGCP[i]);
+            constantMatrix[i+2][2] = QualityUtils.qualToErrorProbLog10(insertionGOP[i]);
+            constantMatrix[i+2][3] = QualityUtils.qualToErrorProbLog10(overallGCP[i]);
+            constantMatrix[i+2][4] = QualityUtils.qualToErrorProbLog10(deletionGOP[i]);
+            constantMatrix[i+2][5] = QualityUtils.qualToErrorProbLog10(overallGCP[i]);
+        }
+        constantMatrix[l+1][4] = 0.0;
+        constantMatrix[l+1][5] = 0.0;
+    }
+
+    /**
+     * Updates a cell in the HMM matrix
+     *
+     * The read and haplotype indices are offset by one because the state arrays have an extra column to hold the
+     * initial conditions
+
+     * @param indI             row index in the matrices to update
+     * @param indJ             column index in the matrices to update
+     * @param prior            the likelihood editing distance matrix for the read x haplotype
+     * @param constants        an array with the six constants relevant to this location
+     * @param matchMetricArray the matches likelihood matrix
+     * @param XMetricArray     the insertions likelihood matrix
+     * @param YMetricArray     the deletions likelihood matrix
+     */
+    private void updateCell( final int indI, final int indJ, final double prior, final double[] constants,
+                             final double[][] matchMetricArray, final double[][] XMetricArray, final double[][] YMetricArray ) {
+
+        matchMetricArray[indI][indJ] = prior +
+                MathUtils.approximateLog10SumLog10( matchMetricArray[indI - 1][indJ - 1] + constants[0],
+                        XMetricArray[indI - 1][indJ - 1] + constants[1],
+                        YMetricArray[indI - 1][indJ - 1] + constants[1] );
+        XMetricArray[indI][indJ] = MathUtils.approximateLog10SumLog10( matchMetricArray[indI - 1][indJ] + constants[2],
+                XMetricArray[indI - 1][indJ] + constants[3]);
+        YMetricArray[indI][indJ] = MathUtils.approximateLog10SumLog10( matchMetricArray[indI][indJ - 1] + constants[4],
+                YMetricArray[indI][indJ - 1] + constants[5]);
+    }
+}
--- a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/LoglessCachingPairHMM.java
+++ b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/LoglessCachingPairHMM.java
@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.utils.pairhmm;
+
+import org.broadinstitute.sting.utils.QualityUtils;
+
+import java.util.Arrays;
+
+/**
+ * Created with IntelliJ IDEA.
+ * User: rpoplin, carneiro
+ * Date: 10/16/12
+ */
+
+public class LoglessCachingPairHMM extends CachingPairHMM {
+
+    protected static final double SCALE_FACTOR_LOG10 = 300.0;
+
+    protected static final double [] firstRowConstantMatrix = {
+            QualityUtils.qualToProb((byte) (DEFAULT_GOP + DEFAULT_GOP)),
+            QualityUtils.qualToProb(DEFAULT_GCP),
+            QualityUtils.qualToErrorProb(DEFAULT_GOP),
+            QualityUtils.qualToErrorProb(DEFAULT_GCP),
+            1.0,
+            1.0
+    };
+
+    @Override
+    public void initialize( final int READ_MAX_LENGTH, final int HAPLOTYPE_MAX_LENGTH ) {
+
+        // M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions and + 1 to consider the final base in a non-global alignment
+        final int X_METRIC_LENGTH = READ_MAX_LENGTH + 2;
+        final int Y_METRIC_LENGTH = HAPLOTYPE_MAX_LENGTH + 2;
+
+        matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+        XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+        YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+
+        for( int iii=0; iii < X_METRIC_LENGTH; iii++ ) {
+            Arrays.fill(matchMetricArray[iii], 0.0);
+            Arrays.fill(XMetricArray[iii], 0.0);
+            Arrays.fill(YMetricArray[iii], 0.0);
+        }
+
+        // the initial condition
+        matchMetricArray[1][1] = Math.pow(10.0, SCALE_FACTOR_LOG10); // Math.log10(1.0);
+
+        constantMatrix = new double[X_METRIC_LENGTH][6];
+        distanceMatrix = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+
+        // fill in the first row
+        for( int jjj = 2; jjj < Y_METRIC_LENGTH; jjj++ ) {
+            updateCell(1, jjj, 1.0, firstRowConstantMatrix, matchMetricArray, XMetricArray, YMetricArray);
+        }
+    }
+
+    @Override
+    public double computeReadLikelihoodGivenHaplotypeLog10( final byte[] haplotypeBases,
+                                                            final byte[] readBases,
+                                                            final byte[] readQuals,
+                                                            final byte[] insertionGOP,
+                                                            final byte[] deletionGOP,
+                                                            final byte[] overallGCP,
+                                                            final int hapStartIndex,
+                                                            final boolean recacheReadValues ) {
+
+        if( recacheReadValues ) {
+            initializeConstants( insertionGOP, deletionGOP, overallGCP );
+        }
+        initializeDistanceMatrix( haplotypeBases, readBases, readQuals, hapStartIndex );
+
+        // M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions and + 1 to consider the final base in a non-global alignment
+        final int X_METRIC_LENGTH = readBases.length + 2;
+        final int Y_METRIC_LENGTH = haplotypeBases.length + 2;
+
+        for (int i = 2; i < X_METRIC_LENGTH; i++) {
+            for (int j = hapStartIndex+1; j < Y_METRIC_LENGTH; j++) {
+                updateCell(i, j, distanceMatrix[i][j], constantMatrix[i], matchMetricArray, XMetricArray, YMetricArray);
+            }
+        }
+
+        // final probability is the log10 sum of the last element in all three state arrays
+        final int endI = X_METRIC_LENGTH - 1;
+        final int endJ = Y_METRIC_LENGTH - 1;
+        return Math.log10( matchMetricArray[endI][endJ] + XMetricArray[endI][endJ] + YMetricArray[endI][endJ] ) - SCALE_FACTOR_LOG10;
+    }
+
+    /**
+     * Initializes the matrix that holds all the constants related to the editing
+     * distance between the read and the haplotype.
+     *
+     * @param haplotypeBases the bases of the haplotype
+     * @param readBases      the bases of the read
+     * @param readQuals      the base quality scores of the read
+     * @param startIndex     where to start updating the distanceMatrix (in case this read is similar to the previous read)
+     */
+    public void initializeDistanceMatrix( final byte[] haplotypeBases,
+                                          final byte[] readBases,
+                                          final byte[] readQuals,
+                                          final int startIndex ) {
+
+        // initialize the pBaseReadLog10 matrix for all combinations of read x haplotype bases
+        // Abusing the fact that java initializes arrays with 0.0, so no need to fill in rows and columns below 2.
+
+        for (int i = 0; i < readBases.length; i++) {
+            final byte x = readBases[i];
+            final byte qual = readQuals[i];
+            for (int j = startIndex; j < haplotypeBases.length; j++) {
+                final byte y = haplotypeBases[j];
+                distanceMatrix[i+2][j+2] = ( x == y || x == (byte) 'N' || y == (byte) 'N' ?
+                        QualityUtils.qualToProb(qual) : QualityUtils.qualToErrorProb(qual) );
+            }
+        }
+    }
+
+    /**
+     * Initializes the matrix that holds all the constants related to quality scores.
+     *
+     * @param insertionGOP   insertion quality scores of the read
+     * @param deletionGOP    deletion quality scores of the read
+     * @param overallGCP     overall gap continuation penalty
+     */
+    public void initializeConstants( final byte[] insertionGOP,
+                                     final byte[] deletionGOP,
+                                     final byte[] overallGCP ) {
+
+        final int l = insertionGOP.length;
+        constantMatrix[1] = firstRowConstantMatrix;
+        for (int i = 0; i < l; i++) {
+            final int qualIndexGOP = Math.min(insertionGOP[i] + deletionGOP[i], Byte.MAX_VALUE);
+            constantMatrix[i+2][0] = QualityUtils.qualToProb((byte) qualIndexGOP);
+            constantMatrix[i+2][1] = QualityUtils.qualToProb(overallGCP[i]);
+            constantMatrix[i+2][2] = QualityUtils.qualToErrorProb(insertionGOP[i]);
+            constantMatrix[i+2][3] = QualityUtils.qualToErrorProb(overallGCP[i]);
+            constantMatrix[i+2][4] = QualityUtils.qualToErrorProb(deletionGOP[i]);
+            constantMatrix[i+2][5] = QualityUtils.qualToErrorProb(overallGCP[i]);
+        }
+        constantMatrix[l+1][4] = 1.0;
+        constantMatrix[l+1][5] = 1.0;
+    }
+
+    /**
+     * Updates a cell in the HMM matrix
+     *
+     * The read and haplotype indices are offset by one because the state arrays have an extra column to hold the
+     * initial conditions
+
+     * @param indI             row index in the matrices to update
+     * @param indJ             column index in the matrices to update
+     * @param prior            the likelihood editing distance matrix for the read x haplotype
+     * @param constants        an array with the six constants relevant to this location
+     * @param matchMetricArray the matches likelihood matrix
+     * @param XMetricArray     the insertions likelihood matrix
+     * @param YMetricArray     the deletions likelihood matrix
+     */
+    private void updateCell( final int indI, final int indJ, final double prior, final double[] constants,
+                             final double[][] matchMetricArray, final double[][] XMetricArray, final double[][] YMetricArray ) {
+
+        matchMetricArray[indI][indJ] = prior * ( matchMetricArray[indI - 1][indJ - 1] * constants[0] +
+                                                 XMetricArray[indI - 1][indJ - 1] * constants[1] +
+                                                 YMetricArray[indI - 1][indJ - 1] * constants[1] );
+        XMetricArray[indI][indJ] = matchMetricArray[indI - 1][indJ] * constants[2] + XMetricArray[indI - 1][indJ] * constants[3];
+        YMetricArray[indI][indJ] = matchMetricArray[indI][indJ - 1] * constants[4] + YMetricArray[indI][indJ - 1] * constants[5];
+    }
+}
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
@ -70,7 +70,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
    @Test
    public void HCTestProblematicReadsModifiedInActiveRegions() {
        final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3";
-        final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("fa5c5eb996e95aed12c50d70e6dd74d7"));
+        final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c54c0c9411054bf629bfd98b616e53fc"));
        executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
    }

--- a/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMUnitTest.java
@ -23,24 +23,26 @@
 */

 // our package
-package org.broadinstitute.sting.utils;
+package org.broadinstitute.sting.utils.pairhmm;


 // the imports for unit testing.

-
 import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.utils.BaseUtils;
+import org.broadinstitute.sting.utils.Utils;
 import org.testng.Assert;
 import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;

 import java.util.*;

-
 public class PairHMMUnitTest extends BaseTest {
    final static boolean EXTENSIVE_TESTING = true;
-    PairHMM hmm = new PairHMM( false ); // reference implementation
-    PairHMM bandedHMM = new PairHMM( true ); // algorithm with banding
+    PairHMM exactHMM = new ExactPairHMM(); // the log truth implementation
+    PairHMM originalHMM = new OriginalPairHMM(); // the reference implementation
+    PairHMM cachingHMM = new CachingPairHMM();
+    PairHMM loglessHMM = new LoglessCachingPairHMM();

    // --------------------------------------------------------------------------------
    //
@ -57,7 +59,7 @@ public class PairHMMUnitTest extends BaseTest {
        final static String LEFT_FLANK = "GATTTATCATCGAGTCTGC";
        final static String RIGHT_FLANK = "CATGGATCGTTATCAGCTATCTCGAGGGATTCACTTAACAGTTTTA";

-        public BasicLikelihoodTestProvider(final String ref, final String read, final int baseQual, final int insQual, final int delQual, final int expectedQual, final int gcp) {
+        public BasicLikelihoodTestProvider(final String ref, final String read, final int baseQual, final int insQual, final int delQual, final int expectedQual, final int gcp ) {
            this(ref, read, baseQual, insQual, delQual, expectedQual, gcp, false, false);
        }

@ -76,115 +78,51 @@ public class PairHMMUnitTest extends BaseTest {
        }

        public double expectedLogL() {
-            return expectedQual / -10.0;
+            return (expectedQual / -10.0) + 0.03 ;
        }

-        public double tolerance() {
-            return 0.1; // TODO FIXME arbitrary
+        public double toleranceFromTheoretical() {
+            return 0.2;
        }

-        public double calcLogL() {
+        public double toleranceFromReference() {
+            return 1E-4;
+        }

-            double logL = hmm.computeReadLikelihoodGivenHaplotype(
+        public double toleranceFromExact() {
+            return 1E-9;
+        }
+
+        public double calcLogL( final PairHMM pairHMM, boolean anchorIndel ) {
+            pairHMM.initialize(readBasesWithContext.length, refBasesWithContext.length);
+            return pairHMM.computeReadLikelihoodGivenHaplotypeLog10(
                    refBasesWithContext, readBasesWithContext,
-                    qualAsBytes(baseQual, false), qualAsBytes(insQual, true), qualAsBytes(delQual, true),
-                    qualAsBytes(gcp, false));
-
-            return logL;
+                    qualAsBytes(baseQual, false, anchorIndel), qualAsBytes(insQual, true, anchorIndel), qualAsBytes(delQual, true, anchorIndel),
+                    qualAsBytes(gcp, false, anchorIndel), 0, true);
        }

        private final byte[] asBytes(final String bases, final boolean left, final boolean right) {
            return ( (left ? LEFT_FLANK : "") + CONTEXT + bases + CONTEXT + (right ? RIGHT_FLANK : "")).getBytes();
        }

-        private byte[] qualAsBytes(final int phredQual, final boolean doGOP) {
+        private byte[] qualAsBytes(final int phredQual, final boolean doGOP, final boolean anchorIndel) {
            final byte phredQuals[] = new byte[readBasesWithContext.length];
-            // initialize everything to MASSIVE_QUAL so it cannot be moved by HMM
-            Arrays.fill(phredQuals, (byte)100);

-            // update just the bases corresponding to the provided micro read with the quality scores
-            if( doGOP ) {
-                phredQuals[0 + CONTEXT.length()] = (byte)phredQual;
-            } else {
-                for ( int i = 0; i < read.length(); i++)
-                    phredQuals[i + CONTEXT.length()] = (byte)phredQual;
-            }
+            if( anchorIndel ) {
+                // initialize everything to MASSIVE_QUAL so it cannot be moved by HMM
+                Arrays.fill(phredQuals, (byte)100);

-            return phredQuals;
-        }
-    }
-
-    final Random random = new Random(87865573);
-    private class BandedLikelihoodTestProvider extends TestDataProvider {
-        final String ref, read;
-        final byte[] refBasesWithContext, readBasesWithContext;
-        final int baseQual, insQual, delQual, gcp;
-        final int expectedQual;
-        final static String LEFT_CONTEXT = "ACGTAATGACGCTACATGTCGCCAACCGTC";
-        final static String RIGHT_CONTEXT = "TACGGCTTCATATAGGGCAATGTGTGTGGCAAAA";
-        final static String LEFT_FLANK = "GATTTATCATCGAGTCTGTT";
-        final static String RIGHT_FLANK = "CATGGATCGTTATCAGCTATCTCGAGGGATTCACTTAACAGTTTCCGTA";
-        final byte[] baseQuals, insQuals, delQuals, gcps;
-
-        public BandedLikelihoodTestProvider(final String ref, final String read, final int baseQual, final int insQual, final int delQual, final int expectedQual, final int gcp) {
-            this(ref, read, baseQual, insQual, delQual, expectedQual, gcp, false, false);
-        }
-
-        public BandedLikelihoodTestProvider(final String ref, final String read, final int baseQual, final int insQual, final int delQual, final int expectedQual, final int gcp, final boolean left, final boolean right) {
-            super(BandedLikelihoodTestProvider.class, String.format("BANDED: ref=%s read=%s b/i/d/c quals = %d/%d/%d/%d l/r flank = %b/%b e[qual]=%d", ref, read, baseQual, insQual, delQual, gcp, left, right, expectedQual));
-            this.baseQual = baseQual;
-            this.delQual = delQual;
-            this.insQual = insQual;
-            this.gcp = gcp;
-            this.read = read;
-            this.ref = ref;
-            this.expectedQual = expectedQual;
-
-            refBasesWithContext = asBytes(ref, left, right);
-            readBasesWithContext = asBytes(read, false, false);
-            baseQuals = qualAsBytes(baseQual);
-            insQuals = qualAsBytes(insQual);
-            delQuals = qualAsBytes(delQual);
-            gcps = qualAsBytes(gcp, false);
-        }
-
-        public double expectedLogL() {
-            double logL = hmm.computeReadLikelihoodGivenHaplotype(
-                    refBasesWithContext, readBasesWithContext,
-                    baseQuals, insQuals, delQuals, gcps);
-
-            return logL;
-        }
-
-        public double tolerance() {
-            return 0.2; // TODO FIXME arbitrary
-        }
-
-        public double calcLogL() {
-
-            double logL = bandedHMM.computeReadLikelihoodGivenHaplotype(
-                    refBasesWithContext, readBasesWithContext,
-                    baseQuals, insQuals, delQuals, gcps);
-
-            return logL;
-        }
-
-        private final byte[] asBytes(final String bases, final boolean left, final boolean right) {
-            return ( (left ? LEFT_FLANK : "") + LEFT_CONTEXT + bases + RIGHT_CONTEXT + (right ? RIGHT_FLANK : "")).getBytes();
-        }
-
-        private byte[] qualAsBytes(final int phredQual) {
-            return qualAsBytes(phredQual, true);
-        }
-
-        private byte[] qualAsBytes(final int phredQual, final boolean addRandom) {
-            final byte phredQuals[] = new byte[readBasesWithContext.length];
-            Arrays.fill(phredQuals, (byte)phredQual);
-            if(addRandom) {
-                for( int iii = 0; iii < phredQuals.length; iii++) {
-                    phredQuals[iii] = (byte) ((int) phredQuals[iii] + (random.nextInt(7) - 3));
+                // update just the bases corresponding to the provided micro read with the quality scores
+                if( doGOP ) {
+                    phredQuals[0 + CONTEXT.length()] = (byte)phredQual;
+                } else {
+                    for ( int i = 0; i < read.length(); i++)
+                        phredQuals[i + CONTEXT.length()] = (byte)phredQual;
                }
+            } else {
+                Arrays.fill(phredQuals, (byte)phredQual);
            }
+
            return phredQuals;
        }
    }
@ -195,8 +133,8 @@ public class PairHMMUnitTest extends BaseTest {
        // test all combinations
        final List<Integer> baseQuals = EXTENSIVE_TESTING ? Arrays.asList(10, 20, 30, 40, 50) : Arrays.asList(30);
        final List<Integer> indelQuals = EXTENSIVE_TESTING ? Arrays.asList(20, 30, 40, 50) : Arrays.asList(40);
-        final List<Integer> gcps = EXTENSIVE_TESTING ? Arrays.asList(10, 20, 30) : Arrays.asList(10);
-        final List<Integer> sizes = EXTENSIVE_TESTING ? Arrays.asList(2,3,4,5,7,8,9,10,20) : Arrays.asList(2);
+        final List<Integer> gcps = EXTENSIVE_TESTING ? Arrays.asList(8, 10, 20) : Arrays.asList(10);
+        final List<Integer> sizes = EXTENSIVE_TESTING ? Arrays.asList(2,3,4,5,7,8,9,10,20,30,35) : Arrays.asList(2);

        for ( final int baseQual : baseQuals ) {
            for ( final int indelQual : indelQuals ) {
@ -219,7 +157,7 @@ public class PairHMMUnitTest extends BaseTest {

                            for ( boolean insertionP : Arrays.asList(true, false)) {
                                final String small = Utils.dupString((char)base, 1);
-                                final String big = Utils.dupString((char)base, size);
+                                final String big = Utils.dupString((char) base, size);

                                final String ref = insertionP ? small : big;
                                final String read = insertionP ? big : small;
@ -238,69 +176,65 @@ public class PairHMMUnitTest extends BaseTest {
        return BasicLikelihoodTestProvider.getTests(BasicLikelihoodTestProvider.class);
    }

-    @Test(dataProvider = "BasicLikelihoodTestProvider", enabled = true)
-    public void testBasicLikelihoods(BasicLikelihoodTestProvider cfg) {
-        double calculatedLogL = cfg.calcLogL();
-        double expectedLogL = cfg.expectedLogL();
-        logger.warn(String.format("Test: logL calc=%.2f expected=%.2f for %s", calculatedLogL, expectedLogL, cfg.toString()));
-        Assert.assertEquals(calculatedLogL, expectedLogL, cfg.tolerance());
-    }
-
-    @DataProvider(name = "BandedLikelihoodTestProvider")
-    public Object[][] makeBandedLikelihoodTests() {
+    final Random random = new Random(87860573);
+    @DataProvider(name = "OptimizedLikelihoodTestProvider")
+    public Object[][] makeOptimizedLikelihoodTests() {
        // context on either side is ACGTTGCA REF ACGTTGCA
        // test all combinations
-        final List<Integer> baseQuals = EXTENSIVE_TESTING ? Arrays.asList(25, 30, 40, 50) : Arrays.asList(30);
-        final List<Integer> indelQuals = EXTENSIVE_TESTING ? Arrays.asList(30, 40, 50) : Arrays.asList(40);
-        final List<Integer> gcps = EXTENSIVE_TESTING ? Arrays.asList(10, 12) : Arrays.asList(10);
-        final List<Integer> sizes = EXTENSIVE_TESTING ? Arrays.asList(2,3,4,5,7,8,9,10,20) : Arrays.asList(2);
+        final List<Integer> baseQuals = EXTENSIVE_TESTING ? Arrays.asList(10, 30, 40, 60) : Arrays.asList(30);
+        final List<Integer> indelQuals = EXTENSIVE_TESTING ? Arrays.asList(20, 40, 60) : Arrays.asList(40);
+        final List<Integer> gcps = EXTENSIVE_TESTING ? Arrays.asList(10, 20, 30) : Arrays.asList(10);
+        final List<Integer> sizes = EXTENSIVE_TESTING ? Arrays.asList(3, 20, 50, 90, 160) : Arrays.asList(2);

        for ( final int baseQual : baseQuals ) {
            for ( final int indelQual : indelQuals ) {
                for ( final int gcp : gcps ) {
-
-                    // test substitutions
-                    for ( final byte refBase : BaseUtils.BASES ) {
-                        for ( final byte readBase : BaseUtils.BASES ) {
-                            final String ref  = new String(new byte[]{refBase});
-                            final String read = new String(new byte[]{readBase});
-                            final int expected = refBase == readBase ? 0 : baseQual;
-                            new BandedLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp);
-                        }
-                    }
-
-                    // test insertions and deletions
-                    for ( final int size : sizes ) {
-                        for ( final byte base : BaseUtils.BASES ) {
-                            final int expected = indelQual + (size - 2) * gcp;
-
-                            for ( boolean insertionP : Arrays.asList(true, false)) {
-                                final String small = Utils.dupString((char)base, 1);
-                                final String big = Utils.dupString((char)base, size);
-
-                                final String ref = insertionP ? small : big;
-                                final String read = insertionP ? big : small;
-
-                                new BandedLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp);
-                                new BandedLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, true, false);
-                                new BandedLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, false, true);
-                                new BandedLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, true, true);
+                    for ( final int refSize : sizes ) {
+                        for ( final int readSize : sizes ) {
+                            String ref = "";
+                            String read = "";
+                            for( int iii = 0; iii < refSize; iii++) {
+                                ref += (char) BaseUtils.BASES[random.nextInt(4)];
                            }
+                            for( int iii = 0; iii < readSize; iii++) {
+                                read += (char) BaseUtils.BASES[random.nextInt(4)];
+                            }
+                            new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, -0, gcp);
+                            new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, -0, gcp, true, false);
+                            new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, -0, gcp, false, true);
+                            new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, -0, gcp, true, true);
                        }
                    }
                }
            }
        }

-        return BandedLikelihoodTestProvider.getTests(BandedLikelihoodTestProvider.class);
+        return BasicLikelihoodTestProvider.getTests(BasicLikelihoodTestProvider.class);
    }

-    @Test(dataProvider = "BandedLikelihoodTestProvider", enabled = true)
-    public void testBandedLikelihoods(BandedLikelihoodTestProvider cfg) {
-        double calculatedLogL = cfg.calcLogL();
+    @Test(dataProvider = "BasicLikelihoodTestProvider", enabled = true)
+    public void testBasicLikelihoods(BasicLikelihoodTestProvider cfg) {
+        double exactLogL = cfg.calcLogL( exactHMM, true );
+        double calculatedLogL = cfg.calcLogL( originalHMM, true );
+        double optimizedLogL = cfg.calcLogL( cachingHMM, true );
+        double loglessLogL = cfg.calcLogL( loglessHMM, true );
        double expectedLogL = cfg.expectedLogL();
-        logger.warn(String.format("Test: logL calc=%.2f expected=%.2f for %s", calculatedLogL, expectedLogL, cfg.toString()));
-        Assert.assertEquals(calculatedLogL, expectedLogL, cfg.tolerance());
+        //logger.warn(String.format("Test: logL calc=%.2f optimized=%.2f logless=%.2f expected=%.2f for %s", calculatedLogL, optimizedLogL, loglessLogL, expectedLogL, cfg.toString()));
+        Assert.assertEquals(exactLogL, expectedLogL, cfg.toleranceFromTheoretical());
+        Assert.assertEquals(calculatedLogL, expectedLogL, cfg.toleranceFromTheoretical());
+        Assert.assertEquals(optimizedLogL, calculatedLogL, cfg.toleranceFromReference());
+        Assert.assertEquals(loglessLogL, exactLogL, cfg.toleranceFromExact());
+    }
+
+    @Test(dataProvider = "OptimizedLikelihoodTestProvider", enabled = true)
+    public void testOptimizedLikelihoods(BasicLikelihoodTestProvider cfg) {
+        double exactLogL = cfg.calcLogL( exactHMM, false );
+        double calculatedLogL = cfg.calcLogL( originalHMM, false );
+        double optimizedLogL = cfg.calcLogL( cachingHMM, false );
+        double loglessLogL = cfg.calcLogL( loglessHMM, false );
+        //logger.warn(String.format("Test: logL calc=%.2f optimized=%.2f logless=%.2f expected=%.2f for %s", calculatedLogL, optimizedLogL, loglessLogL, expectedLogL, cfg.toString()));
+        Assert.assertEquals(optimizedLogL, calculatedLogL, cfg.toleranceFromReference());
+        Assert.assertEquals(loglessLogL, exactLogL, cfg.toleranceFromExact());
    }

    @Test
@ -322,11 +256,11 @@ public class PairHMMUnitTest extends BaseTest {
            byte[] mread = Arrays.copyOfRange(haplotype1,offset,haplotype1.length-offset);
            // change single base at position k to C. If it's a C, change to T
            mread[k] = ( mread[k] == (byte)'C' ? (byte)'T' : (byte)'C');
-            double res1 = hmm.computeReadLikelihoodGivenHaplotype(
+            originalHMM.initialize(mread.length, haplotype1.length);
+            double res1 = originalHMM.computeReadLikelihoodGivenHaplotypeLog10(
                    haplotype1, mread,
                    quals, gop, gop,
-                    gcp);
-
+                    gcp, 0, false);

            System.out.format("H:%s\nR:  %s\n Pos:%d Result:%4.2f\n",new String(haplotype1), new String(mread), k,res1);

@ -353,11 +287,11 @@ public class PairHMMUnitTest extends BaseTest {
            byte[] mread = Arrays.copyOfRange(haplotype1,offset,haplotype1.length);
            // change single base at position k to C. If it's a C, change to T
            mread[k] = ( mread[k] == (byte)'C' ? (byte)'T' : (byte)'C');
-            double res1 = hmm.computeReadLikelihoodGivenHaplotype(
+            originalHMM.initialize(mread.length, haplotype1.length);
+            double res1 = originalHMM.computeReadLikelihoodGivenHaplotypeLog10(
                    haplotype1, mread,
                    quals, gop, gop,
-                    gcp);
-
+                    gcp, 0, false);

            System.out.format("H:%s\nR:  %s\n Pos:%d Result:%4.2f\n",new String(haplotype1), new String(mread), k,res1);

--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java
@ -69,7 +69,33 @@ public class StandardCallerArgumentCollection {
    @Argument(fullName = "max_alternate_alleles_for_indels", shortName = "maxAltAllelesForIndels", doc = "Maximum number of alternate alleles to genotype for indels only", required = false)
    public int MAX_ALTERNATE_ALLELES_FOR_INDELS = 2;

+    /**
+     * If this fraction is greater is than zero, the caller will aggressively attempt to remove contamination through biased down-sampling of reads.
+     * Basically, it will ignore the contamination fraction of reads for each alternate allele.  So if the pileup contains N total bases, then we
+     * will try to remove (N * contamination fraction) bases for each alternate allele.
+     */
+    @Hidden
+    @Argument(fullName = "contamination_percentage_to_filter", shortName = "contamination", doc = "Fraction of contamination in sequencing data (for all samples) to aggressively remove", required = false)
+    public double CONTAMINATION_PERCENTAGE = 0.0;
+
    @Hidden
    @Argument(shortName = "logExactCalls", doc="x", required=false)
    public File exactCallsLog = null;
+
+
+    public StandardCallerArgumentCollection() { }
+
+    // Developers must remember to add any newly added arguments to the list here as well otherwise they won't get changed from their default value!
+    public StandardCallerArgumentCollection(final StandardCallerArgumentCollection SCAC) {
+        this.alleles = SCAC.alleles;
+        this.GenotypingMode = SCAC.GenotypingMode;
+        this.heterozygosity = SCAC.heterozygosity;
+        this.MAX_ALTERNATE_ALLELES = SCAC.MAX_ALTERNATE_ALLELES;
+        this.MAX_ALTERNATE_ALLELES_FOR_INDELS = SCAC.MAX_ALTERNATE_ALLELES_FOR_INDELS;
+        this.OutputMode = SCAC.OutputMode;
+        this.STANDARD_CONFIDENCE_FOR_CALLING = SCAC.STANDARD_CONFIDENCE_FOR_CALLING;
+        this.STANDARD_CONFIDENCE_FOR_EMITTING = SCAC.STANDARD_CONFIDENCE_FOR_EMITTING;
+        this.CONTAMINATION_PERCENTAGE = SCAC.CONTAMINATION_PERCENTAGE;
+        this.exactCallsLog = SCAC.exactCallsLog;
+    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
@ -57,7 +57,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
    protected IndelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
        super(UAC, logger);
        pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY,
-                UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION);
+                UAC.OUTPUT_DEBUG_INDEL_INFO, UAC.pairHMM);
        DEBUG = UAC.OUTPUT_DEBUG_INDEL_INFO;
        haplotypeMap = new LinkedHashMap<Allele, Haplotype>();
        ignoreSNPAllelesWhenGenotypingIndels = UAC.IGNORE_SNP_ALLELES;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
@ -41,19 +41,20 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
 import org.broadinstitute.sting.utils.variantcontext.*;

-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
+import java.util.*;

 public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {

    private final boolean useAlleleFromVCF;

    private final double[] likelihoodSums = new double[4];
-    
+    private final ArrayList<PileupElement>[] alleleStratifiedElements = new ArrayList[4];
+
    protected SNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
        super(UAC, logger);
        useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
+        for ( int i = 0; i < 4; i++ )
+            alleleStratifiedElements[i] = new ArrayList<PileupElement>();
    }

    public VariantContext getLikelihoods(final RefMetaDataTracker tracker,
@ -78,8 +79,10 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
        ArrayList<SampleGenotypeData> GLs = new ArrayList<SampleGenotypeData>(contexts.size());
        for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
            ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup();
+            if ( UAC.CONTAMINATION_PERCENTAGE > 0.0 )
+                pileup = createDecontaminatedPileup(pileup, UAC.CONTAMINATION_PERCENTAGE);
            if ( useBAQedPileup )
-                pileup = createBAQedPileup( pileup );
+                pileup = createBAQedPileup(pileup);

            // create the GenotypeLikelihoods object
            final DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods(UAC.PCR_error);
@ -150,8 +153,6 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC

        // create the genotypes; no-call everyone for now
        final GenotypesContext genotypes = GenotypesContext.create();
-        final List<Allele> noCall = new ArrayList<Allele>();
-        noCall.add(Allele.NO_CALL);

        for ( SampleGenotypeData sampleData : GLs ) {
            final double[] allLikelihoods = sampleData.GL.getLikelihoods();
@ -202,6 +203,42 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
        return allelesToUse;
    }

+    public ReadBackedPileup createDecontaminatedPileup(final ReadBackedPileup pileup, final double contaminationPercentage) {
+        // special case removal of all reads
+        if ( contaminationPercentage >= 1.0 )
+            return new ReadBackedPileupImpl(pileup.getLocation(), new ArrayList<PileupElement>());
+
+        // start by stratifying the reads by the alleles they represent at this position
+        for( final PileupElement pe : pileup ) {
+            final int baseIndex = BaseUtils.simpleBaseToBaseIndex(pe.getBase());
+            if ( baseIndex != -1 )
+                alleleStratifiedElements[baseIndex].add(pe);
+        }
+
+        // Down-sample *each* allele by the contamination fraction applied to the entire pileup.
+        // Unfortunately, we need to maintain the original pileup ordering of reads or FragmentUtils will complain later.
+        int numReadsToRemove = (int)Math.ceil((double)pileup.getNumberOfElements() * contaminationPercentage);
+        final TreeSet<PileupElement> elementsToKeep = new TreeSet<PileupElement>(new Comparator<PileupElement>() {
+            @Override
+            public int compare(PileupElement element1, PileupElement element2) {
+                final int difference = element1.getRead().getAlignmentStart() - element2.getRead().getAlignmentStart();
+                return difference != 0 ? difference : element1.getRead().getReadName().compareTo(element2.getRead().getReadName());
+            }
+        });
+
+        for ( int i = 0; i < 4; i++ ) {
+            final ArrayList<PileupElement> alleleList = alleleStratifiedElements[i];
+            if ( alleleList.size() > numReadsToRemove )
+                elementsToKeep.addAll(downsampleElements(alleleList, numReadsToRemove));
+        }
+
+        // clean up pointers so memory can be garbage collected if needed
+        for ( int i = 0; i < 4; i++ )
+            alleleStratifiedElements[i].clear();
+
+        return new ReadBackedPileupImpl(pileup.getLocation(), new ArrayList<PileupElement>(elementsToKeep));
+    }
+
    public ReadBackedPileup createBAQedPileup( final ReadBackedPileup pileup ) {
        final List<PileupElement> BAQedElements = new ArrayList<PileupElement>();
        for( final PileupElement PE : pileup ) {
@ -220,6 +257,22 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
        public byte getQual( final int offset ) { return BAQ.calcBAQFromTag(getRead(), offset, true); }
    }

+    private List<PileupElement> downsampleElements(final ArrayList<PileupElement> elements, final int numElementsToRemove) {
+        final int pileupSize = elements.size();
+        final BitSet itemsToRemove = new BitSet(pileupSize);
+        for ( Integer selectedIndex : MathUtils.sampleIndicesWithoutReplacement(pileupSize, numElementsToRemove) ) {
+            itemsToRemove.set(selectedIndex);
+        }
+
+        ArrayList<PileupElement> elementsToKeep = new ArrayList<PileupElement>(pileupSize - numElementsToRemove);
+        for ( int i = 0; i < pileupSize; i++ ) {
+            if ( !itemsToRemove.get(i) )
+                elementsToKeep.add(elements.get(i));
+        }
+
+        return elementsToKeep;
+    }
+
    private static class SampleGenotypeData {

        public final String name;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
 import org.broadinstitute.sting.commandline.*;
 import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
 import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcFactory;
+import org.broadinstitute.sting.utils.pairhmm.PairHMM;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;

@ -65,6 +66,12 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
    @Argument(fullName = "annotateNDA", shortName = "nda", doc = "If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site", required = false)
    public boolean ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED = false;

+    /**
+     * The PairHMM implementation to use for -glm INDEL genotype likelihood calculations. The various implementations balance a tradeoff of accuracy and runtime.
+     */
+    @Argument(fullName = "pair_hmm_implementation", shortName = "pairHMM", doc = "The PairHMM implementation to use for -glm INDEL genotype likelihood calculations", required = false)
+    public PairHMM.HMM_IMPLEMENTATION pairHMM = PairHMM.HMM_IMPLEMENTATION.ORIGINAL;
+
    /**
     * The minimum confidence needed in a given base for it to be used in variant calling.  Note that the base quality of a base
     * is capped by the mapping quality so that bases on reads with low mapping quality may get filtered out depending on this value.
@ -112,10 +119,6 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
    @Argument(fullName = "indelHaplotypeSize", shortName = "indelHSize", doc = "Indel haplotype size", required = false)
    public int INDEL_HAPLOTYPE_SIZE = 80;

-    @Hidden
-    @Argument(fullName = "noBandedIndel", shortName = "noBandedIndel", doc = "Don't do Banded Indel likelihood computation", required = false)
-    public boolean DONT_DO_BANDED_INDEL_COMPUTATION = false;
-
    @Hidden
    @Argument(fullName = "indelDebug", shortName = "indelDebug", doc = "Output indel debug info", required = false)
    public boolean OUTPUT_DEBUG_INDEL_INFO = false;
@ -183,63 +186,41 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
    @Argument(shortName="ef", fullName="exclude_filtered_reference_sites", doc="Don't include in the analysis sites where the reference sample VCF is filtered. Default: false.", required=false)
    boolean EXCLUDE_FILTERED_REFERENCE_SITES = false;

-    // Developers must remember to add any newly added arguments to the list here as well otherwise they won't get changed from their default value!
-    public UnifiedArgumentCollection clone() {
-        UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
-
-        uac.GLmodel = GLmodel;
-        uac.AFmodel = AFmodel;
-        uac.heterozygosity = heterozygosity;
-        uac.PCR_error = PCR_error;
-        uac.GenotypingMode = GenotypingMode;
-        uac.OutputMode = OutputMode;
-        uac.NO_SLOD = NO_SLOD;
-        uac.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED = ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED;
-        uac.STANDARD_CONFIDENCE_FOR_CALLING = STANDARD_CONFIDENCE_FOR_CALLING;
-        uac.STANDARD_CONFIDENCE_FOR_EMITTING = STANDARD_CONFIDENCE_FOR_EMITTING;
-        uac.MIN_BASE_QUALTY_SCORE = MIN_BASE_QUALTY_SCORE;
-        uac.MAX_DELETION_FRACTION = MAX_DELETION_FRACTION;
-        uac.MIN_INDEL_COUNT_FOR_GENOTYPING = MIN_INDEL_COUNT_FOR_GENOTYPING;
-        uac.MIN_INDEL_FRACTION_PER_SAMPLE = MIN_INDEL_FRACTION_PER_SAMPLE;
-        uac.INDEL_HETEROZYGOSITY = INDEL_HETEROZYGOSITY;
-        uac.INDEL_GAP_OPEN_PENALTY = INDEL_GAP_OPEN_PENALTY;
-        uac.INDEL_GAP_CONTINUATION_PENALTY = INDEL_GAP_CONTINUATION_PENALTY;
-        uac.OUTPUT_DEBUG_INDEL_INFO = OUTPUT_DEBUG_INDEL_INFO;
-        uac.INDEL_HAPLOTYPE_SIZE = INDEL_HAPLOTYPE_SIZE;
-        uac.alleles = alleles;
-        uac.MAX_ALTERNATE_ALLELES = MAX_ALTERNATE_ALLELES;
-        uac.MAX_ALTERNATE_ALLELES_FOR_INDELS = MAX_ALTERNATE_ALLELES_FOR_INDELS;
-        uac.GLmodel = GLmodel;
-        uac.TREAT_ALL_READS_AS_SINGLE_POOL = TREAT_ALL_READS_AS_SINGLE_POOL;
-        uac.referenceSampleRod = referenceSampleRod;
-        uac.referenceSampleName = referenceSampleName;
-        uac.samplePloidy = samplePloidy;
-        uac.maxQualityScore = minQualityScore;
-        uac.phredScaledPrior = phredScaledPrior;
-        uac.minPower = minPower;
-        uac.minReferenceDepth = minReferenceDepth;
-        uac.EXCLUDE_FILTERED_REFERENCE_SITES = EXCLUDE_FILTERED_REFERENCE_SITES;
-        uac.IGNORE_LANE_INFO = IGNORE_LANE_INFO;
-        uac.exactCallsLog = exactCallsLog;
-
-        // todo- arguments to remove
-        uac.IGNORE_SNP_ALLELES = IGNORE_SNP_ALLELES;
-        uac.DONT_DO_BANDED_INDEL_COMPUTATION = DONT_DO_BANDED_INDEL_COMPUTATION;
-        return uac;
-    }
-
    public UnifiedArgumentCollection() { }

-    public UnifiedArgumentCollection( final StandardCallerArgumentCollection SCAC ) {
-        super();
-        this.alleles = SCAC.alleles;
-        this.GenotypingMode = SCAC.GenotypingMode;
-        this.heterozygosity = SCAC.heterozygosity;
-        this.MAX_ALTERNATE_ALLELES = SCAC.MAX_ALTERNATE_ALLELES;
-        this.MAX_ALTERNATE_ALLELES_FOR_INDELS = SCAC.MAX_ALTERNATE_ALLELES_FOR_INDELS;
-        this.OutputMode = SCAC.OutputMode;
-        this.STANDARD_CONFIDENCE_FOR_CALLING = SCAC.STANDARD_CONFIDENCE_FOR_CALLING;
-        this.STANDARD_CONFIDENCE_FOR_EMITTING = SCAC.STANDARD_CONFIDENCE_FOR_EMITTING;
-        this.exactCallsLog = SCAC.exactCallsLog;
+    public UnifiedArgumentCollection(final StandardCallerArgumentCollection SCAC) {
+        super(SCAC);
+    }
+
+    // Developers must remember to add any newly added arguments to the list here as well otherwise they won't get changed from their default value!
+    public UnifiedArgumentCollection(final UnifiedArgumentCollection uac) {
+        this.GLmodel = uac.GLmodel;
+        this.AFmodel = uac.AFmodel;
+        this.PCR_error = uac.PCR_error;
+        this.NO_SLOD = uac.NO_SLOD;
+        this.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED = uac.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED;
+        this.MIN_BASE_QUALTY_SCORE = uac.MIN_BASE_QUALTY_SCORE;
+        this.MAX_DELETION_FRACTION = uac.MAX_DELETION_FRACTION;
+        this.MIN_INDEL_COUNT_FOR_GENOTYPING = uac.MIN_INDEL_COUNT_FOR_GENOTYPING;
+        this.MIN_INDEL_FRACTION_PER_SAMPLE = uac.MIN_INDEL_FRACTION_PER_SAMPLE;
+        this.INDEL_HETEROZYGOSITY = uac.INDEL_HETEROZYGOSITY;
+        this.INDEL_GAP_OPEN_PENALTY = uac.INDEL_GAP_OPEN_PENALTY;
+        this.INDEL_GAP_CONTINUATION_PENALTY = uac.INDEL_GAP_CONTINUATION_PENALTY;
+        this.OUTPUT_DEBUG_INDEL_INFO = uac.OUTPUT_DEBUG_INDEL_INFO;
+        this.INDEL_HAPLOTYPE_SIZE = uac.INDEL_HAPLOTYPE_SIZE;
+        this.TREAT_ALL_READS_AS_SINGLE_POOL = uac.TREAT_ALL_READS_AS_SINGLE_POOL;
+        this.referenceSampleRod = uac.referenceSampleRod;
+        this.referenceSampleName = uac.referenceSampleName;
+        this.samplePloidy = uac.samplePloidy;
+        this.maxQualityScore = uac.minQualityScore;
+        this.phredScaledPrior = uac.phredScaledPrior;
+        this.minPower = uac.minPower;
+        this.minReferenceDepth = uac.minReferenceDepth;
+        this.EXCLUDE_FILTERED_REFERENCE_SITES = uac.EXCLUDE_FILTERED_REFERENCE_SITES;
+        this.IGNORE_LANE_INFO = uac.IGNORE_LANE_INFO;
+        this.pairHMM = uac.pairHMM;
+
+        // todo- arguments to remove
+        this.IGNORE_SNP_ALLELES = uac.IGNORE_SNP_ALLELES;
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java
@ -30,8 +30,11 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
 import org.broadinstitute.sting.utils.Haplotype;
 import org.broadinstitute.sting.utils.MathUtils;
-import org.broadinstitute.sting.utils.PairHMM;
 import org.broadinstitute.sting.utils.clipping.ReadClipper;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.pairhmm.ExactPairHMM;
+import org.broadinstitute.sting.utils.pairhmm.OriginalPairHMM;
+import org.broadinstitute.sting.utils.pairhmm.PairHMM;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -48,7 +51,6 @@ public class PairHMMIndelErrorModel {
    public static final int BASE_QUAL_THRESHOLD = 20;

    private boolean DEBUG = false;
-    private boolean bandedLikelihoods = false;

    private static final int MAX_CACHED_QUAL = 127;

@ -67,6 +69,8 @@ public class PairHMMIndelErrorModel {
    private final byte[] GAP_OPEN_PROB_TABLE;
    private final byte[] GAP_CONT_PROB_TABLE;

+    private final PairHMM pairHMM;
+
    /////////////////////////////
    // Private Member Variables
    /////////////////////////////
@ -85,15 +89,26 @@ public class PairHMMIndelErrorModel {
        }
    }

-    public PairHMMIndelErrorModel(byte indelGOP, byte indelGCP, boolean deb, boolean bandedLikelihoods) {
+    public PairHMMIndelErrorModel(byte indelGOP, byte indelGCP, boolean deb, final PairHMM.HMM_IMPLEMENTATION hmmType ) {
        this.DEBUG = deb;
-        this.bandedLikelihoods = bandedLikelihoods;
+
+        switch (hmmType) {
+            case EXACT:
+                pairHMM = new ExactPairHMM();
+                break;
+            case ORIGINAL:
+                pairHMM = new OriginalPairHMM();
+                break;
+            case CACHING:
+            case LOGLESS_CACHING:
+            default:
+                throw new UserException.BadArgumentValue("pairHMM", "Specified pairHMM implementation is unrecognized or incompatible with the UnifiedGenotyper. Acceptable options are ORIGINAL and EXACT.");
+        }

        // fill gap penalty table, affine naive model:
        this.GAP_CONT_PROB_TABLE = new byte[MAX_HRUN_GAP_IDX];
        this.GAP_OPEN_PROB_TABLE = new byte[MAX_HRUN_GAP_IDX];

-
        for (int i = 0; i < START_HRUN_GAP_IDX; i++) {
            GAP_OPEN_PROB_TABLE[i] = indelGOP;
            GAP_CONT_PROB_TABLE[i] = indelGCP;
@ -190,7 +205,6 @@ public class PairHMMIndelErrorModel {
                                                                          final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap,
                                                                          final int[] readCounts) {
        final double readLikelihoods[][] = new double[pileup.getNumberOfElements()][haplotypeMap.size()];
-        final PairHMM pairHMM = new PairHMM(bandedLikelihoods);

        int readIdx=0;
        for (PileupElement p: pileup) {
@ -303,8 +317,6 @@ public class PairHMMIndelErrorModel {
                    final byte[] readQuals = Arrays.copyOfRange(unclippedReadQuals,numStartSoftClippedBases, unclippedReadBases.length-numEndSoftClippedBases);
                    int j=0;

-                    // initialize path metric and traceback memories for likelihood computation
-                    double[][] matchMetricArray = null, XMetricArray = null, YMetricArray = null;
                    byte[] previousHaplotypeSeen = null;
                    final byte[] contextLogGapOpenProbabilities = new byte[readBases.length];
                    final byte[] contextLogGapContinuationProbabilities  = new byte[readBases.length];
@ -341,14 +353,9 @@ public class PairHMMIndelErrorModel {
                        final int X_METRIC_LENGTH = readBases.length+2;
                        final int Y_METRIC_LENGTH = haplotypeBases.length+2;

-                        if (matchMetricArray == null) {
+                        if (previousHaplotypeSeen == null) {
                            //no need to reallocate arrays for each new haplotype, as length won't change
-                            matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
-                            XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
-                            YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
-
-
-                            PairHMM.initializeArrays(matchMetricArray, XMetricArray, YMetricArray, X_METRIC_LENGTH);
+                            pairHMM.initialize(X_METRIC_LENGTH, Y_METRIC_LENGTH);
                        }

                        int startIndexInHaplotype = 0;
@ -356,11 +363,10 @@ public class PairHMMIndelErrorModel {
                            startIndexInHaplotype = computeFirstDifferingPosition(haplotypeBases, previousHaplotypeSeen);
                        previousHaplotypeSeen = haplotypeBases.clone();

-                        readLikelihood = pairHMM.computeReadLikelihoodGivenHaplotype(haplotypeBases, readBases, readQuals,
+                        readLikelihood = pairHMM.computeReadLikelihoodGivenHaplotypeLog10(haplotypeBases, readBases, readQuals,
                                (read.hasBaseIndelQualities() ? read.getBaseInsertionQualities() : contextLogGapOpenProbabilities),
                                (read.hasBaseIndelQualities() ? read.getBaseDeletionQualities() : contextLogGapOpenProbabilities),
-                                contextLogGapContinuationProbabilities,
-                                startIndexInHaplotype, matchMetricArray, XMetricArray, YMetricArray);
+                                contextLogGapContinuationProbabilities, startIndexInHaplotype, false);


                        if (DEBUG) {
--- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
+++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.sam.ReadUtils;
 import org.broadinstitute.sting.utils.variantcontext.Allele;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;

+import java.io.Serializable;
 import java.util.*;

 public class Haplotype {
@ -184,6 +185,21 @@ public class Haplotype {
        return new Haplotype(newHaplotypeBases);
    }

+    public static class HaplotypeBaseComparator implements Comparator<Haplotype>, Serializable {
+        @Override
+        public int compare( final Haplotype hap1, final Haplotype hap2 ) {
+            final byte[] arr1 = hap1.getBases();
+            final byte[] arr2 = hap2.getBases();
+            // compares byte arrays using lexical ordering
+            final int len = Math.min(arr1.length, arr2.length);
+            for( int iii = 0; iii < len; iii++ ) {
+                final int cmp = arr1[iii] - arr2[iii];
+                if (cmp != 0) { return cmp; }
+            }
+            return arr2.length - arr1.length;
+        }
+    }
+
    public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(final List<Allele> alleleList,
                                                                               final int startPos,
                                                                               final ReferenceContext ref,
--- a/public/java/src/org/broadinstitute/sting/utils/PairHMM.java
+++ b/public/java/src/org/broadinstitute/sting/utils/PairHMM.java
@ -1,259 +0,0 @@
-/*
- * Copyright (c) 2012, The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.utils;
-
-import com.google.java.contract.Ensures;
-import com.google.java.contract.Requires;
-
-import java.util.*;
-
-/**
- * Util class for performing the pair HMM for local alignment. Figure 4.3 in Durbin 1998 book.
- * User: rpoplin
- * Date: 3/1/12
- */
-
-public class PairHMM {
-    private static final Byte MAX_CACHED_QUAL = Byte.MAX_VALUE;
-    private static final byte DEFAULT_GOP = (byte) 45;
-    private static final byte DEFAULT_GCP = (byte) 10;
-    private static final double BANDING_TOLERANCE = 22.0;
-    private static final int BANDING_CLUSTER_WINDOW = 12;
-    private final boolean noBanded;
-
-    public PairHMM() {
-        noBanded = false;
-    }
-
-    public PairHMM( final boolean noBanded ) {
-        this.noBanded = noBanded;
-    }
-
-    
-    public static void initializeArrays(final double[][] matchMetricArray, final double[][] XMetricArray, final double[][] YMetricArray,
-                                 final int X_METRIC_LENGTH) {
-
-        for( int iii=0; iii < X_METRIC_LENGTH; iii++ ) {
-            Arrays.fill(matchMetricArray[iii], Double.NEGATIVE_INFINITY);
-            Arrays.fill(XMetricArray[iii], Double.NEGATIVE_INFINITY);
-            Arrays.fill(YMetricArray[iii], Double.NEGATIVE_INFINITY);
-        }
-
-        // the initial condition
-        matchMetricArray[1][1] = 0.0; // Math.log10(1.0);
-
-    }
-
-    @Requires({"readBases.length == readQuals.length","readBases.length == insertionGOP.length","readBases.length == deletionGOP.length","readBases.length == overallGCP.length"})
-    @Ensures({"!Double.isInfinite(result)", "!Double.isNaN(result)"}) // Result should be a proper log10 probability
-    public double computeReadLikelihoodGivenHaplotype( final byte[] haplotypeBases, final byte[] readBases, final byte[] readQuals,
-                                                       final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP ) {
-
-        // M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions and + 1 to consider the final base in a non-global alignment
-        final int X_METRIC_LENGTH = readBases.length + 2;
-        final int Y_METRIC_LENGTH = haplotypeBases.length + 2;
-
-        // initial arrays to hold the probabilities of being in the match, insertion and deletion cases
-        final double[][] matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
-        final double[][] XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
-        final double[][] YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
-
-        initializeArrays(matchMetricArray, XMetricArray, YMetricArray, X_METRIC_LENGTH);
-
-        return computeReadLikelihoodGivenHaplotype(haplotypeBases, readBases, readQuals, insertionGOP, deletionGOP, overallGCP, 0, matchMetricArray, XMetricArray, YMetricArray);
-    }
-
-    @Requires({"readBases.length == readQuals.length","readBases.length == insertionGOP.length","readBases.length == deletionGOP.length","readBases.length == overallGCP.length"})
-    @Ensures({"!Double.isInfinite(result)", "!Double.isNaN(result)"}) // Result should be a proper log10 probability
-    public double computeReadLikelihoodGivenHaplotype( final byte[] haplotypeBases, final byte[] readBases, final byte[] readQuals,
-                                                       final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP, final int hapStartIndex,
-                                                       final double[][] matchMetricArray, final double[][] XMetricArray, final double[][] YMetricArray ) {
-
-        // M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions and + 1 to consider the final base in a non-global alignment
-        final int X_METRIC_LENGTH = readBases.length + 2;
-        final int Y_METRIC_LENGTH = haplotypeBases.length + 2;
-
-        // ensure that all the qual scores have valid values
-        for( int iii = 0; iii < readQuals.length; iii++ ) {
-            readQuals[iii] = ( readQuals[iii] < QualityUtils.MIN_USABLE_Q_SCORE ? QualityUtils.MIN_USABLE_Q_SCORE : (readQuals[iii] > MAX_CACHED_QUAL ? MAX_CACHED_QUAL : readQuals[iii]) );
-        }
-
-        if( false ) {
-            final ArrayList<Integer> workQueue = new ArrayList<Integer>(); // holds a queue of starting work location (indices along the diagonal). Will be sorted each step
-            final ArrayList<Integer> workToBeAdded = new ArrayList<Integer>();
-            final ArrayList<Double> calculatedValues = new ArrayList<Double>();
-            final int numDiags = X_METRIC_LENGTH + Y_METRIC_LENGTH - 1;
-            workQueue.add( 1 ); // Always start a new thread at the baseline because of partially repeating sequences that match better in the latter half of the haplotype
-
-            for(int diag = 3; diag < numDiags; diag++) { // diag = 3 is the (1,2) element of the metric arrays. (1,1) is the initial condition and is purposefully skipped over
-                //Collections.sort(workQueue); // no need to sort because elements are guaranteed to be in ascending order
-                int el = 1;
-                for( int work : workQueue ) {
-                    // choose the appropriate diagonal baseline location
-                    int iii = 0;
-                    int jjj = diag;
-                    if( diag > Y_METRIC_LENGTH ) {
-                        iii = diag - Y_METRIC_LENGTH;
-                        jjj = Y_METRIC_LENGTH;
-                    }
-                    // move to the starting work location along the diagonal
-                    iii += work;
-                    jjj -= work;
-                    while( iii >= X_METRIC_LENGTH || jjj <= 0 ) {
-                        iii--;
-                        jjj++;
-                        work--;
-                    }
-                    if( !detectClusteredStartLocations(workToBeAdded, work ) ) {
-                        workToBeAdded.add(work); // keep this thread going once it has started
-                    }
-
-                    if( work >= el - 3 ) {
-                        // step along the diagonal in the forward direction, updating the match matrices and looking for a drop off from the maximum observed value
-                        double maxElement = Double.NEGATIVE_INFINITY;
-                        for( el = work; el < numDiags + 1; el++ ) {
-                            updateCell(iii, jjj, haplotypeBases, readBases, readQuals,
-                                insertionGOP, deletionGOP, overallGCP,  matchMetricArray,  XMetricArray, YMetricArray);
-                            final double bestMetric = MathUtils.max(matchMetricArray[iii][jjj], XMetricArray[iii][jjj], YMetricArray[iii][jjj]);
-                            calculatedValues.add(bestMetric);
-                            if( bestMetric > maxElement ) {
-                                maxElement = bestMetric;
-                            } else if( maxElement - bestMetric > BANDING_TOLERANCE ) {
-                                break;
-                            }
-                            if( ++iii >= X_METRIC_LENGTH ) { // don't walk off the edge of the matrix
-                                break;
-                            }
-                            if( --jjj <= 0 ) { // don't walk off the edge of the matrix
-                                break;
-                            }
-                        }
-
-                        // find a local maximum to start a new band in the work queue
-                        double localMaxElement = Double.NEGATIVE_INFINITY;
-                        int localMaxElementIndex = 0;
-                        for(int kkk = calculatedValues.size()-1; kkk >= 1; kkk--) {
-                            final double bestMetric = calculatedValues.get(kkk);
-                            if( bestMetric > localMaxElement ) {
-                                localMaxElement = bestMetric;
-                                localMaxElementIndex = kkk;
-                            } else if( localMaxElement - bestMetric > BANDING_TOLERANCE * 0.5 ) { // find a local maximum
-                                if( !detectClusteredStartLocations(workToBeAdded, work + localMaxElementIndex ) ) {
-                                    workToBeAdded.add( work + localMaxElementIndex );
-                                }
-                                break;
-                            }
-                        }
-                        calculatedValues.clear();
-
-                        // reset iii and jjj to the appropriate diagonal baseline location
-                        iii = 0;
-                        jjj = diag;
-                        if( diag > Y_METRIC_LENGTH ) {
-                            iii = diag - Y_METRIC_LENGTH;
-                            jjj = Y_METRIC_LENGTH;
-                        }
-                        // move to the starting work location along the diagonal
-                        iii += work-1;
-                        jjj -= work-1;
-
-                        // step along the diagonal in the reverse direction, updating the match matrices and looking for a drop off from the maximum observed value
-                        for( int traceBack = work - 1; traceBack > 0 && iii > 0 && jjj < Y_METRIC_LENGTH; traceBack--,iii--,jjj++ ) {
-                            updateCell(iii, jjj, haplotypeBases, readBases, readQuals,
-                                    insertionGOP, deletionGOP, overallGCP,  matchMetricArray,  XMetricArray, YMetricArray);
-                            final double bestMetric = MathUtils.max(matchMetricArray[iii][jjj], XMetricArray[iii][jjj], YMetricArray[iii][jjj]);
-                            if( bestMetric > maxElement ) {
-                                maxElement = bestMetric;
-                            } else if( maxElement - bestMetric > BANDING_TOLERANCE ) {
-                                break;
-                            }
-                        }
-                    }
-                }
-                workQueue.clear();
-                workQueue.addAll(workToBeAdded);
-                workToBeAdded.clear();
-            }
-        } else {
-            // simple rectangular version of update loop, slow
-            for( int iii = 1; iii < X_METRIC_LENGTH; iii++ ) {
-                for( int jjj = hapStartIndex + 1; jjj < Y_METRIC_LENGTH; jjj++ ) {
-                    if( (iii == 1 && jjj == 1) ) { continue; }
-                    updateCell(iii, jjj, haplotypeBases, readBases, readQuals, insertionGOP, deletionGOP, overallGCP,
-                        matchMetricArray, XMetricArray, YMetricArray);
-                }
-            }
-        }
-
-        // final probability is the log10 sum of the last element in all three state arrays
-        final int endI = X_METRIC_LENGTH - 1;
-        final int endJ = Y_METRIC_LENGTH - 1;
-        return MathUtils.approximateLog10SumLog10(matchMetricArray[endI][endJ], XMetricArray[endI][endJ], YMetricArray[endI][endJ]);
-    }
-
-    private void updateCell( final int indI, final int indJ, final byte[] haplotypeBases, final byte[] readBases,
-                             final byte[] readQuals, final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP,
-                             final double[][] matchMetricArray, final double[][] XMetricArray, final double[][] YMetricArray ) {
-
-        // the read and haplotype indices are offset by one because the state arrays have an extra column to hold the initial conditions
-        final int im1 = indI - 1;
-        final int jm1 = indJ - 1;
-
-        // update the match array
-        double pBaseReadLog10 = 0.0; // Math.log10(1.0);
-        if( im1 > 0 && jm1 > 0 ) { // the emission probability is applied when leaving the state
-            final byte x = readBases[im1-1];
-            final byte y = haplotypeBases[jm1-1];
-            final byte qual = readQuals[im1-1];
-            pBaseReadLog10 = ( x == y || x == (byte) 'N' || y == (byte) 'N' ? QualityUtils.qualToProbLog10(qual) : QualityUtils.qualToErrorProbLog10(qual) );
-        }
-        final int qualIndexGOP = ( im1 == 0 ? DEFAULT_GOP + DEFAULT_GOP : ( insertionGOP[im1-1] + deletionGOP[im1-1] > MAX_CACHED_QUAL ? MAX_CACHED_QUAL : insertionGOP[im1-1] + deletionGOP[im1-1]) );
-        final double d0 = QualityUtils.qualToProbLog10((byte)qualIndexGOP);
-        final double e0 = ( im1 == 0 ? QualityUtils.qualToProbLog10(DEFAULT_GCP) : QualityUtils.qualToProbLog10(overallGCP[im1-1]) );
-        matchMetricArray[indI][indJ] = pBaseReadLog10 + MathUtils.approximateLog10SumLog10(matchMetricArray[indI-1][indJ-1] + d0, XMetricArray[indI-1][indJ-1] + e0, YMetricArray[indI-1][indJ-1] + e0);
-
-        // update the X (insertion) array
-        final double d1 = ( im1 == 0 ? QualityUtils.qualToErrorProbLog10(DEFAULT_GOP) : QualityUtils.qualToErrorProbLog10(insertionGOP[im1-1]) );
-        final double e1 = ( im1 == 0 ? QualityUtils.qualToErrorProbLog10(DEFAULT_GCP) : QualityUtils.qualToErrorProbLog10(overallGCP[im1-1]) );
-        final double qBaseReadLog10 = 0.0; // Math.log10(1.0) -- we don't have an estimate for this emission probability so assume q=1.0
-        XMetricArray[indI][indJ] = qBaseReadLog10 + MathUtils.approximateLog10SumLog10(matchMetricArray[indI-1][indJ] + d1, XMetricArray[indI-1][indJ] + e1);
-
-        // update the Y (deletion) array, with penalty of zero on the left and right flanks to allow for a local alignment within the haplotype
-        final double d2 = ( im1 == 0 || im1 == readBases.length ? 0.0 : QualityUtils.qualToErrorProbLog10(deletionGOP[im1-1]) );
-        final double e2 = ( im1 == 0 || im1 == readBases.length ? 0.0 : QualityUtils.qualToErrorProbLog10(overallGCP[im1-1]) );
-        final double qBaseRefLog10 = 0.0; // Math.log10(1.0) -- we don't have an estimate for this emission probability so assume q=1.0
-        YMetricArray[indI][indJ] = qBaseRefLog10 + MathUtils.approximateLog10SumLog10(matchMetricArray[indI][indJ-1] + d2, YMetricArray[indI][indJ-1] + e2);
-    }
-
-    // private function used by the banded approach to ensure the proposed bands are sufficiently distinct from each other
-    private boolean detectClusteredStartLocations( final ArrayList<Integer> list, int loc ) {
-        for(int x : list) {
-            if( Math.abs(x-loc) <= BANDING_CLUSTER_WINDOW ) {
-                return true;
-            }
-        }
-        return false;
-    }
-}
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/hapmap/RawHapMapCodec.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/hapmap/RawHapMapCodec.java
@ -25,8 +25,11 @@
 package org.broadinstitute.sting.utils.codecs.hapmap;

 import org.broad.tribble.AsciiFeatureCodec;
+import org.broad.tribble.FeatureCodecHeader;
 import org.broad.tribble.annotation.Strand;
+import org.broad.tribble.readers.AsciiLineReader;
 import org.broad.tribble.readers.LineReader;
+import org.broad.tribble.readers.PositionalBufferedStream;

 import java.io.IOException;
 import java.util.Arrays;
@ -116,4 +119,10 @@ public class RawHapMapCodec extends AsciiFeatureCodec<RawHapMapFeature> {
        }
        return headerLine;
    }
+
+    @Override
+    public FeatureCodecHeader readHeader(final PositionalBufferedStream stream) throws IOException {
+        final AsciiLineReader br = new AsciiLineReader(stream);
+        return new FeatureCodecHeader(readHeader(br), br.getPosition());
+    }
 }
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java
@ -2,8 +2,6 @@ package org.broadinstitute.sting.utils.codecs.vcf;

 import org.broad.tribble.TribbleException;
 import org.broad.tribble.readers.LineReader;
-import org.broad.tribble.util.ParsingUtils;
-import org.broadinstitute.sting.utils.variantcontext.*;

 import java.io.IOException;
 import java.util.*;
@ -119,7 +117,7 @@ public class VCFCodec extends AbstractVCFCodec {
        // empty set for passes filters
        List<String> fFields = new LinkedList<String>();
        // otherwise we have to parse and cache the value
-        if ( filterString.indexOf(VCFConstants.FILTER_CODE_SEPARATOR) == -1 )
+        if ( !filterString.contains(VCFConstants.FILTER_CODE_SEPARATOR) )
            fFields.add(filterString);
        else
            fFields.addAll(Arrays.asList(filterString.split(VCFConstants.FILTER_CODE_SEPARATOR)));
--- a/public/java/src/org/broadinstitute/sting/utils/pairhmm/ExactPairHMM.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pairhmm/ExactPairHMM.java
@ -0,0 +1,107 @@
+package org.broadinstitute.sting.utils.pairhmm;
+
+import com.google.java.contract.Ensures;
+import com.google.java.contract.Requires;
+import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.QualityUtils;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+/**
+ * Created with IntelliJ IDEA.
+ * User: rpoplin
+ * Date: 10/16/12
+ */
+
+public class ExactPairHMM extends PairHMM {
+
+    @Override
+    public void initialize( final int READ_MAX_LENGTH, final int HAPLOTYPE_MAX_LENGTH ) {
+
+        // M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions and + 1 to consider the final base in a non-global alignment
+        final int X_METRIC_LENGTH = READ_MAX_LENGTH + 2;
+        final int Y_METRIC_LENGTH = HAPLOTYPE_MAX_LENGTH + 2;
+
+        matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+        XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+        YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+
+        for( int iii=0; iii < X_METRIC_LENGTH; iii++ ) {
+            Arrays.fill(matchMetricArray[iii], Double.NEGATIVE_INFINITY);
+            Arrays.fill(XMetricArray[iii], Double.NEGATIVE_INFINITY);
+            Arrays.fill(YMetricArray[iii], Double.NEGATIVE_INFINITY);
+        }
+
+        // the initial condition
+        matchMetricArray[1][1] = 0.0; // Math.log10(1.0);
+    }
+
+    @Override
+    public double computeReadLikelihoodGivenHaplotypeLog10( final byte[] haplotypeBases,
+                                                            final byte[] readBases,
+                                                            final byte[] readQuals,
+                                                            final byte[] insertionGOP,
+                                                            final byte[] deletionGOP,
+                                                            final byte[] overallGCP,
+                                                            final int hapStartIndex,
+                                                            final boolean recacheReadValues ) {
+
+        // M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions and + 1 to consider the final base in a non-global alignment
+        final int X_METRIC_LENGTH = readBases.length + 2;
+        final int Y_METRIC_LENGTH = haplotypeBases.length + 2;
+
+        // ensure that all the qual scores have valid values
+        for( int iii = 0; iii < readQuals.length; iii++ ) {
+            readQuals[iii] = ( readQuals[iii] < QualityUtils.MIN_USABLE_Q_SCORE ? QualityUtils.MIN_USABLE_Q_SCORE : (readQuals[iii] > MAX_CACHED_QUAL ? MAX_CACHED_QUAL : readQuals[iii]) );
+        }
+
+        // simple rectangular version of update loop, slow
+        for( int iii = 1; iii < X_METRIC_LENGTH; iii++ ) {
+            for( int jjj = hapStartIndex + 1; jjj < Y_METRIC_LENGTH; jjj++ ) {
+                if( (iii == 1 && jjj == 1) ) { continue; }
+                updateCell(iii, jjj, haplotypeBases, readBases, readQuals, insertionGOP, deletionGOP, overallGCP,
+                        matchMetricArray, XMetricArray, YMetricArray);
+            }
+        }
+
+        // final probability is the log10 sum of the last element in all three state arrays
+        final int endI = X_METRIC_LENGTH - 1;
+        final int endJ = Y_METRIC_LENGTH - 1;
+        return MathUtils.log10sumLog10(new double[]{matchMetricArray[endI][endJ], XMetricArray[endI][endJ], YMetricArray[endI][endJ]});
+    }
+
+    private void updateCell( final int indI, final int indJ, final byte[] haplotypeBases, final byte[] readBases,
+                             final byte[] readQuals, final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP,
+                             final double[][] matchMetricArray, final double[][] XMetricArray, final double[][] YMetricArray ) {
+
+        // the read and haplotype indices are offset by one because the state arrays have an extra column to hold the initial conditions
+        final int im1 = indI - 1;
+        final int jm1 = indJ - 1;
+
+        // update the match array
+        double pBaseReadLog10 = 0.0; // Math.log10(1.0);
+        if( im1 > 0 && jm1 > 0 ) { // the emission probability is applied when leaving the state
+            final byte x = readBases[im1-1];
+            final byte y = haplotypeBases[jm1-1];
+            final byte qual = readQuals[im1-1];
+            pBaseReadLog10 = ( x == y || x == (byte) 'N' || y == (byte) 'N' ? QualityUtils.qualToProbLog10(qual) : QualityUtils.qualToErrorProbLog10(qual) );
+        }
+        final int qualIndexGOP = ( im1 == 0 ? DEFAULT_GOP + DEFAULT_GOP : ( insertionGOP[im1-1] + deletionGOP[im1-1] > MAX_CACHED_QUAL ? MAX_CACHED_QUAL : insertionGOP[im1-1] + deletionGOP[im1-1]) );
+        final double d0 = QualityUtils.qualToProbLog10((byte)qualIndexGOP);
+        final double e0 = ( im1 == 0 ? QualityUtils.qualToProbLog10(DEFAULT_GCP) : QualityUtils.qualToProbLog10(overallGCP[im1-1]) );
+        matchMetricArray[indI][indJ] = pBaseReadLog10 + MathUtils.log10sumLog10(new double[]{matchMetricArray[indI-1][indJ-1] + d0, XMetricArray[indI-1][indJ-1] + e0, YMetricArray[indI-1][indJ-1] + e0});
+
+        // update the X (insertion) array
+        final double d1 = ( im1 == 0 ? QualityUtils.qualToErrorProbLog10(DEFAULT_GOP) : QualityUtils.qualToErrorProbLog10(insertionGOP[im1-1]) );
+        final double e1 = ( im1 == 0 ? QualityUtils.qualToErrorProbLog10(DEFAULT_GCP) : QualityUtils.qualToErrorProbLog10(overallGCP[im1-1]) );
+        final double qBaseReadLog10 = 0.0; // Math.log10(1.0) -- we don't have an estimate for this emission probability so assume q=1.0
+        XMetricArray[indI][indJ] = qBaseReadLog10 + MathUtils.log10sumLog10(new double[]{matchMetricArray[indI-1][indJ] + d1, XMetricArray[indI-1][indJ] + e1});
+
+        // update the Y (deletion) array, with penalty of zero on the left and right flanks to allow for a local alignment within the haplotype
+        final double d2 = ( im1 == 0 || im1 == readBases.length ? 0.0 : QualityUtils.qualToErrorProbLog10(deletionGOP[im1-1]) );
+        final double e2 = ( im1 == 0 || im1 == readBases.length ? 0.0 : QualityUtils.qualToErrorProbLog10(overallGCP[im1-1]) );
+        final double qBaseRefLog10 = 0.0; // Math.log10(1.0) -- we don't have an estimate for this emission probability so assume q=1.0
+        YMetricArray[indI][indJ] = qBaseRefLog10 + MathUtils.log10sumLog10(new double[]{matchMetricArray[indI][indJ-1] + d2, YMetricArray[indI][indJ-1] + e2});
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/utils/pairhmm/OriginalPairHMM.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pairhmm/OriginalPairHMM.java
@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.utils.pairhmm;
+
+import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.QualityUtils;
+
+/**
+ * Util class for performing the pair HMM for local alignment. Figure 4.3 in Durbin 1998 book.
+ * User: rpoplin
+ * Date: 3/1/12
+ */
+
+public class OriginalPairHMM extends ExactPairHMM {
+
+    @Override
+    public double computeReadLikelihoodGivenHaplotypeLog10( final byte[] haplotypeBases,
+                                                            final byte[] readBases,
+                                                            final byte[] readQuals,
+                                                            final byte[] insertionGOP,
+                                                            final byte[] deletionGOP,
+                                                            final byte[] overallGCP,
+                                                            final int hapStartIndex,
+                                                            final boolean recacheReadValues ) {
+
+        // M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions and + 1 to consider the final base in a non-global alignment
+        final int X_METRIC_LENGTH = readBases.length + 2;
+        final int Y_METRIC_LENGTH = haplotypeBases.length + 2;
+
+        // ensure that all the qual scores have valid values
+        for( int iii = 0; iii < readQuals.length; iii++ ) {
+            readQuals[iii] = ( readQuals[iii] < QualityUtils.MIN_USABLE_Q_SCORE ? QualityUtils.MIN_USABLE_Q_SCORE : (readQuals[iii] > MAX_CACHED_QUAL ? MAX_CACHED_QUAL : readQuals[iii]) );
+        }
+
+        // simple rectangular version of update loop, slow
+        for( int iii = 1; iii < X_METRIC_LENGTH; iii++ ) {
+            for( int jjj = hapStartIndex + 1; jjj < Y_METRIC_LENGTH; jjj++ ) {
+                if( (iii == 1 && jjj == 1) ) { continue; }
+                updateCell(iii, jjj, haplotypeBases, readBases, readQuals, insertionGOP, deletionGOP, overallGCP,
+                        matchMetricArray, XMetricArray, YMetricArray);
+            }
+        }
+
+        // final probability is the log10 sum of the last element in all three state arrays
+        final int endI = X_METRIC_LENGTH - 1;
+        final int endJ = Y_METRIC_LENGTH - 1;
+        return MathUtils.approximateLog10SumLog10(matchMetricArray[endI][endJ], XMetricArray[endI][endJ], YMetricArray[endI][endJ]);
+    }
+
+    private void updateCell( final int indI, final int indJ, final byte[] haplotypeBases, final byte[] readBases,
+                             final byte[] readQuals, final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP,
+                             final double[][] matchMetricArray, final double[][] XMetricArray, final double[][] YMetricArray ) {
+
+        // the read and haplotype indices are offset by one because the state arrays have an extra column to hold the initial conditions
+        final int im1 = indI - 1;
+        final int jm1 = indJ - 1;
+
+        // update the match array
+        double pBaseReadLog10 = 0.0; // Math.log10(1.0);
+        if( im1 > 0 && jm1 > 0 ) { // the emission probability is applied when leaving the state
+            final byte x = readBases[im1-1];
+            final byte y = haplotypeBases[jm1-1];
+            final byte qual = readQuals[im1-1];
+            pBaseReadLog10 = ( x == y || x == (byte) 'N' || y == (byte) 'N' ? QualityUtils.qualToProbLog10(qual) : QualityUtils.qualToErrorProbLog10(qual) );
+        }
+        final int qualIndexGOP = ( im1 == 0 ? DEFAULT_GOP + DEFAULT_GOP : ( insertionGOP[im1-1] + deletionGOP[im1-1] > MAX_CACHED_QUAL ? MAX_CACHED_QUAL : insertionGOP[im1-1] + deletionGOP[im1-1]) );
+        final double d0 = QualityUtils.qualToProbLog10((byte)qualIndexGOP);
+        final double e0 = ( im1 == 0 ? QualityUtils.qualToProbLog10(DEFAULT_GCP) : QualityUtils.qualToProbLog10(overallGCP[im1-1]) );
+        matchMetricArray[indI][indJ] = pBaseReadLog10 + MathUtils.approximateLog10SumLog10(matchMetricArray[indI-1][indJ-1] + d0, XMetricArray[indI-1][indJ-1] + e0, YMetricArray[indI-1][indJ-1] + e0);
+
+        // update the X (insertion) array
+        final double d1 = ( im1 == 0 ? QualityUtils.qualToErrorProbLog10(DEFAULT_GOP) : QualityUtils.qualToErrorProbLog10(insertionGOP[im1-1]) );
+        final double e1 = ( im1 == 0 ? QualityUtils.qualToErrorProbLog10(DEFAULT_GCP) : QualityUtils.qualToErrorProbLog10(overallGCP[im1-1]) );
+        final double qBaseReadLog10 = 0.0; // Math.log10(1.0) -- we don't have an estimate for this emission probability so assume q=1.0
+        XMetricArray[indI][indJ] = qBaseReadLog10 + MathUtils.approximateLog10SumLog10(matchMetricArray[indI-1][indJ] + d1, XMetricArray[indI-1][indJ] + e1);
+
+        // update the Y (deletion) array, with penalty of zero on the left and right flanks to allow for a local alignment within the haplotype
+        final double d2 = ( im1 == 0 || im1 == readBases.length ? 0.0 : QualityUtils.qualToErrorProbLog10(deletionGOP[im1-1]) );
+        final double e2 = ( im1 == 0 || im1 == readBases.length ? 0.0 : QualityUtils.qualToErrorProbLog10(overallGCP[im1-1]) );
+        final double qBaseRefLog10 = 0.0; // Math.log10(1.0) -- we don't have an estimate for this emission probability so assume q=1.0
+        YMetricArray[indI][indJ] = qBaseRefLog10 + MathUtils.approximateLog10SumLog10(matchMetricArray[indI][indJ-1] + d2, YMetricArray[indI][indJ-1] + e2);
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java
@ -0,0 +1,45 @@
+package org.broadinstitute.sting.utils.pairhmm;
+
+import com.google.java.contract.Ensures;
+import com.google.java.contract.Requires;
+
+/**
+ * Created with IntelliJ IDEA.
+ * User: rpoplin
+ * Date: 10/16/12
+ */
+
+public abstract class PairHMM {
+    protected static final Byte MAX_CACHED_QUAL = Byte.MAX_VALUE;
+    protected static final byte DEFAULT_GOP = (byte) 45;
+    protected static final byte DEFAULT_GCP = (byte) 10;
+
+    public enum HMM_IMPLEMENTATION {
+        /* Very slow implementation which uses very accurate log10 sum functions. Only meant to be used as a reference test implementation */
+        EXACT,
+        /* PairHMM as implemented for the UnifiedGenotyper. Uses log10 sum functions accurate to only 1E-4 */
+        ORIGINAL,
+        /* Optimized version of the PairHMM which caches per-read computations */
+        CACHING,
+        /* Optimized version of the PairHMM which caches per-read computations and operations in real space to avoid costly sums of log10'ed likelihoods */
+        LOGLESS_CACHING
+    }
+
+    protected double[][] matchMetricArray = null;
+    protected double[][] XMetricArray = null;
+    protected double[][] YMetricArray = null;
+
+    public abstract void initialize( final int READ_MAX_LENGTH, final int HAPLOTYPE_MAX_LENGTH );
+
+    @Requires({"readBases.length == readQuals.length", "readBases.length == insertionGOP.length", "readBases.length == deletionGOP.length",
+               "readBases.length == overallGCP.length", "matchMetricArray!=null", "XMetricArray!=null", "YMetricArray!=null"})
+    @Ensures({"!Double.isInfinite(result)", "!Double.isNaN(result)"}) // Result should be a proper log10 likelihood
+    public abstract double computeReadLikelihoodGivenHaplotypeLog10( final byte[] haplotypeBases,
+                                                                     final byte[] readBases,
+                                                                     final byte[] readQuals,
+                                                                     final byte[] insertionGOP,
+                                                                     final byte[] deletionGOP,
+                                                                     final byte[] overallGCP,
+                                                                     final int hapStartIndex,
+                                                                     final boolean recacheReadValues );
+}