Refactoring/fixing up UG HMM code: a) Make code use PairHMM class instead of having duplicated code. That way UG and HaplotypeCaller now use same core code. Changes to be able to do this: 1. Compute context-dependent GOP as a function of read, not of haplotype, b) Extracted code to initialize HMM arrays into separate method, c) Move PairHMM class and unit test to public, d) Reenable banded code in PairHMM, inverted sense of flag (true=enable feature) but leave off in HaplotypeCaller.

2012-04-11 13:56:51 -04:00 · 2012-04-11 13:56:51 -04:00 · f9f8589692
parent ec9822b2a7
commit f9f8589692
4 changed files with 695 additions and 119 deletions
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
@ -137,11 +137,11 @@ public class UnifiedArgumentCollection {

    @Hidden
    @Argument(fullName = "indelGapContinuationPenalty", shortName = "indelGCP", doc = "Indel gap continuation penalty", required = false)
-    public double INDEL_GAP_CONTINUATION_PENALTY = 10.0;
+    public byte INDEL_GAP_CONTINUATION_PENALTY = 10;

    @Hidden
    @Argument(fullName = "indelGapOpenPenalty", shortName = "indelGOP", doc = "Indel gap open penalty", required = false)
-    public double INDEL_GAP_OPEN_PENALTY = 45.0;
+    public byte INDEL_GAP_OPEN_PENALTY = 45;

    @Hidden
    @Argument(fullName = "indelHaplotypeSize", shortName = "indelHSize", doc = "Indel haplotype size", required = false)
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java
@ -31,7 +31,9 @@ import net.sf.samtools.CigarOperator;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.utils.Haplotype;
 import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.PairHMM;
 import org.broadinstitute.sting.utils.clipping.ReadClipper;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -41,13 +43,14 @@ import org.broadinstitute.sting.utils.variantcontext.Allele;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
+import java.util.Map;


 public class PairHMMIndelErrorModel {
    public static final int BASE_QUAL_THRESHOLD = 20;

    private boolean DEBUG = false;
-    private boolean bandedLikelihoods = false;
+    private boolean bandedLikelihoods = true;

    private static final int MAX_CACHED_QUAL = 127;

@ -60,12 +63,12 @@ public class PairHMMIndelErrorModel {
    private static final int START_HRUN_GAP_IDX = 4;
    private static final int MAX_HRUN_GAP_IDX = 20;

-    private static final double MIN_GAP_OPEN_PENALTY = 30.0;
-    private static final double MIN_GAP_CONT_PENALTY = 10.0;
-    private static final double GAP_PENALTY_HRUN_STEP = 1.0; // each increase in hrun decreases gap penalty by this.
+    private static final byte MIN_GAP_OPEN_PENALTY = 30;
+    private static final byte MIN_GAP_CONT_PENALTY = 10;
+    private static final byte GAP_PENALTY_HRUN_STEP = 1; // each increase in hrun decreases gap penalty by this.

-    private final double[] GAP_OPEN_PROB_TABLE;
-    private final double[] GAP_CONT_PROB_TABLE;
+    private final byte[] GAP_OPEN_PROB_TABLE;
+    private final byte[] GAP_CONT_PROB_TABLE;

    /////////////////////////////
    // Private Member Variables
@ -86,42 +89,42 @@ public class PairHMMIndelErrorModel {
        }
    }

-    public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean bandedLikelihoods) {
+    public PairHMMIndelErrorModel(byte indelGOP, byte indelGCP, boolean deb, boolean bandedLikelihoods) {
        this.DEBUG = deb;
-        this.bandedLikelihoods = bandedLikelihoods;
+        //this.bandedLikelihoods = bandedLikelihoods;

        // fill gap penalty table, affine naive model:
-        this.GAP_CONT_PROB_TABLE = new double[MAX_HRUN_GAP_IDX];
-        this.GAP_OPEN_PROB_TABLE = new double[MAX_HRUN_GAP_IDX];
+        this.GAP_CONT_PROB_TABLE = new byte[MAX_HRUN_GAP_IDX];
+        this.GAP_OPEN_PROB_TABLE = new byte[MAX_HRUN_GAP_IDX];

-        double gop = -indelGOP/10.0;
-        double gcp = -indelGCP/10.0;

        for (int i = 0; i < START_HRUN_GAP_IDX; i++) {
-            GAP_OPEN_PROB_TABLE[i] = gop;
-            GAP_CONT_PROB_TABLE[i] = gcp;
+            GAP_OPEN_PROB_TABLE[i] = indelGOP;
+            GAP_CONT_PROB_TABLE[i] = indelGCP;
        }

        double step = GAP_PENALTY_HRUN_STEP/10.0;

-        double maxGOP = -MIN_GAP_OPEN_PENALTY/10.0;  // phred to log prob
-        double maxGCP = -MIN_GAP_CONT_PENALTY/10.0;  // phred to log prob
+        // initialize gop and gcp to their default values
+        byte gop = indelGOP;
+        byte gcp = indelGCP;

+        // all of the following is computed in QUal-space
        for (int i=START_HRUN_GAP_IDX; i < MAX_HRUN_GAP_IDX; i++) {
-            gop += step;
-            if (gop > maxGOP)
-                gop = maxGOP;
+            gop -= GAP_PENALTY_HRUN_STEP;
+            if (gop < MIN_GAP_OPEN_PENALTY)
+                gop = MIN_GAP_OPEN_PENALTY;

-            gcp += step;
-            if(gcp > maxGCP)
-                gcp = maxGCP;
+            gcp -= step;
+            if(gcp < MIN_GAP_CONT_PENALTY)
+                gcp = MIN_GAP_CONT_PENALTY;
            GAP_OPEN_PROB_TABLE[i] = gop;
            GAP_CONT_PROB_TABLE[i] = gcp;
        }

    }

-    static private void getContextHomopolymerLength(final byte[] refBytes, int[] hrunArray) {
+    static private void getContextHomopolymerLength(final byte[] refBytes, final int[] hrunArray) {
        // compute forward hrun length, example:
        // AGGTGACCCCCCTGAGAG
        // 001000012345000000
@ -155,7 +158,7 @@ public class PairHMMIndelErrorModel {


    private void updateCell(final int indI, final int indJ, final int X_METRIC_LENGTH, final int Y_METRIC_LENGTH, byte[] readBases, byte[] readQuals, byte[] haplotypeBases,
-                            double[] currentGOP, double[] currentGCP,  double[][] matchMetricArray,  double[][] XMetricArray,  double[][] YMetricArray) {
+                            byte[] currentGOP, byte[] currentGCP,  double[][] matchMetricArray,  double[][] XMetricArray,  double[][] YMetricArray) {
        if (indI > 0 && indJ > 0) {
            final int im1 = indI -1;
            final int jm1 = indJ - 1;
@ -168,20 +171,20 @@ public class PairHMMIndelErrorModel {

            matchMetricArray[indI][indJ] = pBaseRead + MathUtils.approximateLog10SumLog10(new double[]{matchMetricArray[im1][jm1], XMetricArray[im1][jm1], YMetricArray[im1][jm1]});

-            final double c1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1];
-            final double d1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1];
+            final double c1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : -(double)currentGOP[im1]/10.0;
+            final double d1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : -(double)currentGCP[im1]/10.0;

            XMetricArray[indI][indJ] = MathUtils.approximateLog10SumLog10(matchMetricArray[im1][indJ] + c1, XMetricArray[im1][indJ] + d1);

            // update Y array
-            final double c2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1];
-            final double d2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1];
+            final double c2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : -(double)currentGOP[im1]/10.0;
+            final double d2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : -(double)currentGCP[im1]/10.0;
            YMetricArray[indI][indJ] = MathUtils.approximateLog10SumLog10(matchMetricArray[indI][jm1] + c2, YMetricArray[indI][jm1] + d2);
        }
    }

    private double computeReadLikelihoodGivenHaplotypeAffineGaps(byte[] haplotypeBases, byte[] readBases, byte[] readQuals,
-                                                                 double[] currentGOP, double[] currentGCP, int indToStart,
+                                                                 byte[] currentGOP, byte[] currentGCP, int indToStart,
                                                                 double[][] matchMetricArray, double[][] XMetricArray, double[][] YMetricArray) {

        final int X_METRIC_LENGTH = readBases.length+1;
@ -349,8 +352,9 @@ public class PairHMMIndelErrorModel {

    }

-    private void fillGapProbabilities(int[] hrunProfile,
-                                      double[] contextLogGapOpenProbabilities, double[] contextLogGapContinuationProbabilities) {
+    private void fillGapProbabilities(final int[] hrunProfile,
+                                      final byte[] contextLogGapOpenProbabilities,
+                                      final byte[] contextLogGapContinuationProbabilities) {
        // fill based on lookup table
        for (int i = 0; i < hrunProfile.length; i++) {
            if (hrunProfile[i] >= MAX_HRUN_GAP_IDX) {
@ -372,27 +376,8 @@ public class PairHMMIndelErrorModel {
        final int readCounts[] = new int[pileup.getNumberOfElements()];
        int readIdx=0;

-        LinkedHashMap<Allele,double[]> gapOpenProbabilityMap = new LinkedHashMap<Allele,double[]>();
-        LinkedHashMap<Allele,double[]> gapContProbabilityMap = new LinkedHashMap<Allele,double[]>();
-
-        // will context dependent probabilities based on homopolymer run. Probabilities are filled based on total complete haplotypes.
-        // todo -- refactor into separate function
-        for (Allele a: haplotypeMap.keySet()) {
-            Haplotype haplotype = haplotypeMap.get(a);
-            byte[] haplotypeBases = haplotype.getBases();
-            double[] contextLogGapOpenProbabilities = new double[haplotypeBases.length];
-            double[] contextLogGapContinuationProbabilities = new double[haplotypeBases.length];
-
-            // get homopolymer length profile for current haplotype
-            int[] hrunProfile = new int[haplotypeBases.length];
-            getContextHomopolymerLength(haplotypeBases,hrunProfile);
-            fillGapProbabilities(hrunProfile, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities);
-
-            gapOpenProbabilityMap.put(a,contextLogGapOpenProbabilities);
-            gapContProbabilityMap.put(a,contextLogGapContinuationProbabilities);
-
-        }

+        PairHMM pairHMM = new PairHMM(bandedLikelihoods);
        for (PileupElement p: pileup) {
            // > 1 when the read is a consensus read representing multiple independent observations
            readCounts[readIdx] = p.getRepresentativeCount();
@ -408,12 +393,27 @@ public class PairHMMIndelErrorModel {
            else {
                // System.out.format("%d %s\n",p.getRead().getAlignmentStart(), p.getRead().getClass().getName());
                GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());
+
                if (read.isEmpty())
                    continue;

-                if(ReadUtils.is454Read(read)) {
+                if (read.getUnclippedEnd() > ref.getWindow().getStop())
+                    read = ReadClipper.hardClipByReferenceCoordinatesRightTail(read, ref.getWindow().getStop());
+
+                if (read.isEmpty())
                    continue;
-                }
+
+                if (read.getUnclippedStart() < ref.getWindow().getStart())
+                    read = ReadClipper.hardClipByReferenceCoordinatesLeftTail (read, ref.getWindow().getStart());
+
+                if (read.isEmpty())
+                    continue;
+                // hard-clip low quality ends - this may introduce extra H elements in CIGAR string
+                read = ReadClipper.hardClipLowQualEnds(read,(byte)BASE_QUAL_THRESHOLD );
+
+                if (read.isEmpty())
+                    continue;
+

                // get bases of candidate haplotypes that overlap with reads
                final int trailingBases = 3;
@ -469,54 +469,56 @@ public class PairHMMIndelErrorModel {
                unclippedReadBases = read.getReadBases();
                unclippedReadQuals = read.getBaseQualities();

-                // Do a stricter base clipping than provided by CIGAR string, since this one may be too conservative,
-                // and may leave a string of Q2 bases still hanging off the reads.
-                for (int i=numStartSoftClippedBases; i < unclippedReadBases.length; i++) {
-                    if (unclippedReadQuals[i] < BASE_QUAL_THRESHOLD)
-                        numStartClippedBases++;
-                    else
-                        break;
+                final int extraOffset = Math.abs(eventLength);

-                }
-                for (int i=unclippedReadBases.length-numEndSoftClippedBases-1; i >= 0; i-- ){
-                    if (unclippedReadQuals[i] < BASE_QUAL_THRESHOLD)
-                        numEndClippedBases++;
-                    else
-                        break;
-                }
+                /**
+                 * Compute genomic locations that candidate haplotypes will span.
+                 * Read start and stop locations (variables readStart and readEnd) are the original unclipped positions from SAMRecord,
+                 * adjusted by hard clips from Cigar string and by qual-based soft-clipping performed above.
+                 * We will propose haplotypes that overlap the read with some padding.
+                 * True read start = readStart + numStartClippedBases - ReadUtils.getFirstInsertionOffset(read)
+                 * Last term is because if a read starts with an insertion then these bases are not accounted for in readStart.
+                 * trailingBases is a padding constant(=3) and we additionally add abs(eventLength) to both sides of read to be able to
+                 * differentiate context between two haplotypes
+                 */
+                long startLocationInRefForHaplotypes = Math.max(readStart + numStartClippedBases - trailingBases - ReadUtils.getFirstInsertionOffset(read)-extraOffset, 0);
+                long stopLocationInRefForHaplotypes =  readEnd -numEndClippedBases  + trailingBases + ReadUtils.getLastInsertionOffset(read)+extraOffset;

-                int extraOffset = Math.abs(eventLength);
+                if (DEBUG)
+                    System.out.format("orig Start:%d orig stop: %d\n", startLocationInRefForHaplotypes, stopLocationInRefForHaplotypes);

-                long start = Math.max(readStart + numStartClippedBases - trailingBases - ReadUtils.getFirstInsertionOffset(read)-extraOffset, 0);
-                long stop =  readEnd -numEndClippedBases  + trailingBases + ReadUtils.getLastInsertionOffset(read)+extraOffset;
-
-                // Variables start and stop are coordinates (inclusive) where we want to get the haplotype from.
                int readLength = read.getReadLength()-numStartSoftClippedBases-numEndSoftClippedBases;
                // check if start of read will be before start of reference context
-                if (start < ref.getWindow().getStart())// read starts before haplotype: read will have to be cut
-                    start = ref.getWindow().getStart();
-
+                if (startLocationInRefForHaplotypes < ref.getWindow().getStart()) {
+                    // read starts before haplotype: read will have to be cut
+                    //numStartClippedBases += ref.getWindow().getStart() - startLocationInRefForHaplotypes;
+                    startLocationInRefForHaplotypes = ref.getWindow().getStart();
+                }
                // check also if end of read will go beyond reference context
-                if (stop > ref.getWindow().getStop())
-                    stop = ref.getWindow().getStop();
+                if (stopLocationInRefForHaplotypes > ref.getWindow().getStop()) {
+                    //numEndClippedBases += stopLocationInRefForHaplotypes - ref.getWindow().getStop();
+                    stopLocationInRefForHaplotypes = ref.getWindow().getStop();
+                }

-                // if there's an insertion in the read, the read stop position will be less than start + read length,
+                // if there's an insertion in the read, the read stop position will be less than start + read legnth,
                // but we want to compute likelihoods in the whole region that a read might overlap
-                if (stop <= start + readLength) {
-                    stop = start + readLength-1;
+                if (stopLocationInRefForHaplotypes <= startLocationInRefForHaplotypes + readLength) {
+                    stopLocationInRefForHaplotypes = startLocationInRefForHaplotypes + readLength-1;
                }

                // ok, we now figured out total number of clipped bases on both ends.
                // Figure out where we want to place the haplotype to score read against
-                /*
-               if (DEBUG)
-                   System.out.format("numStartClippedBases: %d numEndClippedBases: %d WinStart:%d WinStop:%d start: %d stop: %d readLength: %d\n",
-                           numStartClippedBases, numEndClippedBases, ref.getWindow().getStart(), ref.getWindow().getStop(), start, stop, read.getReadLength());
-                */
+
+                if (DEBUG)
+                    System.out.format("numStartClippedBases: %d numEndClippedBases: %d WinStart:%d WinStop:%d start: %d stop: %d readLength: %d\n",
+                            numStartClippedBases, numEndClippedBases, ref.getWindow().getStart(), ref.getWindow().getStop(), startLocationInRefForHaplotypes, stopLocationInRefForHaplotypes, read.getReadLength());


                LinkedHashMap<Allele,Double> readEl = new LinkedHashMap<Allele,Double>();

+                /**
+                 * Check if we'll end up with an empty read once all clipping is done
+                 */
                if (numStartClippedBases + numEndClippedBases >= unclippedReadBases.length) {
                    int j=0;
                    for (Allele a: haplotypeMap.keySet()) {
@ -537,67 +539,81 @@ public class PairHMMIndelErrorModel {
                    // initialize path metric and traceback memories for likelihood computation
                    double[][] matchMetricArray = null, XMetricArray = null, YMetricArray = null;
                    byte[] previousHaplotypeSeen = null;
-                    double[] previousGOP = null;
-                    double[] previousGCP = null;
-                    int startIdx;
+                    int startIndexInHaplotype = 0;
+                    final byte[] contextLogGapOpenProbabilities = new byte[readBases.length];
+                    final byte[] contextLogGapContinuationProbabilities  = new byte[readBases.length];
+
+                    // get homopolymer length profile for current haplotype
+                    int[] hrunProfile = new int[readBases.length];
+                    getContextHomopolymerLength(readBases,hrunProfile);
+                    fillGapProbabilities(hrunProfile, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities);
+
+
                    for (Allele a: haplotypeMap.keySet()) {

-
                        Haplotype haplotype = haplotypeMap.get(a);
-                        if (stop > haplotype.getStopPosition())
-                            stop = haplotype.getStopPosition();

-                        if (start < haplotype.getStartPosition())
-                            start = haplotype.getStartPosition();
+                        if (stopLocationInRefForHaplotypes > haplotype.getStopPosition())
+                            stopLocationInRefForHaplotypes = haplotype.getStopPosition();

-                        // cut haplotype bases
-                        long indStart = start - haplotype.getStartPosition();
-                        long indStop =  stop - haplotype.getStartPosition();
+                        if (startLocationInRefForHaplotypes < haplotype.getStartPosition())
+                            startLocationInRefForHaplotypes = haplotype.getStartPosition();
+
+                        final long indStart = startLocationInRefForHaplotypes - haplotype.getStartPosition();
+                        final long indStop =  stopLocationInRefForHaplotypes - haplotype.getStartPosition();

                        double readLikelihood;
                        if (DEBUG)
                            System.out.format("indStart: %d indStop: %d WinStart:%d WinStop:%d start: %d stop: %d readLength: %d C:%s\n",
-                                    indStart, indStop, ref.getWindow().getStart(), ref.getWindow().getStop(), start, stop, read.getReadLength(), read.getCigar().toString());
+                                    indStart, indStop, ref.getWindow().getStart(), ref.getWindow().getStop(), startLocationInRefForHaplotypes, stopLocationInRefForHaplotypes, read.getReadLength(), read.getCigar().toString());
+

                        if (indStart < 0 || indStop >= haplotype.getBases().length || indStart > indStop) {
                            // read spanned more than allowed reference context: we currently can't deal with this
-                            readLikelihood =0;
+                            throw new ReviewedStingException("BUG! bad read clipping");
+//                            readLikelihood =0;
                        } else
                        {
                            final byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBases(),
                                    (int)indStart, (int)indStop);

-                            if (matchMetricArray == null) {
-                                final int X_METRIC_LENGTH = readBases.length+1;
-                                final int Y_METRIC_LENGTH = haplotypeBases.length+1;
+                            final int X_METRIC_LENGTH = readBases.length+1;
+                            final int Y_METRIC_LENGTH = haplotypeBases.length+1;

+                            if (matchMetricArray == null) {
+                                //no need to reallocate arrays for each new haplotype, as length won't change
                                matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
                                XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
                                YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+
                            }
-                            final double[] currentContextGOP = Arrays.copyOfRange(gapOpenProbabilityMap.get(a), (int)indStart, (int)indStop);
-                            final double[] currentContextGCP = Arrays.copyOfRange(gapContProbabilityMap.get(a), (int)indStart, (int)indStop);
+
+                            pairHMM.initializeArrays(matchMetricArray, XMetricArray, YMetricArray, X_METRIC_LENGTH);
+
+  /*
                            if (previousHaplotypeSeen == null)
-                                startIdx = 0;
-                            else {
-                                final int s1 = computeFirstDifferingPosition(haplotypeBases, previousHaplotypeSeen);
-                                final int s2 = computeFirstDifferingPosition(currentContextGOP, previousGOP);
-                                final int s3 = computeFirstDifferingPosition(currentContextGCP, previousGCP);
-                                startIdx = Math.min(Math.min(s1, s2), s3);
-                            }
+                                startIndexInHaplotype = 0;
+                            else
+                                startIndexInHaplotype = 0; //computeFirstDifferingPosition(haplotypeBases, previousHaplotypeSeen);
+
                            previousHaplotypeSeen = haplotypeBases.clone();
-                            previousGOP = currentContextGOP.clone();
-                            previousGCP = currentContextGCP.clone();
+    */
+                            readLikelihood = pairHMM.computeReadLikelihoodGivenHaplotype(haplotypeBases, readBases, readQuals,
+                                    contextLogGapOpenProbabilities, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities,
+                                    startIndexInHaplotype, matchMetricArray, XMetricArray, YMetricArray);

+                      /*      double r2 = computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals, contextLogGapOpenProbabilities,
+                                    contextLogGapContinuationProbabilities, 0, matchMetricArray, XMetricArray, YMetricArray);

-                            readLikelihood = computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals,
-                                    currentContextGOP, currentContextGCP, startIdx, matchMetricArray, XMetricArray, YMetricArray);
-
-                            if (DEBUG) {
+                            if (readLikelihood > 0) {
+                                int k=0;
+                            }
+                        */    if (DEBUG) {
                                System.out.println("H:"+new String(haplotypeBases));
                                System.out.println("R:"+new String(readBases));
                                System.out.format("L:%4.2f\n",readLikelihood);
-                                System.out.format("StPos:%d\n", startIdx);
+                               // System.out.format("Lorig:%4.2f\n",r2);
+                                System.out.format("StPos:%d\n", startIndexInHaplotype);
                            }
                        }
                        readEl.put(a,readLikelihood);
--- a/public/java/src/org/broadinstitute/sting/utils/PairHMM.java
+++ b/public/java/src/org/broadinstitute/sting/utils/PairHMM.java
@ -0,0 +1,255 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.utils;
+
+import com.google.java.contract.Ensures;
+import com.google.java.contract.Requires;
+
+import java.util.*;
+
+/**
+ * Util class for performing the pair HMM for local alignment. Figure 4.3 in Durbin 1998 book.
+ * User: rpoplin
+ * Date: 3/1/12
+ */
+
+public class PairHMM {
+    private static final int MAX_CACHED_QUAL = (int)Byte.MAX_VALUE;
+    private static final byte DEFAULT_GOP = (byte) 45;
+    private static final byte DEFAULT_GCP = (byte) 10;
+    private static final double BANDING_TOLERANCE = 22.0;
+    private static final int BANDING_CLUSTER_WINDOW = 12;
+    private final boolean doBanded;
+
+    public PairHMM() {
+        doBanded = false;
+    }
+
+    public PairHMM( final boolean doBanded ) {
+        this.doBanded = doBanded;
+    }
+
+    
+    public void initializeArrays(final double[][] matchMetricArray, final double[][] XMetricArray, final double[][] YMetricArray,
+                                 final int X_METRIC_LENGTH) {
+
+        for( int iii=0; iii < X_METRIC_LENGTH; iii++ ) {
+            Arrays.fill(matchMetricArray[iii], Double.NEGATIVE_INFINITY);
+            Arrays.fill(XMetricArray[iii], Double.NEGATIVE_INFINITY);
+            Arrays.fill(YMetricArray[iii], Double.NEGATIVE_INFINITY);
+        }
+
+        // the initial condition
+        matchMetricArray[1][1] = 0.0; // Math.log10(1.0);
+
+    }
+
+    @Requires({"readBases.length == readQuals.length","readBases.length == insertionGOP.length","readBases.length == deletionGOP.length","readBases.length == overallGCP.length"})
+    @Ensures({"!Double.isInfinite(result)", "!Double.isNaN(result)"}) // Result should be a proper log10 probability
+    public double computeReadLikelihoodGivenHaplotype( final byte[] haplotypeBases, final byte[] readBases, final byte[] readQuals,
+                                                       final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP ) {
+
+        // M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions
+        final int X_METRIC_LENGTH = readBases.length + 1;
+        final int Y_METRIC_LENGTH = haplotypeBases.length + 1;
+
+        // initial arrays to hold the probabilities of being in the match, insertion and deletion cases
+        final double[][] matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+        final double[][] XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+        final double[][] YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+
+        initializeArrays(matchMetricArray, XMetricArray, YMetricArray, X_METRIC_LENGTH);
+
+        return computeReadLikelihoodGivenHaplotype(haplotypeBases, readBases, readQuals, insertionGOP, deletionGOP, overallGCP, 0, matchMetricArray, XMetricArray, YMetricArray);
+    }
+
+    @Requires({"readBases.length == readQuals.length","readBases.length == insertionGOP.length","readBases.length == deletionGOP.length","readBases.length == overallGCP.length"})
+    @Ensures({"!Double.isInfinite(result)", "!Double.isNaN(result)"}) // Result should be a proper log10 probability
+    public double computeReadLikelihoodGivenHaplotype( final byte[] haplotypeBases, final byte[] readBases, final byte[] readQuals,
+                                                       final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP, final int hapStartIndex,
+                                                       final double[][] matchMetricArray, final double[][] XMetricArray, final double[][] YMetricArray ) {
+
+        // M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions
+        final int X_METRIC_LENGTH = readBases.length + 1;
+        final int Y_METRIC_LENGTH = haplotypeBases.length + 1;
+
+        if( doBanded ) {
+            final ArrayList<Integer> workQueue = new ArrayList<Integer>(); // holds a queue of starting work location (indices along the diagonal). Will be sorted each step
+            final ArrayList<Integer> workToBeAdded = new ArrayList<Integer>();
+            final ArrayList<Double> calculatedValues = new ArrayList<Double>();
+            final int numDiags = X_METRIC_LENGTH + Y_METRIC_LENGTH - 1;
+            workQueue.add( 1 ); // Always start a new thread at the baseline because of partially repeating sequences that match better in the latter half of the haplotype
+
+            for(int diag = 3; diag < numDiags; diag++) { // diag = 3 is the (1,2) element of the metric arrays. (1,1) is the initial condition and is purposefully skipped over
+                //Collections.sort(workQueue); // no need to sort because elements are guaranteed to be in ascending order
+                int el = 1;
+                for( int work : workQueue ) {
+                    // choose the appropriate diagonal baseline location
+                    int iii = 0;
+                    int jjj = diag;
+                    if( diag > Y_METRIC_LENGTH ) {
+                        iii = diag - Y_METRIC_LENGTH;
+                        jjj = Y_METRIC_LENGTH;
+                    }
+                    // move to the starting work location along the diagonal
+                    iii += work;
+                    jjj -= work;
+                    while( iii >= X_METRIC_LENGTH || jjj <= 0 ) {
+                        iii--;
+                        jjj++;
+                        work--;
+                    }
+                    if( !detectClusteredStartLocations(workToBeAdded, work ) ) {
+                        workToBeAdded.add(work); // keep this thread going once it has started
+                    }
+
+                    if( work >= el - 3 ) {
+                        // step along the diagonal in the forward direction, updating the match matrices and looking for a drop off from the maximum observed value
+                        double maxElement = Double.NEGATIVE_INFINITY;
+                        for( el = work; el < numDiags + 1; el++ ) {
+                            updateCell(iii, jjj, haplotypeBases, readBases, readQuals,
+                                insertionGOP, deletionGOP, overallGCP,  matchMetricArray,  XMetricArray, YMetricArray);
+                            final double bestMetric = MathUtils.max(matchMetricArray[iii][jjj], XMetricArray[iii][jjj], YMetricArray[iii][jjj]);
+                            calculatedValues.add(bestMetric);
+                            if( bestMetric > maxElement ) {
+                                maxElement = bestMetric;
+                            } else if( maxElement - bestMetric > BANDING_TOLERANCE ) {
+                                break;
+                            }
+                            if( ++iii >= X_METRIC_LENGTH ) { // don't walk off the edge of the matrix
+                                break;
+                            }
+                            if( --jjj <= 0 ) { // don't walk off the edge of the matrix
+                                break;
+                            }
+                        }
+
+                        // find a local maximum to start a new band in the work queue
+                        double localMaxElement = Double.NEGATIVE_INFINITY;
+                        int localMaxElementIndex = 0;
+                        for(int kkk = calculatedValues.size()-1; kkk >= 1; kkk--) {
+                            final double bestMetric = calculatedValues.get(kkk);
+                            if( bestMetric > localMaxElement ) {
+                                localMaxElement = bestMetric;
+                                localMaxElementIndex = kkk;
+                            } else if( localMaxElement - bestMetric > BANDING_TOLERANCE * 0.5 ) { // find a local maximum
+                                if( !detectClusteredStartLocations(workToBeAdded, work + localMaxElementIndex ) ) {
+                                    workToBeAdded.add( work + localMaxElementIndex );
+                                }
+                                break;
+                            }
+                        }
+                        calculatedValues.clear();
+
+                        // reset iii and jjj to the appropriate diagonal baseline location
+                        iii = 0;
+                        jjj = diag;
+                        if( diag > Y_METRIC_LENGTH ) {
+                            iii = diag - Y_METRIC_LENGTH;
+                            jjj = Y_METRIC_LENGTH;
+                        }
+                        // move to the starting work location along the diagonal
+                        iii += work-1;
+                        jjj -= work-1;
+
+                        // step along the diagonal in the reverse direction, updating the match matrices and looking for a drop off from the maximum observed value
+                        for( int traceBack = work - 1; traceBack > 0 && iii > 0 && jjj < Y_METRIC_LENGTH; traceBack--,iii--,jjj++ ) {
+                            updateCell(iii, jjj, haplotypeBases, readBases, readQuals,
+                                    insertionGOP, deletionGOP, overallGCP,  matchMetricArray,  XMetricArray, YMetricArray);
+                            final double bestMetric = MathUtils.max(matchMetricArray[iii][jjj], XMetricArray[iii][jjj], YMetricArray[iii][jjj]);
+                            if( bestMetric > maxElement ) {
+                                maxElement = bestMetric;
+                            } else if( maxElement - bestMetric > BANDING_TOLERANCE ) {
+                                break;
+                            }
+                        }
+                    }
+                }
+                workQueue.clear();
+                workQueue.addAll(workToBeAdded);
+                workToBeAdded.clear();
+            }
+        } else {
+            // simple rectangular version of update loop, slow
+            for( int iii = 1; iii < X_METRIC_LENGTH; iii++ ) {
+                for( int jjj = hapStartIndex + 1; jjj < Y_METRIC_LENGTH; jjj++ ) {
+                    if( (iii == 1 && jjj == 1) ) { continue; }
+                    updateCell(iii, jjj, haplotypeBases, readBases, readQuals, insertionGOP, deletionGOP, overallGCP,
+                        matchMetricArray, XMetricArray, YMetricArray);
+                }
+            }
+        }
+
+        // final probability is the log10 sum of the last element in all three state arrays
+        final int endI = X_METRIC_LENGTH - 1;
+        final int endJ = Y_METRIC_LENGTH - 1;
+        return MathUtils.approximateLog10SumLog10(new double[]{matchMetricArray[endI][endJ], XMetricArray[endI][endJ], YMetricArray[endI][endJ]});
+    }
+
+    private void updateCell( final int indI, final int indJ, final byte[] haplotypeBases, final byte[] readBases,
+                             final byte[] readQuals, final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP,
+                             final double[][] matchMetricArray, final double[][] XMetricArray, final double[][] YMetricArray ) {
+
+        // the read and haplotype indices are offset by one because the state arrays have an extra column to hold the initial conditions
+        final int im1 = indI - 1;
+        final int jm1 = indJ - 1;
+
+        // update the match array
+        double pBaseReadLog10 = 0.0; // Math.log10(1.0);
+        if( im1 > 0 && jm1 > 0 ) { // the emission probability is applied when leaving the state
+            final byte x = readBases[im1-1];
+            final byte y = haplotypeBases[jm1-1];
+            final byte qual = ( readQuals[im1-1] < QualityUtils.MIN_USABLE_Q_SCORE ? QualityUtils.MIN_USABLE_Q_SCORE : (readQuals[im1-1] > MAX_CACHED_QUAL ? MAX_CACHED_QUAL : readQuals[im1-1]) );
+            pBaseReadLog10 = ( x == y || x == (byte) 'N' || y == (byte) 'N' ? QualityUtils.qualToProbLog10(qual) : QualityUtils.qualToErrorProbLog10(qual) );
+        }
+        final int qualIndexGOP = ( im1 == 0 ? DEFAULT_GOP + DEFAULT_GOP : ( insertionGOP[im1-1] + deletionGOP[im1-1] > MAX_CACHED_QUAL ? MAX_CACHED_QUAL : insertionGOP[im1-1] + deletionGOP[im1-1]) );
+        final double d0 = QualityUtils.qualToProbLog10((byte)qualIndexGOP);
+        final double e0 = ( im1 == 0 ? QualityUtils.qualToProbLog10(DEFAULT_GCP) : QualityUtils.qualToProbLog10(overallGCP[im1-1]) );
+        matchMetricArray[indI][indJ] = pBaseReadLog10 + MathUtils.approximateLog10SumLog10(
+                new double[]{matchMetricArray[indI-1][indJ-1] + d0, XMetricArray[indI-1][indJ-1] + e0, YMetricArray[indI-1][indJ-1] + e0});
+
+        // update the X (insertion) array
+        final double d1 = ( im1 == 0 ? QualityUtils.qualToErrorProbLog10(DEFAULT_GOP) : QualityUtils.qualToErrorProbLog10(insertionGOP[im1-1]) );
+        final double e1 = ( im1 == 0 ? QualityUtils.qualToErrorProbLog10(DEFAULT_GCP) : QualityUtils.qualToErrorProbLog10(overallGCP[im1-1]) );
+        final double qBaseReadLog10 = 0.0; // Math.log10(1.0) -- we don't have an estimate for this emission probability so assume q=1.0
+        XMetricArray[indI][indJ] = qBaseReadLog10 + MathUtils.approximateLog10SumLog10(matchMetricArray[indI-1][indJ] + d1, XMetricArray[indI-1][indJ] + e1);
+
+        // update the Y (deletion) array, with penalty of zero on the left and right flanks to allow for a local alignment within the haplotype
+        final double d2 = ( im1 == 0 || im1 == readBases.length - 1 ? 0.0 : QualityUtils.qualToErrorProbLog10(deletionGOP[im1-1]) );
+        final double e2 = ( im1 == 0 || im1 == readBases.length - 1 ? 0.0 : QualityUtils.qualToErrorProbLog10(overallGCP[im1-1]) );
+        final double qBaseRefLog10 = 0.0; // Math.log10(1.0) -- we don't have an estimate for this emission probability so assume q=1.0
+        YMetricArray[indI][indJ] = qBaseRefLog10 + MathUtils.approximateLog10SumLog10(matchMetricArray[indI][indJ-1] + d2, YMetricArray[indI][indJ-1] + e2);
+    }
+
+    // private function used by the banded approach to ensure the proposed bands are sufficiently distinct from each other
+    private boolean detectClusteredStartLocations( final ArrayList<Integer> list, int loc ) {
+        for(int x : list) {
+            if( Math.abs(x-loc) <= BANDING_CLUSTER_WINDOW ) {
+                return true;
+            }
+        }
+        return false;
+    }
+}
--- a/public/java/test/org/broadinstitute/sting/utils/PairHMMUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/PairHMMUnitTest.java
@ -0,0 +1,305 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// our package
+package org.broadinstitute.sting.utils;
+
+
+// the imports for unit testing.
+
+
+import org.broadinstitute.sting.BaseTest;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.*;
+
+
+public class PairHMMUnitTest extends BaseTest {
+    final static boolean EXTENSIVE_TESTING = true;
+    PairHMM hmm = new PairHMM( false ); // reference implementation
+    PairHMM bandedHMM = new PairHMM( true ); // algorithm with banding
+
+    // --------------------------------------------------------------------------------
+    //
+    // Provider
+    //
+    // --------------------------------------------------------------------------------
+
+    private class BasicLikelihoodTestProvider extends TestDataProvider {
+        final String ref, read;
+        final byte[] refBasesWithContext, readBasesWithContext;
+        final int baseQual, insQual, delQual, gcp;
+        final int expectedQual;
+        final static String CONTEXT = "ACGTAATGACGATTGCA";
+        final static String LEFT_FLANK = "GATTTATCATCGAGTCTGC";
+        final static String RIGHT_FLANK = "CATGGATCGTTATCAGCTATCTCGAGGGATTCACTTAACAGTTTTA";
+
+        public BasicLikelihoodTestProvider(final String ref, final String read, final int baseQual, final int insQual, final int delQual, final int expectedQual, final int gcp) {
+            this(ref, read, baseQual, insQual, delQual, expectedQual, gcp, false, false);
+        }
+
+        public BasicLikelihoodTestProvider(final String ref, final String read, final int baseQual, final int insQual, final int delQual, final int expectedQual, final int gcp, final boolean left, final boolean right) {
+            super(BasicLikelihoodTestProvider.class, String.format("ref=%s read=%s b/i/d/c quals = %d/%d/%d/%d l/r flank = %b/%b e[qual]=%d", ref, read, baseQual, insQual, delQual, gcp, left, right, expectedQual));
+            this.baseQual = baseQual;
+            this.delQual = delQual;
+            this.insQual = insQual;
+            this.gcp = gcp;
+            this.read = read;
+            this.ref = ref;
+            this.expectedQual = expectedQual;
+
+            refBasesWithContext = asBytes(ref, left, right);
+            readBasesWithContext = asBytes(read, false, false);
+        }
+
+        public double expectedLogL() {
+            return expectedQual / -10.0;
+        }
+
+        public double tolerance() {
+            return 0.1; // TODO FIXME arbitrary
+        }
+
+        public double calcLogL() {
+
+            double logL = hmm.computeReadLikelihoodGivenHaplotype(
+                    refBasesWithContext, readBasesWithContext,
+                    qualAsBytes(baseQual, false), qualAsBytes(insQual, true), qualAsBytes(delQual, true),
+                    qualAsBytes(gcp, false));
+
+            return logL;
+        }
+
+        private final byte[] asBytes(final String bases, final boolean left, final boolean right) {
+            return ( (left ? LEFT_FLANK : "") + CONTEXT + bases + CONTEXT + (right ? RIGHT_FLANK : "")).getBytes();
+        }
+
+        private byte[] qualAsBytes(final int phredQual, final boolean doGOP) {
+            final byte phredQuals[] = new byte[readBasesWithContext.length];
+            // initialize everything to MASSIVE_QUAL so it cannot be moved by HMM
+            Arrays.fill(phredQuals, (byte)100);
+
+            // update just the bases corresponding to the provided micro read with the quality scores
+            if( doGOP ) {
+                phredQuals[0 + CONTEXT.length()] = (byte)phredQual;
+            } else {
+                for ( int i = 0; i < read.length(); i++)
+                    phredQuals[i + CONTEXT.length()] = (byte)phredQual;
+            }
+
+            return phredQuals;
+        }
+    }
+
+    final Random random = new Random(87865573);
+    private class BandedLikelihoodTestProvider extends TestDataProvider {
+        final String ref, read;
+        final byte[] refBasesWithContext, readBasesWithContext;
+        final int baseQual, insQual, delQual, gcp;
+        final int expectedQual;
+        final static String LEFT_CONTEXT = "ACGTAATGACGCTACATGTCGCCAACCGTC";
+        final static String RIGHT_CONTEXT = "TACGGCTTCATATAGGGCAATGTGTGTGGCAAAA";
+        final static String LEFT_FLANK = "GATTTATCATCGAGTCTGTT";
+        final static String RIGHT_FLANK = "CATGGATCGTTATCAGCTATCTCGAGGGATTCACTTAACAGTTTCCGTA";
+        final byte[] baseQuals, insQuals, delQuals, gcps;
+
+        public BandedLikelihoodTestProvider(final String ref, final String read, final int baseQual, final int insQual, final int delQual, final int expectedQual, final int gcp) {
+            this(ref, read, baseQual, insQual, delQual, expectedQual, gcp, false, false);
+        }
+
+        public BandedLikelihoodTestProvider(final String ref, final String read, final int baseQual, final int insQual, final int delQual, final int expectedQual, final int gcp, final boolean left, final boolean right) {
+            super(BandedLikelihoodTestProvider.class, String.format("BANDED: ref=%s read=%s b/i/d/c quals = %d/%d/%d/%d l/r flank = %b/%b e[qual]=%d", ref, read, baseQual, insQual, delQual, gcp, left, right, expectedQual));
+            this.baseQual = baseQual;
+            this.delQual = delQual;
+            this.insQual = insQual;
+            this.gcp = gcp;
+            this.read = read;
+            this.ref = ref;
+            this.expectedQual = expectedQual;
+
+            refBasesWithContext = asBytes(ref, left, right);
+            readBasesWithContext = asBytes(read, false, false);
+            baseQuals = qualAsBytes(baseQual);
+            insQuals = qualAsBytes(insQual);
+            delQuals = qualAsBytes(delQual);
+            gcps = qualAsBytes(gcp, false);
+        }
+
+        public double expectedLogL() {
+            double logL = hmm.computeReadLikelihoodGivenHaplotype(
+                    refBasesWithContext, readBasesWithContext,
+                    baseQuals, insQuals, delQuals, gcps);
+
+            return logL;
+        }
+
+        public double tolerance() {
+            return 0.2; // TODO FIXME arbitrary
+        }
+
+        public double calcLogL() {
+
+            double logL = bandedHMM.computeReadLikelihoodGivenHaplotype(
+                    refBasesWithContext, readBasesWithContext,
+                    baseQuals, insQuals, delQuals, gcps);
+
+            return logL;
+        }
+
+        private final byte[] asBytes(final String bases, final boolean left, final boolean right) {
+            return ( (left ? LEFT_FLANK : "") + LEFT_CONTEXT + bases + RIGHT_CONTEXT + (right ? RIGHT_FLANK : "")).getBytes();
+        }
+
+        private byte[] qualAsBytes(final int phredQual) {
+            return qualAsBytes(phredQual, true);
+        }
+
+        private byte[] qualAsBytes(final int phredQual, final boolean addRandom) {
+            final byte phredQuals[] = new byte[readBasesWithContext.length];
+            Arrays.fill(phredQuals, (byte)phredQual);
+            if(addRandom) {
+                for( int iii = 0; iii < phredQuals.length; iii++) {
+                    phredQuals[iii] = (byte) ((int) phredQuals[iii] + (random.nextInt(7) - 3));
+                }
+            }
+            return phredQuals;
+        }
+    }
+
+    @DataProvider(name = "BasicLikelihoodTestProvider")
+    public Object[][] makeBasicLikelihoodTests() {
+        // context on either side is ACGTTGCA REF ACGTTGCA
+        // test all combinations
+        final List<Integer> baseQuals = EXTENSIVE_TESTING ? Arrays.asList(10, 20, 30, 40, 50) : Arrays.asList(30);
+        final List<Integer> indelQuals = EXTENSIVE_TESTING ? Arrays.asList(10, 20, 30, 40, 50) : Arrays.asList(40);
+        final List<Integer> gcps = EXTENSIVE_TESTING ? Arrays.asList(10, 20, 30) : Arrays.asList(10);
+        final List<Integer> sizes = EXTENSIVE_TESTING ? Arrays.asList(2,3,4,5,6,7,8,9,10,20) : Arrays.asList(2);
+
+        for ( final int baseQual : baseQuals ) {
+            for ( final int indelQual : indelQuals ) {
+                for ( final int gcp : gcps ) {
+
+                    // test substitutions
+                    for ( final byte refBase : BaseUtils.BASES ) {
+                        for ( final byte readBase : BaseUtils.BASES ) {
+                            final String ref  = new String(new byte[]{refBase});
+                            final String read = new String(new byte[]{readBase});
+                            final int expected = refBase == readBase ? 0 : baseQual;
+                            new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp);
+                        }
+                    }
+
+                    // test insertions and deletions
+                    for ( final int size : sizes ) {
+                        for ( final byte base : BaseUtils.BASES ) {
+                            final int expected = indelQual + (size - 2) * gcp;
+
+                            for ( boolean insertionP : Arrays.asList(true, false)) {
+                                final String small = Utils.dupString((char)base, 1);
+                                final String big = Utils.dupString((char)base, size);
+
+                                final String ref = insertionP ? small : big;
+                                final String read = insertionP ? big : small;
+
+                                new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp);
+                                new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, true, false);
+                                new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, false, true);
+                                new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, true, true);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        return BasicLikelihoodTestProvider.getTests(BasicLikelihoodTestProvider.class);
+    }
+
+    @Test(dataProvider = "BasicLikelihoodTestProvider", enabled = true)
+    public void testBasicLikelihoods(BasicLikelihoodTestProvider cfg) {
+        double calculatedLogL = cfg.calcLogL();
+        double expectedLogL = cfg.expectedLogL();
+        logger.warn(String.format("Test: logL calc=%.2f expected=%.2f for %s", calculatedLogL, expectedLogL, cfg.toString()));
+        Assert.assertEquals(calculatedLogL, expectedLogL, cfg.tolerance());
+    }
+
+    @DataProvider(name = "BandedLikelihoodTestProvider")
+    public Object[][] makeBandedLikelihoodTests() {
+        // context on either side is ACGTTGCA REF ACGTTGCA
+        // test all combinations
+        final List<Integer> baseQuals = EXTENSIVE_TESTING ? Arrays.asList(25, 30, 40, 50) : Arrays.asList(30);
+        final List<Integer> indelQuals = EXTENSIVE_TESTING ? Arrays.asList(30, 40, 50) : Arrays.asList(40);
+        final List<Integer> gcps = EXTENSIVE_TESTING ? Arrays.asList(10, 12) : Arrays.asList(10);
+        final List<Integer> sizes = EXTENSIVE_TESTING ? Arrays.asList(2,3,4,5,6,7,8,9,10,20) : Arrays.asList(2);
+
+        for ( final int baseQual : baseQuals ) {
+            for ( final int indelQual : indelQuals ) {
+                for ( final int gcp : gcps ) {
+
+                    // test substitutions
+                    for ( final byte refBase : BaseUtils.BASES ) {
+                        for ( final byte readBase : BaseUtils.BASES ) {
+                            final String ref  = new String(new byte[]{refBase});
+                            final String read = new String(new byte[]{readBase});
+                            final int expected = refBase == readBase ? 0 : baseQual;
+                            new BandedLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp);
+                        }
+                    }
+
+                    // test insertions and deletions
+                    for ( final int size : sizes ) {
+                        for ( final byte base : BaseUtils.BASES ) {
+                            final int expected = indelQual + (size - 2) * gcp;
+
+                            for ( boolean insertionP : Arrays.asList(true, false)) {
+                                final String small = Utils.dupString((char)base, 1);
+                                final String big = Utils.dupString((char)base, size);
+
+                                final String ref = insertionP ? small : big;
+                                final String read = insertionP ? big : small;
+
+                                new BandedLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp);
+                                new BandedLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, true, false);
+                                new BandedLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, false, true);
+                                new BandedLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, true, true);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        return BandedLikelihoodTestProvider.getTests(BandedLikelihoodTestProvider.class);
+    }
+
+    @Test(dataProvider = "BandedLikelihoodTestProvider", enabled = true)
+    public void testBandedLikelihoods(BandedLikelihoodTestProvider cfg) {
+        double calculatedLogL = cfg.calcLogL();
+        double expectedLogL = cfg.expectedLogL();
+        logger.warn(String.format("Test: logL calc=%.2f expected=%.2f for %s", calculatedLogL, expectedLogL, cfg.toString()));
+        Assert.assertEquals(calculatedLogL, expectedLogL, cfg.tolerance());
+    }
+}