Rebasing

2012-04-11 13:56:51 -04:00 · 2012-04-11 13:56:51 -04:00 · 143e92b797
parent 4999ae87ad
commit 143e92b797
4 changed files with 761 additions and 119 deletions
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
@ -147,11 +147,11 @@ public class UnifiedArgumentCollection {

    @Hidden
    @Argument(fullName = "indelGapContinuationPenalty", shortName = "indelGCP", doc = "Indel gap continuation penalty", required = false)
-    public double INDEL_GAP_CONTINUATION_PENALTY = 10.0;
+    public byte INDEL_GAP_CONTINUATION_PENALTY = 10;

    @Hidden
    @Argument(fullName = "indelGapOpenPenalty", shortName = "indelGOP", doc = "Indel gap open penalty", required = false)
-    public double INDEL_GAP_OPEN_PENALTY = 45.0;
+    public byte INDEL_GAP_OPEN_PENALTY = 45;

    @Hidden
    @Argument(fullName = "indelHaplotypeSize", shortName = "indelHSize", doc = "Indel haplotype size", required = false)
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java
@ -31,7 +31,9 @@ import net.sf.samtools.CigarOperator;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.utils.Haplotype;
 import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.PairHMM;
 import org.broadinstitute.sting.utils.clipping.ReadClipper;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -41,13 +43,14 @@ import org.broadinstitute.sting.utils.variantcontext.Allele;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
+import java.util.Map;


 public class PairHMMIndelErrorModel {
    public static final int BASE_QUAL_THRESHOLD = 20;

    private boolean DEBUG = false;
-    private boolean bandedLikelihoods = false;
+    private boolean bandedLikelihoods = true;

    private static final int MAX_CACHED_QUAL = 127;

@ -60,12 +63,12 @@ public class PairHMMIndelErrorModel {
    private static final int START_HRUN_GAP_IDX = 4;
    private static final int MAX_HRUN_GAP_IDX = 20;

-    private static final double MIN_GAP_OPEN_PENALTY = 30.0;
-    private static final double MIN_GAP_CONT_PENALTY = 10.0;
-    private static final double GAP_PENALTY_HRUN_STEP = 1.0; // each increase in hrun decreases gap penalty by this.
+    private static final byte MIN_GAP_OPEN_PENALTY = 30;
+    private static final byte MIN_GAP_CONT_PENALTY = 10;
+    private static final byte GAP_PENALTY_HRUN_STEP = 1; // each increase in hrun decreases gap penalty by this.

-    private final double[] GAP_OPEN_PROB_TABLE;
-    private final double[] GAP_CONT_PROB_TABLE;
+    private final byte[] GAP_OPEN_PROB_TABLE;
+    private final byte[] GAP_CONT_PROB_TABLE;

    /////////////////////////////
    // Private Member Variables
@ -86,42 +89,42 @@ public class PairHMMIndelErrorModel {
        }
    }

-    public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean bandedLikelihoods) {
+    public PairHMMIndelErrorModel(byte indelGOP, byte indelGCP, boolean deb, boolean bandedLikelihoods) {
        this.DEBUG = deb;
-        this.bandedLikelihoods = bandedLikelihoods;
+        //this.bandedLikelihoods = bandedLikelihoods;

        // fill gap penalty table, affine naive model:
-        this.GAP_CONT_PROB_TABLE = new double[MAX_HRUN_GAP_IDX];
-        this.GAP_OPEN_PROB_TABLE = new double[MAX_HRUN_GAP_IDX];
+        this.GAP_CONT_PROB_TABLE = new byte[MAX_HRUN_GAP_IDX];
+        this.GAP_OPEN_PROB_TABLE = new byte[MAX_HRUN_GAP_IDX];

-        double gop = -indelGOP/10.0;
-        double gcp = -indelGCP/10.0;

        for (int i = 0; i < START_HRUN_GAP_IDX; i++) {
-            GAP_OPEN_PROB_TABLE[i] = gop;
-            GAP_CONT_PROB_TABLE[i] = gcp;
+            GAP_OPEN_PROB_TABLE[i] = indelGOP;
+            GAP_CONT_PROB_TABLE[i] = indelGCP;
        }

        double step = GAP_PENALTY_HRUN_STEP/10.0;

-        double maxGOP = -MIN_GAP_OPEN_PENALTY/10.0;  // phred to log prob
-        double maxGCP = -MIN_GAP_CONT_PENALTY/10.0;  // phred to log prob
+        // initialize gop and gcp to their default values
+        byte gop = indelGOP;
+        byte gcp = indelGCP;

+        // all of the following is computed in QUal-space
        for (int i=START_HRUN_GAP_IDX; i < MAX_HRUN_GAP_IDX; i++) {
-            gop += step;
-            if (gop > maxGOP)
-                gop = maxGOP;
+            gop -= GAP_PENALTY_HRUN_STEP;
+            if (gop < MIN_GAP_OPEN_PENALTY)
+                gop = MIN_GAP_OPEN_PENALTY;

-            gcp += step;
-            if(gcp > maxGCP)
-                gcp = maxGCP;
+            gcp -= step;
+            if(gcp < MIN_GAP_CONT_PENALTY)
+                gcp = MIN_GAP_CONT_PENALTY;
            GAP_OPEN_PROB_TABLE[i] = gop;
            GAP_CONT_PROB_TABLE[i] = gcp;
        }

    }

-    static private void getContextHomopolymerLength(final byte[] refBytes, int[] hrunArray) {
+    static private void getContextHomopolymerLength(final byte[] refBytes, final int[] hrunArray) {
        // compute forward hrun length, example:
        // AGGTGACCCCCCTGAGAG
        // 001000012345000000
@ -155,7 +158,7 @@ public class PairHMMIndelErrorModel {


    private void updateCell(final int indI, final int indJ, final int X_METRIC_LENGTH, final int Y_METRIC_LENGTH, byte[] readBases, byte[] readQuals, byte[] haplotypeBases,
-                            double[] currentGOP, double[] currentGCP,  double[][] matchMetricArray,  double[][] XMetricArray,  double[][] YMetricArray) {
+                            byte[] currentGOP, byte[] currentGCP,  double[][] matchMetricArray,  double[][] XMetricArray,  double[][] YMetricArray) {
        if (indI > 0 && indJ > 0) {
            final int im1 = indI -1;
            final int jm1 = indJ - 1;
@ -168,20 +171,20 @@ public class PairHMMIndelErrorModel {

            matchMetricArray[indI][indJ] = pBaseRead + MathUtils.approximateLog10SumLog10(new double[]{matchMetricArray[im1][jm1], XMetricArray[im1][jm1], YMetricArray[im1][jm1]});

-            final double c1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1];
-            final double d1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1];
+            final double c1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : -(double)currentGOP[im1]/10.0;
+            final double d1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : -(double)currentGCP[im1]/10.0;

            XMetricArray[indI][indJ] = MathUtils.approximateLog10SumLog10(matchMetricArray[im1][indJ] + c1, XMetricArray[im1][indJ] + d1);

            // update Y array
-            final double c2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1];
-            final double d2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1];
+            final double c2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : -(double)currentGOP[im1]/10.0;
+            final double d2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : -(double)currentGCP[im1]/10.0;
            YMetricArray[indI][indJ] = MathUtils.approximateLog10SumLog10(matchMetricArray[indI][jm1] + c2, YMetricArray[indI][jm1] + d2);
        }
    }

    private double computeReadLikelihoodGivenHaplotypeAffineGaps(byte[] haplotypeBases, byte[] readBases, byte[] readQuals,
-                                                                 double[] currentGOP, double[] currentGCP, int indToStart,
+                                                                 byte[] currentGOP, byte[] currentGCP, int indToStart,
                                                                 double[][] matchMetricArray, double[][] XMetricArray, double[][] YMetricArray) {

        final int X_METRIC_LENGTH = readBases.length+1;
@ -349,8 +352,9 @@ public class PairHMMIndelErrorModel {

    }

-    private void fillGapProbabilities(int[] hrunProfile,
-                                      double[] contextLogGapOpenProbabilities, double[] contextLogGapContinuationProbabilities) {
+    private void fillGapProbabilities(final int[] hrunProfile,
+                                      final byte[] contextLogGapOpenProbabilities,
+                                      final byte[] contextLogGapContinuationProbabilities) {
        // fill based on lookup table
        for (int i = 0; i < hrunProfile.length; i++) {
            if (hrunProfile[i] >= MAX_HRUN_GAP_IDX) {
@ -372,27 +376,8 @@ public class PairHMMIndelErrorModel {
        final int readCounts[] = new int[pileup.getNumberOfElements()];
        int readIdx=0;

-        LinkedHashMap<Allele,double[]> gapOpenProbabilityMap = new LinkedHashMap<Allele,double[]>();
-        LinkedHashMap<Allele,double[]> gapContProbabilityMap = new LinkedHashMap<Allele,double[]>();
-
-        // will context dependent probabilities based on homopolymer run. Probabilities are filled based on total complete haplotypes.
-        // todo -- refactor into separate function
-        for (Allele a: haplotypeMap.keySet()) {
-            Haplotype haplotype = haplotypeMap.get(a);
-            byte[] haplotypeBases = haplotype.getBases();
-            double[] contextLogGapOpenProbabilities = new double[haplotypeBases.length];
-            double[] contextLogGapContinuationProbabilities = new double[haplotypeBases.length];
-
-            // get homopolymer length profile for current haplotype
-            int[] hrunProfile = new int[haplotypeBases.length];
-            getContextHomopolymerLength(haplotypeBases,hrunProfile);
-            fillGapProbabilities(hrunProfile, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities);
-
-            gapOpenProbabilityMap.put(a,contextLogGapOpenProbabilities);
-            gapContProbabilityMap.put(a,contextLogGapContinuationProbabilities);
-
-        }

+        PairHMM pairHMM = new PairHMM(bandedLikelihoods);
        for (PileupElement p: pileup) {
            // > 1 when the read is a consensus read representing multiple independent observations
            readCounts[readIdx] = p.getRepresentativeCount();
@ -408,12 +393,27 @@ public class PairHMMIndelErrorModel {
            else {
                // System.out.format("%d %s\n",p.getRead().getAlignmentStart(), p.getRead().getClass().getName());
                GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());
+
                if (read.isEmpty())
                    continue;

-                if(ReadUtils.is454Read(read)) {
+                if (read.getUnclippedEnd() > ref.getWindow().getStop())
+                    read = ReadClipper.hardClipByReferenceCoordinatesRightTail(read, ref.getWindow().getStop());
+
+                if (read.isEmpty())
                    continue;
-                }
+
+                if (read.getUnclippedStart() < ref.getWindow().getStart())
+                    read = ReadClipper.hardClipByReferenceCoordinatesLeftTail (read, ref.getWindow().getStart());
+
+                if (read.isEmpty())
+                    continue;
+                // hard-clip low quality ends - this may introduce extra H elements in CIGAR string
+                read = ReadClipper.hardClipLowQualEnds(read,(byte)BASE_QUAL_THRESHOLD );
+
+                if (read.isEmpty())
+                    continue;
+

                // get bases of candidate haplotypes that overlap with reads
                final int trailingBases = 3;
@ -469,54 +469,56 @@ public class PairHMMIndelErrorModel {
                unclippedReadBases = read.getReadBases();
                unclippedReadQuals = read.getBaseQualities();

-                // Do a stricter base clipping than provided by CIGAR string, since this one may be too conservative,
-                // and may leave a string of Q2 bases still hanging off the reads.
-                for (int i=numStartSoftClippedBases; i < unclippedReadBases.length; i++) {
-                    if (unclippedReadQuals[i] < BASE_QUAL_THRESHOLD)
-                        numStartClippedBases++;
-                    else
-                        break;
+                final int extraOffset = Math.abs(eventLength);

-                }
-                for (int i=unclippedReadBases.length-numEndSoftClippedBases-1; i >= 0; i-- ){
-                    if (unclippedReadQuals[i] < BASE_QUAL_THRESHOLD)
-                        numEndClippedBases++;
-                    else
-                        break;
-                }
+                /**
+                 * Compute genomic locations that candidate haplotypes will span.
+                 * Read start and stop locations (variables readStart and readEnd) are the original unclipped positions from SAMRecord,
+                 * adjusted by hard clips from Cigar string and by qual-based soft-clipping performed above.
+                 * We will propose haplotypes that overlap the read with some padding.
+                 * True read start = readStart + numStartClippedBases - ReadUtils.getFirstInsertionOffset(read)
+                 * Last term is because if a read starts with an insertion then these bases are not accounted for in readStart.
+                 * trailingBases is a padding constant(=3) and we additionally add abs(eventLength) to both sides of read to be able to
+                 * differentiate context between two haplotypes
+                 */
+                long startLocationInRefForHaplotypes = Math.max(readStart + numStartClippedBases - trailingBases - ReadUtils.getFirstInsertionOffset(read)-extraOffset, 0);
+                long stopLocationInRefForHaplotypes =  readEnd -numEndClippedBases  + trailingBases + ReadUtils.getLastInsertionOffset(read)+extraOffset;

-                int extraOffset = Math.abs(eventLength);
+                if (DEBUG)
+                    System.out.format("orig Start:%d orig stop: %d\n", startLocationInRefForHaplotypes, stopLocationInRefForHaplotypes);

-                long start = Math.max(readStart + numStartClippedBases - trailingBases - ReadUtils.getFirstInsertionOffset(read)-extraOffset, 0);
-                long stop =  readEnd -numEndClippedBases  + trailingBases + ReadUtils.getLastInsertionOffset(read)+extraOffset;
-
-                // Variables start and stop are coordinates (inclusive) where we want to get the haplotype from.
                int readLength = read.getReadLength()-numStartSoftClippedBases-numEndSoftClippedBases;
                // check if start of read will be before start of reference context
-                if (start < ref.getWindow().getStart())// read starts before haplotype: read will have to be cut
-                    start = ref.getWindow().getStart();
-
+                if (startLocationInRefForHaplotypes < ref.getWindow().getStart()) {
+                    // read starts before haplotype: read will have to be cut
+                    //numStartClippedBases += ref.getWindow().getStart() - startLocationInRefForHaplotypes;
+                    startLocationInRefForHaplotypes = ref.getWindow().getStart();
+                }
                // check also if end of read will go beyond reference context
-                if (stop > ref.getWindow().getStop())
-                    stop = ref.getWindow().getStop();
+                if (stopLocationInRefForHaplotypes > ref.getWindow().getStop()) {
+                    //numEndClippedBases += stopLocationInRefForHaplotypes - ref.getWindow().getStop();
+                    stopLocationInRefForHaplotypes = ref.getWindow().getStop();
+                }

-                // if there's an insertion in the read, the read stop position will be less than start + read length,
+                // if there's an insertion in the read, the read stop position will be less than start + read legnth,
                // but we want to compute likelihoods in the whole region that a read might overlap
-                if (stop <= start + readLength) {
-                    stop = start + readLength-1;
+                if (stopLocationInRefForHaplotypes <= startLocationInRefForHaplotypes + readLength) {
+                    stopLocationInRefForHaplotypes = startLocationInRefForHaplotypes + readLength-1;
                }

                // ok, we now figured out total number of clipped bases on both ends.
                // Figure out where we want to place the haplotype to score read against
-                /*
-               if (DEBUG)
-                   System.out.format("numStartClippedBases: %d numEndClippedBases: %d WinStart:%d WinStop:%d start: %d stop: %d readLength: %d\n",
-                           numStartClippedBases, numEndClippedBases, ref.getWindow().getStart(), ref.getWindow().getStop(), start, stop, read.getReadLength());
-                */
+
+                if (DEBUG)
+                    System.out.format("numStartClippedBases: %d numEndClippedBases: %d WinStart:%d WinStop:%d start: %d stop: %d readLength: %d\n",
+                            numStartClippedBases, numEndClippedBases, ref.getWindow().getStart(), ref.getWindow().getStop(), startLocationInRefForHaplotypes, stopLocationInRefForHaplotypes, read.getReadLength());


                LinkedHashMap<Allele,Double> readEl = new LinkedHashMap<Allele,Double>();

+                /**
+                 * Check if we'll end up with an empty read once all clipping is done
+                 */
                if (numStartClippedBases + numEndClippedBases >= unclippedReadBases.length) {
                    int j=0;
                    for (Allele a: haplotypeMap.keySet()) {
@ -537,67 +539,81 @@ public class PairHMMIndelErrorModel {
                    // initialize path metric and traceback memories for likelihood computation
                    double[][] matchMetricArray = null, XMetricArray = null, YMetricArray = null;
                    byte[] previousHaplotypeSeen = null;
-                    double[] previousGOP = null;
-                    double[] previousGCP = null;
-                    int startIdx;
+                    int startIndexInHaplotype = 0;
+                    final byte[] contextLogGapOpenProbabilities = new byte[readBases.length];
+                    final byte[] contextLogGapContinuationProbabilities  = new byte[readBases.length];
+
+                    // get homopolymer length profile for current haplotype
+                    int[] hrunProfile = new int[readBases.length];
+                    getContextHomopolymerLength(readBases,hrunProfile);
+                    fillGapProbabilities(hrunProfile, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities);
+
+
                    for (Allele a: haplotypeMap.keySet()) {

-
                        Haplotype haplotype = haplotypeMap.get(a);
-                        if (stop > haplotype.getStopPosition())
-                            stop = haplotype.getStopPosition();

-                        if (start < haplotype.getStartPosition())
-                            start = haplotype.getStartPosition();
+                        if (stopLocationInRefForHaplotypes > haplotype.getStopPosition())
+                            stopLocationInRefForHaplotypes = haplotype.getStopPosition();

-                        // cut haplotype bases
-                        long indStart = start - haplotype.getStartPosition();
-                        long indStop =  stop - haplotype.getStartPosition();
+                        if (startLocationInRefForHaplotypes < haplotype.getStartPosition())
+                            startLocationInRefForHaplotypes = haplotype.getStartPosition();
+
+                        final long indStart = startLocationInRefForHaplotypes - haplotype.getStartPosition();
+                        final long indStop =  stopLocationInRefForHaplotypes - haplotype.getStartPosition();

                        double readLikelihood;
                        if (DEBUG)
                            System.out.format("indStart: %d indStop: %d WinStart:%d WinStop:%d start: %d stop: %d readLength: %d C:%s\n",
-                                    indStart, indStop, ref.getWindow().getStart(), ref.getWindow().getStop(), start, stop, read.getReadLength(), read.getCigar().toString());
+                                    indStart, indStop, ref.getWindow().getStart(), ref.getWindow().getStop(), startLocationInRefForHaplotypes, stopLocationInRefForHaplotypes, read.getReadLength(), read.getCigar().toString());
+

                        if (indStart < 0 || indStop >= haplotype.getBases().length || indStart > indStop) {
                            // read spanned more than allowed reference context: we currently can't deal with this
-                            readLikelihood =0;
+                            throw new ReviewedStingException("BUG! bad read clipping");
+//                            readLikelihood =0;
                        } else
                        {
                            final byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBases(),
                                    (int)indStart, (int)indStop);

-                            if (matchMetricArray == null) {
-                                final int X_METRIC_LENGTH = readBases.length+1;
-                                final int Y_METRIC_LENGTH = haplotypeBases.length+1;
+                            final int X_METRIC_LENGTH = readBases.length+1;
+                            final int Y_METRIC_LENGTH = haplotypeBases.length+1;

+                            if (matchMetricArray == null) {
+                                //no need to reallocate arrays for each new haplotype, as length won't change
                                matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
                                XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
                                YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+
                            }
-                            final double[] currentContextGOP = Arrays.copyOfRange(gapOpenProbabilityMap.get(a), (int)indStart, (int)indStop);
-                            final double[] currentContextGCP = Arrays.copyOfRange(gapContProbabilityMap.get(a), (int)indStart, (int)indStop);
+
+                            pairHMM.initializeArrays(matchMetricArray, XMetricArray, YMetricArray, X_METRIC_LENGTH);
+
+  /*
                            if (previousHaplotypeSeen == null)
-                                startIdx = 0;
-                            else {
-                                final int s1 = computeFirstDifferingPosition(haplotypeBases, previousHaplotypeSeen);
-                                final int s2 = computeFirstDifferingPosition(currentContextGOP, previousGOP);
-                                final int s3 = computeFirstDifferingPosition(currentContextGCP, previousGCP);
-                                startIdx = Math.min(Math.min(s1, s2), s3);
-                            }
+                                startIndexInHaplotype = 0;
+                            else
+                                startIndexInHaplotype = 0; //computeFirstDifferingPosition(haplotypeBases, previousHaplotypeSeen);
+
                            previousHaplotypeSeen = haplotypeBases.clone();
-                            previousGOP = currentContextGOP.clone();
-                            previousGCP = currentContextGCP.clone();
+    */
+                            readLikelihood = pairHMM.computeReadLikelihoodGivenHaplotype(haplotypeBases, readBases, readQuals,
+                                    contextLogGapOpenProbabilities, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities,
+                                    startIndexInHaplotype, matchMetricArray, XMetricArray, YMetricArray);

+                      /*      double r2 = computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals, contextLogGapOpenProbabilities,
+                                    contextLogGapContinuationProbabilities, 0, matchMetricArray, XMetricArray, YMetricArray);

-                            readLikelihood = computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals,
-                                    currentContextGOP, currentContextGCP, startIdx, matchMetricArray, XMetricArray, YMetricArray);
-
-                            if (DEBUG) {
+                            if (readLikelihood > 0) {
+                                int k=0;
+                            }
+                        */    if (DEBUG) {
                                System.out.println("H:"+new String(haplotypeBases));
                                System.out.println("R:"+new String(readBases));
                                System.out.format("L:%4.2f\n",readLikelihood);
-                                System.out.format("StPos:%d\n", startIdx);
+                               // System.out.format("Lorig:%4.2f\n",r2);
+                                System.out.format("StPos:%d\n", startIndexInHaplotype);
                            }
                        }
                        readEl.put(a,readLikelihood);
--- a/public/java/src/org/broadinstitute/sting/utils/PairHMM.java
+++ b/public/java/src/org/broadinstitute/sting/utils/PairHMM.java
@ -0,0 +1,259 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.utils;
+
+import com.google.java.contract.Ensures;
+import com.google.java.contract.Requires;
+
+import java.util.*;
+
+/**
+ * Util class for performing the pair HMM for local alignment. Figure 4.3 in Durbin 1998 book.
+ * User: rpoplin
+ * Date: 3/1/12
+ */
+
+public class PairHMM {
+    private static final int MAX_CACHED_QUAL = (int)Byte.MAX_VALUE;
+    private static final byte DEFAULT_GOP = (byte) 45;
+    private static final byte DEFAULT_GCP = (byte) 10;
+    private static final double BANDING_TOLERANCE = 22.0;
+    private static final int BANDING_CLUSTER_WINDOW = 12;
+    private final boolean doBanded;
+
+    public PairHMM() {
+        doBanded = false;
+    }
+
+    public PairHMM( final boolean doBanded ) {
+        this.doBanded = doBanded;
+    }
+
+    
+    public void initializeArrays(final double[][] matchMetricArray, final double[][] XMetricArray, final double[][] YMetricArray,
+                                 final int X_METRIC_LENGTH) {
+
+        for( int iii=0; iii < X_METRIC_LENGTH; iii++ ) {
+            Arrays.fill(matchMetricArray[iii], Double.NEGATIVE_INFINITY);
+            Arrays.fill(XMetricArray[iii], Double.NEGATIVE_INFINITY);
+            Arrays.fill(YMetricArray[iii], Double.NEGATIVE_INFINITY);
+        }
+
+        // the initial condition
+        matchMetricArray[1][1] = 0.0; // Math.log10(1.0);
+
+    }
+
+    @Requires({"readBases.length == readQuals.length","readBases.length == insertionGOP.length","readBases.length == deletionGOP.length","readBases.length == overallGCP.length"})
+    @Ensures({"!Double.isInfinite(result)", "!Double.isNaN(result)"}) // Result should be a proper log10 probability
+    public double computeReadLikelihoodGivenHaplotype( final byte[] haplotypeBases, final byte[] readBases, final byte[] readQuals,
+                                                       final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP ) {
+
+        // M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions and + 1 to consider the final base in a non-global alignment
+        final int X_METRIC_LENGTH = readBases.length + 2;
+        final int Y_METRIC_LENGTH = haplotypeBases.length + 2;
+
+        // initial arrays to hold the probabilities of being in the match, insertion and deletion cases
+        final double[][] matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+        final double[][] XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+        final double[][] YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+
+        initializeArrays(matchMetricArray, XMetricArray, YMetricArray, X_METRIC_LENGTH);
+
+        return computeReadLikelihoodGivenHaplotype(haplotypeBases, readBases, readQuals, insertionGOP, deletionGOP, overallGCP, 0, matchMetricArray, XMetricArray, YMetricArray);
+    }
+
+    @Requires({"readBases.length == readQuals.length","readBases.length == insertionGOP.length","readBases.length == deletionGOP.length","readBases.length == overallGCP.length"})
+    @Ensures({"!Double.isInfinite(result)", "!Double.isNaN(result)"}) // Result should be a proper log10 probability
+    public double computeReadLikelihoodGivenHaplotype( final byte[] haplotypeBases, final byte[] readBases, final byte[] readQuals,
+                                                       final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP, final int hapStartIndex,
+                                                       final double[][] matchMetricArray, final double[][] XMetricArray, final double[][] YMetricArray ) {
+
+        // M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions and + 1 to consider the final base in a non-global alignment
+        final int X_METRIC_LENGTH = readBases.length + 2;
+        final int Y_METRIC_LENGTH = haplotypeBases.length + 2;
+
+        // ensure that all the qual scores have valid values
+        for( int iii = 0; iii < readQuals.length; iii++ ) {
+            readQuals[iii] = ( readQuals[iii] < QualityUtils.MIN_USABLE_Q_SCORE ? QualityUtils.MIN_USABLE_Q_SCORE : (readQuals[iii] > MAX_CACHED_QUAL ? MAX_CACHED_QUAL : readQuals[iii]) );
+        }
+
+        if( doBanded ) {
+            final ArrayList<Integer> workQueue = new ArrayList<Integer>(); // holds a queue of starting work location (indices along the diagonal). Will be sorted each step
+            final ArrayList<Integer> workToBeAdded = new ArrayList<Integer>();
+            final ArrayList<Double> calculatedValues = new ArrayList<Double>();
+            final int numDiags = X_METRIC_LENGTH + Y_METRIC_LENGTH - 1;
+            workQueue.add( 1 ); // Always start a new thread at the baseline because of partially repeating sequences that match better in the latter half of the haplotype
+
+            for(int diag = 3; diag < numDiags; diag++) { // diag = 3 is the (1,2) element of the metric arrays. (1,1) is the initial condition and is purposefully skipped over
+                //Collections.sort(workQueue); // no need to sort because elements are guaranteed to be in ascending order
+                int el = 1;
+                for( int work : workQueue ) {
+                    // choose the appropriate diagonal baseline location
+                    int iii = 0;
+                    int jjj = diag;
+                    if( diag > Y_METRIC_LENGTH ) {
+                        iii = diag - Y_METRIC_LENGTH;
+                        jjj = Y_METRIC_LENGTH;
+                    }
+                    // move to the starting work location along the diagonal
+                    iii += work;
+                    jjj -= work;
+                    while( iii >= X_METRIC_LENGTH || jjj <= 0 ) {
+                        iii--;
+                        jjj++;
+                        work--;
+                    }
+                    if( !detectClusteredStartLocations(workToBeAdded, work ) ) {
+                        workToBeAdded.add(work); // keep this thread going once it has started
+                    }
+
+                    if( work >= el - 3 ) {
+                        // step along the diagonal in the forward direction, updating the match matrices and looking for a drop off from the maximum observed value
+                        double maxElement = Double.NEGATIVE_INFINITY;
+                        for( el = work; el < numDiags + 1; el++ ) {
+                            updateCell(iii, jjj, haplotypeBases, readBases, readQuals,
+                                insertionGOP, deletionGOP, overallGCP,  matchMetricArray,  XMetricArray, YMetricArray);
+                            final double bestMetric = MathUtils.max(matchMetricArray[iii][jjj], XMetricArray[iii][jjj], YMetricArray[iii][jjj]);
+                            calculatedValues.add(bestMetric);
+                            if( bestMetric > maxElement ) {
+                                maxElement = bestMetric;
+                            } else if( maxElement - bestMetric > BANDING_TOLERANCE ) {
+                                break;
+                            }
+                            if( ++iii >= X_METRIC_LENGTH ) { // don't walk off the edge of the matrix
+                                break;
+                            }
+                            if( --jjj <= 0 ) { // don't walk off the edge of the matrix
+                                break;
+                            }
+                        }
+
+                        // find a local maximum to start a new band in the work queue
+                        double localMaxElement = Double.NEGATIVE_INFINITY;
+                        int localMaxElementIndex = 0;
+                        for(int kkk = calculatedValues.size()-1; kkk >= 1; kkk--) {
+                            final double bestMetric = calculatedValues.get(kkk);
+                            if( bestMetric > localMaxElement ) {
+                                localMaxElement = bestMetric;
+                                localMaxElementIndex = kkk;
+                            } else if( localMaxElement - bestMetric > BANDING_TOLERANCE * 0.5 ) { // find a local maximum
+                                if( !detectClusteredStartLocations(workToBeAdded, work + localMaxElementIndex ) ) {
+                                    workToBeAdded.add( work + localMaxElementIndex );
+                                }
+                                break;
+                            }
+                        }
+                        calculatedValues.clear();
+
+                        // reset iii and jjj to the appropriate diagonal baseline location
+                        iii = 0;
+                        jjj = diag;
+                        if( diag > Y_METRIC_LENGTH ) {
+                            iii = diag - Y_METRIC_LENGTH;
+                            jjj = Y_METRIC_LENGTH;
+                        }
+                        // move to the starting work location along the diagonal
+                        iii += work-1;
+                        jjj -= work-1;
+
+                        // step along the diagonal in the reverse direction, updating the match matrices and looking for a drop off from the maximum observed value
+                        for( int traceBack = work - 1; traceBack > 0 && iii > 0 && jjj < Y_METRIC_LENGTH; traceBack--,iii--,jjj++ ) {
+                            updateCell(iii, jjj, haplotypeBases, readBases, readQuals,
+                                    insertionGOP, deletionGOP, overallGCP,  matchMetricArray,  XMetricArray, YMetricArray);
+                            final double bestMetric = MathUtils.max(matchMetricArray[iii][jjj], XMetricArray[iii][jjj], YMetricArray[iii][jjj]);
+                            if( bestMetric > maxElement ) {
+                                maxElement = bestMetric;
+                            } else if( maxElement - bestMetric > BANDING_TOLERANCE ) {
+                                break;
+                            }
+                        }
+                    }
+                }
+                workQueue.clear();
+                workQueue.addAll(workToBeAdded);
+                workToBeAdded.clear();
+            }
+        } else {
+            // simple rectangular version of update loop, slow
+            for( int iii = 1; iii < X_METRIC_LENGTH; iii++ ) {
+                for( int jjj = hapStartIndex + 1; jjj < Y_METRIC_LENGTH; jjj++ ) {
+                    if( (iii == 1 && jjj == 1) ) { continue; }
+                    updateCell(iii, jjj, haplotypeBases, readBases, readQuals, insertionGOP, deletionGOP, overallGCP,
+                        matchMetricArray, XMetricArray, YMetricArray);
+                }
+            }
+        }
+
+        // final probability is the log10 sum of the last element in all three state arrays
+        final int endI = X_METRIC_LENGTH - 1;
+        final int endJ = Y_METRIC_LENGTH - 1;
+        return MathUtils.approximateLog10SumLog10(matchMetricArray[endI][endJ], XMetricArray[endI][endJ], YMetricArray[endI][endJ]);
+    }
+
+    private void updateCell( final int indI, final int indJ, final byte[] haplotypeBases, final byte[] readBases,
+                             final byte[] readQuals, final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP,
+                             final double[][] matchMetricArray, final double[][] XMetricArray, final double[][] YMetricArray ) {
+
+        // the read and haplotype indices are offset by one because the state arrays have an extra column to hold the initial conditions
+        final int im1 = indI - 1;
+        final int jm1 = indJ - 1;
+
+        // update the match array
+        double pBaseReadLog10 = 0.0; // Math.log10(1.0);
+        if( im1 > 0 && jm1 > 0 ) { // the emission probability is applied when leaving the state
+            final byte x = readBases[im1-1];
+            final byte y = haplotypeBases[jm1-1];
+            final byte qual = readQuals[im1-1];
+            pBaseReadLog10 = ( x == y || x == (byte) 'N' || y == (byte) 'N' ? QualityUtils.qualToProbLog10(qual) : QualityUtils.qualToErrorProbLog10(qual) );
+        }
+        final int qualIndexGOP = ( im1 == 0 ? DEFAULT_GOP + DEFAULT_GOP : ( insertionGOP[im1-1] + deletionGOP[im1-1] > MAX_CACHED_QUAL ? MAX_CACHED_QUAL : insertionGOP[im1-1] + deletionGOP[im1-1]) );
+        final double d0 = QualityUtils.qualToProbLog10((byte)qualIndexGOP);
+        final double e0 = ( im1 == 0 ? QualityUtils.qualToProbLog10(DEFAULT_GCP) : QualityUtils.qualToProbLog10(overallGCP[im1-1]) );
+        matchMetricArray[indI][indJ] = pBaseReadLog10 + MathUtils.approximateLog10SumLog10(matchMetricArray[indI-1][indJ-1] + d0, XMetricArray[indI-1][indJ-1] + e0, YMetricArray[indI-1][indJ-1] + e0);
+
+        // update the X (insertion) array
+        final double d1 = ( im1 == 0 ? QualityUtils.qualToErrorProbLog10(DEFAULT_GOP) : QualityUtils.qualToErrorProbLog10(insertionGOP[im1-1]) );
+        final double e1 = ( im1 == 0 ? QualityUtils.qualToErrorProbLog10(DEFAULT_GCP) : QualityUtils.qualToErrorProbLog10(overallGCP[im1-1]) );
+        final double qBaseReadLog10 = 0.0; // Math.log10(1.0) -- we don't have an estimate for this emission probability so assume q=1.0
+        XMetricArray[indI][indJ] = qBaseReadLog10 + MathUtils.approximateLog10SumLog10(matchMetricArray[indI-1][indJ] + d1, XMetricArray[indI-1][indJ] + e1);
+
+        // update the Y (deletion) array, with penalty of zero on the left and right flanks to allow for a local alignment within the haplotype
+        final double d2 = ( im1 == 0 || im1 == readBases.length ? 0.0 : QualityUtils.qualToErrorProbLog10(deletionGOP[im1-1]) );
+        final double e2 = ( im1 == 0 || im1 == readBases.length ? 0.0 : QualityUtils.qualToErrorProbLog10(overallGCP[im1-1]) );
+        final double qBaseRefLog10 = 0.0; // Math.log10(1.0) -- we don't have an estimate for this emission probability so assume q=1.0
+        YMetricArray[indI][indJ] = qBaseRefLog10 + MathUtils.approximateLog10SumLog10(matchMetricArray[indI][indJ-1] + d2, YMetricArray[indI][indJ-1] + e2);
+    }
+
+    // private function used by the banded approach to ensure the proposed bands are sufficiently distinct from each other
+    private boolean detectClusteredStartLocations( final ArrayList<Integer> list, int loc ) {
+        for(int x : list) {
+            if( Math.abs(x-loc) <= BANDING_CLUSTER_WINDOW ) {
+                return true;
+            }
+        }
+        return false;
+    }
+}
--- a/public/java/test/org/broadinstitute/sting/utils/PairHMMUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/PairHMMUnitTest.java
@ -0,0 +1,367 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// our package
+package org.broadinstitute.sting.utils;
+
+
+// the imports for unit testing.
+
+
+import org.broadinstitute.sting.BaseTest;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.*;
+
+
+public class PairHMMUnitTest extends BaseTest {
+    final static boolean EXTENSIVE_TESTING = true;
+    PairHMM hmm = new PairHMM( false ); // reference implementation
+    PairHMM bandedHMM = new PairHMM( true ); // algorithm with banding
+
+    // --------------------------------------------------------------------------------
+    //
+    // Provider
+    //
+    // --------------------------------------------------------------------------------
+
+    private class BasicLikelihoodTestProvider extends TestDataProvider {
+        final String ref, read;
+        final byte[] refBasesWithContext, readBasesWithContext;
+        final int baseQual, insQual, delQual, gcp;
+        final int expectedQual;
+        final static String CONTEXT = "ACGTAATGACGATTGCA";
+        final static String LEFT_FLANK = "GATTTATCATCGAGTCTGC";
+        final static String RIGHT_FLANK = "CATGGATCGTTATCAGCTATCTCGAGGGATTCACTTAACAGTTTTA";
+
+        public BasicLikelihoodTestProvider(final String ref, final String read, final int baseQual, final int insQual, final int delQual, final int expectedQual, final int gcp) {
+            this(ref, read, baseQual, insQual, delQual, expectedQual, gcp, false, false);
+        }
+
+        public BasicLikelihoodTestProvider(final String ref, final String read, final int baseQual, final int insQual, final int delQual, final int expectedQual, final int gcp, final boolean left, final boolean right) {
+            super(BasicLikelihoodTestProvider.class, String.format("ref=%s read=%s b/i/d/c quals = %d/%d/%d/%d l/r flank = %b/%b e[qual]=%d", ref, read, baseQual, insQual, delQual, gcp, left, right, expectedQual));
+            this.baseQual = baseQual;
+            this.delQual = delQual;
+            this.insQual = insQual;
+            this.gcp = gcp;
+            this.read = read;
+            this.ref = ref;
+            this.expectedQual = expectedQual;
+
+            refBasesWithContext = asBytes(ref, left, right);
+            readBasesWithContext = asBytes(read, false, false);
+        }
+
+        public double expectedLogL() {
+            return expectedQual / -10.0;
+        }
+
+        public double tolerance() {
+            return 0.1; // TODO FIXME arbitrary
+        }
+
+        public double calcLogL() {
+
+            double logL = hmm.computeReadLikelihoodGivenHaplotype(
+                    refBasesWithContext, readBasesWithContext,
+                    qualAsBytes(baseQual, false), qualAsBytes(insQual, true), qualAsBytes(delQual, true),
+                    qualAsBytes(gcp, false));
+
+            return logL;
+        }
+
+        private final byte[] asBytes(final String bases, final boolean left, final boolean right) {
+            return ( (left ? LEFT_FLANK : "") + CONTEXT + bases + CONTEXT + (right ? RIGHT_FLANK : "")).getBytes();
+        }
+
+        private byte[] qualAsBytes(final int phredQual, final boolean doGOP) {
+            final byte phredQuals[] = new byte[readBasesWithContext.length];
+            // initialize everything to MASSIVE_QUAL so it cannot be moved by HMM
+            Arrays.fill(phredQuals, (byte)100);
+
+            // update just the bases corresponding to the provided micro read with the quality scores
+            if( doGOP ) {
+                phredQuals[0 + CONTEXT.length()] = (byte)phredQual;
+            } else {
+                for ( int i = 0; i < read.length(); i++)
+                    phredQuals[i + CONTEXT.length()] = (byte)phredQual;
+            }
+
+            return phredQuals;
+        }
+    }
+
+    final Random random = new Random(87865573);
+    private class BandedLikelihoodTestProvider extends TestDataProvider {
+        final String ref, read;
+        final byte[] refBasesWithContext, readBasesWithContext;
+        final int baseQual, insQual, delQual, gcp;
+        final int expectedQual;
+        final static String LEFT_CONTEXT = "ACGTAATGACGCTACATGTCGCCAACCGTC";
+        final static String RIGHT_CONTEXT = "TACGGCTTCATATAGGGCAATGTGTGTGGCAAAA";
+        final static String LEFT_FLANK = "GATTTATCATCGAGTCTGTT";
+        final static String RIGHT_FLANK = "CATGGATCGTTATCAGCTATCTCGAGGGATTCACTTAACAGTTTCCGTA";
+        final byte[] baseQuals, insQuals, delQuals, gcps;
+
+        public BandedLikelihoodTestProvider(final String ref, final String read, final int baseQual, final int insQual, final int delQual, final int expectedQual, final int gcp) {
+            this(ref, read, baseQual, insQual, delQual, expectedQual, gcp, false, false);
+        }
+
+        public BandedLikelihoodTestProvider(final String ref, final String read, final int baseQual, final int insQual, final int delQual, final int expectedQual, final int gcp, final boolean left, final boolean right) {
+            super(BandedLikelihoodTestProvider.class, String.format("BANDED: ref=%s read=%s b/i/d/c quals = %d/%d/%d/%d l/r flank = %b/%b e[qual]=%d", ref, read, baseQual, insQual, delQual, gcp, left, right, expectedQual));
+            this.baseQual = baseQual;
+            this.delQual = delQual;
+            this.insQual = insQual;
+            this.gcp = gcp;
+            this.read = read;
+            this.ref = ref;
+            this.expectedQual = expectedQual;
+
+            refBasesWithContext = asBytes(ref, left, right);
+            readBasesWithContext = asBytes(read, false, false);
+            baseQuals = qualAsBytes(baseQual);
+            insQuals = qualAsBytes(insQual);
+            delQuals = qualAsBytes(delQual);
+            gcps = qualAsBytes(gcp, false);
+        }
+
+        public double expectedLogL() {
+            double logL = hmm.computeReadLikelihoodGivenHaplotype(
+                    refBasesWithContext, readBasesWithContext,
+                    baseQuals, insQuals, delQuals, gcps);
+
+            return logL;
+        }
+
+        public double tolerance() {
+            return 0.2; // TODO FIXME arbitrary
+        }
+
+        public double calcLogL() {
+
+            double logL = bandedHMM.computeReadLikelihoodGivenHaplotype(
+                    refBasesWithContext, readBasesWithContext,
+                    baseQuals, insQuals, delQuals, gcps);
+
+            return logL;
+        }
+
+        private final byte[] asBytes(final String bases, final boolean left, final boolean right) {
+            return ( (left ? LEFT_FLANK : "") + LEFT_CONTEXT + bases + RIGHT_CONTEXT + (right ? RIGHT_FLANK : "")).getBytes();
+        }
+
+        private byte[] qualAsBytes(final int phredQual) {
+            return qualAsBytes(phredQual, true);
+        }
+
+        private byte[] qualAsBytes(final int phredQual, final boolean addRandom) {
+            final byte phredQuals[] = new byte[readBasesWithContext.length];
+            Arrays.fill(phredQuals, (byte)phredQual);
+            if(addRandom) {
+                for( int iii = 0; iii < phredQuals.length; iii++) {
+                    phredQuals[iii] = (byte) ((int) phredQuals[iii] + (random.nextInt(7) - 3));
+                }
+            }
+            return phredQuals;
+        }
+    }
+
+    @DataProvider(name = "BasicLikelihoodTestProvider")
+    public Object[][] makeBasicLikelihoodTests() {
+        // context on either side is ACGTTGCA REF ACGTTGCA
+        // test all combinations
+        final List<Integer> baseQuals = EXTENSIVE_TESTING ? Arrays.asList(10, 20, 30, 40, 50) : Arrays.asList(30);
+        final List<Integer> indelQuals = EXTENSIVE_TESTING ? Arrays.asList(20, 30, 40, 50) : Arrays.asList(40);
+        final List<Integer> gcps = EXTENSIVE_TESTING ? Arrays.asList(10, 20, 30) : Arrays.asList(10);
+        final List<Integer> sizes = EXTENSIVE_TESTING ? Arrays.asList(2,3,4,5,7,8,9,10,20) : Arrays.asList(2);
+
+        for ( final int baseQual : baseQuals ) {
+            for ( final int indelQual : indelQuals ) {
+                for ( final int gcp : gcps ) {
+
+                    // test substitutions
+                    for ( final byte refBase : BaseUtils.BASES ) {
+                        for ( final byte readBase : BaseUtils.BASES ) {
+                            final String ref  = new String(new byte[]{refBase});
+                            final String read = new String(new byte[]{readBase});
+                            final int expected = refBase == readBase ? 0 : baseQual;
+                            new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp);
+                        }
+                    }
+
+                    // test insertions and deletions
+                    for ( final int size : sizes ) {
+                        for ( final byte base : BaseUtils.BASES ) {
+                            final int expected = indelQual + (size - 2) * gcp;
+
+                            for ( boolean insertionP : Arrays.asList(true, false)) {
+                                final String small = Utils.dupString((char)base, 1);
+                                final String big = Utils.dupString((char)base, size);
+
+                                final String ref = insertionP ? small : big;
+                                final String read = insertionP ? big : small;
+
+                                new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp);
+                                new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, true, false);
+                                new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, false, true);
+                                new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, true, true);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        return BasicLikelihoodTestProvider.getTests(BasicLikelihoodTestProvider.class);
+    }
+
+    @Test(dataProvider = "BasicLikelihoodTestProvider", enabled = true)
+    public void testBasicLikelihoods(BasicLikelihoodTestProvider cfg) {
+        double calculatedLogL = cfg.calcLogL();
+        double expectedLogL = cfg.expectedLogL();
+        logger.warn(String.format("Test: logL calc=%.2f expected=%.2f for %s", calculatedLogL, expectedLogL, cfg.toString()));
+        Assert.assertEquals(calculatedLogL, expectedLogL, cfg.tolerance());
+    }
+
+    @DataProvider(name = "BandedLikelihoodTestProvider")
+    public Object[][] makeBandedLikelihoodTests() {
+        // context on either side is ACGTTGCA REF ACGTTGCA
+        // test all combinations
+        final List<Integer> baseQuals = EXTENSIVE_TESTING ? Arrays.asList(25, 30, 40, 50) : Arrays.asList(30);
+        final List<Integer> indelQuals = EXTENSIVE_TESTING ? Arrays.asList(30, 40, 50) : Arrays.asList(40);
+        final List<Integer> gcps = EXTENSIVE_TESTING ? Arrays.asList(10, 12) : Arrays.asList(10);
+        final List<Integer> sizes = EXTENSIVE_TESTING ? Arrays.asList(2,3,4,5,7,8,9,10,20) : Arrays.asList(2);
+
+        for ( final int baseQual : baseQuals ) {
+            for ( final int indelQual : indelQuals ) {
+                for ( final int gcp : gcps ) {
+
+                    // test substitutions
+                    for ( final byte refBase : BaseUtils.BASES ) {
+                        for ( final byte readBase : BaseUtils.BASES ) {
+                            final String ref  = new String(new byte[]{refBase});
+                            final String read = new String(new byte[]{readBase});
+                            final int expected = refBase == readBase ? 0 : baseQual;
+                            new BandedLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp);
+                        }
+                    }
+
+                    // test insertions and deletions
+                    for ( final int size : sizes ) {
+                        for ( final byte base : BaseUtils.BASES ) {
+                            final int expected = indelQual + (size - 2) * gcp;
+
+                            for ( boolean insertionP : Arrays.asList(true, false)) {
+                                final String small = Utils.dupString((char)base, 1);
+                                final String big = Utils.dupString((char)base, size);
+
+                                final String ref = insertionP ? small : big;
+                                final String read = insertionP ? big : small;
+
+                                new BandedLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp);
+                                new BandedLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, true, false);
+                                new BandedLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, false, true);
+                                new BandedLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, true, true);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        return BandedLikelihoodTestProvider.getTests(BandedLikelihoodTestProvider.class);
+    }
+
+    @Test(dataProvider = "BandedLikelihoodTestProvider", enabled = true)
+    public void testBandedLikelihoods(BandedLikelihoodTestProvider cfg) {
+        double calculatedLogL = cfg.calcLogL();
+        double expectedLogL = cfg.expectedLogL();
+        logger.warn(String.format("Test: logL calc=%.2f expected=%.2f for %s", calculatedLogL, expectedLogL, cfg.toString()));
+        Assert.assertEquals(calculatedLogL, expectedLogL, cfg.tolerance());
+    }
+
+    @Test
+    public void testMismatchInEveryPositionInTheReadWithCenteredHaplotype() {
+        byte[] haplotype1 = "TTCTCTTCTGTTGTGGCTGGTT".getBytes();
+
+        final int offset = 2;
+        byte[] gop = new byte[haplotype1.length - 2 * offset];
+        Arrays.fill(gop, (byte) 80);
+        byte[] gcp = new byte[haplotype1.length - 2 * offset];
+        Arrays.fill(gcp, (byte) 80);
+
+        for( int k = 0; k < haplotype1.length - 2 * offset; k++ ) {
+            byte[] quals = new byte[haplotype1.length - 2 * offset];
+            Arrays.fill(quals, (byte) 90);
+            // one read mismatches the haplotype
+            quals[k] = 20;
+
+            byte[] mread = Arrays.copyOfRange(haplotype1,offset,haplotype1.length-offset);
+            // change single base at position k to C. If it's a C, change to T
+            mread[k] = ( mread[k] == (byte)'C' ? (byte)'T' : (byte)'C');
+            double res1 = hmm.computeReadLikelihoodGivenHaplotype(
+                    haplotype1, mread,
+                    quals, gop, gop,
+                    gcp);
+
+
+            System.out.format("H:%s\nR:  %s\n Pos:%d Result:%4.2f\n",new String(haplotype1), new String(mread), k,res1);
+
+            Assert.assertEquals(res1, -2.0, 1e-2);
+        }
+    }
+
+    @Test
+    public void testMismatchInEveryPositionInTheRead() {
+        byte[] haplotype1 = "TTCTCTTCTGTTGTGGCTGGTT".getBytes();
+
+        final int offset = 2;
+        byte[] gop = new byte[haplotype1.length - offset];
+        Arrays.fill(gop, (byte) 80);
+        byte[] gcp = new byte[haplotype1.length - offset];
+        Arrays.fill(gcp, (byte) 80);
+
+        for( int k = 0; k < haplotype1.length - offset; k++ ) {
+            byte[] quals = new byte[haplotype1.length - offset];
+            Arrays.fill(quals, (byte) 90);
+            // one read mismatches the haplotype
+            quals[k] = 20;
+
+            byte[] mread = Arrays.copyOfRange(haplotype1,offset,haplotype1.length);
+            // change single base at position k to C. If it's a C, change to T
+            mread[k] = ( mread[k] == (byte)'C' ? (byte)'T' : (byte)'C');
+            double res1 = hmm.computeReadLikelihoodGivenHaplotype(
+                    haplotype1, mread,
+                    quals, gop, gop,
+                    gcp);
+
+
+            System.out.format("H:%s\nR:  %s\n Pos:%d Result:%4.2f\n",new String(haplotype1), new String(mread), k,res1);
+
+            Assert.assertEquals(res1, -2.0, 1e-2);
+        }
+    }
+}