Major changes/improvements to indel genotyper:

a) Redid way to compute path metrics in indel error model. Paper formulation where we have an anchor point in the alignemt between read and haplotype won't work in practice except in nice data sets that are perfectly indel-realigned and that are well mapped by aligner. New formulation doesn't assume this, and it's actually simpler and uses less code. It now resembles more a classic SW dynamic programming formulation but it still preserves the HMM probabilistic formulation. b) Added a programmable call threshold, set by command line. c) Use now sample name from BAM file, remove -sampleName argument. d) Simplify loop to compute read-haplotype likelihoods. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4311 348d0f76-0448-11de-a6fe-93d51630548a
2010-09-19 23:47:31 +00:00 · 2010-09-19 23:47:31 +00:00 · f64b6fddc1
parent 40274ba7dc
commit f64b6fddc1
3 changed files with 185 additions and 249 deletions
--- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java
@ -36,19 +36,12 @@ import org.broadinstitute.sting.utils.genotype.Haplotype;
 import java.util.Arrays;
 import java.util.List;

-/**
- * Created by IntelliJ IDEA.
- * User: delangel
- * Date: Aug 24, 2010
- * Time: 1:31:08 PM
- * To change this template use File | Settings | File Templates.
- */
 public class HaplotypeIndelErrorModel {

    private final int maxReadDeletionLength; // maximum length of deletion on a read
    private final double noDeletionProbability; // alpha for geometric distribution for deletion length
    private final int haplotypeSize;
-    private final int maxReadLength;
+
    private final int PATH_METRIC_TABLE_LENGTH;
    private final int RIGHT_ALIGN_INDEX;
    private final int LEFT_ALIGN_INDEX;
@ -57,7 +50,7 @@ public class HaplotypeIndelErrorModel {

    private double deletionErrorProbabilities[];

-    private double pathMetricArray[];
+    private double pathMetricArray[][];
    private int bestStateIndexArray[][];

    private final double logOneMinusInsertionStartProbability;
@ -68,17 +61,18 @@ public class HaplotypeIndelErrorModel {
    private boolean DEBUG = false;
    private boolean doSimpleCalculationModel = false;

-    public  HaplotypeIndelErrorModel(int mrdl, double insStart, double insEnd, double alpha, int haplotypeSize, int maxReadLength,
+    private static final double QUAL_ONE_HALF = -10*Math.log10(0.5);
+
+    public  HaplotypeIndelErrorModel(int mrdl, double insStart, double insEnd, double alpha, int haplotypeSize,
                                     boolean dosimple, boolean deb) {
-        this(mrdl, insStart, insEnd, alpha, haplotypeSize, maxReadLength);
+        this(mrdl, insStart, insEnd, alpha, haplotypeSize);
        this.DEBUG = deb;
        this.doSimpleCalculationModel = dosimple;
    }
-    public  HaplotypeIndelErrorModel(int mrdl, double insStart, double insEnd, double alpha, int haplotypeSize, int maxReadLength) {
+    public  HaplotypeIndelErrorModel(int mrdl, double insStart, double insEnd, double alpha, int haplotypeSize) {
        this.maxReadDeletionLength = mrdl;
        this.noDeletionProbability = 1-alpha;
        this.haplotypeSize = haplotypeSize;
-        this.maxReadLength = maxReadLength;

        PATH_METRIC_TABLE_LENGTH = haplotypeSize+2;
        RIGHT_ALIGN_INDEX = PATH_METRIC_TABLE_LENGTH-1;
@ -90,13 +84,6 @@ public class HaplotypeIndelErrorModel {
        logOneMinusInsertionEndProbability = probToQual(1-insEnd);


-        // initialize path metric and traceback memories for Viterbi computation
-        pathMetricArray = new double[2*PATH_METRIC_TABLE_LENGTH];
-        bestStateIndexArray = new int[maxReadLength][2*(PATH_METRIC_TABLE_LENGTH)];
-
-        for (int k=0; k < 2*PATH_METRIC_TABLE_LENGTH; k++)
-            pathMetricArray[k] = 0;
-
        // fill in probability for read deletion of length k = C*exp(k-1)
        double prob = 1.0;
        double sumProb = 0.0;
@ -140,39 +127,22 @@ public class HaplotypeIndelErrorModel {
        byte[] readQuals = Arrays.copyOfRange(read.getBaseQualities(),(int)numStartClippedBases,
                (int)(read.getReadBases().length-numEndClippedBases));

-        double[] previousPathMetricArray = new double[2*PATH_METRIC_TABLE_LENGTH];
-        int readStartIdx, initialIndexInHaplotype;

-        if (readStartPosition < haplotypeStartPosition) {
-            readStartIdx = (int)(haplotypeStartPosition- readStartPosition);
-            initialIndexInHaplotype = 0; // implicit above
-            if ((readStartPosition+readBases.length-1) < haplotypeStartPosition)
-                // when ignoring read CIGAR alignment, this read actually didn't overlap with haplotype:
-                // just ignore read
-                return INFINITE;
-        }
-        else {
-            readStartIdx = 0;
-            initialIndexInHaplotype = (int)(readStartPosition-haplotypeStartPosition);
-        }
+        int readLength = readBases.length;

-        // special case 1: we're analyzing an insertion and the read starts right at the insertion.
-        if ((readStartPosition > vc.getStart()-eventLength) && vc.isInsertion() && !haplotype.isReference()) {
-            initialIndexInHaplotype += eventLength;
-        }
-        
-        if (DEBUG) {
-            System.out.println("READ: "+read.getReadName());
+        // initialize path metric and traceback memories for Viterbi computation
+        pathMetricArray = new double[readLength+1][PATH_METRIC_TABLE_LENGTH];
+        bestStateIndexArray = new int[readLength+1][PATH_METRIC_TABLE_LENGTH];

-            System.out.println(haplotypeStartPosition);
-            System.out.println(readStartPosition);
-            System.out.println(readStartIdx);
-            System.out.println(initialIndexInHaplotype);
-        }
+        for (int k=1; k < PATH_METRIC_TABLE_LENGTH; k++)
+            pathMetricArray[0][k] = 0;
+
+ /*
+
+         if (doSimpleCalculationModel) {

-        double pRead = 0;
-        if (doSimpleCalculationModel) {
            // No Viterbi algorithm - assume no sequencing indel artifacts,
+
            // so we can collapse computations and pr(read | haplotype) is just probability of observing overlap
            // of read with haplotype.
            int haplotypeIndex = initialIndexInHaplotype;
@ -231,7 +201,7 @@ public class HaplotypeIndelErrorModel {
                System.out.format("\nLikelihood:%f\n",pRead);

            }
-/*
+
            if (read.getReadName().contains("106880")) {
                
                System.out.println("aca");
@ -248,87 +218,51 @@ public class HaplotypeIndelErrorModel {
                    System.out.format("%c ", readBases[k]);
                }

-            }   */
+            }
            return pRead;

        }
-        // initialize path metric array to hard-code certainty that initial position is aligned
-        for (int k=0; k < 2*PATH_METRIC_TABLE_LENGTH; k++)
-            pathMetricArray[k] = INFINITE;
-
-        pathMetricArray[initialIndexInHaplotype] = 0;
+                */


        // Update path metric computations based on branch metric (Add/Compare/Select operations)
        // do forward direction first, ie from anchor to end of read
        // outer loop
-        for (int indR=readStartIdx; indR < readBases.length; indR++) {
+        for (int indR=0; indR < readLength; indR++) {
            byte readBase = readBases[indR];
            byte readQual = readQuals[indR];

-            System.arraycopy(pathMetricArray, 0, previousPathMetricArray, 0, 2*PATH_METRIC_TABLE_LENGTH);
-
-

            for (int indX=LEFT_ALIGN_INDEX; indX <= RIGHT_ALIGN_INDEX; indX++) {


                byte haplotypeBase;
-                if (indX > LEFT_ALIGN_INDEX && indX <= haplotypeSize)
+                if (indX > LEFT_ALIGN_INDEX && indX < RIGHT_ALIGN_INDEX)
                    haplotypeBase = haplotype.getBasesAsBytes()[indX-1];
                else
                    haplotypeBase = readBase;

-                updatePathMetrics(haplotypeBase, indX, indR, readBase, readQual, previousPathMetricArray, true);
+                updatePathMetrics(haplotypeBase, indX, indR, readBase, readQual);
            }
        }

-        double[] forwardPathMetricArray = new double[2*PATH_METRIC_TABLE_LENGTH];
-        System.arraycopy(pathMetricArray, 0, forwardPathMetricArray, 0, 2*PATH_METRIC_TABLE_LENGTH);
-        double forwardMetric = MathUtils.arrayMin(pathMetricArray);
-
-        // reinitialize path metric array to known position at start of read
-        // initialize path metric array to hard-code certainty that initial position is aligned
-        for (int k=0; k < 2*PATH_METRIC_TABLE_LENGTH; k++)
-            pathMetricArray[k] = INFINITE;
-
-        pathMetricArray[initialIndexInHaplotype] = 0;
-
-        // do now backward direction (from anchor to start of read)
-        // outer loop
-        for (int indR=readStartIdx-1; indR >= 0; indR--) {
-            byte readBase = readBases[indR];
-            byte readQual = readQuals[indR];
-
-            System.arraycopy(pathMetricArray, 0, previousPathMetricArray, 0, 2*PATH_METRIC_TABLE_LENGTH);
-
-            for (int indX=LEFT_ALIGN_INDEX; indX <= RIGHT_ALIGN_INDEX; indX++) {
-                byte haplotypeBase;
-                if (indX > 0 && indX <= haplotypeSize)
-                    haplotypeBase = haplotype.getBasesAsBytes()[indX-1];
-                else
-                    haplotypeBase = readBase;
-
-                updatePathMetrics(haplotypeBase, indX, indR, readBase, readQual, previousPathMetricArray, false);
-
-            }
-        }


        // for debugging only: compute backtracking to find optimal route through trellis. Since I'm only interested
        // in log-likelihood of best state, this isn't really necessary.
+        int[] bestIndexArray = new int[readLength];
+        double bestMetric = MathUtils.arrayMin(pathMetricArray[readLength]);
+
+        // start from last position of read, go backwards to find optimal alignment
+        int bestIndex = MathUtils.minElementIndex(pathMetricArray[readLength]);
+        bestIndexArray[readLength-1] = bestIndex;
+
+        for (int k=readLength-2; k>=0; k--) {
+            bestIndex = bestStateIndexArray[k][bestIndex];
+            bestIndexArray[k] = bestIndex;
+        }
        if (DEBUG) {

-            int[] bestIndexArray = new int[readBases.length+1];
-
-
-            int bestIndex = MathUtils.minElementIndex(forwardPathMetricArray);
-            bestIndexArray[readBases.length] = bestIndex;
-
-            for (int k=readBases.length-1; k>=0; k--) {
-                bestIndex = bestStateIndexArray[k][bestIndex];
-                bestIndexArray[k] = bestIndex;
-            }


            System.out.println(read.getReadName());
@ -352,155 +286,91 @@ public class HaplotypeIndelErrorModel {
            System.out.println();
        }
        // now just take optimum along all path metrics: that's the log likelihood of best alignment
-        double backwardMetric = MathUtils.arrayMin(pathMetricArray);
        if (DEBUG)
-            System.out.format("Likelihood: %5.4f\n", forwardMetric + backwardMetric);
-        return forwardMetric + backwardMetric;
+            System.out.format("Likelihood: %5.4f\n", bestMetric);
+        return bestMetric;

    }

-    private void updatePathMetrics(byte haplotypeBase, int indX, int indR, byte readBase, byte readQual,
-                                   double[] previousPathMetricArray, boolean isForward) {
+    private void updatePathMetrics(byte haplotypeBase, int indX, int indR, byte readBase, byte readQual) {

        double bmetric;

-        // Pr(Rb', Xb',Ib'| Xb,Ib=0) is populated starting on Xb , =
-        // Pr(Rb'|Xb',Ib') Pr(Xb',Ib'|Xb,Ib) =
-        // Pr(Rb'|Xb',Ib') Pr(Xb'|Ib',Xb,Ib) Pr(Ib'|Ib)
-
-        // start with case Ib=0, Ib'=0: no insertion
        double bestMetric = INFINITE;
-        int bestMetricIndex = 0;
-        double pBaseRead = getProbabilityOfReadBaseGivenXandI(haplotypeBase, readBase, readQual, indX, 0);
+        int bestMetricIndex = -1;

-        if (isForward) {
-            if (indX==LEFT_ALIGN_INDEX){
-                // there can't be a transition into Xb=0 
-                // special case: only one incoming path
-                // record best path metric
-//                pathMetricArray[indX] = INFINITE;
-//                bestStateIndexArray[indR][indX] = 0;
-//                bestMetric = INFINITE;
-//                bestMetricIndex = 0;
-            }
-            else {
-                for (int indXold = indX-1; indXold >= indX-this.maxReadDeletionLength; indXold--){
+        // compute metric for match/mismatch
+        double pBaseRead;

-                    if (indXold < 0)
-                        break;
+        // workaround for reads whose bases quality = 0,
+        if (readQual < 1)
+            readQual = 1;
+        
+        double baseProb = QualityUtils.qualToProb(readQual);

-                    // fetch path metric and add branch metric
-                    bmetric = previousPathMetricArray[indXold] + deletionErrorProbabilities[indX-indXold] +
-                            logOneMinusInsertionStartProbability + pBaseRead;
+        
+        double pBaseMatch =  probToQual(baseProb);
+        double pBaseMismatch = (double)(readQual);

-                    if (bmetric < bestMetric) {
-                        bestMetric = bmetric;
-                        bestMetricIndex = indXold;
-                    }
-                }
-                if (indX == RIGHT_ALIGN_INDEX)  {
-                    // special treatment for R->R case  (read aligns to the right of haplotype) when Xold = X = R
-                    bmetric = previousPathMetricArray[indX] + pBaseRead;
-
-                    if (bmetric < bestMetric) {
-                        bestMetric = bmetric;
-                        bestMetricIndex = indX;
-                    }
+        if (haplotypeBase == readBase)
+            pBaseRead =  pBaseMatch;
+        else
+            pBaseRead = pBaseMismatch;


-                }
-            }
+
+
+
+        if (indX == LEFT_ALIGN_INDEX) {
+            // special treatment for L->L case  (read aligns to the right/left of haplotype) when Xold = X = R
+            bestMetric = pathMetricArray[indR][indX] + QUAL_ONE_HALF; //1/2 in log scale
+            bestMetricIndex = indX;
        }
        else {
-            for (int indXold = indX+1; indXold <=this.maxReadDeletionLength + indX; indXold++){
-                if (indXold > this.haplotypeSize+1)
+            for (int indXold = indX-1; indXold >= indX-this.maxReadDeletionLength; indXold--) {
+
+                if (indXold < 0)
                    break;

                // fetch path metric and add branch metric
-                bmetric = previousPathMetricArray[indXold] + deletionErrorProbabilities[indXold-indX] +
-                        logOneMinusInsertionStartProbability + pBaseRead;
+                bmetric = pathMetricArray[indR][indXold] + deletionErrorProbabilities[indX-indXold] + pBaseRead;
+
+                if (indXold == indX-1) {
+                    // special case for exact walk down diagonal: need to consider that an insertion may have ended
+               //     bmetric += logInsertionEndProbability;
+                } else {
+                    bmetric += logOneMinusInsertionStartProbability;
+                }

                if (bmetric < bestMetric) {
                    bestMetric = bmetric;
                    bestMetricIndex = indXold;
                }
-                if (indX == LEFT_ALIGN_INDEX)  {
-                    // special treatment for R->R case  (read aligns to the right of haplotype) when Xold = X = R
-                    bmetric = previousPathMetricArray[indX] + pBaseRead;
-
-                    if (bmetric < bestMetric) {
-                        bestMetric = bmetric;
-                        bestMetricIndex = indX;
-                    }
-
-
-                }
            }
-
-        }
-        // now Pr(Xb',Ib'=0| Xb,Ib=1): an insertion ended. Only case if Xb'=Xb+1 (walk diag down in path array).
-
-        if (indX > LEFT_ALIGN_INDEX && indX < RIGHT_ALIGN_INDEX)  {
-            if (isForward)
-                bmetric = previousPathMetricArray[indX-1+PATH_METRIC_TABLE_LENGTH] + logInsertionEndProbability + pBaseRead;
-            else
-                bmetric = previousPathMetricArray[indX+1+PATH_METRIC_TABLE_LENGTH] + logInsertionEndProbability + pBaseRead;
+            // additional case: a walk right (ie indXold = indX). Can be because of an insertion in the middle of reads,
+            // or we're aligned to right of read
+            bmetric = pathMetricArray[indR][indX]+pBaseRead;
+            if (indX < RIGHT_ALIGN_INDEX) {
+                bmetric += logInsertionStartProbability + logOneMinusInsertionEndProbability;
+            }
+            else {
+                // anything extra to do for R->R case?
+                bmetric = pathMetricArray[indR][indX] + QUAL_ONE_HALF;
+            }

            if (bmetric < bestMetric) {
                bestMetric = bmetric;
-                if (isForward)
-                    bestMetricIndex = indX-1 + PATH_METRIC_TABLE_LENGTH;
-                else
-                    bestMetricIndex = indX+1 + PATH_METRIC_TABLE_LENGTH;
-
-            }
-        }
-        // record best path metric
-        pathMetricArray[indX] = bestMetric;
-        bestStateIndexArray[indR][indX] = bestMetricIndex;
-
-        // now Pr(Xb', Ib'=1 | Xb, Ib=0) : an insertion started: (walk right in path array)
-        bestMetric = INFINITE;
-        pBaseRead = getProbabilityOfReadBaseGivenXandI(haplotypeBase, readBase, readQual, indX, 1);
-
-        bmetric = previousPathMetricArray[indX] + logInsertionStartProbability + pBaseRead;
-        if (bmetric < bestMetric) {     //if superfluous, could clean up
-            bestMetric = bmetric;
-            bestMetricIndex = indX;
-        }
-
-        // final case: Pr(Xb', Ib'=1 | Xb, Ib=1): insertion continues, also walk right in path array
-        if (indX > LEFT_ALIGN_INDEX && indX < RIGHT_ALIGN_INDEX)  {
-            bmetric = previousPathMetricArray[indX+PATH_METRIC_TABLE_LENGTH] + logOneMinusInsertionEndProbability + pBaseRead;
-            if (bmetric < bestMetric) {     //if superfluous, could clean up
-                bestMetric = bmetric;
-                bestMetricIndex = indX+PATH_METRIC_TABLE_LENGTH;
+                bestMetricIndex = indX;
            }

-            // record best path metric
-            pathMetricArray[indX+PATH_METRIC_TABLE_LENGTH] = bestMetric;
-            bestStateIndexArray[indR][indX+PATH_METRIC_TABLE_LENGTH] = bestMetricIndex;
+
        }
+
+         // record best path metric
+        pathMetricArray[indR+1][indX] = bestMetric;
+        bestStateIndexArray[indR+1][indX] = bestMetricIndex;
+
    }

-    private double getProbabilityOfReadBaseGivenXandI(byte haplotypeBase, byte readBase, byte readQual, int indX, int indI) {
-
-
-        if (indX == RIGHT_ALIGN_INDEX  || indX == LEFT_ALIGN_INDEX) {
-            // X = L or R
-            double baseProb = QualityUtils.qualToProb(readQual);
-            return probToQual(baseProb);
-        }
-
-        if (haplotypeBase == readBase) {
-            double baseProb = QualityUtils.qualToProb(readQual);
-            return probToQual(baseProb);
-        }
-        else {
-            return (double)(readQual);
-        }
-        // TODO - optimize for speed by avoiding so many log conversions, can use a LUT
-
-    }

 }
--- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/SimpleIndelGenotyperWalker.java
+++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/SimpleIndelGenotyperWalker.java
@ -38,7 +38,9 @@ import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.SampleUtils;
 import org.broadinstitute.sting.utils.collections.CircularArray;
+import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.genotype.Haplotype;
 import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
@ -60,25 +62,22 @@ public class SimpleIndelGenotyperWalker extends RefWalker<Integer,Integer> {
    int maxReadDeletionLength = 3;

    @Argument(fullName="insertionStartProbability",shortName="insertionStartProbability",doc="Assess only sites with coverage at or below the specified value.",required=false)
-    double insertionStartProbability = 0.01;
+    double insertionStartProbability = 0.001;

    @Argument(fullName="insertionEndProbability",shortName="insertionEndProbability",doc="Assess only sites with coverage at or below the specified value.",required=false)
    double insertionEndProbability = 0.5;

    @Argument(fullName="alphaDeletionProbability",shortName="alphaDeletionProbability",doc="Assess only sites with coverage at or below the specified value.",required=false)
-    double alphaDeletionProbability = 0.01;
+    double alphaDeletionProbability = 0.001;


-    @Argument(fullName="haplotypeSize",shortName="hsize",doc="Size of haplotypes to evaluate calls.",required=true)
-    int HAPLOTYPE_SIZE = 40;
+    @Argument(fullName="haplotypeSize",shortName="hsize",doc="Size of haplotypes to evaluate calls.",required=false)
+    int HAPLOTYPE_SIZE = 80;


    @Argument(fullName="indelHeterozygozity",shortName="indhet",doc="Indel Heterozygosity (assumed 1/6500 from empirical human data)",required=false)
    double indelHeterozygosity = (double)1.0/6500.0;

-    @Argument(fullName="sampleName",shortName="sample",doc="Sample name to evaluate genotypes in",required=true)
-    String sampleName;
-
    @Argument(fullName="doSimpleCalculationModel",shortName="simple",doc="Use Simple Calculation Model for Pr(Reads | Haplotype)",required=false)
    boolean doSimple = false;

@ -91,6 +90,14 @@ public class SimpleIndelGenotyperWalker extends RefWalker<Integer,Integer> {
    @Argument(fullName="useFlatPriors",shortName="flat",doc="If present, use flat priors on haplotypes",required=false)
    boolean useFlatPriors = false;

+    @Argument(fullName="useDynamicHaplotypeSize",shortName="dhsize",doc="If present, use dynamic haplotype size",required=false)
+    boolean useDynamicHaplotypeSize = false;
+
+
+    @Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be called", required = false)
+    public double STANDARD_CONFIDENCE_FOR_CALLING = 10.0;
+
+
    @Override
    public boolean generateExtendedEvents() { return true; }

@ -100,15 +107,16 @@ public class SimpleIndelGenotyperWalker extends RefWalker<Integer,Integer> {

    private HaplotypeIndelErrorModel model;

-    private static final int MAX_READ_LENGTH = 1000; // TODO- make this dynamic
-

+    Set<String>  sampleNames;


    @Override
    public void initialize() {
        model = new HaplotypeIndelErrorModel(maxReadDeletionLength, insertionStartProbability,
-                insertionEndProbability, alphaDeletionProbability, HAPLOTYPE_SIZE, MAX_READ_LENGTH, doSimple, DEBUG);
+                insertionEndProbability, alphaDeletionProbability, HAPLOTYPE_SIZE, doSimple, DEBUG);
+
+        sampleNames = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());

    }

@ -121,6 +129,7 @@ public class SimpleIndelGenotyperWalker extends RefWalker<Integer,Integer> {
        if (!context.hasBasePileup())
            return 0;

+
        VariantContext vc = tracker.getVariantContext(ref, "indels", null, context.getLocation(), true);
        // ignore places where we don't have a variant
        if ( vc == null )
@ -130,8 +139,50 @@ public class SimpleIndelGenotyperWalker extends RefWalker<Integer,Integer> {
        if (!vc.isIndel())
            return 0;

+        int eventLength = vc.getReference().getBaseString().length() - vc.getAlternateAllele(0).getBaseString().length(); // assume only one alt allele for now
+        if (eventLength<0)
+            eventLength = - eventLength;

-        List<Haplotype> haplotypesInVC = Haplotype.makeHaplotypeListFromVariantContextAlleles( vc, ref, HAPLOTYPE_SIZE);
+        int currentHaplotypeSize = HAPLOTYPE_SIZE;
+        List<Haplotype> haplotypesInVC = new ArrayList<Haplotype>();
+
+        int minHaplotypeSize = Haplotype.LEFT_WINDOW_SIZE + eventLength + 2; // to be safe
+
+        
+        if (useDynamicHaplotypeSize) {
+            if (currentHaplotypeSize < minHaplotypeSize)
+                currentHaplotypeSize = minHaplotypeSize;
+            
+            boolean doneHaplotype = false;
+            while (!doneHaplotype) {
+
+                haplotypesInVC = Haplotype.makeHaplotypeListFromVariantContextAlleles( vc, ref, currentHaplotypeSize);
+                String[] haplotypeStrings = new String[haplotypesInVC.size()];
+                for (int k = 0; k < haplotypeStrings.length; k++)
+                    haplotypeStrings[k] = new String(haplotypesInVC.get(k).getBasesAsBytes());
+
+                // check if all strings are the same.
+                for (int k = 1; k < haplotypeStrings.length; k++) {
+                    if (!haplotypeStrings[k].equals(haplotypeStrings[0]))
+                        doneHaplotype = true;
+
+                }
+
+                if (doneHaplotype)
+                    break;
+                else
+                    currentHaplotypeSize = 2*currentHaplotypeSize;
+
+            }
+
+        }
+        else {
+            if (currentHaplotypeSize < minHaplotypeSize)
+                throw new UserException.BadArgumentValue("Insufficient haplotype length to represent all indels:",
+                        String.valueOf(currentHaplotypeSize));
+
+            haplotypesInVC = Haplotype.makeHaplotypeListFromVariantContextAlleles( vc, ref, currentHaplotypeSize);
+        }

        // combine likelihoods for all possible haplotype pair (n*(n-1)/2 combinations)

@ -140,6 +191,10 @@ public class SimpleIndelGenotyperWalker extends RefWalker<Integer,Integer> {
        // for given allele haplotype, compute likelihood of read pileup given haplotype
        ReadBackedPileup pileup =  context.getBasePileup().getPileupWithoutMappingQualityZeroReads();

+        if (pileup.getSamples().size() > 1) {
+            throw new UserException.BadInput("Currently only a single-sample BAM is supported for Indel genotyper");
+        }
+

        // compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them.
        // In general, we'll assume: even spread of indels throughout genome (not true, but simplifying assumption),
@ -160,9 +215,6 @@ public class SimpleIndelGenotyperWalker extends RefWalker<Integer,Integer> {
        // and -10*log10(above) = -10*log10(K)+ 18.9*log10(abs(L)) + 0.15*abs(L) - 10*log10(theta*L), and we ignore terms that would be
        // added to ref hypothesis.
        // Equation above is prior model.
-        int eventLength = vc.getReference().getBaseString().length() - vc.getAlternateAllele(0).getBaseString().length(); // assume only one alt allele for now
-        if (eventLength<0)
-            eventLength = - eventLength;

        if (!useFlatPriors) {

@ -177,10 +229,12 @@ public class SimpleIndelGenotyperWalker extends RefWalker<Integer,Integer> {
        int bestIndexI =-1, bestIndexJ=-1;
        double callConfidence = 0.0;

+
        if (pileup.getReads().size() > 0) {
            double readLikelihoods[][] = new double[pileup.getReads().size()][haplotypesInVC.size()];
            int i=0;
            for (SAMRecord read : pileup.getReads()) {
+                
                // for each read/haplotype combination, compute likelihoods, ie -10*log10(Pr(R | Hi))
                // = sum_j(-10*log10(Pr(R_j | Hi) since reads are assumed to be independent
                for (int j=0; j < haplotypesInVC.size(); j++) {
@ -216,7 +270,8 @@ public class SimpleIndelGenotyperWalker extends RefWalker<Integer,Integer> {
                    if (haplotypeLikehoodMatrix[i][j] < bestLikelihood) {
                        bestIndexI = i;
                        bestIndexJ = j;
-                        callConfidence = bestLikelihood - haplotypeLikehoodMatrix[i][j];
+                        if (i > 0 || j > 0)
+                            callConfidence = bestLikelihood - haplotypeLikehoodMatrix[i][j];
                        bestLikelihood = haplotypeLikehoodMatrix[i][j];
                    }
                }
@ -259,26 +314,36 @@ public class SimpleIndelGenotyperWalker extends RefWalker<Integer,Integer> {
            else
                newG = "OTHER";

+
+            if (callConfidence < STANDARD_CONFIDENCE_FOR_CALLING) {
+                newG = "NOCALL";
+            }
            out.format("NewG %s OldG %s DadG %s MomG %s\n", newG, oldG, dadG, momG);
        }
        else {
-            Genotype originalGenotype = vc.getGenotype(sampleName);
-            String oldG, newG;
-            oldG = getGenotypeString(originalGenotype);
-            int x = bestIndexI+bestIndexJ;
-            if (x == 0)
-                newG = "HOMREF";
-            else if (x == 1)
-                newG = "HET";
-            else if (x == 2)
-                newG = "HOMVAR";
-            else if (x < 0)
-                newG = "NOCALL";
-            else
-                newG = "OTHER";

-            out.format("NewG %s OldG %s\n", newG, oldG);
+            for (String sample: sampleNames) {
+                String oldG, newG;
+                Genotype originalGenotype = vc.getGenotype(sample);
+                oldG = getGenotypeString(originalGenotype);
+                int x = bestIndexI+bestIndexJ;
+                if (x == 0)
+                    newG = "HOMREF";
+                else if (x == 1)
+                    newG = "HET";
+                else if (x == 2)
+                    newG = "HOMVAR";
+                else if (x < 0)
+                    newG = "NOCALL";
+                else
+                    newG = "OTHER";

+                if (callConfidence < STANDARD_CONFIDENCE_FOR_CALLING) {
+                    newG = "NOCALL";
+                }
+
+                out.format("NewG %s OldG %s\n", newG, oldG);
+            }
        }

 /*
--- a/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java
@ -40,6 +40,7 @@ public class Haplotype {
    protected double[] quals = null;
    private GenomeLoc genomeLocation = null;
    private boolean isReference = false;
+    public static final int LEFT_WINDOW_SIZE = 20;

    /**
     * Create a simple consensus sequence with provided bases and a uniform quality over all bases of qual
@ -111,7 +112,7 @@ public class Haplotype {

        byte[] refBases = ref.getBases();

-        int numPrefBases = 20;
+        int numPrefBases = LEFT_WINDOW_SIZE;

        int startIdxInReference = (int)(1+vc.getStart()-numPrefBases-ref.getWindow().getStart());
        //int numPrefBases = (int)(vc.getStart()-ref.getWindow().getStart()+1); // indel vc starts one before event