Major change to UG engine to support:

a) Genotype given alleles with indels b) Genotyping and computing likelihoods of multi-allelic sites. When GGA option is enabled, indels will be called on regular pileups, not on extended pileups (extended pileups will be removed shortly in a next iteration). As a result, likelihood computation is suboptimal since we can't see reads that start with an insertion right after a position, and hence quality of some insertions is removed and we could be missing a few marginal calls, but it makes everything else much simpler. For multiallelic sites, we currently can't call them in discovery mode but we can genotype them and compute/report full PL's on them (annotation support comes in next commit). There are several suboptimal approximations made in exact model to compute this. Ideally, joint likelihood Pr(Data | AC1=i,AC2=j..) should be computed but this is hard. Instead, marginal likelihoods are computed Pr(Data | ACi=k) for all i,k, and QUAL is based on highest likelihood allele. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5941 348d0f76-0448-11de-a6fe-93d51630548a
2011-06-03 22:13:07 +00:00 · 2011-06-03 22:13:07 +00:00 · a8faacda4e
parent 4c6751ec3c
commit a8faacda4e
10 changed files with 256 additions and 116 deletions
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java
@ -26,6 +26,7 @@
 package org.broadinstitute.sting.gatk.walkers.genotyper;

 import org.apache.log4j.Logger;
+import org.broad.tribble.util.variantcontext.Allele;
 import org.broad.tribble.util.variantcontext.VariantContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -66,12 +67,13 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
     * @param tracker                         rod data
     * @param ref                             reference context
     * @param GLs                             genotype likelihoods
+     * @param Alleles                       Alleles corresponding to GLs
     * @param log10AlleleFrequencyPriors      priors
     * @param log10AlleleFrequencyPosteriors  array (pre-allocated) to store results
     */
    protected abstract void getLog10PNonRef(RefMetaDataTracker tracker,
                                            ReferenceContext ref,
-                                            Map<String, Genotype> GLs,
+                                            Map<String, Genotype> GLs,  Set<Allele> Alleles,
                                            double[] log10AlleleFrequencyPriors,
                                            double[] log10AlleleFrequencyPosteriors);

--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java
@ -58,7 +58,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {

    private boolean SIMPLE_GREEDY_GENOTYPER = false;

- 
+

    final private ExactCalculation calcToUse;
    protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
@ -68,7 +68,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {

    public void getLog10PNonRef(RefMetaDataTracker tracker,
                                ReferenceContext ref,
-                                Map<String, Genotype> GLs,
+                                Map<String, Genotype> GLs, Set<Allele>alleles,
                                double[] log10AlleleFrequencyPriors,
                                double[] log10AlleleFrequencyPosteriors) {
        // todo -- REMOVE ME AFTER TESTING
@ -78,11 +78,15 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
        if ( COMPARE_TO_GS ) // due to annoying special values in incoming array, we have to clone up here
            gsPosteriors = log10AlleleFrequencyPosteriors.clone();

+        int idxAA = GenotypeType.AA.ordinal();
+        int idxAB = GenotypeType.AB.ordinal();
+        int idxBB = GenotypeType.BB.ordinal();
+
        // todo -- remove me after testing
        if ( N_CYCLES > 1 ) {
            for ( int i = 0; i < N_CYCLES; i++) {
                timerGS.restart();
-                linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone());
+                linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone(), idxAA, idxAB, idxBB);
                timerGS.stop();

                timerExpt.restart();
@ -95,20 +99,60 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
        }

        int lastK = -1;
-        switch ( calcToUse ) {
-            case N2_GOLD_STANDARD:
-                lastK = gdaN2GoldStandard(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors);
-                break;
-            case LINEAR_EXPERIMENTAL:
-                lastK = linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors);
-                break;
+
+        int numAlleles = alleles.size();
+
+        int idxDiag = numAlleles;
+        int incr = numAlleles - 1;
+
+        double[][] posteriorCache = new double[numAlleles-1][];
+        double[] bestAFguess = new double[numAlleles-1];
+
+        for (int k=1; k < numAlleles; k++) {
+            // multi-allelic approximation, part 1: Ideally
+            // for each alt allele compute marginal (suboptimal) posteriors -
+            // compute indices for AA,AB,BB for current allele - genotype likelihoods are a linear vector that can be thought of
+            // as a row-wise upper triangular matrix of likelihoods.
+            // So, for example, with 2 alt alleles, likelihoods have AA,AB,AC,BB,BC,CC.
+            // 3 alt alleles: AA,AB,AC,AD BB BC BD CC CD DD
+
+            idxAA = 0;
+            idxAB = k;
+            // yy is always element on the diagonal.
+            // 2 alleles: BBelement 2
+            // 3 alleles: BB element  3. CC element 5
+            // 4 alleles:
+            idxBB = idxDiag;
+            idxDiag += incr--;
+
+            // todo - possible cleanup
+            switch ( calcToUse ) {
+                case N2_GOLD_STANDARD:
+                    lastK = gdaN2GoldStandard(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB);
+                    break;
+                case LINEAR_EXPERIMENTAL:
+                    lastK = linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB);
+                    break;
+            }
+            if (numAlleles > 2) {
+                posteriorCache[k-1] = log10AlleleFrequencyPosteriors.clone();
+                bestAFguess[k-1] = (double)MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors);
+            }
        }

+        if (numAlleles > 2) {
+            // multiallelic approximation, part 2:
+            // report posteriors for allele that has highest estimated AC
+            int mostLikelyAlleleIdx = MathUtils.maxElementIndex(bestAFguess);
+            for (int k=0; k < log10AlleleFrequencyPosteriors.length-1; k++)
+                log10AlleleFrequencyPosteriors[k] = (posteriorCache[mostLikelyAlleleIdx][k]);
+
+        }
        // todo -- REMOVE ME AFTER TESTING
        // todo -- REMOVE ME AFTER TESTING
        // todo -- REMOVE ME AFTER TESTING
        if ( COMPARE_TO_GS ) {
-            gdaN2GoldStandard(GLs, log10AlleleFrequencyPriors, gsPosteriors);
+            gdaN2GoldStandard(GLs, log10AlleleFrequencyPriors, gsPosteriors, idxAA, idxAB, idxBB);

            double log10thisPVar = Math.log10(MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors)[0]);
            double log10gsPVar = Math.log10(MathUtils.normalizeFromLog10(gsPosteriors)[0]);
@ -268,7 +312,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {

    public int linearExact(Map<String, Genotype> GLs,
                           double[] log10AlleleFrequencyPriors,
-                           double[] log10AlleleFrequencyPosteriors) {
+                           double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) {
        final int numSamples = GLs.size();
        final int numChr = 2*numSamples;
        final double[][] genotypeLikelihoods = getGLs(GLs);
@ -285,7 +329,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {

            if ( k == 0 ) { // special case for k = 0
                for ( int j=1; j <= numSamples; j++ ) {
-                    kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods[j][GenotypeType.AA.ordinal()];
+                    kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods[j][idxAA];
                }
            } else { // k > 0
                final double[] kMinus1 = logY.getkMinus1();
@ -298,14 +342,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
                    double aa = Double.NEGATIVE_INFINITY;
                    double ab = Double.NEGATIVE_INFINITY;
                    if (k < 2*j-1)
-                        aa = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + kMinus0[j-1] + gl[GenotypeType.AA.ordinal()];
+                        aa = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + kMinus0[j-1] + gl[idxAA];

                    if (k < 2*j)
-                        ab = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ kMinus1[j-1] + gl[GenotypeType.AB.ordinal()];
+                        ab = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ kMinus1[j-1] + gl[idxAB];

                    double log10Max;
                    if (k > 1) {
-                        final double bb = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + kMinus2[j-1] + gl[GenotypeType.BB.ordinal()];
+                        final double bb = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + kMinus2[j-1] + gl[idxBB];
                        log10Max = approximateLog10SumLog10(aa, ab, bb);
                    } else {
                        // we know we aren't considering the BB case, so we can use an optimized log10 function
@ -385,8 +429,10 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
        if ( !vc.isVariant() )
            throw new UserException("The VCF record passed in does not contain an ALT allele at " + vc.getChr() + ":" + vc.getStart());

-        Allele refAllele = vc.getReference();
-        Allele altAllele = vc.getAlternateAllele(0);
+        boolean multiAllelicRecord = false;
+
+        if (vc.getAlternateAlleles().size() > 1)
+            multiAllelicRecord = true;

        Map<String, Genotype> GLs = vc.getGenotypes();
        double[][] pathMetricArray = new double[GLs.size()+1][AFofMaxLikelihood+1];
@ -402,7 +448,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {

        pathMetricArray[0][0] = 0.0;

-        if (SIMPLE_GREEDY_GENOTYPER) {
+        // todo = can't deal with optimal dynamic programming solution with multiallelic records
+        if (SIMPLE_GREEDY_GENOTYPER || multiAllelicRecord) {
            sampleIndices.addAll(GLs.keySet());
            sampleIdx = GLs.size();
        }
@ -453,7 +500,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
            if ( !g.hasLikelihoods() )
                continue;

-            if (SIMPLE_GREEDY_GENOTYPER)
+            if (SIMPLE_GREEDY_GENOTYPER || multiAllelicRecord)
                bestGTguess = Utils.findIndexOfMaxEntry(g.getLikelihoods().getAsVector());
            else {
                int newIdx = tracebackArray[k][startIdx];
@ -463,24 +510,48 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {

            ArrayList<Allele> myAlleles = new ArrayList<Allele>();

-            double qual;
+            double qual = Double.NEGATIVE_INFINITY;
            double[] likelihoods = g.getLikelihoods().getAsVector();
+ /*           System.out.format("Sample: %s GL:",sample);
+            for (int i=0; i < likelihoods.length; i++)
+                System.out.format("%1.4f ",likelihoods[i]);
+    */

-            if (bestGTguess == 0) {
-                myAlleles.add(refAllele);
-                myAlleles.add(refAllele);
-                qual = likelihoods[0] - Math.max(likelihoods[1], likelihoods[2]);
-            } else if(bestGTguess == 1) {
-                myAlleles.add(refAllele);
-                myAlleles.add(altAllele);
-                qual = likelihoods[1] - Math.max(likelihoods[0], likelihoods[2]);
-
-            }  else {
-                myAlleles.add(altAllele);
-                myAlleles.add(altAllele);
-                qual = likelihoods[2] - Math.max(likelihoods[1], likelihoods[0]);
+            for (int i=0; i < likelihoods.length; i++) {
+                if (i==bestGTguess)
+                    continue;
+                if (likelihoods[i] >= qual)
+                    qual = likelihoods[i];
            }
+            // qual contains now max(likelihoods[k]) for all k != bestGTguess
+            qual = likelihoods[bestGTguess] - qual;

+            // likelihoods are stored row-wise in upper triangular matrix. IE
+            // for 2 alleles they have ordering AA,AB,BB
+            // for 3 alleles they are ordered AA,AB,AC,BB,BC,CC
+            // Get now alleles corresponding to best index
+            int kk=0;
+            boolean done = false;
+            for (int i=0; i < vc.getNAlleles(); i++) {
+                for (int j=i; j < vc.getNAlleles(); j++){
+                    if (kk++ == bestGTguess) {
+                        if (i==0)
+                            myAlleles.add(vc.getReference());
+                        else
+                            myAlleles.add(vc.getAlternateAllele(i-1));
+
+                        if (j==0)
+                            myAlleles.add(vc.getReference());
+                        else
+                            myAlleles.add(vc.getAlternateAllele(j-1));
+                        done = true;
+                        break;
+                    }
+
+                }
+                if (done)
+                    break;
+            }

            if (qual < 0) {
                // QUAL can be negative if the chosen genotype is not the most likely one individually.
@ -489,7 +560,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
                double chosenGenotype = normalized[bestGTguess];
                qual = -1.0 * Math.log10(1.0 - chosenGenotype);
            }
-
+ //System.out.println(myAlleles.toString());
            calls.put(sample, new Genotype(sample, myAlleles, qual, null, g.getAttributes(), false));

        }
@ -506,7 +577,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
    // -------------------------------------------------------------------------------------
    public int gdaN2GoldStandard(Map<String, Genotype> GLs,
                                 double[] log10AlleleFrequencyPriors,
-                                 double[] log10AlleleFrequencyPosteriors) {
+                                 double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) {
        int numSamples = GLs.size();
        int numChr = 2*numSamples;

@ -516,7 +587,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
            for (int j=0; j <=numChr; j++)
                logYMatrix[i][j] = Double.NEGATIVE_INFINITY;

-        //YMatrix[0][0] = 1.0;
+         //YMatrix[0][0] = 1.0;
        logYMatrix[0][0] = 0.0;
        int j=0;

@ -533,7 +604,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {

            // special treatment for k=0: iteration reduces to:
            //YMatrix[j][0] = YMatrix[j-1][0]*genotypeLikelihoods[GenotypeType.AA.ordinal()];
-            logYMatrix[j][0] = logYMatrix[j-1][0] + genotypeLikelihoods[GenotypeType.AA.ordinal()];
+            logYMatrix[j][0] = logYMatrix[j-1][0] + genotypeLikelihoods[idxAA];

            for (int k=1; k <= 2*j; k++ ) {

@ -542,20 +613,20 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
                logNumerator = new double[3];
                if (k < 2*j-1)
                    logNumerator[0] = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + logYMatrix[j-1][k] +
-                            genotypeLikelihoods[GenotypeType.AA.ordinal()];
+                            genotypeLikelihoods[idxAA];
                else
                    logNumerator[0] = Double.NEGATIVE_INFINITY;


                if (k < 2*j)
                    logNumerator[1] = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ logYMatrix[j-1][k-1] +
-                            genotypeLikelihoods[GenotypeType.AB.ordinal()];
+                            genotypeLikelihoods[idxAB];
                else
                    logNumerator[1] = Double.NEGATIVE_INFINITY;

                if (k > 1)
                    logNumerator[2] = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + logYMatrix[j-1][k-2] +
-                            genotypeLikelihoods[GenotypeType.BB.ordinal()];
+                            genotypeLikelihoods[idxBB];
                else
                    logNumerator[2] = Double.NEGATIVE_INFINITY;

--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java
@ -88,7 +88,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
                                          Map<String, AlignmentContext> contexts,
                                          AlignmentContextUtils.ReadOrientation contextType,
                                          GenotypePriors priors,
-                                          Map<String, BiallelicGenotypeLikelihoods> GLs,
+                                          Map<String, MultiallelicGenotypeLikelihoods> GLs,
                                          Allele alternateAlleleToUse, boolean useBAQedPileup);

    protected int getFilteredDepth(ReadBackedPileup pileup) {
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java
@ -54,7 +54,7 @@ public class GridSearchAFEstimation extends AlleleFrequencyCalculationModel {

    protected void getLog10PNonRef(RefMetaDataTracker tracker,
                                   ReferenceContext ref,
-                                   Map<String, Genotype> GLs,
+                                   Map<String, Genotype> GLs, Set<Allele>alleles,
                                   double[] log10AlleleFrequencyPriors,
                                   double[] log10AlleleFrequencyPosteriors) {
        initializeAFMatrix(GLs);
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
@ -60,7 +60,13 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood

    private PairHMMIndelErrorModel pairModel;

-    private HashMap<PileupElement,LinkedHashMap<Allele,Double>> indelLikelihoodMap;
+    private static ThreadLocal<HashMap<PileupElement,LinkedHashMap<Allele,Double>>> indelLikelihoodMap =
+            new ThreadLocal<HashMap<PileupElement,LinkedHashMap<Allele,Double>>>() {
+            protected synchronized HashMap<PileupElement,LinkedHashMap<Allele,Double>> initialValue() {
+                return new HashMap<PileupElement,LinkedHashMap<Allele,Double>>();
+        }
+    };
+
    private LinkedHashMap<Allele,Haplotype> haplotypeMap;

    // gdebug removeme
@ -69,7 +75,12 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
    private boolean useOldWrongHorribleHackedUpLikelihoodModel = false;
 //
    private GenomeLoc lastSiteVisited;
-    private List<Allele> alleleList;
+    private ArrayList<Allele> alleleList;
+
+    static {
+        indelLikelihoodMap.set(new HashMap<PileupElement,LinkedHashMap<Allele,Double>>());
+    }
+

    protected IndelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
        super(UAC, logger);
@ -99,7 +110,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
        HAPLOTYPE_SIZE = UAC.INDEL_HAPLOTYPE_SIZE;
        DEBUG = UAC.OUTPUT_DEBUG_INDEL_INFO;

-        indelLikelihoodMap = new HashMap<PileupElement,LinkedHashMap<Allele,Double>>();
        haplotypeMap = new LinkedHashMap<Allele,Haplotype>();

    }
@ -289,7 +299,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
                                 Map<String, AlignmentContext> contexts,
                                 AlignmentContextUtils.ReadOrientation contextType,
                                 GenotypePriors priors,
-                                 Map<String, BiallelicGenotypeLikelihoods> GLs,
+                                 Map<String, MultiallelicGenotypeLikelihoods> GLs,
                                 Allele alternateAlleleToUse,
                                 boolean useBAQedPileup) {

@ -305,13 +315,15 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
            // starting a new site: clear allele list
            alleleList.clear();
            lastSiteVisited = ref.getLocus();
-            indelLikelihoodMap.clear();
+            indelLikelihoodMap.set(new HashMap<PileupElement,LinkedHashMap<Allele,Double>>());
            haplotypeMap.clear();

            if (getAlleleListFromVCF) {
-
-                 for( final VariantContext vc_input : tracker.getVariantContexts(ref, "alleles", null, ref.getLocus(), false, false) ) {
-                     if( vc_input != null && ! vc_input.isFiltered() && vc_input.isIndel() && ref.getLocus().getStart() == vc_input.getStart()) {
+                 EnumSet<VariantContext.Type> allowableTypes = EnumSet.of(VariantContext.Type.INDEL);
+                 allowableTypes.add(VariantContext.Type.MIXED);
+                 for( final VariantContext vc_input : tracker.getVariantContexts(ref, "alleles",
+                         allowableTypes, ref.getLocus(), false, false) ) {
+                      if( vc_input != null && ref.getLocus().getStart() == vc_input.getStart()) {
                         vc = vc_input;
                         break;
                     }
@ -320,10 +332,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
                 if ( vc == null )
                     return null;

-
-                 if (!vc.isIndel())
-                     return null;
-
                alleleList.clear();
                for (Allele a : vc.getAlleles())
                    alleleList.add(a);
@ -350,11 +358,19 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
        
        refAllele = alleleList.get(0);
        altAllele = alleleList.get(1);
+
+        // look for alt allele that has biggest length distance to ref allele
+        int maxLenDiff = 0;
+        for (Allele a: alleleList) {
+            if(a.isNonReference())  {
+                int lenDiff = Math.abs(a.getBaseString().length() - refAllele.getBaseString().length());
+                if (lenDiff > maxLenDiff) {
+                    maxLenDiff = lenDiff;
+                    altAllele = a;
+                }
+            }
+        }
        int eventLength = altAllele.getBaseString().length() - refAllele.getBaseString().length();
-        // assume only one alt allele for now
-
-        //List<Haplotype> haplotypesInVC;
-
        int hsize = (int)ref.getWindow().size()-Math.abs(eventLength)-1;
        int numPrefBases= ref.getLocus().getStart()-ref.getWindow().getStart()+1;

@ -386,27 +402,34 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
            if (pileup != null ) {
                double[] genotypeLikelihoods;
                if (useOldWrongHorribleHackedUpLikelihoodModel)
-                    genotypeLikelihoods = model.computeReadHaplotypeLikelihoods( pileup, haplotypeMap);
+                   genotypeLikelihoods = model.computeReadHaplotypeLikelihoods( pileup, haplotypeMap);
                else
-                    genotypeLikelihoods = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypeMap, ref, HAPLOTYPE_SIZE, eventLength, indelLikelihoodMap);
+                    genotypeLikelihoods = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());



-                GLs.put(sample.getKey(), new BiallelicGenotypeLikelihoods(sample.getKey(),
-                        refAllele,
-                        altAllele,
-                        genotypeLikelihoods[0],
-                        genotypeLikelihoods[1],
-                        genotypeLikelihoods[2],
+                // which genotype likelihoods correspond to two most likely alleles? By convention, likelihood vector is lexically ordered, for example
+                // for 3 alleles it's 00 01 02 11 12 22
+                 GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(),
+                        alleleList,
+                        genotypeLikelihoods,
                        getFilteredDepth(pileup)));
-                if (DEBUG)
-                    System.out.format("Sample:%s GL:%4.2f %4.2f %4.2f\n",sample.getKey(), genotypeLikelihoods[0],genotypeLikelihoods[1], genotypeLikelihoods[2]);
+
+                if (DEBUG) {
+                    System.out.format("Sample:%s Alleles:%s GL:",sample.getKey(), alleleList.toString());
+                    for (int k=0; k < genotypeLikelihoods.length; k++)
+                        System.out.format("%1.4f ",genotypeLikelihoods[k]);
+                    System.out.println();
+                }
            }
        }

        return refAllele;
    }

- 
+
+    public static HashMap<PileupElement,LinkedHashMap<Allele,Double>> getIndelLikelihoodMap() {
+        return indelLikelihoodMap.get();
+    }

 }
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java
@ -1,6 +1,7 @@
 package org.broadinstitute.sting.gatk.walkers.genotyper;

 import org.broad.tribble.util.variantcontext.Allele;
+import org.broadinstitute.sting.utils.exceptions.StingException;

 import java.util.ArrayList;

@ -19,10 +20,15 @@ public class MultiallelicGenotypeLikelihoods {

    public MultiallelicGenotypeLikelihoods(String sample,
                                         ArrayList<Allele> A,
-                                         double[] log10AALikelihoods, int depth) {
+                                         double[] log10Likelihoods, int depth) {
+        /* Check for consistency between likelihood vector and number of alleles */
+        int numAlleles = A.size();
+        if (log10Likelihoods.length != numAlleles*(numAlleles+1)/2)
+            throw new StingException(("BUG: Incorrect length of GL vector when creating MultiallelicGenotypeLikelihoods object!"));
+
         this.sample = sample;
         this.alleleList = A;
-         this.GLs = log10AALikelihoods;
+         this.GLs = log10Likelihoods;
         this.depth = depth;
     }

--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
@ -79,7 +79,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
                                 Map<String, AlignmentContext> contexts,
                                 AlignmentContextUtils.ReadOrientation contextType,
                                 GenotypePriors priors,
-                                 Map<String, BiallelicGenotypeLikelihoods> GLs,
+                                 Map<String, MultiallelicGenotypeLikelihoods> GLs,
                                 Allele alternateAlleleToUse,
                                 boolean useBAQedPileup) {

@ -136,13 +136,12 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
            DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(refBase);
            DiploidGenotype hetGenotype = DiploidGenotype.createDiploidGenotype(refBase, bestAlternateAllele);
            DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(bestAlternateAllele);
-            GLs.put(sample.getKey(), new BiallelicGenotypeLikelihoods(sample.getKey(),
-                    refAllele,
-                    altAllele,
-                    likelihoods[refGenotype.ordinal()],
-                    likelihoods[hetGenotype.ordinal()],
-                    likelihoods[homGenotype.ordinal()],
-                    getFilteredDepth(pileup)));
+            ArrayList<Allele> aList = new ArrayList<Allele>();
+            aList.add(refAllele);
+            aList.add(altAllele);
+            double[] dlike = new double[]{likelihoods[refGenotype.ordinal()],likelihoods[hetGenotype.ordinal()],likelihoods[homGenotype.ordinal()]} ;
+            GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(),
+                    aList,  dlike, getFilteredDepth(pileup)));
        }

        return refAllele;
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
@ -82,7 +82,9 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
    public boolean includeReadsWithDeletionAtLoci() { return true; }

    // enable extended events for indels
-    public boolean generateExtendedEvents() { return UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.SNP; }
+    public boolean generateExtendedEvents() {
+        return (UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.SNP && UAC.GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES);
+    }

    /**
     * Inner class for collecting output statistics from the UG
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@ -144,7 +144,7 @@ public class UnifiedGenotyperEngine {
        if ( UAC.COVERAGE_AT_WHICH_TO_ABORT > 0 && rawContext.size() > UAC.COVERAGE_AT_WHICH_TO_ABORT )
            return null;

-        final GenotypeLikelihoodsCalculationModel.Model model = getCurrentGLModel( rawContext );
+        final GenotypeLikelihoodsCalculationModel.Model model = getCurrentGLModel(tracker, refContext, rawContext );
        if( model == null ) {
            return (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? generateEmptyContext(tracker, refContext, null, rawContext) : null);
        }
@ -171,7 +171,7 @@ public class UnifiedGenotyperEngine {
     * @return the VariantContext object
     */
    public VariantContext calculateLikelihoods(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
-        final GenotypeLikelihoodsCalculationModel.Model model = getCurrentGLModel( rawContext );
+        final GenotypeLikelihoodsCalculationModel.Model model = getCurrentGLModel( tracker, refContext, rawContext );
        if( model == null )
            return null;

@ -192,7 +192,7 @@ public class UnifiedGenotyperEngine {
     * @return the VariantCallContext object
     */
    public VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, VariantContext vc) {
-        final GenotypeLikelihoodsCalculationModel.Model model = getCurrentGLModel( rawContext );
+        final GenotypeLikelihoodsCalculationModel.Model model = getCurrentGLModel(tracker, refContext, rawContext );
        if( model == null ) {
            return null;
        }
@ -217,7 +217,7 @@ public class UnifiedGenotyperEngine {
            glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
        }

-        Map<String, BiallelicGenotypeLikelihoods> GLs = new HashMap<String, BiallelicGenotypeLikelihoods>();
+        Map<String, MultiallelicGenotypeLikelihoods> GLs = new HashMap<String, MultiallelicGenotypeLikelihoods>();

        Allele refAllele = glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), GLs, alternateAlleleToUse, useBAQedPileup && BAQEnabledOnCMDLine);

@ -259,21 +259,23 @@ public class UnifiedGenotyperEngine {
        return new VariantCallContext(vc, ref.getBase(), false);
    }

-    private VariantContext createVariantContextFromLikelihoods(ReferenceContext refContext, Allele refAllele, Map<String, BiallelicGenotypeLikelihoods> GLs) {
+    private VariantContext createVariantContextFromLikelihoods(ReferenceContext refContext, Allele refAllele, Map<String, MultiallelicGenotypeLikelihoods> GLs) {
        // no-call everyone for now
        List<Allele> noCall = new ArrayList<Allele>();
        noCall.add(Allele.NO_CALL);

-        Set<Allele> alleles = new HashSet<Allele>();
+        Set<Allele> alleles = new LinkedHashSet<Allele>();
        alleles.add(refAllele);
-        boolean addedAltAllele = false;
+        boolean addedAltAlleles = false;

        HashMap<String, Genotype> genotypes = new HashMap<String, Genotype>();
-        for ( BiallelicGenotypeLikelihoods GL : GLs.values() ) {
-            if ( !addedAltAllele ) {
-                addedAltAllele = true;
-                alleles.add(GL.getAlleleA());
-                alleles.add(GL.getAlleleB());
+        for ( MultiallelicGenotypeLikelihoods GL : GLs.values() ) {
+            if ( !addedAltAlleles ) {
+                addedAltAlleles = true;
+                // ordering important to maintain consistency
+                for (Allele a: GL.getAlleles()) {
+                    alleles.add(a);
+                }
            }

            HashMap<String, Object> attributes = new HashMap<String, Object>();
@ -316,7 +318,7 @@ public class UnifiedGenotyperEngine {

        // 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position)
        clearAFarray(log10AlleleFrequencyPosteriors.get());
-        afcm.get().getLog10PNonRef(tracker, refContext, vc.getGenotypes(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
+        afcm.get().getLog10PNonRef(tracker, refContext, vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());

        // find the most likely frequency
        int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get());
@ -374,7 +376,7 @@ public class UnifiedGenotyperEngine {
            // the overall lod
            VariantContext vcOverall = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, vc.getAlternateAllele(0), false, model);
            clearAFarray(log10AlleleFrequencyPosteriors.get());
-            afcm.get().getLog10PNonRef(tracker, refContext, vcOverall.getGenotypes(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
+            afcm.get().getLog10PNonRef(tracker, refContext, vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
            //double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
            double overallLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
            if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
@ -382,7 +384,7 @@ public class UnifiedGenotyperEngine {
            // the forward lod
            VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, vc.getAlternateAllele(0), false, model);
            clearAFarray(log10AlleleFrequencyPosteriors.get());
-            afcm.get().getLog10PNonRef(tracker, refContext, vcForward.getGenotypes(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
+            afcm.get().getLog10PNonRef(tracker, refContext, vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
            //double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
            double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
            double forwardLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
@ -391,7 +393,7 @@ public class UnifiedGenotyperEngine {
            // the reverse lod
            VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, vc.getAlternateAllele(0), false, model);
            clearAFarray(log10AlleleFrequencyPosteriors.get());
-            afcm.get().getLog10PNonRef(tracker, refContext, vcReverse.getGenotypes(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
+            afcm.get().getLog10PNonRef(tracker, refContext, vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
            //normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
            double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
            double reverseLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
@ -465,18 +467,33 @@ public class UnifiedGenotyperEngine {

        if ( model == GenotypeLikelihoodsCalculationModel.Model.INDEL ) {

-            ReadBackedExtendedEventPileup rawPileup = rawContext.getExtendedEventPileup();
+            if (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
+                // regular pileup in this case
+                ReadBackedPileup pileup = rawContext.getBasePileup() .getMappingFilteredPileup(UAC.MIN_MAPPING_QUALTY_SCORE);

-            // filter the context based on min mapping quality
-            ReadBackedExtendedEventPileup pileup = rawPileup.getMappingFilteredPileup(UAC.MIN_MAPPING_QUALTY_SCORE);
+                // don't call when there is no coverage
+                if ( pileup.size() == 0 && UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES  )
+                    return null;

-            // don't call when there is no coverage
-            if ( pileup.size() == 0 && !(UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) )
-                return null;
+                // stratify the AlignmentContext and cut by sample
+                stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE);

-            // stratify the AlignmentContext and cut by sample
-            stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE);
+            } else {
+                // todo - tmp will get rid of extended events so this wont be needed
+                if (!rawContext.hasExtendedEventPileup())
+                    return null;
+                ReadBackedExtendedEventPileup rawPileup = rawContext.getExtendedEventPileup();

+                // filter the context based on min mapping quality
+                ReadBackedExtendedEventPileup pileup = rawPileup.getMappingFilteredPileup(UAC.MIN_MAPPING_QUALTY_SCORE);
+
+                // don't call when there is no coverage
+                if ( pileup.size() == 0 && UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES  )
+                    return null;
+
+                // stratify the AlignmentContext and cut by sample
+                stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE);
+            }
        } else if ( model == GenotypeLikelihoodsCalculationModel.Model.SNP ) {

            if ( !BaseUtils.isRegularBase( refContext.getBase() ) )
@ -583,14 +600,35 @@ public class UnifiedGenotyperEngine {
    }

    // decide whether we are currently processing SNPs, indels, or neither
-    private GenotypeLikelihoodsCalculationModel.Model getCurrentGLModel( final AlignmentContext rawContext ) {
-        if( (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL) && rawContext.hasExtendedEventPileup() ) {
-            return GenotypeLikelihoodsCalculationModel.Model.INDEL;
-        } else if( (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP) && !rawContext.hasExtendedEventPileup() ) {
-            return GenotypeLikelihoodsCalculationModel.Model.SNP;
-        } else {
-            return null;
+    private GenotypeLikelihoodsCalculationModel.Model getCurrentGLModel(final RefMetaDataTracker tracker, final ReferenceContext refContext,
+                                                                        final AlignmentContext rawContext ) {
+        if (rawContext.hasExtendedEventPileup() ) {
+            // todo - remove this code
+            if ((UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL) &&
+                   (UAC.GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) )
+                return GenotypeLikelihoodsCalculationModel.Model.INDEL;
        }
+        else {
+            // no extended event pileup
+            // if we're genotyping given alleles and we have a requested SNP at this position, do SNP
+            if (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
+                VariantContext vcInput = SNPGenotypeLikelihoodsCalculationModel.getSNPVCFromAllelesRod(tracker, refContext, false, logger);
+                if (vcInput == null)
+                    return null;
+
+                if (vcInput.isSNP() &&  ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP))
+                    return GenotypeLikelihoodsCalculationModel.Model.SNP;
+                else if ((vcInput.isIndel() || vcInput.isMixed()) && (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL))
+                    return GenotypeLikelihoodsCalculationModel.Model.INDEL;
+            }   else {
+                // todo - this assumes SNP's take priority when BOTH is selected, should do a smarter way once extended events are removed
+                if( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP)
+                    return GenotypeLikelihoodsCalculationModel.Model.SNP;
+                else if (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL)
+                    return GenotypeLikelihoodsCalculationModel.Model.INDEL;
+                }
+        }
+        return null;
    }

    protected void computeAlleleFrequencyPriors(int N, final double[] priors, final GenotypeLikelihoodsCalculationModel.Model model) {
--- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java
@ -712,7 +712,7 @@ public class PairHMMIndelErrorModel {
        }
    }
    public synchronized double[] computeReadHaplotypeLikelihoods(ReadBackedPileup pileup, LinkedHashMap<Allele,Haplotype> haplotypeMap,
-                                                                   ReferenceContext ref, int haplotypeSize, int eventLength,
+                                                                   ReferenceContext ref, int eventLength,
                                                                   HashMap<PileupElement, LinkedHashMap<Allele,Double>> indelLikelihoodMap){

        int numHaplotypes = haplotypeMap.size();
@ -1022,8 +1022,7 @@ public class PairHMMIndelErrorModel {
                // combine likelihoods of haplotypeLikelihoods[i], haplotypeLikelihoods[j]
                // L(Hi, Hj) = sum_reads ( Pr(R|Hi)/2 + Pr(R|Hj)/2)
                //readLikelihoods[k][j] has log10(Pr(R_k) | H[j] )
-                double[] readLikelihood = new double[2]; // diploid sample
-                for (readIdx=0; readIdx < pileup.getReads().size(); readIdx++) {
+                 for (readIdx=0; readIdx < pileup.getReads().size(); readIdx++) {

                    // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
                    // First term is approximated by Jacobian log with table lookup.