From 9b9744109c357f2dd1d50cccb0022d518526cf42 Mon Sep 17 00:00:00 2001
From: ebanks <ebanks@348d0f76-0448-11de-a6fe-93d51630548a>
Date: Wed, 21 Oct 2009 02:39:23 +0000
Subject: [PATCH] Mark's new unified calculation model is now officially
 implemented. Because it doesn't actually use EM, it's no longer a subclass of
 the EM model.

Note that you can't use it just yet because it doesn't actually emit calls (just prints to logger).  I need to deal with general UG output tomorrow.  Hold off until then, Mark, and then you can go wild.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1891 348d0f76-0448-11de-a6fe-93d51630548a
---
 .../AllMAFsGenotypeCalculationModel.java      | 214 +++++++++++++-----
 .../genotyper/EMGenotypeCalculationModel.java | 126 +----------
 .../genotyper/GenotypeCalculationModel.java   | 130 +++++++++++
 ...PointEstimateGenotypeCalculationModel.java |   2 +-
 4 files changed, 285 insertions(+), 187 deletions(-)
diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AllMAFsGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AllMAFsGenotypeCalculationModel.java
index b7404b8c5..56795a5d2 100644
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AllMAFsGenotypeCalculationModel.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AllMAFsGenotypeCalculationModel.java
@@ -1,12 +1,13 @@
 package org.broadinstitute.sting.gatk.walkers.genotyper;
 
-import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.utils.*;
 import org.broadinstitute.sting.utils.genotype.*;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 
 import java.util.*;
 
-public class AllMAFsGenotypeCalculationModel extends EMGenotypeCalculationModel {
+public class AllMAFsGenotypeCalculationModel extends GenotypeCalculationModel {
 
     protected AllMAFsGenotypeCalculationModel() {}
 
@@ -14,39 +15,78 @@ public class AllMAFsGenotypeCalculationModel extends EMGenotypeCalculationModel
     // we cache the results to avoid having to recompute everything
     private HashMap<Integer, double[]> nullAlleleFrequencyCache = new HashMap<Integer, double[]>();
 
-    // the allele frequencies
-    private double[][] alleleFrequencies = new double[3][];
-    private double[][] oldAlleleFrequencies;
+    // because the Hardy-Weinberg values for a given frequency are constant,
+    // we cache the results to avoid having to recompute everything
+    private HashMap<Double, double[]> hardyWeinbergValueCache = new HashMap<Double, double[]>();
 
-    // keep track of whether or not a given MAF is stable
-    private boolean[] frequencyStabilityArray = new boolean[3];
+    // the allele frequency priors
+    private double[] alleleFrequencyPriors;
+
+    // the allele frequency posteriors and P(f>0) mapped by alternate allele
+    private HashMap<Character, double[]> alleleFrequencyPosteriors = new HashMap<Character, double[]>();
+    private HashMap<Character, Double> PofFs = new HashMap<Character, Double>();
 
     // the minimum and actual number of points in our allele frequency estimation
-    private static final int MIN_ESTIMATION_POINTS = 1000;
-    private int estimationPoints;
+    private static final int MIN_ESTIMATION_POINTS = 20; //1000;
+    private int frequencyEstimationPoints;
 
     // the GenotypeLikelihoods map
     private HashMap<String, AlleleSpecificGenotypeLikelihoods> GLs = new HashMap<String, AlleleSpecificGenotypeLikelihoods>();
 
 
-    protected void initializeAlleleFrequencies(int numSamples, char ref) {
-        // first, initialize the stability array to "unstable"
-        for (int i = 0; i < 3; i++)
-            frequencyStabilityArray[i] = false;
+    public Pair<List<GenotypeCall>, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
+
+        // keep track of the context for each sample, overall and separated by strand
+        HashMap<String, AlignmentContextBySample> contexts = splitContextBySample(context);
+        if ( contexts == null )
+            return null;
+
+        calculate(ref, contexts, priors, StratifiedContext.OVERALL);
+
+
+        //double lod = overall.getPofD() - overall.getPofNull();
+        //logger.debug("lod=" + lod);
+
+        // calculate strand score
+        //EMOutput forward = calculate(ref, contexts, priors, StratifiedContext.FORWARD);
+        //EMOutput reverse = calculate(ref, contexts, priors, StratifiedContext.REVERSE);
+        //double forwardLod = (forward.getPofD() + reverse.getPofNull()) - overall.getPofNull();
+        //double reverseLod = (reverse.getPofD() + forward.getPofNull()) - overall.getPofNull();
+        //logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
+        //double strandScore = Math.max(forwardLod - lod, reverseLod - lod);
+
+        //logger.debug(String.format("LOD=%f, SLOD=%f", lod, strandScore));
+
+        // generate the calls
+        //GenotypeMetaData metadata = new GenotypeMetaData(lod, strandScore, overall.getMAF());
+        //return new Pair<List<GenotypeCall>, GenotypeMetaData>(genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts), metadata);
+        return null;
+    }
+
+    private void calculate(char ref, HashMap<String, AlignmentContextBySample> contexts, DiploidGenotypePriors priors, StratifiedContext contextType) {
+
+        initializeAlleleFrequencies(contexts.size());
+
+        initializeGenotypeLikelihoods(ref, contexts, priors, contextType);
+
+        calculateAlleleFrequencyPosteriors(ref);
+    }
+
+    private void initializeAlleleFrequencies(int numSamples) {
 
         // calculate the number of estimation points to use:
         // it's either MIN_ESTIMATION_POINTS or 2N if that's larger
         // (add 1 for allele frequency of zero)
-        estimationPoints = Math.max(MIN_ESTIMATION_POINTS, 2 * numSamples) + 1;
+        frequencyEstimationPoints = Math.max(MIN_ESTIMATION_POINTS, 2 * numSamples) + 1;
 
-        for (int alt = 0; alt < 3; alt++)
-            alleleFrequencies[alt] = getNullAlleleFrequencies(estimationPoints);
+        // set up the allele frequency priors
+        alleleFrequencyPriors = getNullalleleFrequencyPriors(frequencyEstimationPoints);
 
-        for (int i = 1; i < estimationPoints; i++)
-            logger.debug("Initial allele frequency for MAF=" + i + ": " + alleleFrequencies[0][i]);
+        for (int i = 1; i < frequencyEstimationPoints; i++)
+            logger.debug("Null allele frequency for MAF=" + i + ": " + alleleFrequencyPriors[i]);
     }
 
-    private double[] getNullAlleleFrequencies(int N) {
+    private double[] getNullalleleFrequencyPriors(int N) {
         double[] AFs = nullAlleleFrequencyCache.get(N);
 
         // if it hasn't been calculated yet, do so now
@@ -71,7 +111,7 @@ public class AllMAFsGenotypeCalculationModel extends EMGenotypeCalculationModel
         return AFs.clone();
     }
 
-    protected void initializeGenotypeLikelihoods(char ref, HashMap<String, AlignmentContextBySample> contexts, DiploidGenotypePriors priors, StratifiedContext contextType) {
+    private void initializeGenotypeLikelihoods(char ref, HashMap<String, AlignmentContextBySample> contexts, DiploidGenotypePriors priors, StratifiedContext contextType) {
         GLs.clear();
 
         for ( String sample : contexts.keySet() ) {
@@ -87,76 +127,128 @@ public class AllMAFsGenotypeCalculationModel extends EMGenotypeCalculationModel
         }
     }
 
-    protected void calculateAlleleFrequencyPosteriors() {
+    private void calculateAlleleFrequencyPosteriors(char ref) {
 
-    }
-
-    protected void applyAlleleFrequencyToGenotypeLikelihoods() {
-
-    }
-
-    protected boolean isStable() {
-        // We consider the EM stable when the MAF doesn't change more than EM_STABILITY_METRIC
-        // We compute this separately for all of the alternate alleles
-        for (int i = 0; i < 3; i++) {
-            // if we've already determined that a MAF is stable, don't recalculate
-            if ( frequencyStabilityArray[i] )
+        for ( char altAllele : BaseUtils.BASES ) {
+            if ( altAllele == ref )
                 continue;
 
-            // determine change
-            double AF_delta = 0.0;
-            for (int j = 1; j < estimationPoints; j++)
-                AF_delta += Math.abs(oldAlleleFrequencies[i][j] - alleleFrequencies[i][j]);
+            double[] AFPosteriors = new double[frequencyEstimationPoints];
 
-            // if it's not stable, we're done
-            if (AF_delta > EM_STABILITY_METRIC)
-                return false;
+            for ( AlleleSpecificGenotypeLikelihoods GL : GLs.values() ) {
+                double[] PofDgivenG = GL.getNormalizedPosteriors(altAllele);
 
-            // it's stable, so record that fact in the stability array
-            frequencyStabilityArray[i] = true;
+                // calculate the posterior weighted frequencies for this sample
+                for (int i = 1; i < frequencyEstimationPoints; i++) {
+                    double f = (double)i / (double)(frequencyEstimationPoints-1);
+                    double[] hardyWeinberg = getHardyWeinbergValues(f);
+
+                    double PofDgivenAFi = 0.0;
+                    PofDgivenAFi += hardyWeinberg[GenotypeType.REF.ordinal()] * PofDgivenG[GenotypeType.REF.ordinal()];
+                    PofDgivenAFi += hardyWeinberg[GenotypeType.HET.ordinal()] * PofDgivenG[GenotypeType.HET.ordinal()];
+                    PofDgivenAFi += hardyWeinberg[GenotypeType.HOM.ordinal()] * PofDgivenG[GenotypeType.HOM.ordinal()];
+
+                    AFPosteriors[i] += Math.log10(PofDgivenAFi);
+                }
+            }
+
+            for (int i = 1; i < frequencyEstimationPoints; i++)
+                logger.debug("P(D|AF=" + i + ") for alt allele " + altAllele + ": " + AFPosteriors[i]);
+
+            // 1) multiply by null allele frequency priors to get AF posteriors
+            // 2) for precision purposes, we need to subtract the largest posterior value
+            // 3) convert back to normal space
+            double maxValue = findMaxEntry(AFPosteriors, true);
+            for (int i = 1; i < frequencyEstimationPoints; i++)
+                AFPosteriors[i] = Math.pow(10, alleleFrequencyPriors[i] + AFPosteriors[i] - maxValue);
+
+            // normalize
+            double sum = 0.0;
+            for (int i = 1; i < frequencyEstimationPoints; i++)
+                sum += AFPosteriors[i];
+            for (int i = 1; i < frequencyEstimationPoints; i++)
+                AFPosteriors[i] /= sum;
+
+            for (int i = 1; i < frequencyEstimationPoints; i++)
+                logger.debug("Allele frequency posterior estimate for alt allele " + altAllele + " MAF=" + i + ": " + AFPosteriors[i]);
+            logger.debug("P(f>0) for alt allele " + altAllele + ": " + sum);
+
+            alleleFrequencyPosteriors.put(altAllele, AFPosteriors);
+            PofFs.put(altAllele, sum);
+        }
+    }
+
+    private double[] getHardyWeinbergValues(double f) {
+        double[] values = hardyWeinbergValueCache.get(f);
+
+        // if it hasn't been calculated yet, do so now
+        if ( values == null ) {
+            // p^2, 2pq, q^2
+            values = new double[] { Math.pow(1.0 - f, 2), 2.0 * (1.0 - f) * f, Math.pow(f, 2) };
+            hardyWeinbergValueCache.put(f, values);
         }
 
-        // if we got here, then we're stable
-        return true;
+        return values;
     }
 
-    protected EMOutput computePofF(char ref) {
-        return null;
+    private static double findMaxEntry(double[] array, boolean skipZeroIndex) {
+        int index = skipZeroIndex ? 1 : 0;
+        double max = array[index++];
+        for (; index < array.length; index++) {
+            if ( array[index] > max )
+                max = array[index];
+        }
+        return max;
     }
 
 
+    private enum GenotypeType { REF, HET, HOM }
+
     /**
-     * A class for the allele-specific genotype likelihoods
+     * A class for the allele-specific GL posteriors
     */
-    protected class AlleleSpecificGenotypeLikelihoods {
-        private HashMap<String, Pair<double[], double[]>> GLs = new HashMap<String, Pair<double[], double[]>>();
+    private class AlleleSpecificGenotypeLikelihoods {
+        // mapping from alt allele base to posteriors
+        private HashMap<Character, double[]> alleleGLs = new HashMap<Character, double[]>();
 
         AlleleSpecificGenotypeLikelihoods(char ref, GenotypeLikelihoods GL) {
-            double[] likelihoods = GL.getLikelihoods();
             double[] posteriors = GL.getPosteriors();
 
-
-
-            // get the ref likelihood/posterior
+            // get the ref data
             DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(ref);
-            double refLikelihood = GL.getLikelihood(refGenotype);
-            double refPosterior = GL.getPosterior(refGenotype);
+            double refPosterior = posteriors[refGenotype.ordinal()];
             String refStr = String.valueOf(ref);
 
             for ( char base : BaseUtils.BASES ) {
                 if ( base == ref )
                     continue;
 
-                // get hom var and het likelihoods
-                double homLikelihood = GL.getLikelihood(DiploidGenotype.createHomGenotype(base));
-                double hetLikelihood = GL.getLikelihood(DiploidGenotype.valueOf(refStr + String.valueOf(base)));
+                // get hom var and het data
+                DiploidGenotype hetGenotype = ref < base ? DiploidGenotype.valueOf(refStr + String.valueOf(base)) : DiploidGenotype.valueOf(String.valueOf(base) + refStr);
+                DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(base);
+                double hetPosterior = posteriors[hetGenotype.ordinal()];
+                double homPosterior = posteriors[homGenotype.ordinal()];
 
+                // for precision purposes, we need to add (or really subtract, since everything is negative)
+                // the largest posterior value from all entries so that numbers don't get too small
+                double maxValue = Math.max(Math.max(refPosterior, hetPosterior), homPosterior);
+                double normalizedRefPosterior = Math.pow(10, refPosterior - maxValue);
+                double normalizedHetPosterior = Math.pow(10, hetPosterior - maxValue);
+                double normalizedHomPosterior = Math.pow(10, homPosterior - maxValue);
 
+                // normalize
+                double sum = normalizedRefPosterior + normalizedHetPosterior + normalizedHomPosterior;
+                normalizedRefPosterior /= sum;
+                normalizedHetPosterior /= sum;
+                normalizedHomPosterior /= sum;
+
+                double[] altPosteriors = new double[] { normalizedRefPosterior, normalizedHetPosterior, normalizedHomPosterior };
+                alleleGLs.put(base, altPosteriors);
             }
         }
 
-
-
-
+        public double[] getNormalizedPosteriors(char altAllele) {
+            return alleleGLs.get(altAllele);
+        }
     }
 }
\ No newline at end of file
diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java
index c319a61ba..ba0d657c0 100755
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java
@@ -1,7 +1,5 @@
 package org.broadinstitute.sting.gatk.walkers.genotyper;
 
-import net.sf.samtools.SAMRecord;
-import net.sf.samtools.SAMReadGroupRecord;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.utils.*;
@@ -108,65 +106,10 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
     protected abstract boolean isStable();
     protected abstract EMOutput computePofF(char ref);
 
-    /**
-     * Create the mapping from sample to alignment context; also, fill in the base counts along the way.
-     * Returns null iff there are too many deletions at this position.
-     */
-    protected HashMap<String, AlignmentContextBySample> splitContextBySample(AlignmentContext context) {
-
-        HashMap<String, AlignmentContextBySample> contexts = new HashMap<String, AlignmentContextBySample>();
-
-        int deletionsInPile = 0;
-        List<SAMRecord> reads = context.getReads();
-        List<Integer> offsets = context.getOffsets();
-
-        for (int i = 0; i < reads.size(); i++) {
-
-            // get the read and offset
-            SAMRecord read = reads.get(i);
-            int offset = offsets.get(i);
-
-            // find the sample; special case for pools
-            String sample;
-            if ( POOLED_INPUT ) {
-                sample = "POOL";
-            } else {
-                SAMReadGroupRecord readGroup = read.getReadGroup();
-                if ( readGroup == null ) {
-                    if ( assumedSingleSample == null )
-                        throw new StingException("Missing read group for read " + read.getReadName());
-                    sample = assumedSingleSample;
-                } else {
-                    sample = readGroup.getSample();
-                }
-            }
-
-            // create a new context object if this is the first time we're seeing a read for this sample
-            AlignmentContextBySample myContext = contexts.get(sample);
-            if ( myContext == null ) {
-                myContext = new AlignmentContextBySample(context.getLocation());
-                contexts.put(sample, myContext);
-            }
-
-            // check for deletions
-            if ( offset == -1 ) {
-                // are there too many deletions in the pileup?
-                if ( ++deletionsInPile > maxDeletionsInPileup )
-                    return null;
-            }
-
-            // add the read to this sample's context
-            // note that bad bases are added to the context (for DoC calculations later)
-            myContext.add(read, offset);
-        }
-
-        return contexts;
-    }
-
 
     /**
      * A class to keep track of the EM output
-    */
+     */
     protected class EMOutput {
         private double pD, pNull, pF, MAF;
         private HashMap<String, GenotypeLikelihoods> GLs;
@@ -185,71 +128,4 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
         public double getMAF() { return MAF; }
         public HashMap<String, GenotypeLikelihoods> getGenotypeLikelihoods() { return GLs; }
     }
-
-
-    /**
-     * A class to keep track of the alignment context observed for a given sample.
-     * we currently store the overall context and strand-stratified sets,
-     * but any arbitrary stratification could be added.
-    */
-    protected enum StratifiedContext { OVERALL, FORWARD, REVERSE }
-    protected class AlignmentContextBySample {
-
-        private AlignmentContext overall = null;
-        private AlignmentContext forward = null;
-        private AlignmentContext reverse = null;
-        private GenomeLoc loc;
-
-        private ArrayList<SAMRecord> allReads = new ArrayList<SAMRecord>();
-        private ArrayList<SAMRecord> forwardReads = new ArrayList<SAMRecord>();
-        private ArrayList<SAMRecord> reverseReads = new ArrayList<SAMRecord>();
-
-        private ArrayList<Integer> allOffsets = new ArrayList<Integer>();
-        private ArrayList<Integer> forwardOffsets = new ArrayList<Integer>();
-        private ArrayList<Integer> reverseOffsets = new ArrayList<Integer>();
-
-
-        AlignmentContextBySample(GenomeLoc loc) {
-            this.loc = loc;
-        }
-
-        public AlignmentContext getContext(StratifiedContext context) {
-            switch ( context ) {
-                case OVERALL: return getOverallContext();
-                case FORWARD: return getForwardContext();
-                case REVERSE: return getReverseContext();
-            }
-            return null;
-        }
-
-        private AlignmentContext getOverallContext() {
-            if ( overall == null )
-                overall = new AlignmentContext(loc, allReads, allOffsets);
-            return overall;
-        }
-
-        private AlignmentContext getForwardContext() {
-            if ( forward == null )
-                forward = new AlignmentContext(loc, forwardReads, forwardOffsets);
-            return forward;
-        }
-
-        private AlignmentContext getReverseContext() {
-            if ( reverse == null )
-                reverse = new AlignmentContext(loc, reverseReads, reverseOffsets);
-            return reverse;
-        }
-
-        public void add(SAMRecord read, int offset) {
-            if ( read.getReadNegativeStrandFlag() ) {
-                reverseReads.add(read);
-                reverseOffsets.add(offset);
-            } else {
-                forwardReads.add(read);
-                forwardOffsets.add(offset);
-            }
-            allReads.add(read);
-            allOffsets.add(offset);
-         }
-    }
 }
\ No newline at end of file
diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java
index 94ce2a55f..1ec9d0842 100755
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java
@@ -4,10 +4,15 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.utils.genotype.GenotypeMetaData;
 import org.broadinstitute.sting.utils.Pair;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.StingException;
 import org.apache.log4j.Logger;
 
 import java.util.*;
 
+import net.sf.samtools.SAMRecord;
+import net.sf.samtools.SAMReadGroupRecord;
+
 /**
  * The model representing how we calculate a genotype given the priors and a pile
  * of bases and quality scores
@@ -100,4 +105,129 @@ public abstract class GenotypeCalculationModel implements Cloneable {
                                                                                  char ref,
                                                                                  AlignmentContext context,
                                                                                  DiploidGenotypePriors priors);
+
+
+    /**
+     * Create the mapping from sample to alignment contexts.
+     * @param context   original alignment context
+     *
+     * @return stratified contexts, or null if there are too many deletions at this position
+     */
+    protected HashMap<String, AlignmentContextBySample> splitContextBySample(AlignmentContext context) {
+
+        HashMap<String, AlignmentContextBySample> contexts = new HashMap<String, AlignmentContextBySample>();
+
+        int deletionsInPile = 0;
+        List<SAMRecord> reads = context.getReads();
+        List<Integer> offsets = context.getOffsets();
+
+        for (int i = 0; i < reads.size(); i++) {
+
+            // get the read and offset
+            SAMRecord read = reads.get(i);
+            int offset = offsets.get(i);
+
+            // find the sample; special case for pools
+            String sample;
+            if ( POOLED_INPUT ) {
+                sample = "POOL";
+            } else {
+                SAMReadGroupRecord readGroup = read.getReadGroup();
+                if ( readGroup == null ) {
+                    if ( assumedSingleSample == null )
+                        throw new StingException("Missing read group for read " + read.getReadName());
+                    sample = assumedSingleSample;
+                } else {
+                    sample = readGroup.getSample();
+                }
+            }
+
+            // create a new context object if this is the first time we're seeing a read for this sample
+            AlignmentContextBySample myContext = contexts.get(sample);
+            if ( myContext == null ) {
+                myContext = new AlignmentContextBySample(context.getLocation());
+                contexts.put(sample, myContext);
+            }
+
+            // check for deletions
+            if ( offset == -1 ) {
+                // are there too many deletions in the pileup?
+                if ( ++deletionsInPile > maxDeletionsInPileup )
+                    return null;
+            }
+
+            // add the read to this sample's context
+            // note that bad bases are added to the context (for DoC calculations later)
+            myContext.add(read, offset);
+        }
+
+        return contexts;
+    }
+
+    
+    /**
+     * A class to keep track of the alignment context observed for a given sample.
+     * we currently store the overall context and strand-stratified sets,
+     * but any arbitrary stratification could be added.
+    */
+    protected enum StratifiedContext { OVERALL, FORWARD, REVERSE }
+    protected class AlignmentContextBySample {
+
+        private AlignmentContext overall = null;
+        private AlignmentContext forward = null;
+        private AlignmentContext reverse = null;
+        private GenomeLoc loc;
+
+        private ArrayList<SAMRecord> allReads = new ArrayList<SAMRecord>();
+        private ArrayList<SAMRecord> forwardReads = new ArrayList<SAMRecord>();
+        private ArrayList<SAMRecord> reverseReads = new ArrayList<SAMRecord>();
+
+        private ArrayList<Integer> allOffsets = new ArrayList<Integer>();
+        private ArrayList<Integer> forwardOffsets = new ArrayList<Integer>();
+        private ArrayList<Integer> reverseOffsets = new ArrayList<Integer>();
+
+
+        AlignmentContextBySample(GenomeLoc loc) {
+            this.loc = loc;
+        }
+
+        public AlignmentContext getContext(StratifiedContext context) {
+            switch ( context ) {
+                case OVERALL: return getOverallContext();
+                case FORWARD: return getForwardContext();
+                case REVERSE: return getReverseContext();
+            }
+            return null;
+        }
+
+        private AlignmentContext getOverallContext() {
+            if ( overall == null )
+                overall = new AlignmentContext(loc, allReads, allOffsets);
+            return overall;
+        }
+
+        private AlignmentContext getForwardContext() {
+            if ( forward == null )
+                forward = new AlignmentContext(loc, forwardReads, forwardOffsets);
+            return forward;
+        }
+
+        private AlignmentContext getReverseContext() {
+            if ( reverse == null )
+                reverse = new AlignmentContext(loc, reverseReads, reverseOffsets);
+            return reverse;
+        }
+
+        public void add(SAMRecord read, int offset) {
+            if ( read.getReadNegativeStrandFlag() ) {
+                reverseReads.add(read);
+                reverseOffsets.add(offset);
+            } else {
+                forwardReads.add(read);
+                forwardOffsets.add(offset);
+            }
+            allReads.add(read);
+            allOffsets.add(offset);
+         }
+    }
 }
\ No newline at end of file
diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java
index ece842be8..73f0ce3e2 100755
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java
@@ -133,7 +133,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
         }
 
         // normalize
-        double sum = 0;
+        double sum = 0.0;
         for (int i = 0; i < 4; i++)
             sum += alleleFrequencies[i];
         for (int i = 0; i < 4; i++)