-Refactored JointEstimation code so that pooled calling will work

-Use phred-scale for fisher strand test -Use only 2N allele frequency estimation points git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2059 348d0f76-0448-11de-a6fe-93d51630548a
2009-11-17 20:21:15 +00:00 · 2009-11-17 20:21:15 +00:00 · a048f5cdf1
parent 43bd4c8e8f
commit a048f5cdf1
10 changed files with 225 additions and 159 deletions
--- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java
@ -31,7 +31,7 @@ public class FisherStrand implements VariantAnnotation {
        if ( pvalue == null )
            return null;

-        return new Pair<String, String>("FisherStrand", String.format("%.4f", pvalue));
+        return new Pair<String, String>("FisherStrand", String.format("%.1f", -10.0 * Math.log10(pvalue)));
    }

    public boolean useZeroQualityReads() { return false; }
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java
@ -0,0 +1,88 @@
+package org.broadinstitute.sting.gatk.walkers.genotyper;
+
+import org.broadinstitute.sting.utils.*;
+import org.broadinstitute.sting.utils.genotype.*;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+
+import java.util.*;
+
+public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalculationModel {
+
+    protected DiploidGenotypeCalculationModel() {}
+
+    // the GenotypeLikelihoods map
+    private HashMap<String, GenotypeLikelihoods> GLs = new HashMap<String, GenotypeLikelihoods>();
+
+
+    protected HashMap<String, AlignmentContextBySample> createContexts(AlignmentContext context) {
+        return splitContextBySample(context);
+    }
+
+    protected void initializeLikelihoods(char ref, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
+        GLs.clear();
+
+        // use flat priors for GLs
+        DiploidGenotypePriors priors = new DiploidGenotypePriors();
+
+        for ( String sample : contexts.keySet() ) {
+            AlignmentContextBySample context = contexts.get(sample);
+            ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType));
+
+            // create the GenotypeLikelihoods object
+            GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
+            GL.add(pileup, true);
+            GLs.put(sample, GL);
+        }
+    }
+
+    protected double computeLog10PofDgivenAFi(DiploidGenotype refGenotype, DiploidGenotype hetGenotype, DiploidGenotype homGenotype, double f) {
+        double PofDgivenAFi = 0.0;
+
+        // for each sample
+        for ( GenotypeLikelihoods GL : GLs.values() ) {
+
+            double[] posteriors = GL.getPosteriors();
+
+            double[] allelePosteriors = new double[] { posteriors[refGenotype.ordinal()], posteriors[hetGenotype.ordinal()], posteriors[homGenotype.ordinal()] };
+            allelePosteriors = MathUtils.normalizeFromLog10(allelePosteriors);
+
+            // calculate the posterior weighted frequencies
+            double[] HWvalues = getHardyWeinbergValues(f);
+            double samplePofDgivenAFi = 0.0;
+            samplePofDgivenAFi += HWvalues[GenotypeType.REF.ordinal()] * allelePosteriors[GenotypeType.REF.ordinal()];
+            samplePofDgivenAFi += HWvalues[GenotypeType.HET.ordinal()] * allelePosteriors[GenotypeType.HET.ordinal()];
+            samplePofDgivenAFi += HWvalues[GenotypeType.HOM.ordinal()] * allelePosteriors[GenotypeType.HOM.ordinal()];
+            PofDgivenAFi += Math.log10(samplePofDgivenAFi);
+        }
+
+        return PofDgivenAFi;
+    }
+
+    protected List<Genotype> makeGenotypeCalls(char ref, HashMap<String, AlignmentContextBySample> contexts, GenomeLoc loc) {
+        ArrayList<Genotype> calls = new ArrayList<Genotype>();
+
+        for ( String sample : GLs.keySet() ) {
+
+            // create the call
+            Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, loc);
+
+            if ( call instanceof ReadBacked ) {
+                ReadBackedPileup pileup = new ReadBackedPileup(ref, contexts.get(sample).getContext(StratifiedContext.OVERALL));
+                ((ReadBacked)call).setPileup(pileup);
+            }
+            if ( call instanceof SampleBacked ) {
+                ((SampleBacked)call).setSampleName(sample);
+            }
+            if ( call instanceof LikelihoodsBacked ) {
+                ((LikelihoodsBacked)call).setLikelihoods(GLs.get(sample).getLikelihoods());
+            }
+            if ( call instanceof PosteriorsBacked ) {
+                ((PosteriorsBacked)call).setPosteriors(GLs.get(sample).getPosteriors());
+            }
+
+            calls.add(call);
+        }
+
+        return calls;
+    }
+}
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java
@ -23,7 +23,8 @@ public abstract class GenotypeCalculationModel implements Cloneable {

    public enum Model {
        EM_POINT_ESTIMATE,
-        JOINT_ESTIMATE
+        JOINT_ESTIMATE,
+        POOLED
    }

    protected BaseMismatchModel baseModel;
@ -34,7 +35,6 @@ public abstract class GenotypeCalculationModel implements Cloneable {
    protected GenotypeWriterFactory.GENOTYPE_FORMAT OUTPUT_FORMAT;
    protected boolean ALL_BASE_MODE;
    protected boolean GENOTYPE_MODE;
-    protected boolean POOLED_INPUT;
    protected int POOL_SIZE;
    protected double CONFIDENCE_THRESHOLD;
    protected double MINIMUM_ALLELE_FREQUENCY;
@ -67,7 +67,6 @@ public abstract class GenotypeCalculationModel implements Cloneable {
        OUTPUT_FORMAT = UAC.VAR_FORMAT;
        ALL_BASE_MODE = UAC.ALL_BASES;
        GENOTYPE_MODE = UAC.GENOTYPE;
-        POOLED_INPUT = UAC.POOLED;
        POOL_SIZE = UAC.POOLSIZE;
        CONFIDENCE_THRESHOLD = UAC.CONFIDENCE_THRESHOLD;
        MINIMUM_ALLELE_FREQUENCY = UAC.MINIMUM_ALLELE_FREQUENCY;
@ -144,19 +143,15 @@ public abstract class GenotypeCalculationModel implements Cloneable {
            SAMRecord read = reads.get(i);
            int offset = offsets.get(i);

-            // find the sample; special case for pools
+            // find the sample
            String sample;
-            if ( POOLED_INPUT ) {
-                sample = "POOL";
+            SAMReadGroupRecord readGroup = read.getReadGroup();
+            if ( readGroup == null ) {
+                if ( assumedSingleSample == null )
+                    throw new StingException("Missing read group for read " + read.getReadName());
+                sample = assumedSingleSample;
            } else {
-                SAMReadGroupRecord readGroup = read.getReadGroup();
-                if ( readGroup == null ) {
-                    if ( assumedSingleSample == null )
-                        throw new StingException("Missing read group for read " + read.getReadName());
-                    sample = assumedSingleSample;
-                } else {
-                    sample = readGroup.getSample();
-                }
+                sample = readGroup.getSample();
            }

            // create a new context object if this is the first time we're seeing a read for this sample
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModelFactory.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModelFactory.java
@ -59,7 +59,10 @@ public class GenotypeCalculationModelFactory {
                gcm = new PointEstimateGenotypeCalculationModel();
                break;
            case JOINT_ESTIMATE:
-                gcm = new JointEstimateGenotypeCalculationModel();
+                gcm = new DiploidGenotypeCalculationModel();
+                break;
+            case POOLED:
+                gcm = new PooledCalculationModel();
                break;
            default: throw new RuntimeException("Unexpected GenotypeCalculationModel " + UAC.genotypeModel);
        }
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java
@ -8,7 +8,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;

 import java.util.*;

-public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationModel {
+public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalculationModel {

    protected JointEstimateGenotypeCalculationModel() {}

@ -20,63 +20,53 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
    // we cache the results to avoid having to recompute everything
    private HashMap<Double, double[]> hardyWeinbergValueCache = new HashMap<Double, double[]>();

+    protected enum GenotypeType { REF, HET, HOM }
+
    // the allele frequency priors
-    private double[] log10AlleleFrequencyPriors;
+    protected double[] log10AlleleFrequencyPriors;

    // the allele frequency posteriors and P(f>0) for each alternate allele
-    private double[][] alleleFrequencyPosteriors = new double[BaseUtils.BASES.length][];
-    private double[][] log10PofDgivenAFi = new double[BaseUtils.BASES.length][];
-    private double[] PofFs = new double[BaseUtils.BASES.length];
+    protected double[][] alleleFrequencyPosteriors = new double[BaseUtils.BASES.length][];
+    protected double[][] log10PofDgivenAFi = new double[BaseUtils.BASES.length][];
+    protected double[] PofFs = new double[BaseUtils.BASES.length];

-    // the minimum and actual number of points in our allele frequency estimation
-    private static final int MIN_ESTIMATION_POINTS = 100;
-    private int frequencyEstimationPoints;
-
-    // the GenotypeLikelihoods map
-    private HashMap<String, GenotypeLikelihoods> GLs = new HashMap<String, GenotypeLikelihoods>();
-
-    private enum GenotypeType { REF, HET, HOM }
+    
+    // these need to be implemented
+    protected abstract HashMap<String, AlignmentContextBySample> createContexts(AlignmentContext context);
+    protected abstract void initializeLikelihoods(char ref, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType);
+    protected abstract double computeLog10PofDgivenAFi(DiploidGenotype refGenotype, DiploidGenotype hetGenotype, DiploidGenotype homGenotype, double f);
+    protected abstract List<Genotype> makeGenotypeCalls(char ref, HashMap<String, AlignmentContextBySample> contexts, GenomeLoc loc);


    public Pair<List<Genotype>, GenotypeLocusData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {

        // keep track of the context for each sample, overall and separated by strand
-        HashMap<String, AlignmentContextBySample> contexts = splitContextBySample(context);
+        HashMap<String, AlignmentContextBySample> contexts = createContexts(context);
        if ( contexts == null )
            return null;

-        // todo -- eric, can you refactor this into a superclass the manages the i over 2n calculation?
-        //
-        // The only thing that's different from pools and msg should be the calls to
-        // initializeGenotypeLikelihoods and calculateAlleleFrequencyPosteriors
-        // There's a lot of functionality here that can be separated into a superclass that requires
-        // a few methods be overridden and it'll magically work for all data types, etc.
-        //
-        // Here are some examples:
-        // getNumberSamples() ->
-        // likelihoodsOfDGivenF() -> for MSG, pools, etc -- see below
-        //
+        int numSamples = contexts.size();
+        int frequencyEstimationPoints = (2 * numSamples) + 1;  // (add 1 for allele frequency of zero)

-        initializeAlleleFrequencies(contexts.size());
+        initializeAlleleFrequencies(frequencyEstimationPoints);

-        // run joint estimation for the full GL contexts
-        initializeGenotypeLikelihoods(ref, contexts, StratifiedContext.OVERALL);
-        calculateAlleleFrequencyPosteriors(ref, context.getLocation());
-        return createCalls(tracker, ref, contexts, context.getLocation());
-    }
+        initializeLikelihoods(ref, contexts, StratifiedContext.OVERALL);
+        calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints);
+        calculatePofFs(ref, frequencyEstimationPoints);

-    private void initializeAlleleFrequencies(int numSamples) {
+        // print out stats if we have a writer
+        if ( verboseWriter != null )
+            printAlleleFrequencyData(ref, context.getLocation(), frequencyEstimationPoints);

-        // calculate the number of estimation points to use:
-        // it's either MIN_ESTIMATION_POINTS or 2N if that's larger
-        // (add 1 for allele frequency of zero)
-        frequencyEstimationPoints = Math.max(MIN_ESTIMATION_POINTS, 2 * numSamples) + 1;
+        return createCalls(tracker, ref, contexts, context.getLocation(), frequencyEstimationPoints);
+   }

+    private void initializeAlleleFrequencies(int frequencyEstimationPoints) {
        // set up the allele frequency priors
        log10AlleleFrequencyPriors = getNullAlleleFrequencyPriors(frequencyEstimationPoints);
    }

-    private double[] getNullAlleleFrequencyPriors(int N) {
+    protected double[] getNullAlleleFrequencyPriors(int N) {
        double[] AFs = nullAlleleFrequencyCache.get(N);

        // if it hasn't been calculated yet, do so now
@ -108,24 +98,7 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
        return AFs;
    }

-    private void initializeGenotypeLikelihoods(char ref, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
-        GLs.clear();
-
-        // use flat priors for GLs
-        DiploidGenotypePriors priors = new DiploidGenotypePriors();
-
-        for ( String sample : contexts.keySet() ) {
-            AlignmentContextBySample context = contexts.get(sample);
-            ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType));
-
-            // create the GenotypeLikelihoods object
-            GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
-            GL.add(pileup, true);
-            GLs.put(sample, GL);
-        }
-    }
-
-    private void calculateAlleleFrequencyPosteriors(char ref, GenomeLoc verboseLocation) {
+    protected void calculateAlleleFrequencyPosteriors(char ref, int frequencyEstimationPoints) {

        // initialization
        for ( char altAllele : BaseUtils.BASES ) {
@ -134,53 +107,27 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
            log10PofDgivenAFi[baseIndex] = new double[frequencyEstimationPoints];
        }
        DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(ref);
+        String refStr = String.valueOf(ref);

-        //
-        // todo -- if you invert this loop
-        // foreach frequnecy
-        //   foreach altAllele
-        //
-        // then the pooled vs. msg calculation is just a function call difference
+        // for each alternate allele
+        for ( char altAllele : BaseUtils.BASES ) {
+            if ( altAllele == ref )
+                continue;

-        // for each minor allele frequency
-        for (int i = 0; i < frequencyEstimationPoints; i++) {
-            double f = (double)i / (double)(frequencyEstimationPoints-1);
+            int baseIndex = BaseUtils.simpleBaseToBaseIndex(altAllele);

-            // for each sample
-            for ( GenotypeLikelihoods GL : GLs.values() ) {
+            DiploidGenotype hetGenotype = ref < altAllele ? DiploidGenotype.valueOf(refStr + String.valueOf(altAllele)) : DiploidGenotype.valueOf(String.valueOf(altAllele) + refStr);
+            DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(altAllele);

-                double[] posteriors = GL.getPosteriors();
-
-                // get the ref data
-                double refPosterior = posteriors[refGenotype.ordinal()];
-                String refStr = String.valueOf(ref);
-
-                // for each alternate allele
-                for ( char altAllele : BaseUtils.BASES ) {
-                    if ( altAllele == ref )
-                        continue;
-
-                    int baseIndex = BaseUtils.simpleBaseToBaseIndex(altAllele);
-
-                    DiploidGenotype hetGenotype = ref < altAllele ? DiploidGenotype.valueOf(refStr + String.valueOf(altAllele)) : DiploidGenotype.valueOf(String.valueOf(altAllele) + refStr);
-                    DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(altAllele);
-
-                    double[] allelePosteriors = new double[] { refPosterior, posteriors[hetGenotype.ordinal()], posteriors[homGenotype.ordinal()] };
-                    allelePosteriors = MathUtils.normalizeFromLog10(allelePosteriors);
-                    //logger.debug("Normalized posteriors for " + altAllele + ": " + allelePosteriors[0] + " " + allelePosteriors[1] + " " + allelePosteriors[2]);
-
-                    // calculate the posterior weighted frequencies
-                    double[] HWvalues = getHardyWeinbergValues(f);
-                    double PofDgivenAFi = 0.0;
-                    PofDgivenAFi += HWvalues[GenotypeType.REF.ordinal()] * allelePosteriors[GenotypeType.REF.ordinal()];
-                    PofDgivenAFi += HWvalues[GenotypeType.HET.ordinal()] * allelePosteriors[GenotypeType.HET.ordinal()];
-                    PofDgivenAFi += HWvalues[GenotypeType.HOM.ordinal()] * allelePosteriors[GenotypeType.HOM.ordinal()];
-                    log10PofDgivenAFi[baseIndex][i] += Math.log10(PofDgivenAFi);
-                }
+            // for each minor allele frequency
+            for (int i = 0; i < frequencyEstimationPoints; i++) {
+                double f = (double)i / (double)(frequencyEstimationPoints-1);
+                log10PofDgivenAFi[baseIndex][i] += computeLog10PofDgivenAFi(refGenotype, hetGenotype, homGenotype, f);
            }
        }
+    }

-        // todo -- shouldn't this be in a function for clarity?
+    protected void calculatePofFs(char ref, int frequencyEstimationPoints) {
        // for each alternate allele
        for ( char altAllele : BaseUtils.BASES ) {
            if ( altAllele == ref )
@ -199,13 +146,9 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
                sum += alleleFrequencyPosteriors[baseIndex][i];
            PofFs[baseIndex] = Math.min(sum, 1.0); // deal with precision errors
        }
-
-        // print out stats if we have a position and a writer
-        if ( verboseLocation != null && verboseWriter != null )
-            printAlleleFrequencyData(ref, verboseLocation);
    }

-    private double[] getHardyWeinbergValues(double f) {
+    protected double[] getHardyWeinbergValues(double f) {
        double[] HWvalues = hardyWeinbergValueCache.get(f);

        // if it hasn't been calculated yet, do so now
@ -231,7 +174,7 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
        return HWvalues;
    }

-    private void printAlleleFrequencyData(char ref, GenomeLoc loc) {
+    protected void printAlleleFrequencyData(char ref, GenomeLoc loc, int frequencyEstimationPoints) {

        verboseWriter.println("Location=" + loc + ", ref=" + ref);
        StringBuilder header = new StringBuilder("MAF\tNullAFpriors\t");
@ -267,7 +210,7 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
        verboseWriter.println();
    }

-    private Pair<List<Genotype>, GenotypeLocusData> createCalls(RefMetaDataTracker tracker, char ref, HashMap<String, AlignmentContextBySample> contexts, GenomeLoc loc) {
+    protected Pair<List<Genotype>, GenotypeLocusData> createCalls(RefMetaDataTracker tracker, char ref, HashMap<String, AlignmentContextBySample> contexts, GenomeLoc loc, int frequencyEstimationPoints) {
        // first, find the alt allele with maximum confidence
        int indexOfMax = 0;
        char baseOfMax = ref;
@ -290,31 +233,8 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
        if ( !ALL_BASE_MODE && (bestAFguess == 0 || phredScaledConfidence < CONFIDENCE_THRESHOLD) )
            return new Pair<List<Genotype>, GenotypeLocusData>(null, null);

-        ArrayList<Genotype> calls = new ArrayList<Genotype>();
-
-        // first, populate the sample-specific data
-        for ( String sample : GLs.keySet() ) {
-
-            // create the call
-            AlignmentContext context = contexts.get(sample).getContext(StratifiedContext.OVERALL);
-            Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation());
-
-            if ( call instanceof ReadBacked ) {
-                ReadBackedPileup pileup = new ReadBackedPileup(ref, contexts.get(sample).getContext(StratifiedContext.OVERALL));               
-                ((ReadBacked)call).setPileup(pileup);
-            }
-            if ( call instanceof SampleBacked ) {
-                ((SampleBacked)call).setSampleName(sample);
-            }
-            if ( call instanceof LikelihoodsBacked ) {
-                ((LikelihoodsBacked)call).setLikelihoods(GLs.get(sample).getLikelihoods());
-            }
-            if ( call instanceof PosteriorsBacked ) {
-                ((PosteriorsBacked)call).setPosteriors(GLs.get(sample).getPosteriors());
-            }
-
-            calls.add(call);
-        }
+        // populate the sample-specific data
+        List<Genotype> calls = makeGenotypeCalls(ref, contexts, loc);

        // next, the general locus data
        // note that calculating strand bias involves overwriting data structures, so we do that last
@ -340,14 +260,16 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
                double lod = overallLog10PofF - overallLog10PofNull;

                // the forward lod
-                initializeGenotypeLikelihoods(ref, contexts, StratifiedContext.FORWARD);
-                calculateAlleleFrequencyPosteriors(ref, null);
+                initializeLikelihoods(ref, contexts, StratifiedContext.FORWARD);
+                calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints);
+                calculatePofFs(ref, frequencyEstimationPoints);
                double forwardLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]);
                double forwardLog10PofF = Math.log10(PofFs[indexOfMax]);

                // the reverse lod
-                initializeGenotypeLikelihoods(ref, contexts, StratifiedContext.REVERSE);
-                calculateAlleleFrequencyPosteriors(ref, null);
+                initializeLikelihoods(ref, contexts, StratifiedContext.REVERSE);
+                calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints);
+                calculatePofFs(ref, frequencyEstimationPoints);
                double reverseLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]);
                double reverseLog10PofF = Math.log10(PofFs[indexOfMax]);

--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java
@ -0,0 +1,61 @@
+package org.broadinstitute.sting.gatk.walkers.genotyper;
+
+import org.broadinstitute.sting.utils.*;
+import org.broadinstitute.sting.utils.genotype.*;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+
+import java.util.*;
+
+import net.sf.samtools.SAMRecord;
+
+public class PooledCalculationModel extends JointEstimateGenotypeCalculationModel {
+
+    protected PooledCalculationModel() {}
+
+
+    protected HashMap<String, AlignmentContextBySample> createContexts(AlignmentContext context) {
+        // for testing purposes, we may want to throw multi-samples at pooled mode,
+        // so we can't use the standard splitContextBySample() method here
+
+        AlignmentContextBySample pooledContext = new AlignmentContextBySample(context.getLocation());
+
+        int deletionsInPileup = 0;
+        List<SAMRecord> reads = context.getReads();
+        List<Integer> offsets = context.getOffsets();
+
+        for (int i = 0; i < reads.size(); i++) {
+
+            // check for deletions
+            int offset = offsets.get(i);
+            if ( offset == -1 ) {
+                // are there too many deletions in the pileup?
+                if ( ++deletionsInPileup > maxDeletionsInPileup && maxDeletionsInPileup >= 0 )
+                    return null;
+            }
+
+            // add the read to this sample's context
+            // note that bad bases are added to the context (for DoC calculations later)
+            pooledContext.add(reads.get(i), offset);
+        }
+
+        HashMap<String, AlignmentContextBySample> contexts = new HashMap<String, AlignmentContextBySample>();
+        contexts.put("POOL", pooledContext);
+        return contexts;
+    }
+
+    protected void initializeLikelihoods(char ref, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
+        return;
+    }
+    
+    protected double computeLog10PofDgivenAFi(DiploidGenotype refGenotype, DiploidGenotype hetGenotype, DiploidGenotype homGenotype, double f) {
+
+        // TODO -- IMPLEMENT ME
+
+        return 0.0;
+    }
+
+    protected List<Genotype> makeGenotypeCalls(char ref, HashMap<String, AlignmentContextBySample> contexts, GenomeLoc loc) {
+        // no genotypes in pooled mode
+        return new ArrayList<Genotype>();
+    }
+}
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
@ -32,7 +32,7 @@ import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
 public class UnifiedArgumentCollection {

    // control the various models to be used
-    @Argument(fullName = "genotype_model", shortName = "gm", doc = "Genotype calculation model to employ -- EM_POINT_ESTIMATE is currently the default, while JOINT_ESTIMATE is under development.  An exception will be thrown if an attempt is made to use EM_POINT_ESTIMATE with pooled data.", required = false)
+    @Argument(fullName = "genotype_model", shortName = "gm", doc = "Genotype calculation model to employ -- EM_POINT_ESTIMATE is currently the default, while JOINT_ESTIMATE and POOLED are under development.", required = false)
    public GenotypeCalculationModel.Model genotypeModel = GenotypeCalculationModel.Model.EM_POINT_ESTIMATE;

    @Argument(fullName = "base_model", shortName = "bm", doc = "Base substitution model to employ -- EMPIRICAL is the recommended default, but it's possible to select the ONE_STATE and THREE_STATE models for comparison purposes", required = false)
@ -41,10 +41,7 @@ public class UnifiedArgumentCollection {
    @Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
    public Double heterozygosity = DiploidGenotypePriors.HUMAN_HETEROZYGOSITY;

-    @Argument(fullName = "pooled", shortName = "pooled", doc = "Does the input bam represent pooled data (so that genotypes can't be called)?", required = false)
-    public boolean POOLED = false;
-
-    @Argument(fullName = "poolSize", shortName = "ps", doc = "Number of individuals in the pool", required = false)
+    @Argument(fullName = "poolSize", shortName = "ps", doc = "Number of individuals in the pool (for POOLED model only)", required = false)
    public int POOLSIZE = 0;


--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
@ -94,8 +94,8 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeL
    public void initialize() {

        // deal with input errors
-        if ( UAC.POOLED && UAC.genotypeModel == GenotypeCalculationModel.Model.EM_POINT_ESTIMATE ) {
-            throw new StingException("This was an attempt to use an EM Point Estimate model with pooled genotype calculations. This model does not work with pooled data.");
+        if ( UAC.POOLSIZE > 0 && UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED ) {
+            throw new StingException("Attempting to use a model other than POOLED with pooled data. Please set the model to POOLED.");
        }
        if ( UAC.LOD_THRESHOLD > Double.MIN_VALUE ) {
            StringBuilder sb = new StringBuilder();
--- a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
@ -7,7 +7,7 @@ import java.util.Arrays;

 public class VariantAnnotatorIntegrationTest extends WalkerTest {
    public static String baseTestString() {
-        return "-T VariantAnnotator -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 20:10,000,000-10,010,000 -vcf %s";
+        return "-T VariantAnnotator -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,010,000 -vcf %s";
    }

    @Test
--- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@ -47,7 +47,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void testMultiSamplePilot1PointEM() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1,
-                Arrays.asList("b992e55996942c893948ea85660478ba"));
+                Arrays.asList("37e112d619c2b6f213554edd10262523"));
        executeTest("testMultiSamplePilot1 - Point Estimate EM", spec);
    }

@ -68,7 +68,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void testMultiSamplePilot1Joint() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
-                Arrays.asList("90c5129f298075ee0e18233b3763f25d"));
+                Arrays.asList("e4d32d41544fc12c1553f780be3fd272"));
        executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
    }

@ -76,7 +76,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void testMultiSamplePilot2Joint() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
-                Arrays.asList("033390940ecc0e2dcda5559d6a1802fa"));
+                Arrays.asList("d0bfdedca0944a0b05d53f1a75408a57"));
        executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
    }

@ -84,7 +84,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void testSingleSamplePilot2Joint() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,067,000-10,083,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
-                Arrays.asList("8deb8b1132e7ddf28c7a0d919ce22985"));
+                Arrays.asList("e2e4c147075186245d8eb1f8d3bb8705"));
        executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
    }