-Renamed new calculation model and worked out some significant xhanges with Mark

-Allow walkers calling the UG to pass in their own argument collections git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1896 348d0f76-0448-11de-a6fe-93d51630548a
2009-10-21 20:49:36 +00:00 · 2009-10-21 20:49:36 +00:00 · 55fa1cfa06
parent 8e3f72ced9
commit 55fa1cfa06
6 changed files with 301 additions and 263 deletions
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AllMAFsGenotypeCalculationModel.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AllMAFsGenotypeCalculationModel.java
@ -1,254 +0,0 @@
-package org.broadinstitute.sting.gatk.walkers.genotyper;
-
-import org.broadinstitute.sting.utils.*;
-import org.broadinstitute.sting.utils.genotype.*;
-import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
-
-import java.util.*;
-
-public class AllMAFsGenotypeCalculationModel extends GenotypeCalculationModel {
-
-    protected AllMAFsGenotypeCalculationModel() {}
-
-    // because the null allele frequencies are constant for a given N,
-    // we cache the results to avoid having to recompute everything
-    private HashMap<Integer, double[]> nullAlleleFrequencyCache = new HashMap<Integer, double[]>();
-
-    // because the Hardy-Weinberg values for a given frequency are constant,
-    // we cache the results to avoid having to recompute everything
-    private HashMap<Double, double[]> hardyWeinbergValueCache = new HashMap<Double, double[]>();
-
-    // the allele frequency priors
-    private double[] alleleFrequencyPriors;
-
-    // the allele frequency posteriors and P(f>0) mapped by alternate allele
-    private HashMap<Character, double[]> alleleFrequencyPosteriors = new HashMap<Character, double[]>();
-    private HashMap<Character, Double> PofFs = new HashMap<Character, Double>();
-
-    // the minimum and actual number of points in our allele frequency estimation
-    private static final int MIN_ESTIMATION_POINTS = 20; //1000;
-    private int frequencyEstimationPoints;
-
-    // the GenotypeLikelihoods map
-    private HashMap<String, AlleleSpecificGenotypeLikelihoods> GLs = new HashMap<String, AlleleSpecificGenotypeLikelihoods>();
-
-
-    public Pair<List<GenotypeCall>, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
-
-        // keep track of the context for each sample, overall and separated by strand
-        HashMap<String, AlignmentContextBySample> contexts = splitContextBySample(context);
-        if ( contexts == null )
-            return null;
-
-        calculate(ref, contexts, priors, StratifiedContext.OVERALL);
-
-
-        //double lod = overall.getPofD() - overall.getPofNull();
-        //logger.debug("lod=" + lod);
-
-        // calculate strand score
-        //EMOutput forward = calculate(ref, contexts, priors, StratifiedContext.FORWARD);
-        //EMOutput reverse = calculate(ref, contexts, priors, StratifiedContext.REVERSE);
-        //double forwardLod = (forward.getPofD() + reverse.getPofNull()) - overall.getPofNull();
-        //double reverseLod = (reverse.getPofD() + forward.getPofNull()) - overall.getPofNull();
-        //logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
-        //double strandScore = Math.max(forwardLod - lod, reverseLod - lod);
-
-        //logger.debug(String.format("LOD=%f, SLOD=%f", lod, strandScore));
-
-        // generate the calls
-        //GenotypeMetaData metadata = new GenotypeMetaData(lod, strandScore, overall.getMAF());
-        //return new Pair<List<GenotypeCall>, GenotypeMetaData>(genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts), metadata);
-        return null;
-    }
-
-    private void calculate(char ref, HashMap<String, AlignmentContextBySample> contexts, DiploidGenotypePriors priors, StratifiedContext contextType) {
-
-        initializeAlleleFrequencies(contexts.size());
-
-        initializeGenotypeLikelihoods(ref, contexts, priors, contextType);
-
-        calculateAlleleFrequencyPosteriors(ref);
-    }
-
-    private void initializeAlleleFrequencies(int numSamples) {
-
-        // calculate the number of estimation points to use:
-        // it's either MIN_ESTIMATION_POINTS or 2N if that's larger
-        // (add 1 for allele frequency of zero)
-        frequencyEstimationPoints = Math.max(MIN_ESTIMATION_POINTS, 2 * numSamples) + 1;
-
-        // set up the allele frequency priors
-        alleleFrequencyPriors = getNullalleleFrequencyPriors(frequencyEstimationPoints);
-
-        for (int i = 1; i < frequencyEstimationPoints; i++)
-            logger.debug("Null allele frequency for MAF=" + i + ": " + alleleFrequencyPriors[i]);
-    }
-
-    private double[] getNullalleleFrequencyPriors(int N) {
-        double[] AFs = nullAlleleFrequencyCache.get(N);
-
-        // if it hasn't been calculated yet, do so now
-        if ( AFs == null ) {
-
-            // calculate sum(1/i)
-            double denominator = 0.0;
-            for (int i = 1; i < N; i++)
-                denominator += 1.0 / (double)i;
-
-            // set up delta
-            double delta = 1.0 / denominator;
-
-            // calculate the null allele frequencies
-            AFs = new double[N];
-            for (int i = 1; i < N; i++)
-                AFs[i] = Math.log10(delta / (double)i);
-
-            nullAlleleFrequencyCache.put(N, AFs);
-        }
-
-        return AFs.clone();
-    }
-
-    private void initializeGenotypeLikelihoods(char ref, HashMap<String, AlignmentContextBySample> contexts, DiploidGenotypePriors priors, StratifiedContext contextType) {
-        GLs.clear();
-
-        for ( String sample : contexts.keySet() ) {
-            AlignmentContextBySample context = contexts.get(sample);
-            ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType));
-
-            // create the GenotypeLikelihoods object
-            GenotypeLikelihoods GL = GenotypeLikelihoodsFactory.makeGenotypeLikelihoods(baseModel, priors, defaultPlatform);
-            GL.setVerbose(VERBOSE);
-            GL.add(pileup, true);
-
-            GLs.put(sample, new AlleleSpecificGenotypeLikelihoods(ref, GL));
-        }
-    }
-
-    private void calculateAlleleFrequencyPosteriors(char ref) {
-
-        for ( char altAllele : BaseUtils.BASES ) {
-            if ( altAllele == ref )
-                continue;
-
-            double[] AFPosteriors = new double[frequencyEstimationPoints];
-
-            for ( AlleleSpecificGenotypeLikelihoods GL : GLs.values() ) {
-                double[] PofDgivenG = GL.getNormalizedPosteriors(altAllele);
-
-                // calculate the posterior weighted frequencies for this sample
-                for (int i = 1; i < frequencyEstimationPoints; i++) {
-                    double f = (double)i / (double)(frequencyEstimationPoints-1);
-                    double[] hardyWeinberg = getHardyWeinbergValues(f);
-
-                    double PofDgivenAFi = 0.0;
-                    PofDgivenAFi += hardyWeinberg[GenotypeType.REF.ordinal()] * PofDgivenG[GenotypeType.REF.ordinal()];
-                    PofDgivenAFi += hardyWeinberg[GenotypeType.HET.ordinal()] * PofDgivenG[GenotypeType.HET.ordinal()];
-                    PofDgivenAFi += hardyWeinberg[GenotypeType.HOM.ordinal()] * PofDgivenG[GenotypeType.HOM.ordinal()];
-
-                    AFPosteriors[i] += Math.log10(PofDgivenAFi);
-                }
-            }
-
-            for (int i = 1; i < frequencyEstimationPoints; i++)
-                logger.debug("P(D|AF=" + i + ") for alt allele " + altAllele + ": " + AFPosteriors[i]);
-
-            // 1) multiply by null allele frequency priors to get AF posteriors
-            // 2) for precision purposes, we need to subtract the largest posterior value
-            // 3) convert back to normal space
-            double maxValue = findMaxEntry(AFPosteriors, true);
-            for (int i = 1; i < frequencyEstimationPoints; i++)
-                AFPosteriors[i] = Math.pow(10, alleleFrequencyPriors[i] + AFPosteriors[i] - maxValue);
-
-            // normalize
-            double sum = 0.0;
-            for (int i = 1; i < frequencyEstimationPoints; i++)
-                sum += AFPosteriors[i];
-            for (int i = 1; i < frequencyEstimationPoints; i++)
-                AFPosteriors[i] /= sum;
-
-            for (int i = 1; i < frequencyEstimationPoints; i++)
-                logger.debug("Allele frequency posterior estimate for alt allele " + altAllele + " MAF=" + i + ": " + AFPosteriors[i]);
-            logger.debug("P(f>0) for alt allele " + altAllele + ": " + sum);
-
-            alleleFrequencyPosteriors.put(altAllele, AFPosteriors);
-            PofFs.put(altAllele, sum);
-        }
-    }
-
-    private double[] getHardyWeinbergValues(double f) {
-        double[] values = hardyWeinbergValueCache.get(f);
-
-        // if it hasn't been calculated yet, do so now
-        if ( values == null ) {
-            // p^2, 2pq, q^2
-            values = new double[] { Math.pow(1.0 - f, 2), 2.0 * (1.0 - f) * f, Math.pow(f, 2) };
-            hardyWeinbergValueCache.put(f, values);
-        }
-
-        return values;
-    }
-
-    private static double findMaxEntry(double[] array, boolean skipZeroIndex) {
-        int index = skipZeroIndex ? 1 : 0;
-        double max = array[index++];
-        for (; index < array.length; index++) {
-            if ( array[index] > max )
-                max = array[index];
-        }
-        return max;
-    }
-
-
-    private enum GenotypeType { REF, HET, HOM }
-
-    /**
-     * A class for the allele-specific GL posteriors
-    */
-    private class AlleleSpecificGenotypeLikelihoods {
-        // mapping from alt allele base to posteriors
-        private HashMap<Character, double[]> alleleGLs = new HashMap<Character, double[]>();
-
-        AlleleSpecificGenotypeLikelihoods(char ref, GenotypeLikelihoods GL) {
-            double[] posteriors = GL.getPosteriors();
-
-            // get the ref data
-            DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(ref);
-            double refPosterior = posteriors[refGenotype.ordinal()];
-            String refStr = String.valueOf(ref);
-
-            for ( char base : BaseUtils.BASES ) {
-                if ( base == ref )
-                    continue;
-
-                // get hom var and het data
-                DiploidGenotype hetGenotype = ref < base ? DiploidGenotype.valueOf(refStr + String.valueOf(base)) : DiploidGenotype.valueOf(String.valueOf(base) + refStr);
-                DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(base);
-                double hetPosterior = posteriors[hetGenotype.ordinal()];
-                double homPosterior = posteriors[homGenotype.ordinal()];
-
-                // for precision purposes, we need to add (or really subtract, since everything is negative)
-                // the largest posterior value from all entries so that numbers don't get too small
-                double maxValue = Math.max(Math.max(refPosterior, hetPosterior), homPosterior);
-                double normalizedRefPosterior = Math.pow(10, refPosterior - maxValue);
-                double normalizedHetPosterior = Math.pow(10, hetPosterior - maxValue);
-                double normalizedHomPosterior = Math.pow(10, homPosterior - maxValue);
-
-                // normalize
-                double sum = normalizedRefPosterior + normalizedHetPosterior + normalizedHomPosterior;
-                normalizedRefPosterior /= sum;
-                normalizedHetPosterior /= sum;
-                normalizedHomPosterior /= sum;
-
-                double[] altPosteriors = new double[] { normalizedRefPosterior, normalizedHetPosterior, normalizedHomPosterior };
-                alleleGLs.put(base, altPosteriors);
-            }
-        }
-
-        public double[] getNormalizedPosteriors(char altAllele) {
-            return alleleGLs.get(altAllele);
-        }
-    }
-}
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java
@ -21,7 +21,7 @@ public abstract class GenotypeCalculationModel implements Cloneable {

    public enum Model {
        EM_POINT_ESTIMATE,
-        EM_ALL_MAFS
+        JOINT_ESTIMATE
    }

    protected BaseMismatchModel baseModel;
@ -33,6 +33,7 @@ public abstract class GenotypeCalculationModel implements Cloneable {
    protected boolean POOLED_INPUT;
    protected int POOL_SIZE;
    protected double LOD_THRESHOLD;
+    protected double MINIMUM_ALLELE_FREQUENCY;
    protected int maxDeletionsInPileup;
    protected String assumedSingleSample;
    protected boolean VERBOSE;
@ -62,6 +63,7 @@ public abstract class GenotypeCalculationModel implements Cloneable {
        POOLED_INPUT = UAC.POOLED;
        POOL_SIZE = UAC.POOLSIZE;
        LOD_THRESHOLD = UAC.LOD_THRESHOLD;
+        MINIMUM_ALLELE_FREQUENCY = UAC.MINIMUM_ALLELE_FREQUENCY;
        maxDeletionsInPileup = UAC.MAX_DELETIONS;
        assumedSingleSample = UAC.ASSUME_SINGLE_SAMPLE;
        VERBOSE = UAC.VERBOSE;
@ -82,14 +84,16 @@ public abstract class GenotypeCalculationModel implements Cloneable {
        gcm.GENOTYPE_MODE = GENOTYPE_MODE;
        gcm.POOLED_INPUT = POOLED_INPUT;
        gcm.LOD_THRESHOLD = LOD_THRESHOLD;
+        gcm.MINIMUM_ALLELE_FREQUENCY = MINIMUM_ALLELE_FREQUENCY;
        gcm.maxDeletionsInPileup = maxDeletionsInPileup;
        gcm.assumedSingleSample = assumedSingleSample;
        gcm.VERBOSE = VERBOSE;
        return gcm;
    }

-    public void setAssumedSingleSample(String sample) {
-        assumedSingleSample = sample;
+    public void setUnifiedArgumentCollection(UnifiedArgumentCollection UAC) {
+        // just re-initialize
+        initialize(this.samples, this.logger, UAC);
    }

    /**
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModelFactory.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModelFactory.java
@ -58,8 +58,8 @@ public class GenotypeCalculationModelFactory {
            case EM_POINT_ESTIMATE:
                gcm = new PointEstimateGenotypeCalculationModel();
                break;
-            case EM_ALL_MAFS:
-                gcm = new AllMAFsGenotypeCalculationModel();
+            case JOINT_ESTIMATE:
+                gcm = new JointEstimateGenotypeCalculationModel();
                break;
            default: throw new RuntimeException("Unexpected GenotypeCalculationModel " + UAC.genotypeModel);
        }
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java
@ -0,0 +1,284 @@
+package org.broadinstitute.sting.gatk.walkers.genotyper;
+
+import org.broadinstitute.sting.utils.*;
+import org.broadinstitute.sting.utils.genotype.*;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+
+import java.util.*;
+
+public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationModel {
+
+    protected JointEstimateGenotypeCalculationModel() {}
+
+    // because the null allele frequencies are constant for a given N,
+    // we cache the results to avoid having to recompute everything
+    private HashMap<Integer, double[]> nullAlleleFrequencyCache = new HashMap<Integer, double[]>();
+
+    // because the Hardy-Weinberg values for a given frequency are constant,
+    // we cache the results to avoid having to recompute everything
+    private HashMap<Double, Pair<double[], double[]>> hardyWeinbergValueCache = new HashMap<Double, Pair<double[], double[]>>();
+
+    // the allele frequency priors
+    private double[] log10AlleleFrequencyPriors;
+
+    // the allele frequency posteriors and P(f>0) for each alternate allele
+    private double[][] alleleFrequencyPosteriors = new double[BaseUtils.BASES.length][];
+    private double[][] log10PofDgivenAFi = new double[BaseUtils.BASES.length][];
+    private double[] PofFs = new double[BaseUtils.BASES.length];
+
+    // the minimum and actual number of points in our allele frequency estimation
+    private static final int MIN_ESTIMATION_POINTS = 100; //1000;
+    private int frequencyEstimationPoints;
+
+    // the GenotypeLikelihoods map
+    private HashMap<String, GenotypeLikelihoods> GLs = new HashMap<String, GenotypeLikelihoods>();
+
+    private enum GenotypeType { REF, HET, HOM }
+
+
+    public Pair<List<GenotypeCall>, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
+
+        // keep track of the context for each sample, overall and separated by strand
+        HashMap<String, AlignmentContextBySample> contexts = splitContextBySample(context);
+        if ( contexts == null )
+            return null;
+
+        calculate(ref, contexts, StratifiedContext.OVERALL);
+
+
+        //double lod = overall.getPofD() - overall.getPofNull();
+        //logger.debug("lod=" + lod);
+
+        // calculate strand score
+        //EMOutput forward = calculate(ref, contexts, priors, StratifiedContext.FORWARD);
+        //EMOutput reverse = calculate(ref, contexts, priors, StratifiedContext.REVERSE);
+        //double forwardLod = (forward.getPofD() + reverse.getPofNull()) - overall.getPofNull();
+        //double reverseLod = (reverse.getPofD() + forward.getPofNull()) - overall.getPofNull();
+        //logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
+        //double strandScore = Math.max(forwardLod - lod, reverseLod - lod);
+
+        //logger.debug(String.format("LOD=%f, SLOD=%f", lod, strandScore));
+
+        // generate the calls
+        //GenotypeMetaData metadata = new GenotypeMetaData(lod, strandScore, overall.getMAF());
+        //return new Pair<List<GenotypeCall>, GenotypeMetaData>(genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts), metadata);
+        return null;
+    }
+
+    private void calculate(char ref, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
+
+        initializeAlleleFrequencies(contexts.size());
+
+        initializeGenotypeLikelihoods(ref, contexts, contextType);
+
+        calculateAlleleFrequencyPosteriors(ref);
+    }
+
+    private void initializeAlleleFrequencies(int numSamples) {
+
+        // calculate the number of estimation points to use:
+        // it's either MIN_ESTIMATION_POINTS or 2N if that's larger
+        // (add 1 for allele frequency of zero)
+        frequencyEstimationPoints = Math.max(MIN_ESTIMATION_POINTS, 2 * numSamples) + 1;
+
+        // set up the allele frequency priors
+        log10AlleleFrequencyPriors = getNullAlleleFrequencyPriors(frequencyEstimationPoints);
+
+        for (int i = 0; i < frequencyEstimationPoints; i++)
+            logger.debug("Null allele frequency for MAF=" + i + "/" + (frequencyEstimationPoints-1) + ": " + log10AlleleFrequencyPriors[i]);
+    }
+
+    private double[] getNullAlleleFrequencyPriors(int N) {
+        double[] AFs = nullAlleleFrequencyCache.get(N);
+
+        // if it hasn't been calculated yet, do so now
+        if ( AFs == null ) {
+
+            // calculate sum(1/i)
+            double sigma_1_over_I = 0.0;
+            for (int i = 1; i < N; i++)
+                sigma_1_over_I += 1.0 / (double)i;
+
+            // delta = theta / sum(1/i)
+            double delta = heterozygosity / sigma_1_over_I;
+
+            // calculate the null allele frequencies for 1-N
+            AFs = new double[N];
+            double sum = 0.0;
+            for (int i = 1; i < N; i++) {
+                double value = delta / (double)i;
+                AFs[i] = Math.log10(value);
+                sum += value;
+            }
+
+            // null frequency for AF=0 is (1 - sum(all other frequencies))
+            AFs[0] = Math.log10(1.0 - sum);
+
+            nullAlleleFrequencyCache.put(N, AFs);
+        }
+
+        return AFs;
+    }
+
+    private void initializeGenotypeLikelihoods(char ref, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
+        GLs.clear();
+
+        for ( String sample : contexts.keySet() ) {
+            AlignmentContextBySample context = contexts.get(sample);
+            ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType));
+
+            // create the GenotypeLikelihoods object
+            GenotypeLikelihoods GL = GenotypeLikelihoodsFactory.makeGenotypeLikelihoods(baseModel, new DiploidGenotypePriors(), defaultPlatform);
+            GL.setVerbose(VERBOSE);
+            GL.add(pileup, true);
+
+            GLs.put(sample, GL);
+        }
+    }
+
+    private void calculateAlleleFrequencyPosteriors(char ref) {
+
+        // initialization
+        for ( char altAllele : BaseUtils.BASES ) {
+            int baseIndex = BaseUtils.simpleBaseToBaseIndex(altAllele);
+            alleleFrequencyPosteriors[baseIndex] = new double[frequencyEstimationPoints];
+            log10PofDgivenAFi[baseIndex] = new double[frequencyEstimationPoints];
+        }
+        DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(ref);
+        
+        // for each minor allele frequency
+        for (int i = 0; i < frequencyEstimationPoints; i++) {
+            double f = (double)i / (double)(frequencyEstimationPoints-1);
+
+            DiploidGenotypePriors hardyWeinberg = getHardyWeinbergValues(f, ref);
+
+            // for each sample
+            for ( GenotypeLikelihoods GL : GLs.values() ) {
+
+                // set the GL prior to be the AF priors and get the corresponding posteriors
+                //GL.setPriors(hardyWeinberg);
+                GL.setPriors(new DiploidGenotypePriors());
+                double[] posteriors = GL.getPosteriors();
+
+                // get the ref data
+                double refPosterior = posteriors[refGenotype.ordinal()];
+                String refStr = String.valueOf(ref);
+
+                // for each alternate allele
+                for ( char altAllele : BaseUtils.BASES ) {
+                    if ( altAllele == ref )
+                        continue;
+
+                    int baseIndex = BaseUtils.simpleBaseToBaseIndex(altAllele);
+
+                    DiploidGenotype hetGenotype = ref < altAllele ? DiploidGenotype.valueOf(refStr + String.valueOf(altAllele)) : DiploidGenotype.valueOf(String.valueOf(altAllele) + refStr);
+                    DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(altAllele);
+
+                    double[] allelePosteriors = new double[] { refPosterior, posteriors[hetGenotype.ordinal()], posteriors[homGenotype.ordinal()] };
+                    normalizeFromLog10(allelePosteriors);
+                    //logger.debug("Normalized posteriors for " + altAllele + ": " + allelePosteriors[0] + " " + allelePosteriors[1] + " " + allelePosteriors[2]);
+
+                    // calculate the posterior weighted frequencies
+                    double[] HWvalues = hardyWeinbergValueCache.get(f).first;
+                    double PofDgivenAFi = 0.0;
+                    PofDgivenAFi += HWvalues[GenotypeType.REF.ordinal()] * allelePosteriors[GenotypeType.REF.ordinal()];
+                    PofDgivenAFi += HWvalues[GenotypeType.HET.ordinal()] * allelePosteriors[GenotypeType.HET.ordinal()];
+                    PofDgivenAFi += HWvalues[GenotypeType.HOM.ordinal()] * allelePosteriors[GenotypeType.HOM.ordinal()];
+                    log10PofDgivenAFi[baseIndex][i] += Math.log10(PofDgivenAFi);
+                }
+            }
+        }
+
+        // for each alternate allele
+        for ( char altAllele : BaseUtils.BASES ) {
+            if ( altAllele == ref )
+                continue;
+
+            int baseIndex = BaseUtils.simpleBaseToBaseIndex(altAllele);
+
+            for (int i = 0; i < frequencyEstimationPoints; i++)
+                logger.debug("P(D|AF=" + i + ") for alt allele " + altAllele + ": " + log10PofDgivenAFi[baseIndex][i]);
+
+            // multiply by null allele frequency priors to get AF posteriors, then normalize
+            for (int i = 0; i < frequencyEstimationPoints; i++)
+                alleleFrequencyPosteriors[baseIndex][i] = log10AlleleFrequencyPriors[i] + log10PofDgivenAFi[baseIndex][i];
+            normalizeFromLog10(alleleFrequencyPosteriors[baseIndex]);
+
+            for (int i = 0; i < frequencyEstimationPoints; i++)
+                logger.debug("Allele frequency posterior estimate for alt allele " + altAllele + " MAF=" + i + "/" + (frequencyEstimationPoints-1) + ": " + alleleFrequencyPosteriors[baseIndex][i]);
+
+            // calculate p(f>0)
+            double sum = 0.0;
+            for (int i = 1; i < frequencyEstimationPoints; i++)
+                sum += alleleFrequencyPosteriors[baseIndex][i];
+            PofFs[baseIndex] = Math.min(sum, 1.0); // deal with precision errors
+            logger.info("P(f>0), LOD for alt allele " + altAllele + ": " + Math.log10(PofFs[baseIndex]) + ", " +  (Math.log10(PofFs[baseIndex]) - Math.log10(alleleFrequencyPosteriors[baseIndex][0])));
+        }
+    }
+
+    private DiploidGenotypePriors getHardyWeinbergValues(double f, char ref) {
+        Pair<double[], double[]> HWvalues = hardyWeinbergValueCache.get(f);
+
+        // if it hasn't been calculated yet, do so now
+        if ( HWvalues == null ) {
+
+            // create Hardy-Weinberg based allele frequencies (p^2, 2pq, q^2) converted to log-space
+            double p = 1.0 - f;
+            double q = f;
+
+            // allele frequencies don't actually equal 0...
+            if ( MathUtils.compareDoubles(q, 0.0) == 0 ) {
+                q = MINIMUM_ALLELE_FREQUENCY;
+                p -= MINIMUM_ALLELE_FREQUENCY;
+            } else if ( MathUtils.compareDoubles(p, 0.0) == 0 ) {
+                p = MINIMUM_ALLELE_FREQUENCY;
+                q -= MINIMUM_ALLELE_FREQUENCY;
+            }
+
+            double[] freqs = new double[] { Math.pow(p, 2), 2.0 * p * q, Math.pow(q, 2) };
+            double[] log10Freqs = new double[] { Math.log10(freqs[0]), Math.log10(freqs[1]), Math.log10(freqs[2]) };
+            HWvalues = new Pair<double[], double[]>(freqs, log10Freqs);
+            hardyWeinbergValueCache.put(f, HWvalues);
+        }
+
+        // create the priors based on the HW frequencies
+        double[] alleleFreqPriors = new double[10];
+        for ( DiploidGenotype g : DiploidGenotype.values() ) {
+            if ( g.isHomRef(ref) )
+                alleleFreqPriors[g.ordinal()] = HWvalues.second[GenotypeType.REF.ordinal()];
+            else if ( g.isHomVar(ref) )
+                alleleFreqPriors[g.ordinal()] = HWvalues.second[GenotypeType.HOM.ordinal()];
+            else if ( g.isHetRef(ref) )
+                alleleFreqPriors[g.ordinal()] = HWvalues.second[GenotypeType.HET.ordinal()];
+            else // g is het non-ref
+                alleleFreqPriors[g.ordinal()] = 0.0;
+        }
+
+        return new DiploidGenotypePriors(alleleFreqPriors);
+    }
+
+    private void normalizeFromLog10(double[] array) {
+        // for precision purposes, we need to add (or really subtract, since they're
+        // all negative) the largest value; also, we need to convert to normal-space.
+        double maxValue = findMaxEntry(array);
+        for (int i = 0; i < array.length; i++)
+            array[i] = Math.pow(10, array[i] - maxValue);
+
+        // normalize
+        double sum = 0.0;
+        for (int i = 0; i < array.length; i++)
+            sum += array[i];
+        for (int i = 0; i < array.length; i++)
+            array[i] /= sum;
+    }
+
+    private static double findMaxEntry(double[] array) {
+        double max = array[0];
+        for (int index = 1; index < array.length; index++) {
+            if ( array[index] > max )
+                max = array[index];
+        }
+        return max;
+    }
+}
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
@ -31,7 +31,7 @@ import org.broadinstitute.sting.utils.cmdLine.Argument;
 public class UnifiedArgumentCollection {

    // control the various models to be used
-    @Argument(fullName = "genotype_model", shortName = "gm", doc = "Genotype calculation model to employ -- EM_POINT_ESTIMATE is currently the default, while EM_ALL_MAFS is under development.  An exception will be thrown if an attempt is made to use EM_POINT_ESTIMATE with a pooled genotype calculation.", required = false)
+    @Argument(fullName = "genotype_model", shortName = "gm", doc = "Genotype calculation model to employ -- EM_POINT_ESTIMATE is currently the default, while JOINT_ESTIMATE is under development.  An exception will be thrown if an attempt is made to use EM_POINT_ESTIMATE with pooled data.", required = false)
    public GenotypeCalculationModel.Model genotypeModel = GenotypeCalculationModel.Model.EM_POINT_ESTIMATE;

    @Argument(fullName = "base_model", shortName = "bm", doc = "Base substitution model to employ -- EMPIRICAL is the recommended default, but it's possible to select the ONE_STATE and THREE_STATE models for comparison purposes", required = false)
@ -73,6 +73,8 @@ public class UnifiedArgumentCollection {
    @Argument(fullName = "max_coverage", shortName = "coverage", doc = "Maximum reads at this locus for it to be callable; to disable, provide value < 1 [default:10,000]", required = false)
    public Integer MAX_READS_IN_PILEUP = 10000;

+    @Argument(fullName = "min_allele_frequency", shortName = "minFreq", doc = "The minimum possible allele frequency in a population (advanced)", required = false)
+    public double MINIMUM_ALLELE_FREQUENCY = 1e-8;

    //@Argument(fullName = "disableCache", doc = "[ADVANCED] If true, we won't use the caching system.  This argument is for testing purposes only", required = false)
    //public boolean disableCache = false;
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
@ -82,13 +82,15 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<GenotypeCall>, Genot
    /** Enable deletions in the pileup **/
    public boolean includeReadsWithDeletionAtLoci() { return true; }

+
    /**
-     * Sets the single sample to assume when read groups are missing.
+     * Sets the argument collection for the UnifiedGenotyper.
     * To be used with walkers that call the UnifiedGenotyper's map function
+     * and consequently can't set these arguments on the command-line
     *
     **/
-    public void setAssumedSingleSample(String sample) {
-        gcm.setAssumedSingleSample(sample);
+    public void setUnifiedArgumentCollection(UnifiedArgumentCollection UAC) {
+        gcm.setUnifiedArgumentCollection(UAC);
    }

    /**