As promised in the release notes for 1.6, I am removing the old deprecated genotyping framework revolving around the misordering of alleles and have moved the fixed version in its place in preparation for release 1.7 (or 2.0?).

2012-05-01 16:18:24 -04:00 · 2012-05-01 16:18:24 -04:00 · 619a69a5f1
parent c255dd5917
commit 619a69a5f1
9 changed files with 129 additions and 1135 deletions
--- a/public/java/src/org/broadinstitute/sting/gatk/examples/GATKPaperGenotyper.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/examples/GATKPaperGenotyper.java
@ -32,7 +32,8 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.LocusWalker;
 import org.broadinstitute.sting.gatk.walkers.TreeReducible;
-import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidSNPGenotypePriors;
+import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
+import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;

@ -68,8 +69,8 @@ public class GATKPaperGenotyper extends LocusWalker<Integer,Long> implements Tre
        if (ref.getBase() == 'N' || ref.getBase() == 'n') return null; // we don't deal with the N ref base case

        ReadBackedPileup pileup = context.getBasePileup().getPileupWithoutMappingQualityZeroReads();
-        double likelihoods[] = DiploidSNPGenotypePriors.getReferencePolarizedPriors(ref.getBase(),
-                DiploidSNPGenotypePriors.HUMAN_HETEROZYGOSITY,
+        double likelihoods[] = getReferencePolarizedPriors(ref.getBase(),
+                UnifiedGenotyperEngine.HUMAN_SNP_HETEROZYGOSITY,
                0.01);
        // get the bases and qualities from the pileup
        byte bases[] = pileup.getBases();
@ -104,10 +105,106 @@ public class GATKPaperGenotyper extends LocusWalker<Integer,Long> implements Tre
    }

    /**
-     * Provide an initial value for reduce computations. In this case we simply return an empty list
+     * Takes reference base, and three priors for hom-ref, het, hom-var, and fills in the priors vector
+     * appropriately.
     *
-     * @return Initial value of reduce.
+     * Suppose A is the reference base, and we are given the probability of being hom-ref, het, and hom-var,
+     * and that pTriSateGenotype is the true probability of observing reference A and a true genotype of B/C
+     * then this sets the priors to:
+     *
+     * AA = hom-ref
+     * AC = AG = AT = (het - pTriStateGenotype) / 3
+     * CC = GG = TT = hom-var / 3
+     * CG = CT = GT = pTriStateGenotype / 3
+     *
+     * So that we get:
+     *
+     * hom-ref + 3 * (het - pTriStateGenotype) / 3 + 3 * hom-var / 3 + 3 * pTriStateGenotype
+     * hom-ref + het - pTriStateGenotype + hom-var + pTriStateGenotype
+     * hom-ref + het + hom-var
+     * = 1
+     *
+     * @param ref
+     * @param heterozyosity
+     * @param pRefError
     */
+    public static double[] getReferencePolarizedPriors(byte ref, double heterozyosity, double pRefError ) {
+        if ( ! MathUtils.isBounded(pRefError, 0.0, 0.01) ) {
+            throw new RuntimeException(String.format("BUG: p Reference error is out of bounds (0.0 - 0.01) is allow range %f", pRefError));
+        }
+
+        double pTriStateGenotype = heterozyosity * pRefError;
+//        if ( pTriStateGenotype >= heterozyosity ) {
+//            throw new RuntimeException(String.format("p Tristate genotype %f is greater than the heterozygosity %f", pTriStateGenotype, heterozyosity));
+//        }
+
+        double pHomRef = heterozygosity2HomRefProbability(heterozyosity);
+        double pHet    = heterozygosity2HetProbability(heterozyosity);
+        double pHomVar = heterozygosity2HomVarProbability(heterozyosity);
+
+        if (MathUtils.compareDoubles(pHomRef + pHet + pHomVar, 1.0) != 0) {
+            throw new RuntimeException(String.format("BUG: Prior probabilities don't sum to one => %f, %f, %f", pHomRef, pHet, pHomVar));
+        }
+
+        double[] priors = new double[DiploidGenotype.values().length];
+
+        for ( DiploidGenotype g : DiploidGenotype.values() ) {
+            double POfG;
+
+            final double nOnRefHets = 3;
+            final double nOffRefHets = 3;
+            final double nHomVars = 3;
+
+            if ( g.isHomRef(ref) )      { POfG = pHomRef; }
+            else if ( g.isHomVar(ref) ) { POfG = pHomVar / nHomVars; }
+            else if ( g.isHetRef(ref) ) { POfG = (pHet - pTriStateGenotype ) / nOnRefHets; }
+            else                        { POfG = pTriStateGenotype / nOffRefHets; }
+
+            priors[g.ordinal()] = Math.log10(POfG);
+        }
+
+        return priors;
+    }
+
+    /**
+     *
+     * @param h
+     * @return
+     */
+    public static double heterozygosity2HomRefProbability(double h) {
+        if (MathUtils.isNegative(h)) {
+            throw new RuntimeException(String.format("Heterozygous value is bad %f", h));
+        }
+
+        double v = 1.0 - (3.0 * h / 2.0);
+        if (MathUtils.isNegative(v)) {
+            throw new RuntimeException(String.format("Heterozygous value is bad %f", h));
+        }
+
+        return v;
+    }
+
+    public static double heterozygosity2HetProbability(double h) {
+        if (MathUtils.isNegative(h)) {
+            throw new RuntimeException(String.format("Heterozygous value is bad %f", h));
+        }
+
+        return h;
+    }
+
+    public static double heterozygosity2HomVarProbability(double h) {
+        if (MathUtils.isNegative(h)) {
+            throw new RuntimeException(String.format("Heterozygous value is bad %f", h));
+        }
+
+        return h / 2.0;
+    }
+
+    /**
+        * Provide an initial value for reduce computations. In this case we simply return an empty list
+        *
+        * @return Initial value of reduce.
+        */
    public Long reduceInit() {
        return 0L;
    }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotype.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotype.java
@ -27,16 +27,15 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;

 import org.broadinstitute.sting.utils.BaseUtils;

-@Deprecated
 public enum DiploidGenotype {
    AA ('A', 'A'),
    AC ('A', 'C'),
-    AG ('A', 'G'),
-    AT ('A', 'T'),
    CC ('C', 'C'),
+    AG ('A', 'G'),
    CG ('C', 'G'),
-    CT ('C', 'T'),
    GG ('G', 'G'),
+    AT ('A', 'T'),
+    CT ('C', 'T'),
    GT ('G', 'T'),
    TT ('T', 'T');

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeWithCorrectAlleleOrdering.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeWithCorrectAlleleOrdering.java
@ -1,125 +0,0 @@
-/*
- * Copyright (c) 2010 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.walkers.genotyper;
-
-import org.broadinstitute.sting.utils.BaseUtils;
-
-public enum DiploidGenotypeWithCorrectAlleleOrdering {
-    AA ('A', 'A'),
-    AC ('A', 'C'),
-    CC ('C', 'C'),
-    AG ('A', 'G'),
-    CG ('C', 'G'),
-    GG ('G', 'G'),
-    AT ('A', 'T'),
-    CT ('C', 'T'),
-    GT ('G', 'T'),
-    TT ('T', 'T');
-
-    public byte base1, base2;
-
-    @Deprecated
-    private DiploidGenotypeWithCorrectAlleleOrdering(char base1, char base2) {
-        this((byte)base1, (byte)base2);
-    }
-
-    private DiploidGenotypeWithCorrectAlleleOrdering(byte base1, byte base2) {
-        this.base1 = base1;
-        this.base2 = base2;
-    }
-
-    public boolean isHomRef(byte r) {
-        return isHom() && r == base1;
-    }
-
-    public boolean isHomVar(byte r) {
-        return isHom() && r != base1;
-    }
-
-    public boolean isHetRef(byte r) {
-        if ( base1 == r )
-            return r != base2;
-        else
-            return base2 == r;
-    }
-
-    public boolean isHom() {
-        return ! isHet();
-    }
-
-    public boolean isHet() {
-        return base1 != base2;
-    }
-
-    /**
-     * create a diploid genotype, given a character to make into a hom genotype
-     * @param hom the character to turn into a hom genotype, i.e. if it is A, then returned will be AA
-     * @return the diploid genotype
-     */
-    public static DiploidGenotypeWithCorrectAlleleOrdering createHomGenotype(byte hom) {
-        int index = BaseUtils.simpleBaseToBaseIndex(hom);
-        if ( index == -1 )
-            throw new IllegalArgumentException(hom + " is not a valid base character");
-        return conversionMatrix[index][index];
-    }
-
-    /**
-     * create a diploid genotype, given 2 chars which may not necessarily be ordered correctly
-     * @param base1 base1
-     * @param base2 base2
-     * @return the diploid genotype
-     */
-    public static DiploidGenotypeWithCorrectAlleleOrdering createDiploidGenotype(byte base1, byte base2) {
-        int index1 = BaseUtils.simpleBaseToBaseIndex(base1);
-        if ( index1 == -1 )
-            throw new IllegalArgumentException(base1 + " is not a valid base character");
-        int index2 = BaseUtils.simpleBaseToBaseIndex(base2);
-        if ( index2 == -1 )
-            throw new IllegalArgumentException(base2 + " is not a valid base character");
-        return conversionMatrix[index1][index2];
-    }
-
-    /**
-     * create a diploid genotype, given 2 base indexes which may not necessarily be ordered correctly
-     * @param baseIndex1 base1
-     * @param baseIndex2 base2
-     * @return the diploid genotype
-     */
-    public static DiploidGenotypeWithCorrectAlleleOrdering createDiploidGenotype(int baseIndex1, int baseIndex2) {
-        if ( baseIndex1 == -1 )
-            throw new IllegalArgumentException(baseIndex1 + " does not represent a valid base character");
-        if ( baseIndex2 == -1 )
-            throw new IllegalArgumentException(baseIndex2 + " does not represent a valid base character");
-        return conversionMatrix[baseIndex1][baseIndex2];
-    }
-
-    private static final DiploidGenotypeWithCorrectAlleleOrdering[][] conversionMatrix = {
-            { DiploidGenotypeWithCorrectAlleleOrdering.AA, DiploidGenotypeWithCorrectAlleleOrdering.AC, DiploidGenotypeWithCorrectAlleleOrdering.AG, DiploidGenotypeWithCorrectAlleleOrdering.AT },
-            { DiploidGenotypeWithCorrectAlleleOrdering.AC, DiploidGenotypeWithCorrectAlleleOrdering.CC, DiploidGenotypeWithCorrectAlleleOrdering.CG, DiploidGenotypeWithCorrectAlleleOrdering.CT },
-            { DiploidGenotypeWithCorrectAlleleOrdering.AG, DiploidGenotypeWithCorrectAlleleOrdering.CG, DiploidGenotypeWithCorrectAlleleOrdering.GG, DiploidGenotypeWithCorrectAlleleOrdering.GT },
-            { DiploidGenotypeWithCorrectAlleleOrdering.AT, DiploidGenotypeWithCorrectAlleleOrdering.CT, DiploidGenotypeWithCorrectAlleleOrdering.GT, DiploidGenotypeWithCorrectAlleleOrdering.TT }
-    };
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java
@ -27,10 +27,10 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;

 import net.sf.samtools.SAMUtils;
 import org.broadinstitute.sting.utils.BaseUtils;
-import org.broadinstitute.sting.utils.fragments.FragmentCollection;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.fragments.FragmentCollection;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;

@ -70,8 +70,8 @@ import static java.lang.Math.pow;
 * From then on, you can call any of the add() routines to update the likelihoods and posteriors in the above
 * model.
 */
-@Deprecated
 public class DiploidSNPGenotypeLikelihoods implements Cloneable {
+
    public final static double DEFAULT_PCR_ERROR_RATE = 1e-4;

    protected final static int FIXED_PLOIDY = 2;
@ -82,36 +82,20 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
    protected boolean VERBOSE = false;

    //
-    // The fundamental data arrays associated with a Genotype Likelhoods object
+    // The fundamental data arrays associated with a Genotype Likelihoods object
    //
    protected double[] log10Likelihoods = null;
-    protected double[] log10Posteriors = null;
-
-    protected DiploidSNPGenotypePriors priors = null;

    // TODO: don't calculate this each time through
    protected double log10_PCR_error_3;
    protected double log10_1_minus_PCR_error;

    /**
-     * Create a new GenotypeLikelhoods object with flat priors for each diploid genotype
+     * Create a new GenotypeLikelhoods object with given PCR error rate for each diploid genotype
     *
-     */
-    public DiploidSNPGenotypeLikelihoods() {
-        this.priors = new DiploidSNPGenotypePriors();
-        log10_PCR_error_3 = log10(DEFAULT_PCR_ERROR_RATE) - log10_3;
-        log10_1_minus_PCR_error = log10(1.0 - DEFAULT_PCR_ERROR_RATE);
-        setToZero();
-    }
-
-    /**
-     * Create a new GenotypeLikelhoods object with given priors and PCR error rate for each diploid genotype
-     *
-     * @param priors          priors
     * @param PCR_error_rate  the PCR error rate
     */
-    public DiploidSNPGenotypeLikelihoods(DiploidSNPGenotypePriors priors, double PCR_error_rate) {
-        this.priors = priors;
+    public DiploidSNPGenotypeLikelihoods(double PCR_error_rate) {
        log10_PCR_error_3 = log10(PCR_error_rate) - log10_3;
        log10_1_minus_PCR_error = log10(1.0 - PCR_error_rate);
        setToZero();
@ -124,15 +108,12 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
     */
    protected Object clone() throws CloneNotSupportedException {
        DiploidSNPGenotypeLikelihoods c = (DiploidSNPGenotypeLikelihoods)super.clone();
-        c.priors = priors;
        c.log10Likelihoods = log10Likelihoods.clone();
-        c.log10Posteriors = log10Posteriors.clone();
        return c;
    }

    protected void setToZero() {
        log10Likelihoods = genotypeZeros.clone();                 // likelihoods are all zeros
-        log10Posteriors = priors.getPriors().clone();     // posteriors are all the priors
    }

    /**
@ -143,102 +124,6 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
        return log10Likelihoods;
    }

-    /**
-     * Returns the likelihood associated with DiploidGenotype g
-     * @param g genotype
-     * @return log10 likelihood as a double
-     */
-    public double getLikelihood(DiploidGenotype g) {
-        return getLikelihoods()[g.ordinal()];
-    }
-
-    /**
-     * Returns an array of posteriors for each genotype, indexed by DiploidGenotype.ordinal values().
-     *
-     * @return raw log10 (not-normalized posteriors) as a double array
-     */
-    public double[] getPosteriors() {
-        return log10Posteriors;
-    }
-
-    /**
-     * Returns the posterior associated with DiploidGenotype g
-     * @param g genotpe
-     * @return raw log10 (not-normalized posteror) as a double
-     */
-    public double getPosterior(DiploidGenotype g) {
-        return getPosteriors()[g.ordinal()];
-    }
-
-
-    /**
-     * Returns an array of posteriors for each genotype, indexed by DiploidGenotype.ordinal values().
-     *
-     * @return normalized posterors as a double array
-     */
-    public double[] getNormalizedPosteriors() {
-        double[] normalized = new double[log10Posteriors.length];
-        double sum = 0.0;
-
-        // for precision purposes, we need to add (or really subtract, since everything is negative)
-        // the largest posterior value from all entries so that numbers don't get too small
-        double maxValue = log10Posteriors[0];
-        for (int i = 1; i < log10Posteriors.length; i++) {
-            if ( maxValue < log10Posteriors[i] )
-                maxValue = log10Posteriors[i];
-        }
-
-        // collect the posteriors
-        for ( DiploidGenotype g : DiploidGenotype.values() ) {
-            double posterior = Math.pow(10, getPosterior(g) - maxValue);
-            normalized[g.ordinal()] = posterior;
-            sum += posterior;
-        }
-
-        // normalize
-        for (int i = 0; i < normalized.length; i++)
-            normalized[i] /= sum;
-
-        return normalized;
-    }
-
-
-
-    public DiploidSNPGenotypePriors getPriorObject() {
-        return priors;
-    }
-
-    /**
-     * Returns an array of priors for each genotype, indexed by DiploidGenotype.ordinal values().
-     *
-     * @return log10 prior as a double array
-     */
-    public double[] getPriors() {
-        return priors.getPriors();
-    }
-
-    /**
-     * Sets the priors
-     * @param priors priors
-     */
-    public void setPriors(DiploidSNPGenotypePriors priors) {
-        this.priors = priors;
-        log10Posteriors = genotypeZeros.clone();
-        for ( DiploidGenotype g : DiploidGenotype.values() ) {
-            int i = g.ordinal();
-            log10Posteriors[i] = priors.getPriors()[i] + log10Likelihoods[i];
-        }
-    }
-
-    /**
-     * Returns the prior associated with DiploidGenotype g
-     * @param g genotype
-     * @return log10 prior as a double
-     */
-    public double getPrior(DiploidGenotype g) {
-        return getPriors()[g.ordinal()];
-    }
-
    // -------------------------------------------------------------------------------------
    //
    // add() routines.  These are the workhorse routines for calculating the overall genotype
@ -316,13 +201,12 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {

    /**
     *
-     * @param obsBase1
-     * @param qual1
-     * @param obsBase2
-     * @param qual2 can be 0, indicating no second base was observed for this fragment
-     * @param nObs The number of times this quad of values was seen.  Generally 1, but reduced reads
-     *  can have nObs > 1 for synthetic reads
-     * @return
+     * @param obsBase1    first observed base
+     * @param qual1       base qual of first observed base
+     * @param obsBase2    second observed base
+     * @param qual2       base qual of second observed base; can be 0, indicating no second base was observed for this fragment
+     * @param nObs        the number of times this quad of values was seen.  Generally 1, but reduced reads can have nObs > 1 for synthetic reads
+     * @return 0 if the base is bad, 1 otherwise
     */
    private int add(byte obsBase1, byte qual1, byte obsBase2, byte qual2, int nObs) {
        // TODO-- Right now we assume that there are at most 2 reads per fragment.  This assumption is fine
@ -346,7 +230,6 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
        for ( DiploidGenotype g : DiploidGenotype.values() ) {
            double likelihood = likelihoods[g.ordinal()];
            log10Likelihoods[g.ordinal()] += likelihood * nObs;
-            log10Posteriors[g.ordinal()] += likelihood * nObs;
        }

        return 1;
@ -419,16 +302,15 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
                double p_base = 0.0;
                p_base += pow(10, log10FourBaseLikelihoods[BaseUtils.simpleBaseToBaseIndex(g.base1)] - ploidyAdjustment);
                p_base += pow(10, log10FourBaseLikelihoods[BaseUtils.simpleBaseToBaseIndex(g.base2)] - ploidyAdjustment);
-                double likelihood = log10(p_base);

+                final double likelihood = log10(p_base);
                gl.log10Likelihoods[g.ordinal()] += likelihood;
-                gl.log10Posteriors[g.ordinal()] += likelihood;
            }

            if ( VERBOSE ) {
                for ( DiploidGenotype g : DiploidGenotype.values() ) { System.out.printf("%s\t", g); }
                System.out.println();
-                for ( DiploidGenotype g : DiploidGenotype.values() ) { System.out.printf("%.2f\t", gl.log10Likelihoods[g.ordinal()]); }
+            for ( DiploidGenotype g : DiploidGenotype.values() ) { System.out.printf("%.2f\t", gl.log10Likelihoods[g.ordinal()]); }
                System.out.println();
            }

@ -508,11 +390,11 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
     * likelihoods for a pileup element.  May return 0 to indicate that the observation is bad, and may
     * cap the quality score by the mapping quality of the read itself.
     *
-     * @param p
-     * @param ignoreBadBases
-     * @param capBaseQualsAtMappingQual
-     * @param minBaseQual
-     * @return
+     * @param p                           Pileup element
+     * @param ignoreBadBases              Should we ignore bad bases?
+     * @param capBaseQualsAtMappingQual   Should we cap the base qualities at the mapping quality of the read?
+     * @param minBaseQual                 Minimum allowed base quality
+     * @return the actual base quality to use
     */
    private static byte qualToUse(PileupElement p, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual, int minBaseQual) {
        if ( ignoreBadBases && !BaseUtils.isRegularBase( p.getBase() ) )
@ -568,16 +450,12 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {

    public boolean validate(boolean throwException) {
        try {
-            priors.validate(throwException);
-
            for ( DiploidGenotype g : DiploidGenotype.values() ) {
                String bad = null;

                int i = g.ordinal();
                if ( ! MathUtils.wellFormedDouble(log10Likelihoods[i]) || ! MathUtils.isNegativeOrZero(log10Likelihoods[i]) ) {
                    bad = String.format("Likelihood %f is badly formed", log10Likelihoods[i]);
-                } else if ( ! MathUtils.wellFormedDouble(log10Posteriors[i]) || ! MathUtils.isNegativeOrZero(log10Posteriors[i]) ) {
-                    bad = String.format("Posterior %f is badly formed", log10Posteriors[i]);
                }

                if ( bad != null ) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering.java
@ -1,489 +0,0 @@
-/*
- * Copyright (c) 2010.
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.walkers.genotyper;
-
-import net.sf.samtools.SAMUtils;
-import org.broadinstitute.sting.utils.BaseUtils;
-import org.broadinstitute.sting.utils.MathUtils;
-import org.broadinstitute.sting.utils.QualityUtils;
-import org.broadinstitute.sting.utils.exceptions.UserException;
-import org.broadinstitute.sting.utils.fragments.FragmentCollection;
-import org.broadinstitute.sting.utils.pileup.PileupElement;
-import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
-
-import java.util.List;
-
-import static java.lang.Math.log10;
-import static java.lang.Math.pow;
-
-/**
- * Stable, error checking version of the Bayesian genotyper.  Useful for calculating the likelihoods, priors,
- * and posteriors given a pile of bases and quality scores
- *
- * Suppose we have bases b1, b2, ..., bN with qualities scores q1, q2, ..., qN.  This object
- * calculates:
- *
- * P(G | D) = P(G) * P(D | G)
- *
- * where
- *
- * P(D | G) = sum_i log10 P(bi | G)
- *
- * and
- *
- * P(bi | G) = 1 - P(error | q1) if bi is in G
- *           = P(error | q1) / 3 if bi is not in G
- *
- * for homozygous genotypes and for heterozygous genotypes:
- *
- * P(bi | G) = 1 - P(error | q1) / 2 + P(error | q1) / 6 if bi is in G
- *           = P(error | q1) / 3 if bi is not in G
- *
- * for each of the 10 unique diploid genotypes AA, AC, AG, .., TT
- *
- * Everything is stored as arrays indexed by DiploidGenotype.ordinal() values in log10 space.
- *
- * The priors contain the relative probabilities of each genotype, and must be provided at object creation.
- * From then on, you can call any of the add() routines to update the likelihoods and posteriors in the above
- * model.
- */
-public class DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering implements Cloneable {
-
-    public final static double DEFAULT_PCR_ERROR_RATE = 1e-4;
-
-    protected final static int FIXED_PLOIDY = 2;
-    protected final static int MAX_PLOIDY = FIXED_PLOIDY + 1;
-    protected final static double ploidyAdjustment = log10(FIXED_PLOIDY);
-    protected final static double log10_3 = log10(3.0);
-
-    protected boolean VERBOSE = false;
-
-    //
-    // The fundamental data arrays associated with a Genotype Likelihoods object
-    //
-    protected double[] log10Likelihoods = null;
-
-    // TODO: don't calculate this each time through
-    protected double log10_PCR_error_3;
-    protected double log10_1_minus_PCR_error;
-
-    /**
-     * Create a new GenotypeLikelhoods object with given PCR error rate for each diploid genotype
-     *
-     * @param PCR_error_rate  the PCR error rate
-     */
-    public DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering(double PCR_error_rate) {
-        log10_PCR_error_3 = log10(PCR_error_rate) - log10_3;
-        log10_1_minus_PCR_error = log10(1.0 - PCR_error_rate);
-        setToZero();
-    }
-
-    /**
-     * Cloning of the object
-     * @return clone
-     * @throws CloneNotSupportedException
-     */
-    protected Object clone() throws CloneNotSupportedException {
-        DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering c = (DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering)super.clone();
-        c.log10Likelihoods = log10Likelihoods.clone();
-        return c;
-    }
-
-    protected void setToZero() {
-        log10Likelihoods = genotypeZeros.clone();                 // likelihoods are all zeros
-    }
-
-    /**
-     * Returns an array of log10 likelihoods for each genotype, indexed by DiploidGenotype.ordinal values()
-     * @return likelihoods array
-     */
-    public double[] getLikelihoods() {
-        return log10Likelihoods;
-    }
-
-    // -------------------------------------------------------------------------------------
-    //
-    // add() routines.  These are the workhorse routines for calculating the overall genotype
-    // likelihoods given observed bases and reads.  Includes high-level operators all the
-    // way down to single base and qual functions.
-    //
-    // -------------------------------------------------------------------------------------
-
-    /**
-     * Updates likelihoods and posteriors to reflect the additional observations contained within the
-     * read-based pileup up by calling add(observedBase, qualityScore) for each base / qual in the
-     * pileup
-     *
-     * @param pileup                    read pileup
-     * @param ignoreBadBases            should we ignore bad bases?
-     * @param capBaseQualsAtMappingQual should we cap a base's quality by its read's mapping quality?
-     * @param minBaseQual               the minimum base quality at which to consider a base valid
-     * @return the number of good bases found in the pileup
-     */
-    public int add(ReadBackedPileup pileup, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual, int minBaseQual) {
-        int n = 0;
-
-        // for each fragment, add to the likelihoods
-        FragmentCollection<PileupElement> fpile = pileup.toFragments();
-
-        for ( PileupElement p : fpile.getSingletonReads() )
-            n += add(p, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual);
-
-        for ( List<PileupElement> overlappingPair : fpile.getOverlappingPairs() )
-            n += add(overlappingPair, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual);
-
-        return n;
-    }
-
-    public int add(PileupElement elt, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual, int minBaseQual) {
-        byte obsBase = elt.getBase();
-        byte qual = qualToUse(elt, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual);
-        if ( qual == 0 )
-            return 0;
-
-        if ( elt.getRead().isReducedRead() ) {
-            // reduced read representation
-            if ( BaseUtils.isRegularBase( obsBase )) {
-                int representativeCount = elt.getRepresentativeCount();
-                add(obsBase, qual, (byte)0, (byte)0, representativeCount); // fast calculation of n identical likelihoods
-                return representativeCount; // we added nObs bases here
-            }
-
-            // odd bases or deletions => don't use them
-            return 0;
-        }
-
-        return add(obsBase, qual, (byte)0, (byte)0, 1);
-    }
-
-    public int add(List<PileupElement> overlappingPair, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual, int minBaseQual) {
-        final PileupElement p1 = overlappingPair.get(0);
-        final PileupElement p2 = overlappingPair.get(1);
-
-        final byte observedBase1 = p1.getBase();
-        final byte qualityScore1 = qualToUse(p1, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual);
-        final byte observedBase2 = p2.getBase();
-        final byte qualityScore2 = qualToUse(p2, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual);
-
-        if ( qualityScore1 == 0 ) {
-            if ( qualityScore2 == 0 ) // abort early if we didn't see any good bases
-                return 0;
-            else {
-                return add(observedBase2, qualityScore2, (byte)0, (byte)0);
-            }
-        } else {
-            return add(observedBase1, qualityScore1, observedBase2, qualityScore2);
-        }
-    }
-
-    /**
-     *
-     * @param obsBase1    first observed base
-     * @param qual1       base qual of first observed base
-     * @param obsBase2    second observed base
-     * @param qual2       base qual of second observed base; can be 0, indicating no second base was observed for this fragment
-     * @param nObs        the number of times this quad of values was seen.  Generally 1, but reduced reads can have nObs > 1 for synthetic reads
-     * @return 0 if the base is bad, 1 otherwise
-     */
-    private int add(byte obsBase1, byte qual1, byte obsBase2, byte qual2, int nObs) {
-        // TODO-- Right now we assume that there are at most 2 reads per fragment.  This assumption is fine
-        // TODO--   given the current state of next-gen sequencing, but may need to be fixed in the future.
-        // TODO--   However, when that happens, we'll need to be a lot smarter about the caching we do here.
-
-        // Just look up the cached result if it's available, or compute and store it
-        DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering gl;
-        if ( ! inCache(obsBase1, qual1, obsBase2, qual2, FIXED_PLOIDY) ) {
-            gl = calculateCachedGenotypeLikelihoods(obsBase1, qual1, obsBase2, qual2, FIXED_PLOIDY);
-        } else {
-            gl = getCachedGenotypeLikelihoods(obsBase1, qual1, obsBase2, qual2, FIXED_PLOIDY);
-        }
-
-        // for bad bases, there are no likelihoods
-        if ( gl == null )
-            return 0;
-
-        double[] likelihoods = gl.getLikelihoods();
-
-        for ( DiploidGenotypeWithCorrectAlleleOrdering g : DiploidGenotypeWithCorrectAlleleOrdering.values() ) {
-            double likelihood = likelihoods[g.ordinal()];
-            log10Likelihoods[g.ordinal()] += likelihood * nObs;
-        }
-
-        return 1;
-    }
-
-    private int add(byte obsBase1, byte qual1, byte obsBase2, byte qual2) {
-        return add(obsBase1, qual1, obsBase2, qual2, 1);
-    }
-
-    // -------------------------------------------------------------------------------------
-    //
-    // Dealing with the cache routines
-    //
-    // -------------------------------------------------------------------------------------
-
-    static DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering[][][][][] CACHE = new DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering[BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE+1][BaseUtils.BASES.length+1][QualityUtils.MAX_QUAL_SCORE+1][MAX_PLOIDY];
-
-    protected boolean inCache(byte observedBase1, byte qualityScore1, byte observedBase2, byte qualityScore2, int ploidy) {
-        return getCache(CACHE, observedBase1, qualityScore1, observedBase2, qualityScore2, ploidy) != null;
-    }
-
-    protected DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering getCachedGenotypeLikelihoods(byte observedBase1, byte qualityScore1, byte observedBase2, byte qualityScore2, int ploidy) {
-        DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering gl = getCache(CACHE, observedBase1, qualityScore1, observedBase2, qualityScore2, ploidy);
-        if ( gl == null )
-            throw new RuntimeException(String.format("BUG: trying to fetch an unset cached genotype likelihood at base1=%c, qual1=%d, base2=%c, qual2=%d, ploidy=%d",
-                    observedBase1, qualityScore1, observedBase2, qualityScore2, ploidy));
-        return gl;
-    }
-
-    protected DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering calculateCachedGenotypeLikelihoods(byte observedBase1, byte qualityScore1, byte observedBase2, byte qualityScore2, int ploidy) {
-        DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering gl = calculateGenotypeLikelihoods(observedBase1, qualityScore1, observedBase2, qualityScore2);
-        setCache(CACHE, observedBase1, qualityScore1, observedBase2, qualityScore2, ploidy, gl);
-        return gl;
-    }
-
-    protected void setCache( DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering[][][][][] cache,
-                             byte observedBase1, byte qualityScore1, byte observedBase2, byte qualityScore2, int ploidy,
-                             DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering val ) {
-        int i = BaseUtils.simpleBaseToBaseIndex(observedBase1);
-        int j = qualityScore1;
-        int k = qualityScore2 != 0 ? BaseUtils.simpleBaseToBaseIndex(observedBase2) : BaseUtils.BASES.length;
-        int l = qualityScore2;
-        int m = ploidy;
-
-        cache[i][j][k][l][m] = val;
-    }
-
-    protected DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering getCache(DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering[][][][][] cache,
-                                            byte observedBase1, byte qualityScore1, byte observedBase2, byte qualityScore2, int ploidy) {
-        int i = BaseUtils.simpleBaseToBaseIndex(observedBase1);
-        int j = qualityScore1;
-        int k = qualityScore2 != 0 ? BaseUtils.simpleBaseToBaseIndex(observedBase2) : BaseUtils.BASES.length;
-        int l = qualityScore2;
-        int m = ploidy;
-        return cache[i][j][k][l][m];
-    }
-
-    protected DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering calculateGenotypeLikelihoods(byte observedBase1, byte qualityScore1, byte observedBase2, byte qualityScore2) {
-        double[] log10FourBaseLikelihoods = computeLog10Likelihoods(observedBase1, qualityScore1, observedBase2, qualityScore2);
-
-        try {
-
-            DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering gl = (DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering)this.clone();
-            gl.setToZero();
-
-            // we need to adjust for ploidy.  We take the raw p(obs | chrom) / ploidy, which is -log10(ploidy) in log space
-            for ( DiploidGenotypeWithCorrectAlleleOrdering g : DiploidGenotypeWithCorrectAlleleOrdering.values() ) {
-
-                // todo assumes ploidy is 2 -- should be generalized.  Obviously the below code can be turned into a loop
-                double p_base = 0.0;
-                p_base += pow(10, log10FourBaseLikelihoods[BaseUtils.simpleBaseToBaseIndex(g.base1)] - ploidyAdjustment);
-                p_base += pow(10, log10FourBaseLikelihoods[BaseUtils.simpleBaseToBaseIndex(g.base2)] - ploidyAdjustment);
-
-                final double likelihood = log10(p_base);
-                gl.log10Likelihoods[g.ordinal()] += likelihood;
-            }
-
-            if ( VERBOSE ) {
-                for ( DiploidGenotypeWithCorrectAlleleOrdering g : DiploidGenotypeWithCorrectAlleleOrdering.values() ) { System.out.printf("%s\t", g); }
-                System.out.println();
-            for ( DiploidGenotypeWithCorrectAlleleOrdering g : DiploidGenotypeWithCorrectAlleleOrdering.values() ) { System.out.printf("%.2f\t", gl.log10Likelihoods[g.ordinal()]); }
-                System.out.println();
-            }
-
-            return gl;
-
-         } catch ( CloneNotSupportedException e ) {
-             throw new RuntimeException(e);
-         }
-    }
-
-    /**
-     * Updates likelihoods and posteriors to reflect an additional observation of observedBase with
-     * qualityScore.
-     *
-     * @param observedBase1  the base observed on the 1st read of the fragment
-     * @param qualityScore1  the qual of the base on the 1st read of the fragment, or zero if NA
-     * @param observedBase2  the base observed on the 2nd read of the fragment
-     * @param qualityScore2  the qual of the base on the 2nd read of the fragment, or zero if NA
-     * @return likelihoods for this observation or null if the base was not considered good enough to add to the likelihoods (Q0 or 'N', for example)
-     */
-    protected double[] computeLog10Likelihoods(byte observedBase1, byte qualityScore1, byte observedBase2, byte qualityScore2) {
-        double[] log10FourBaseLikelihoods = baseZeros.clone();
-
-        for ( byte trueBase : BaseUtils.BASES ) {
-            double likelihood = 0.0;
-
-            for ( byte fragmentBase : BaseUtils.BASES ) {
-                double log10FragmentLikelihood = (trueBase == fragmentBase ? log10_1_minus_PCR_error : log10_PCR_error_3);
-                if ( qualityScore1 != 0 ) {
-                    log10FragmentLikelihood += log10PofObservingBaseGivenChromosome(observedBase1, fragmentBase, qualityScore1);
-                }
-                if ( qualityScore2 != 0 ) {
-                    log10FragmentLikelihood += log10PofObservingBaseGivenChromosome(observedBase2, fragmentBase, qualityScore2);
-                }
-
-                //if ( VERBOSE ) {
-                //    System.out.printf("  L(%c | b=%s, Q=%d) = %f / %f%n",
-                //            observedBase, trueBase, qualityScore, pow(10,likelihood) * 100, likelihood);
-                //}
-
-                likelihood += pow(10, log10FragmentLikelihood);
-            }
-
-            log10FourBaseLikelihoods[BaseUtils.simpleBaseToBaseIndex(trueBase)] = log10(likelihood);
-        }
-
-        return log10FourBaseLikelihoods;
-    }
-
-    /**
-     *
-     * @param observedBase observed base
-     * @param chromBase    target base
-     * @param qual         base quality
-     * @return log10 likelihood
-     */
-    protected double log10PofObservingBaseGivenChromosome(byte observedBase, byte chromBase, byte qual) {
-
-        double logP;
-
-        if ( observedBase == chromBase ) {
-            // the base is consistent with the chromosome -- it's 1 - e
-            //logP = oneMinusData[qual];
-            double e = pow(10, (qual / -10.0));
-            logP = log10(1.0 - e);
-        } else {
-            // the base is inconsistent with the chromosome -- it's e * P(chromBase | observedBase is an error)
-            logP = qual / -10.0 + (-log10_3);
-        }
-
-        //System.out.printf("%c %c %d => %f%n", observedBase, chromBase, qual, logP);
-        return logP;
-    }
-
-    /**
-     * Helper function that returns the phred-scaled base quality score we should use for calculating
-     * likelihoods for a pileup element.  May return 0 to indicate that the observation is bad, and may
-     * cap the quality score by the mapping quality of the read itself.
-     *
-     * @param p                           Pileup element
-     * @param ignoreBadBases              Should we ignore bad bases?
-     * @param capBaseQualsAtMappingQual   Should we cap the base qualities at the mapping quality of the read?
-     * @param minBaseQual                 Minimum allowed base quality
-     * @return the actual base quality to use
-     */
-    private static byte qualToUse(PileupElement p, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual, int minBaseQual) {
-        if ( ignoreBadBases && !BaseUtils.isRegularBase( p.getBase() ) )
-            return 0;
-
-        byte qual = p.getQual();
-
-        if ( qual > SAMUtils.MAX_PHRED_SCORE )
-            throw new UserException.MalformedBAM(p.getRead(), String.format("the maximum allowed quality score is %d, but a quality of %d was observed in read %s.  Perhaps your BAM incorrectly encodes the quality scores in Sanger format; see http://en.wikipedia.org/wiki/FASTQ_format for more details", SAMUtils.MAX_PHRED_SCORE, qual, p.getRead().getReadName()));
-        if ( capBaseQualsAtMappingQual )
-            qual = (byte)Math.min((int)qual, p.getMappingQual());
-        if ( (int)qual < minBaseQual )
-            qual = (byte)0;
-
-        return qual;
-    }
-
-    // -----------------------------------------------------------------------------------------------------------------
-    //
-    //
-    // helper routines
-    //
-    //
-    // -----------------------------------------------------------------------------------------------------------------
-
-    /**
-     * Return a string representation of this object in a moderately usable form
-     *
-     * @return string representation
-     */
-    public String toString() {
-        double sum = 0;
-        StringBuilder s = new StringBuilder();
-        for (DiploidGenotypeWithCorrectAlleleOrdering g : DiploidGenotypeWithCorrectAlleleOrdering.values()) {
-            s.append(String.format("%s %.10f ", g, log10Likelihoods[g.ordinal()]));
-			sum += Math.pow(10,log10Likelihoods[g.ordinal()]);
-        }
-		s.append(String.format(" %f", sum));
-        return s.toString();
-    }
-
-    // -----------------------------------------------------------------------------------------------------------------
-    //
-    //
-    // Validation routines
-    //
-    //
-    // -----------------------------------------------------------------------------------------------------------------
-
-    public boolean validate() {
-        return validate(true);
-    }
-
-    public boolean validate(boolean throwException) {
-        try {
-            for ( DiploidGenotypeWithCorrectAlleleOrdering g : DiploidGenotypeWithCorrectAlleleOrdering.values() ) {
-                String bad = null;
-
-                int i = g.ordinal();
-                if ( ! MathUtils.wellFormedDouble(log10Likelihoods[i]) || ! MathUtils.isNegativeOrZero(log10Likelihoods[i]) ) {
-                    bad = String.format("Likelihood %f is badly formed", log10Likelihoods[i]);
-                }
-
-                if ( bad != null ) {
-                    throw new IllegalStateException(String.format("At %s: %s", g.toString(), bad));
-                }
-            }
-        } catch ( IllegalStateException e ) {
-            if ( throwException )
-                throw new RuntimeException(e);
-            else
-                return false;
-        }
-
-        return true;
-    }
-
-    //
-    // Constant static data
-    //
-    private final static double[] genotypeZeros = new double[DiploidGenotypeWithCorrectAlleleOrdering.values().length];
-    private final static double[] baseZeros = new double[BaseUtils.BASES.length];
-
-    static {
-        for ( DiploidGenotypeWithCorrectAlleleOrdering g : DiploidGenotypeWithCorrectAlleleOrdering.values() ) {
-            genotypeZeros[g.ordinal()] = 0.0;
-        }
-        for ( byte base : BaseUtils.BASES ) {
-            baseZeros[BaseUtils.simpleBaseToBaseIndex(base)] = 0.0;
-        }
-    }
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypePriors.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypePriors.java
@ -1,257 +0,0 @@
-/*
- * Copyright (c) 2010.
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.walkers.genotyper;
-
-import org.broadinstitute.sting.utils.MathUtils;
-
-import java.util.Arrays;
-
-@Deprecated
-public class DiploidSNPGenotypePriors implements GenotypePriors {
-    // --------------------------------------------------------------------------------------------------------------
-    //
-    // Constants and static information
-    //
-    // --------------------------------------------------------------------------------------------------------------
-    public static final double HUMAN_HETEROZYGOSITY = 1e-3;
-    public static final double CEU_HETEROZYGOSITY = 1e-3;
-    public static final double YRI_HETEROZYGOSITY = 1.0 / 850;
-
-    /**
-     * Default value of the prob of seeing a reference error.  Used to calculation the
-     * chance of seeing a true B/C het when the reference is A, which we assume is the product
-     * of the ref error rate and the het. Default value is Q60
-     */
-    public static final double PROB_OF_REFERENCE_ERROR = 1e-6;  // the reference is
-
-    private final static double[] flatPriors = new double[DiploidGenotype.values().length];
-
-    // --------------------------------------------------------------------------------------------------------------
-    //
-    // Diploid priors
-    //
-    // --------------------------------------------------------------------------------------------------------------
-    private double[] priors = null;
-
-    // todo -- fix me when this issue is resolved
-    public static final boolean requirePriorSumToOne = false;
-
-    /**
-     * Create a new DiploidGenotypePriors object with flat priors for each diploid genotype
-     */
-    public DiploidSNPGenotypePriors() {
-        priors = flatPriors.clone();
-    }
-
-    /**
-     * Create a new GenotypeLikelihoods object with priors for a diploid with heterozygosity and reference
-     * base ref
-     *
-     * @param ref
-     * @param heterozygosity
-     * @param probOfTriStateGenotype The prob of seeing a true B/C het when the reference is A
-     */
-    public DiploidSNPGenotypePriors(byte ref, double heterozygosity, double probOfTriStateGenotype) {
-        priors = getReferencePolarizedPriors(ref, heterozygosity, probOfTriStateGenotype);
-    }
-
-    /**
-     * Create a new Genotypelike Likelhoods's object with priors (in log10 space) for each of the DiploteGenotypes
-     *
-     * @param log10Priors
-     */
-    public DiploidSNPGenotypePriors(double[] log10Priors) {
-        priors = log10Priors.clone();
-    }
-
-    /**
-     * Returns an array of priors for each genotype, indexed by DiploidGenotype.ordinal values().
-     *
-     * @return log10 prior as a double array
-     */
-    public double[] getPriors() {
-        return priors;
-    }
-
-    /**
-     * Returns the prior associated with DiploidGenotype g
-     * @param g
-     * @return log10 prior as a double
-     */
-    public double getPrior(DiploidGenotype g) {
-        return getPriors()[g.ordinal()];
-    }
-
-    public double getHeterozygosity() { return HUMAN_HETEROZYGOSITY; }
-
-    public boolean validate(boolean throwException) {
-        try {
-            if ( requirePriorSumToOne && MathUtils.compareDoubles(MathUtils.sumLog10(priors), 1.0) != 0 ) {
-                throw new IllegalStateException(String.format("Priors don't sum to 1: sum=%f %s", MathUtils.sumLog10(priors), Arrays.toString(priors)));
-            }
-
-            for ( DiploidGenotype g : DiploidGenotype.values() ) {
-                int i = g.ordinal();
-                if ( ! MathUtils.wellFormedDouble(priors[i]) || ! MathUtils.isNegativeOrZero(priors[i]) ) {
-                    String bad = String.format("Prior %f is badly formed %b", priors[i], MathUtils.isNegativeOrZero(priors[i]));
-                    throw new IllegalStateException(String.format("At %s: %s", g.toString(), bad));
-                }
-            }
-        } catch ( IllegalStateException e ) {
-            if ( throwException )
-                throw new RuntimeException(e);
-            else
-                return false;
-        }
-
-        return true;
-    }
-
-    // --------------------------------------------------------------------------------------------------------------
-    //
-    // Static functionality
-    //
-    // --------------------------------------------------------------------------------------------------------------
-
-    /**
-     * Returns homozygous-reference, heterozygous, and homozygous-non-ref probabilities given a heterozygosity
-     * value, as elements 0, 1, and 2 of a double[], respectively
-     *
-     * @param h the heterozygosity [probability of a base being heterozygous]
-     */
-    @Deprecated
-    public static double[] heterozygosity2DiploidProbabilities(double h) {
-        double[] pdbls = new double[3];
-
-        pdbls[0] = heterozygosity2HomRefProbability(h);
-        pdbls[1] = heterozygosity2HetProbability(h);
-        pdbls[2] = heterozygosity2HomVarProbability(h);
-        return pdbls;
-    }
-
-    /**
-     *
-     * @param h
-     * @return
-     */
-    public static double heterozygosity2HomRefProbability(double h) {
-        if (MathUtils.isNegative(h)) {
-            throw new RuntimeException(String.format("Heterozygous value is bad %f", h));
-        }
-
-        double v = 1.0 - (3.0 * h / 2.0);
-        if (MathUtils.isNegative(v)) {
-            throw new RuntimeException(String.format("Heterozygous value is bad %f", h));
-        }
-
-        return v;
-    }
-
-    public static double heterozygosity2HetProbability(double h) {
-        if (MathUtils.isNegative(h)) {
-            throw new RuntimeException(String.format("Heterozygous value is bad %f", h));
-        }
-
-        return h;
-    }
-
-    public static double heterozygosity2HomVarProbability(double h) {
-        if (MathUtils.isNegative(h)) {
-            throw new RuntimeException(String.format("Heterozygous value is bad %f", h));
-        }
-
-        return h / 2.0;
-    }
-
-
-    /**
-     * Takes reference base, and three priors for hom-ref, het, hom-var, and fills in the priors vector
-     * appropriately.
-     *
-     * Suppose A is the reference base, and we are given the probability of being hom-ref, het, and hom-var,
-     * and that pTriSateGenotype is the true probability of observing reference A and a true genotype of B/C
-     * then this sets the priors to:
-     *
-     * AA = hom-ref
-     * AC = AG = AT = (het - pTriStateGenotype) / 3
-     * CC = GG = TT = hom-var / 3
-     * CG = CT = GT = pTriStateGenotype / 3
-     *
-     * So that we get:
-     *
-     * hom-ref + 3 * (het - pTriStateGenotype) / 3 + 3 * hom-var / 3 + 3 * pTriStateGenotype
-     * hom-ref + het - pTriStateGenotype + hom-var + pTriStateGenotype
-     * hom-ref + het + hom-var
-     * = 1
-     *
-     * @param ref
-     * @param heterozyosity
-     * @param pRefError
-     */
-    public static double[] getReferencePolarizedPriors(byte ref, double heterozyosity, double pRefError ) {
-        if ( ! MathUtils.isBounded(pRefError, 0.0, 0.01) ) {
-            throw new RuntimeException(String.format("BUG: p Reference error is out of bounds (0.0 - 0.01) is allow range %f", pRefError));
-        }
-
-        double pTriStateGenotype = heterozyosity * pRefError;
-//        if ( pTriStateGenotype >= heterozyosity ) {
-//            throw new RuntimeException(String.format("p Tristate genotype %f is greater than the heterozygosity %f", pTriStateGenotype, heterozyosity));
-//        }
-
-        double pHomRef = heterozygosity2HomRefProbability(heterozyosity);
-        double pHet    = heterozygosity2HetProbability(heterozyosity);
-        double pHomVar = heterozygosity2HomVarProbability(heterozyosity);
-
-        if (MathUtils.compareDoubles(pHomRef + pHet + pHomVar, 1.0) != 0) {
-            throw new RuntimeException(String.format("BUG: Prior probabilities don't sum to one => %f, %f, %f", pHomRef, pHet, pHomVar));
-        }
-
-        double[] priors = new double[DiploidGenotype.values().length];
-
-        for ( DiploidGenotype g : DiploidGenotype.values() ) {
-            double POfG;
-
-            final double nOnRefHets = 3;
-            final double nOffRefHets = 3;
-            final double nHomVars = 3;
-
-            if ( g.isHomRef(ref) )      { POfG = pHomRef; }
-            else if ( g.isHomVar(ref) ) { POfG = pHomVar / nHomVars; }
-            else if ( g.isHetRef(ref) ) { POfG = (pHet - pTriStateGenotype ) / nOnRefHets; }
-            else                        { POfG = pTriStateGenotype / nOffRefHets; }
-
-            priors[g.ordinal()] = Math.log10(POfG);
-        }
-
-        return priors;
-    }
-
-    static {
-        for ( DiploidGenotype g : DiploidGenotype.values() ) {
-            flatPriors[g.ordinal()] = Math.log10(1.0 / DiploidGenotype.values().length);
-        }
-    }
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
@ -84,7 +84,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
                pileup = createBAQedPileup( pileup );

            // create the GenotypeLikelihoods object
-            final DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering GL = new DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering(UAC.PCR_error);
+            final DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods(UAC.PCR_error);
            final int nGoodBases = GL.add(pileup, true, true, UAC.MIN_BASE_QUALTY_SCORE);
            if ( nGoodBases > 0 )
                GLs.add(new SampleGenotypeData(sample.getKey(), GL, getFilteredDepth(pileup)));
@ -139,7 +139,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
            for ( int j = i; j <= numAltAlleles; j++ ) {
                // As per the VCF spec: "the ordering of genotypes for the likelihoods is given by: F(j/k) = (k*(k+1)/2)+j.
                // In other words, for biallelic sites the ordering is: AA,AB,BB; for triallelic sites the ordering is: AA,AB,BB,AC,BC,CC, etc."
-                PLordering[(j * (j+1) / 2) + i] = DiploidGenotypeWithCorrectAlleleOrdering.createDiploidGenotype(alleleOrdering[i], alleleOrdering[j]).ordinal();
+                PLordering[(j * (j+1) / 2) + i] = DiploidGenotype.createDiploidGenotype(alleleOrdering[i], alleleOrdering[j]).ordinal();
            }
        }

@ -171,7 +171,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
    protected List<Allele> determineAlternateAlleles(final byte ref, final List<SampleGenotypeData> sampleDataList) {

        final int baseIndexOfRef = BaseUtils.simpleBaseToBaseIndex(ref);
-        final int PLindexOfRef = DiploidGenotypeWithCorrectAlleleOrdering.createDiploidGenotype(ref, ref).ordinal();
+        final int PLindexOfRef = DiploidGenotype.createDiploidGenotype(ref, ref).ordinal();
        for ( int i = 0; i < 4; i++ )
            likelihoodSums[i] = 0.0;
        
@ -219,10 +219,10 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
    private static class SampleGenotypeData {

        public final String name;
-        public final DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering GL;
+        public final DiploidSNPGenotypeLikelihoods GL;
        public final int depth;

-        public SampleGenotypeData(final String name, final DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering GL, final int depth) {
+        public SampleGenotypeData(final String name, final DiploidSNPGenotypeLikelihoods GL, final int depth) {
            this.name = name;
            this.GL = GL;
            this.depth = depth;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
@ -53,7 +53,7 @@ public class UnifiedArgumentCollection {
     * effectively acts as a cap on the base qualities.
     */
    @Argument(fullName = "pcr_error_rate", shortName = "pcr_error", doc = "The PCR error rate to be used for computing fragment-based likelihoods", required = false)
-    public Double PCR_error = DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering.DEFAULT_PCR_ERROR_RATE;
+    public Double PCR_error = DiploidSNPGenotypeLikelihoods.DEFAULT_PCR_ERROR_RATE;

    @Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Specifies how to determine the alternate alleles to use for genotyping", required = false)
    public GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY;
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypePriorsUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypePriorsUnitTest.java
@ -1,109 +0,0 @@
-package org.broadinstitute.sting.gatk.walkers.genotyper;
-
-import org.testng.Assert;
-import org.broadinstitute.sting.BaseTest;
-import org.testng.annotations.Test;
-
-
-import static java.lang.Math.log10;
-
-public class GenotypePriorsUnitTest extends BaseTest {
-    private final static double DELTA = 1e-8;
-
-    @Test
-    public void testBasic() {
-        logger.warn("Executing testIsBetween");
-        Assert.assertEquals(DELTA, DiploidSNPGenotypePriors.HUMAN_HETEROZYGOSITY,1e-3);
-    }
-
-
-    // f <- function(h) { print(paste(1-3.0 * h / 2, h, h/2, sep=', '));}
-    @Test
-    public void testPriorsFromHet() {
-        logger.warn("Executing testPriorsFromHet");
-        testPriorsFromHet(0.0, 1, 0, 0);
-        testPriorsFromHet(1e-1, 0.85, 0.1, 0.05);
-        testPriorsFromHet(1e-2, 0.985, 0.01, 0.005);
-        testPriorsFromHet(1e-3, 0.9985, 0.001, 5e-04);
-        testPriorsFromHet(1e-4, 0.99985, 1e-04, 5e-05);
-        testPriorsFromHet(1e-5, 0.999985, 1e-05, 5e-06);
-        testPriorsFromHet(0.5, 0.25, 0.5, 0.25);
-    }
-
-    @Test(expectedExceptions=RuntimeException.class)
-    public void testPriorsFromHetFail1() {
-        logger.warn("Executing testPriorsFromHetFail1");
-        testPriorsFromHet(1.0, 0, 0, 0);
-    }
-
-    @Test(expectedExceptions=RuntimeException.class)
-    public void testPriorsFromHetFail2() {
-        logger.warn("Executing testPriorsFromHetFail2");
-        testPriorsFromHet(-1.0, 0, 0, 0);
-    }
-
-    private void testPriorsFromHet(double h, double homRef, double het, double homVar) {
-        double[] vals = DiploidSNPGenotypePriors.heterozygosity2DiploidProbabilities(h);
-        Assert.assertEquals(vals[0], homRef, DELTA);
-        Assert.assertEquals(vals[1], het, DELTA);
-        Assert.assertEquals(vals[2], homVar, DELTA);
-        Assert.assertEquals(DiploidSNPGenotypePriors.heterozygosity2HomRefProbability(h), homRef, DELTA);
-        Assert.assertEquals(DiploidSNPGenotypePriors.heterozygosity2HetProbability(h), het, DELTA);
-        Assert.assertEquals(DiploidSNPGenotypePriors.heterozygosity2HomVarProbability(h), homVar, DELTA);
-    }
-
-    // 
-    @Test
-    public void testGenotypePriorsReferenceIndependent() {
-        logger.warn("Executing testGenotypePriorsReferenceIndependent");
-        // AA, AC, AG, AT, CC, CG, CT, GG, GT, TT
-        double[] array1 = {-0.0705810742857073, -1, -1, -1, -1.301029995663981, -1, -1, -1.301029995663981, -1, -1.301029995663981};
-        testGenotypePriors('A', 1e-1, array1);
-        double[] array2 = {-1.301029995663981, -1, -1, -1, -0.0705810742857073, -1, -1, -1.301029995663981, -1, -1.301029995663981};
-        testGenotypePriors('C', 1e-1, array2);
-        double[] array3 = {-1.301029995663981, -1, -1, -1, -1.301029995663981, -1, -1, -0.0705810742857073, -1, -1.301029995663981};
-        testGenotypePriors('G', 1e-1, array3);
-        double[] array4 = {-1.301029995663981, -1, -1, -1, -1.301029995663981, -1, -1, -1.301029995663981, -1, -0.0705810742857073};
-        testGenotypePriors('T', 1e-1, array4);
-    }
-
-    private void testGenotypePriors(char ref, double h, double[] array) {
-        for ( DiploidGenotype g : DiploidGenotype.values() ) {
-            double val = 0.0;
-            if ( g.isHomRef((byte)ref) ) val = DiploidSNPGenotypePriors.heterozygosity2HomRefProbability(h);
-            if ( g.isHet() )       val = DiploidSNPGenotypePriors.heterozygosity2HetProbability(h);
-            if ( g.isHomVar((byte)ref) ) val = DiploidSNPGenotypePriors.heterozygosity2HomVarProbability(h);
-
-            val = log10(val);
-            double e = array[g.ordinal()];
-            Assert.assertEquals(val, e, DELTA, String.format("%s should have p=%f but has p=%f", g, val, e));
-        }
-    }
-
-    @Test
-    public void testGenotypePriorsReferencePolarized() {
-        logger.warn("Executing testGenotypePriorsReferencePolarized");
-        // AA, AC, AG, AT, CC, CG, CT, GG, GT, TT
-        double[] array1 = {0.9985, 0.00033333, 0.00033333, 0.00033333, 0.000166666666666667, 3.333333e-09, 3.333333e-09, 0.000166666666666667, 3.33333333333333e-09, 0.000166666666666667};
-        logger.warn("  Array 1");
-        testPolarizedGenotypePriors('A', 1e-3, 1e-5, array1);
-        double[] array2 = {0.9985, 0.00033333, 0.00033333, 0.00033333, 0.000166666666666667, 3.333333e-10, 3.333333e-10, 0.000166666666666667, 3.33333333333333e-10, 0.000166666666666667};
-        logger.warn("  Array 2");
-        testPolarizedGenotypePriors('A', 1e-3, 1e-6, array2);
-        double[] array3 = {0.985, 0.0033333, 0.0033333, 0.0033333, 0.00166666666666667, 3.333333e-08, 3.333333e-08, 0.00166666666666667, 3.33333333333333e-08, 0.00166666666666667};
-        logger.warn("  Array 3");
-        testPolarizedGenotypePriors('A', 1e-2, 1e-5, array3);
-        double[] array4 = {0.99985, 3.33333e-05, 3.33333e-05, 3.33333e-05, 1.66666666666667e-05, 3.33333333333333e-12, 3.33333333333333e-12, 1.66666666666667e-05, 3.33333333333333e-12, 1.66666666666667e-05};
-        logger.warn("  Array 4");
-        testPolarizedGenotypePriors('A', 1e-4, 1e-6, array4);
-    }
-
-    private void testPolarizedGenotypePriors(char ref, double h, double pRefError, double[] array) {
-        DiploidSNPGenotypePriors priors = new DiploidSNPGenotypePriors((byte)ref, h, pRefError);
-        for ( DiploidGenotype g : DiploidGenotype.values() ) {
-            double val = Math.pow(10, priors.getPrior(g));
-            double e = array[g.ordinal()];
-            Assert.assertEquals(val, e, DELTA, String.format("%s should have p=%f but has p=%f", g, val, e));
-        }
-    }
-}