diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypePriors.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypePriors.java new file mode 100755 index 000000000..0fa201970 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypePriors.java @@ -0,0 +1,293 @@ +package org.broadinstitute.sting.gatk.walkers.genotyper; + +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.ReadBackedPileup; + +import static java.lang.Math.log10; +import static java.lang.Math.pow; +import java.util.Arrays; + +import net.sf.samtools.SAMRecord; + +public class DiploidGenotypePriors { + // -------------------------------------------------------------------------------------------------------------- + // + // Constants and static information + // + // -------------------------------------------------------------------------------------------------------------- + public static final double HUMAN_HETEROZYGOSITY = 1e-3; + public static final double CEU_HETEROZYGOSITY = 1e-3; + public static final double YRI_HETEROZYGOSITY = 1.0 / 850; + + /** + * Default value of the prob of seeing a true B/C het when the reference is A + */ + public static final double PROB_OF_TRISTATE_GENOTYPE = 1e-5; + + private final static double[] flatPriors = new double[DiploidGenotype.values().length]; + + // -------------------------------------------------------------------------------------------------------------- + // + // Diploid priors + // + // -------------------------------------------------------------------------------------------------------------- + private double[] priors = null; + + // todo -- fix me when this issue is resolved + public static boolean RequirePriorSumToOne = false; + + /** + * Create a new DiploidGenotypePriors object with flat priors for each diploid genotype + */ + public DiploidGenotypePriors() { + priors = flatPriors.clone(); + } + + /** + * Create a new GenotypeLikelihoods object with priors for a diploid with heterozygosity and reference + * base ref + * + * @param ref + * @param heterozygosity + */ + public DiploidGenotypePriors(char ref, double heterozygosity, boolean dontPolarize) { + if ( dontPolarize ) { + double[] vals = heterozygosity2DiploidProbabilities(heterozygosity); + priors = getReferenceIndependentPriors(ref, vals[0], vals[1], vals[2]); + } else { + priors = getReferencePolarizedPriors(ref, heterozygosity, PROB_OF_TRISTATE_GENOTYPE); + } + } + + /** + * Create a new GenotypeLikelihoods object with priors for a diploid with heterozygosity and reference + * base ref + * + * @param ref + * @param heterozygosity + * @param probOfTriStateGenotype The prob of seeing a true B/C het when the reference is A + */ + public DiploidGenotypePriors(char ref, double heterozygosity, double probOfTriStateGenotype) { + priors = getReferencePolarizedPriors(ref, heterozygosity, probOfTriStateGenotype); + } + + /** + * Create a new Genotypelike Likelhoods's object with priors (in log10 space) for each of the DiploteGenotypes + * + * @param log10Priors + */ + public DiploidGenotypePriors(double[] log10Priors) { + priors = log10Priors.clone(); + } + + /** + * Returns an array of priors for each genotype, indexed by DiploidGenotype.ordinal values(). + * + * @return log10 prior as a double array + */ + public double[] getPriors() { + return priors; + } + + /** + * Returns the prior associated with DiploidGenotype g + * @param g + * @return log10 prior as a double + */ + public double getPrior(DiploidGenotype g) { + return getPriors()[g.ordinal()]; + } + + + public boolean validate(boolean throwException) { + try { + if ( RequirePriorSumToOne && MathUtils.compareDoubles(MathUtils.sumLog10(priors), 1.0) != 0 ) { + throw new IllegalStateException(String.format("Priors don't sum to 1: sum=%f %s", MathUtils.sumLog10(priors), Arrays.toString(priors))); + } + + for ( DiploidGenotype g : DiploidGenotype.values() ) { + int i = g.ordinal(); + if ( ! MathUtils.wellFormedDouble(priors[i]) || ! MathUtils.isNegativeOrZero(priors[i]) ) { + String bad = String.format("Prior %f is badly formed %b", priors[i], MathUtils.isNegativeOrZero(priors[i])); + throw new IllegalStateException(String.format("At %s: %s", g.toString(), bad)); + } + } + } catch ( IllegalStateException e ) { + if ( throwException ) + throw new RuntimeException(e); + else + return false; + } + + return true; + } + + // -------------------------------------------------------------------------------------------------------------- + // + // Static functionality + // + // -------------------------------------------------------------------------------------------------------------- + + /** + * Returns homozygous-reference, heterozygous, and homozygous-non-ref probabilities given a heterozygosity + * value, as elements 0, 1, and 2 of a double[], respectively + * + * @param h the heterozygosity [probability of a base being heterozygous] + */ + @Deprecated + public static double[] heterozygosity2DiploidProbabilities(double h) { + double[] pdbls = new double[3]; + + pdbls[0] = heterozygosity2HomRefProbability(h); + pdbls[1] = heterozygosity2HetProbability(h); + pdbls[2] = heterozygosity2HomVarProbability(h); + return pdbls; + } + + /** + * + * @param h + * @return + */ + public static double heterozygosity2HomRefProbability(double h) { + if (MathUtils.isNegative(h)) { + throw new RuntimeException(String.format("Heterozygous value is bad %f", h)); + } + + double v = 1.0 - (3.0 * h / 2.0); + if (MathUtils.isNegative(v)) { + throw new RuntimeException(String.format("Heterozygous value is bad %f", h)); + } + + return v; + } + + public static double heterozygosity2HetProbability(double h) { + if (MathUtils.isNegative(h)) { + throw new RuntimeException(String.format("Heterozygous value is bad %f", h)); + } + + return h; + } + + public static double heterozygosity2HomVarProbability(double h) { + if (MathUtils.isNegative(h)) { + throw new RuntimeException(String.format("Heterozygous value is bad %f", h)); + } + + return h / 2.0; + } + + + /** + * Takes reference base, and three priors for hom-ref, het, hom-var, and fills in the priors vector + * appropriately. + * + * Suppose A is the reference base, and we are given the probability of being hom-ref, het, and hom-var, + * and that pTriSateGenotype is the true probability of observing reference A and a true genotype of B/C + * then this sets the priors to: + * + * AA = hom-ref + * AC = AG = AT = (het - pTriStateGenotype) / 3 + * CC = GG = TT = hom-var / 3 + * CG = CT = GT = pTriStateGenotype / 3 + * + * So that we get: + * + * hom-ref + 3 * (het - pTriStateGenotype) / 3 + 3 * hom-var / 3 + 3 * pTriStateGenotype + * hom-ref + het - pTriStateGenotype + hom-var + pTriStateGenotype + * hom-ref + het + hom-var + * = 1 + * + * @param ref + * @param heterozyosity + * @param pTriStateGenotype + */ + public static double[] getReferencePolarizedPriors(char ref, double heterozyosity, double pTriStateGenotype ) { + if ( ! MathUtils.isBounded(pTriStateGenotype, 0.0, 1.0) ) { + throw new RuntimeException(String.format("p Tristate genotype is out of bounds %f", pTriStateGenotype)); + } + + if ( pTriStateGenotype >= heterozyosity ) { + throw new RuntimeException(String.format("p Tristate genotype %f is greater than the heterozygosity %f", pTriStateGenotype, heterozyosity)); + } + + double pHomRef = heterozygosity2HomRefProbability(heterozyosity); + double pHet = heterozygosity2HetProbability(heterozyosity); + double pHomVar = heterozygosity2HomVarProbability(heterozyosity); + + if ((pHomRef + pHet + pHomVar) != 1) { + throw new RuntimeException(String.format("Prior probabilities don't sum to one => %f, %f, %f", pHomRef, pHet, pHomVar)); + } + + double[] priors = new double[DiploidGenotype.values().length]; + + for ( DiploidGenotype g : DiploidGenotype.values() ) { + double POfG = 0.0; + + final double nOnRefHets = 3; + final double nOffRefHets = 3; + final double nHomVars = 3; + + if ( g.isHomRef(ref) ) { POfG = pHomRef; } + else if ( g.isHomVar(ref) ) { POfG = pHomVar / nHomVars; } + else if ( g.isHetRef(ref) ) { POfG = (pHet - pTriStateGenotype ) / nOnRefHets; } + else { POfG = pTriStateGenotype / nOffRefHets; } + + priors[g.ordinal()] = Math.log10(POfG); + } + + return priors; + } + + /** + * Takes reference base, and three priors for hom-ref, het, hom-var, and fills in the priors vector + * appropriately. + * + * TODO -- delete me + * + * @param ref + * @param priorHomRef + * @param priorHet + * @param priorHomVar + */ + @Deprecated + public static double[] getReferenceIndependentPriors(char ref, double priorHomRef, double priorHet, double priorHomVar ) { + if ((priorHomRef + priorHet + priorHomVar) != 1) { + throw new RuntimeException(String.format("Prior probabilities don't sum to one => %f, %f, %f", priorHomRef, priorHet, priorHomVar)); + } + + double[] priors = new double[DiploidGenotype.values().length]; + + for ( DiploidGenotype g : DiploidGenotype.values() ) { + int nRefBases = Utils.countOccurrences(ref, g.toString()); + double log10POfG = 0.0; + + switch ( nRefBases ) { + case 2: // hom-ref + log10POfG = Math.log10(priorHomRef); + break; + case 0: // hom-nonref + //log10POfG = Math.log10(priorHomVar / 3); + log10POfG = Math.log10(priorHomVar); + break; + default: + //log10POfG = Math.log10(priorHet / 6); + log10POfG = Math.log10(priorHet); + break; + } + + priors[g.ordinal()] = log10POfG; + } + + return priors; + } + + static { + for ( DiploidGenotype g : DiploidGenotype.values() ) { + flatPriors[g.ordinal()] = Math.log10(1.0 / DiploidGenotype.values().length); + } + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoods.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoods.java index 7ca4b94ba..a4c6f0a94 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoods.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoods.java @@ -34,96 +34,5 @@ public abstract class GenotypeLikelihoods { } } - public static final double HUMAN_HETEROZYGOSITY = 1e-3; - - /** - * Returns homozygous-reference, heterozygous, and homozygous-non-ref probabilities given a heterozygosity - * value, as elements 0, 1, and 2 of a double[], respectively - * - * @param h the heterozygosity [probability of a base being heterozygous] - */ - public static double[] heterozygosity2DiploidProbabilities(double h) { - double[] pdbls = new double[3]; - - pdbls[0] = heterozygosity2HomRefProbability(h); - pdbls[1] = heterozygosity2HetProbability(h); - pdbls[2] = heterozygosity2HomVarProbability(h); - return pdbls; - } - - /** - * - * @param h - * @return - */ - public static double heterozygosity2HomRefProbability(double h) { - if (MathUtils.isNegative(h)) { - throw new RuntimeException(String.format("Heterozygous value is bad %f", h)); - } - - double v = 1.0 - (3.0 * h / 2.0); - if (MathUtils.isNegative(v)) { - throw new RuntimeException(String.format("Heterozygous value is bad %f", h)); - } - - return v; - } - - public static double heterozygosity2HetProbability(double h) { - if (MathUtils.isNegative(h)) { - throw new RuntimeException(String.format("Heterozygous value is bad %f", h)); - } - - return h; - } - - public static double heterozygosity2HomVarProbability(double h) { - if (MathUtils.isNegative(h)) { - throw new RuntimeException(String.format("Heterozygous value is bad %f", h)); - } - - return h / 2.0; - } - - /** - * Takes reference base, and three priors for hom-ref, het, hom-var, and fills in the priors vector - * appropriately. - * - * @param ref - * @param priorHomRef - * @param priorHet - * @param priorHomVar - */ - public static double[] getGenotypePriors(char ref, double priorHomRef, double priorHet, double priorHomVar) { - if ((priorHomRef + priorHet + priorHomVar) != 1) { - throw new RuntimeException(String.format("Prior probabilities don't sum to one => %f, %f, %f", priorHomRef, priorHet, priorHomVar)); - } - - double[] priors = new double[DiploidGenotype.values().length]; - - for ( DiploidGenotype g : DiploidGenotype.values() ) { - int nRefBases = Utils.countOccurrences(ref, g.toString()); - double log10POfG = 0.0; - - switch ( nRefBases ) { - case 2: // hom-ref - log10POfG = Math.log10(priorHomRef); - break; - case 0: // hom-nonref - //log10POfG = Math.log10(priorHomVar / 3); - log10POfG = Math.log10(priorHomVar); - break; - default: - //log10POfG = Math.log10(priorHet / 6); - log10POfG = Math.log10(priorHet); - break; - } - - priors[g.ordinal()] = log10POfG; - } - - return priors; - } - public abstract int add(char ref, char read, byte qual); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/NewHotnessGenotypeLikelihoods.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/NewHotnessGenotypeLikelihoods.java index 80b854e40..46974f85a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/NewHotnessGenotypeLikelihoods.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/NewHotnessGenotypeLikelihoods.java @@ -27,12 +27,12 @@ import java.util.Arrays; * P(bi | G) = 1 - P(error | q1) if bi is in G * = P(error | q1) / 3 if bi is not in G * - * for homozygous genotypes and + * for homozygous genotypes and for heterozygous genotypes: * * P(bi | G) = 1 - P(error | q1) / 2 + P(error | q1) / 6 if bi is in G * = P(error | q1) / 3 if bi is not in G * - * for the 10 unique diploid genotypes AA, AC, AG, .., TT + * for each of the 10 unique diploid genotypes AA, AC, AG, .., TT * * Everything is stored as arrays indexed by DiploidGenotype.ordinal() values in log10 space. * @@ -47,63 +47,29 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods { // The three fundamental data arrays associated with a Genotype Likelhoods object // private double[] likelihoods = null; - private double[] priors = null; private double[] posteriors = null; - // - // - // - - // todo -- fix me when this issue is resolved - public static boolean RequirePriorSumToOne = false; + private DiploidGenotypePriors priors = null; /** * Create a new GenotypeLikelhoods object with flat priors for each diploid genotype */ public NewHotnessGenotypeLikelihoods() { - priors = flatPriors.clone(); - initialize(); - } - - - /** - * Create a new GenotypeLikelihoods object with priors for a diploid with heterozygosity and reference - * base ref - * - * @param ref - * @param heterozygosity - */ - public NewHotnessGenotypeLikelihoods(char ref, double heterozygosity) { - double[] vals = heterozygosity2DiploidProbabilities(heterozygosity); - priors = getGenotypePriors(ref, vals[0], vals[1], vals[2]); + this.priors = new DiploidGenotypePriors(); initialize(); } /** - * - * Create a new GenotypeLikelihoods object with priors for a diploid with reference - * base ref and priors for hom-ref (ref/ref), het (ref/X), and hom-var (X/X) states - * - * @param ref + * Create a new GenotypeLikelhoods object with flat priors for each diploid genotype */ - public NewHotnessGenotypeLikelihoods(char ref, double priorHomRef, double priorHet, double priorHomVar) { - priors = getGenotypePriors(ref, priorHomRef, priorHet, priorHomVar); - initialize(); - } - - /** - * Create a new Genotypelike Likelhoods's object with priors (in log10 space) for each of the DiploteGenotypes - * - * @param log10Priors - */ - public NewHotnessGenotypeLikelihoods(double[] log10Priors) { - priors = log10Priors.clone(); + public NewHotnessGenotypeLikelihoods(DiploidGenotypePriors priors) { + this.priors = priors; initialize(); } private void initialize() { likelihoods = zeros.clone(); // likelihoods are all zeros - posteriors = priors.clone(); // posteriors are all the priors + posteriors = priors.getPriors().clone(); // posteriors are all the priors } /** @@ -147,7 +113,7 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods { * @return log10 prior as a double array */ public double[] getPriors() { - return priors; + return priors.getPriors(); } /** @@ -306,17 +272,13 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods { public boolean validate(boolean throwException) { try { - if ( RequirePriorSumToOne && MathUtils.compareDoubles(MathUtils.sumLog10(priors), 1.0) != 0 ) { - throw new IllegalStateException(String.format("Priors don't sum to 1: sum=%f %s", MathUtils.sumLog10(priors), Arrays.toString(priors))); - } + priors.validate(throwException); for ( DiploidGenotype g : DiploidGenotype.values() ) { String bad = null; int i = g.ordinal(); - if ( ! MathUtils.wellFormedDouble(priors[i]) || ! MathUtils.isNegativeOrZero(priors[i]) ) { - bad = String.format("Prior %f is badly formed %b", priors[i], MathUtils.isNegativeOrZero(priors[i])); - } else if ( ! MathUtils.wellFormedDouble(likelihoods[i]) || ! MathUtils.isNegativeOrZero(likelihoods[i]) ) { + if ( ! MathUtils.wellFormedDouble(likelihoods[i]) || ! MathUtils.isNegativeOrZero(likelihoods[i]) ) { bad = String.format("Likelihood %f is badly formed", likelihoods[i]); } else if ( ! MathUtils.wellFormedDouble(posteriors[i]) || ! MathUtils.isNegativeOrZero(posteriors[i]) ) { bad = String.format("Posterior %f is badly formed", posteriors[i]); @@ -348,12 +310,10 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods { // Constant static data // private final static double[] zeros = new double[DiploidGenotype.values().length]; - private final static double[] flatPriors = new double[DiploidGenotype.values().length]; static { for ( DiploidGenotype g : DiploidGenotype.values() ) { zeros[g.ordinal()] = 0.0; - flatPriors[g.ordinal()] = Math.log10(1.0 / DiploidGenotype.values().length); } } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SingleSampleGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SingleSampleGenotyper.java index 4e17d48bf..0e40936e5 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SingleSampleGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SingleSampleGenotyper.java @@ -38,7 +38,9 @@ public class SingleSampleGenotyper extends LocusWalker