Updated polarized reference priors, need DiploidGenotypePriors class that is directly used by the NewHotness genotypelikelihoods, more bug fixes and refactoring, etc.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1391 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2009-08-07 19:01:04 +00:00
parent a864c2f025
commit 20baa80751
4 changed files with 309 additions and 145 deletions

View File

@ -0,0 +1,293 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.ReadBackedPileup;
import static java.lang.Math.log10;
import static java.lang.Math.pow;
import java.util.Arrays;
import net.sf.samtools.SAMRecord;
public class DiploidGenotypePriors {
// --------------------------------------------------------------------------------------------------------------
//
// Constants and static information
//
// --------------------------------------------------------------------------------------------------------------
public static final double HUMAN_HETEROZYGOSITY = 1e-3;
public static final double CEU_HETEROZYGOSITY = 1e-3;
public static final double YRI_HETEROZYGOSITY = 1.0 / 850;
/**
* Default value of the prob of seeing a true B/C het when the reference is A
*/
public static final double PROB_OF_TRISTATE_GENOTYPE = 1e-5;
private final static double[] flatPriors = new double[DiploidGenotype.values().length];
// --------------------------------------------------------------------------------------------------------------
//
// Diploid priors
//
// --------------------------------------------------------------------------------------------------------------
private double[] priors = null;
// todo -- fix me when this issue is resolved
public static boolean RequirePriorSumToOne = false;
/**
* Create a new DiploidGenotypePriors object with flat priors for each diploid genotype
*/
public DiploidGenotypePriors() {
priors = flatPriors.clone();
}
/**
* Create a new GenotypeLikelihoods object with priors for a diploid with heterozygosity and reference
* base ref
*
* @param ref
* @param heterozygosity
*/
public DiploidGenotypePriors(char ref, double heterozygosity, boolean dontPolarize) {
if ( dontPolarize ) {
double[] vals = heterozygosity2DiploidProbabilities(heterozygosity);
priors = getReferenceIndependentPriors(ref, vals[0], vals[1], vals[2]);
} else {
priors = getReferencePolarizedPriors(ref, heterozygosity, PROB_OF_TRISTATE_GENOTYPE);
}
}
/**
* Create a new GenotypeLikelihoods object with priors for a diploid with heterozygosity and reference
* base ref
*
* @param ref
* @param heterozygosity
* @param probOfTriStateGenotype The prob of seeing a true B/C het when the reference is A
*/
public DiploidGenotypePriors(char ref, double heterozygosity, double probOfTriStateGenotype) {
priors = getReferencePolarizedPriors(ref, heterozygosity, probOfTriStateGenotype);
}
/**
* Create a new Genotypelike Likelhoods's object with priors (in log10 space) for each of the DiploteGenotypes
*
* @param log10Priors
*/
public DiploidGenotypePriors(double[] log10Priors) {
priors = log10Priors.clone();
}
/**
* Returns an array of priors for each genotype, indexed by DiploidGenotype.ordinal values().
*
* @return log10 prior as a double array
*/
public double[] getPriors() {
return priors;
}
/**
* Returns the prior associated with DiploidGenotype g
* @param g
* @return log10 prior as a double
*/
public double getPrior(DiploidGenotype g) {
return getPriors()[g.ordinal()];
}
public boolean validate(boolean throwException) {
try {
if ( RequirePriorSumToOne && MathUtils.compareDoubles(MathUtils.sumLog10(priors), 1.0) != 0 ) {
throw new IllegalStateException(String.format("Priors don't sum to 1: sum=%f %s", MathUtils.sumLog10(priors), Arrays.toString(priors)));
}
for ( DiploidGenotype g : DiploidGenotype.values() ) {
int i = g.ordinal();
if ( ! MathUtils.wellFormedDouble(priors[i]) || ! MathUtils.isNegativeOrZero(priors[i]) ) {
String bad = String.format("Prior %f is badly formed %b", priors[i], MathUtils.isNegativeOrZero(priors[i]));
throw new IllegalStateException(String.format("At %s: %s", g.toString(), bad));
}
}
} catch ( IllegalStateException e ) {
if ( throwException )
throw new RuntimeException(e);
else
return false;
}
return true;
}
// --------------------------------------------------------------------------------------------------------------
//
// Static functionality
//
// --------------------------------------------------------------------------------------------------------------
/**
* Returns homozygous-reference, heterozygous, and homozygous-non-ref probabilities given a heterozygosity
* value, as elements 0, 1, and 2 of a double[], respectively
*
* @param h the heterozygosity [probability of a base being heterozygous]
*/
@Deprecated
public static double[] heterozygosity2DiploidProbabilities(double h) {
double[] pdbls = new double[3];
pdbls[0] = heterozygosity2HomRefProbability(h);
pdbls[1] = heterozygosity2HetProbability(h);
pdbls[2] = heterozygosity2HomVarProbability(h);
return pdbls;
}
/**
*
* @param h
* @return
*/
public static double heterozygosity2HomRefProbability(double h) {
if (MathUtils.isNegative(h)) {
throw new RuntimeException(String.format("Heterozygous value is bad %f", h));
}
double v = 1.0 - (3.0 * h / 2.0);
if (MathUtils.isNegative(v)) {
throw new RuntimeException(String.format("Heterozygous value is bad %f", h));
}
return v;
}
public static double heterozygosity2HetProbability(double h) {
if (MathUtils.isNegative(h)) {
throw new RuntimeException(String.format("Heterozygous value is bad %f", h));
}
return h;
}
public static double heterozygosity2HomVarProbability(double h) {
if (MathUtils.isNegative(h)) {
throw new RuntimeException(String.format("Heterozygous value is bad %f", h));
}
return h / 2.0;
}
/**
* Takes reference base, and three priors for hom-ref, het, hom-var, and fills in the priors vector
* appropriately.
*
* Suppose A is the reference base, and we are given the probability of being hom-ref, het, and hom-var,
* and that pTriSateGenotype is the true probability of observing reference A and a true genotype of B/C
* then this sets the priors to:
*
* AA = hom-ref
* AC = AG = AT = (het - pTriStateGenotype) / 3
* CC = GG = TT = hom-var / 3
* CG = CT = GT = pTriStateGenotype / 3
*
* So that we get:
*
* hom-ref + 3 * (het - pTriStateGenotype) / 3 + 3 * hom-var / 3 + 3 * pTriStateGenotype
* hom-ref + het - pTriStateGenotype + hom-var + pTriStateGenotype
* hom-ref + het + hom-var
* = 1
*
* @param ref
* @param heterozyosity
* @param pTriStateGenotype
*/
public static double[] getReferencePolarizedPriors(char ref, double heterozyosity, double pTriStateGenotype ) {
if ( ! MathUtils.isBounded(pTriStateGenotype, 0.0, 1.0) ) {
throw new RuntimeException(String.format("p Tristate genotype is out of bounds %f", pTriStateGenotype));
}
if ( pTriStateGenotype >= heterozyosity ) {
throw new RuntimeException(String.format("p Tristate genotype %f is greater than the heterozygosity %f", pTriStateGenotype, heterozyosity));
}
double pHomRef = heterozygosity2HomRefProbability(heterozyosity);
double pHet = heterozygosity2HetProbability(heterozyosity);
double pHomVar = heterozygosity2HomVarProbability(heterozyosity);
if ((pHomRef + pHet + pHomVar) != 1) {
throw new RuntimeException(String.format("Prior probabilities don't sum to one => %f, %f, %f", pHomRef, pHet, pHomVar));
}
double[] priors = new double[DiploidGenotype.values().length];
for ( DiploidGenotype g : DiploidGenotype.values() ) {
double POfG = 0.0;
final double nOnRefHets = 3;
final double nOffRefHets = 3;
final double nHomVars = 3;
if ( g.isHomRef(ref) ) { POfG = pHomRef; }
else if ( g.isHomVar(ref) ) { POfG = pHomVar / nHomVars; }
else if ( g.isHetRef(ref) ) { POfG = (pHet - pTriStateGenotype ) / nOnRefHets; }
else { POfG = pTriStateGenotype / nOffRefHets; }
priors[g.ordinal()] = Math.log10(POfG);
}
return priors;
}
/**
* Takes reference base, and three priors for hom-ref, het, hom-var, and fills in the priors vector
* appropriately.
*
* TODO -- delete me
*
* @param ref
* @param priorHomRef
* @param priorHet
* @param priorHomVar
*/
@Deprecated
public static double[] getReferenceIndependentPriors(char ref, double priorHomRef, double priorHet, double priorHomVar ) {
if ((priorHomRef + priorHet + priorHomVar) != 1) {
throw new RuntimeException(String.format("Prior probabilities don't sum to one => %f, %f, %f", priorHomRef, priorHet, priorHomVar));
}
double[] priors = new double[DiploidGenotype.values().length];
for ( DiploidGenotype g : DiploidGenotype.values() ) {
int nRefBases = Utils.countOccurrences(ref, g.toString());
double log10POfG = 0.0;
switch ( nRefBases ) {
case 2: // hom-ref
log10POfG = Math.log10(priorHomRef);
break;
case 0: // hom-nonref
//log10POfG = Math.log10(priorHomVar / 3);
log10POfG = Math.log10(priorHomVar);
break;
default:
//log10POfG = Math.log10(priorHet / 6);
log10POfG = Math.log10(priorHet);
break;
}
priors[g.ordinal()] = log10POfG;
}
return priors;
}
static {
for ( DiploidGenotype g : DiploidGenotype.values() ) {
flatPriors[g.ordinal()] = Math.log10(1.0 / DiploidGenotype.values().length);
}
}
}

View File

@ -34,96 +34,5 @@ public abstract class GenotypeLikelihoods {
}
}
public static final double HUMAN_HETEROZYGOSITY = 1e-3;
/**
* Returns homozygous-reference, heterozygous, and homozygous-non-ref probabilities given a heterozygosity
* value, as elements 0, 1, and 2 of a double[], respectively
*
* @param h the heterozygosity [probability of a base being heterozygous]
*/
public static double[] heterozygosity2DiploidProbabilities(double h) {
double[] pdbls = new double[3];
pdbls[0] = heterozygosity2HomRefProbability(h);
pdbls[1] = heterozygosity2HetProbability(h);
pdbls[2] = heterozygosity2HomVarProbability(h);
return pdbls;
}
/**
*
* @param h
* @return
*/
public static double heterozygosity2HomRefProbability(double h) {
if (MathUtils.isNegative(h)) {
throw new RuntimeException(String.format("Heterozygous value is bad %f", h));
}
double v = 1.0 - (3.0 * h / 2.0);
if (MathUtils.isNegative(v)) {
throw new RuntimeException(String.format("Heterozygous value is bad %f", h));
}
return v;
}
public static double heterozygosity2HetProbability(double h) {
if (MathUtils.isNegative(h)) {
throw new RuntimeException(String.format("Heterozygous value is bad %f", h));
}
return h;
}
public static double heterozygosity2HomVarProbability(double h) {
if (MathUtils.isNegative(h)) {
throw new RuntimeException(String.format("Heterozygous value is bad %f", h));
}
return h / 2.0;
}
/**
* Takes reference base, and three priors for hom-ref, het, hom-var, and fills in the priors vector
* appropriately.
*
* @param ref
* @param priorHomRef
* @param priorHet
* @param priorHomVar
*/
public static double[] getGenotypePriors(char ref, double priorHomRef, double priorHet, double priorHomVar) {
if ((priorHomRef + priorHet + priorHomVar) != 1) {
throw new RuntimeException(String.format("Prior probabilities don't sum to one => %f, %f, %f", priorHomRef, priorHet, priorHomVar));
}
double[] priors = new double[DiploidGenotype.values().length];
for ( DiploidGenotype g : DiploidGenotype.values() ) {
int nRefBases = Utils.countOccurrences(ref, g.toString());
double log10POfG = 0.0;
switch ( nRefBases ) {
case 2: // hom-ref
log10POfG = Math.log10(priorHomRef);
break;
case 0: // hom-nonref
//log10POfG = Math.log10(priorHomVar / 3);
log10POfG = Math.log10(priorHomVar);
break;
default:
//log10POfG = Math.log10(priorHet / 6);
log10POfG = Math.log10(priorHet);
break;
}
priors[g.ordinal()] = log10POfG;
}
return priors;
}
public abstract int add(char ref, char read, byte qual);
}

View File

@ -27,12 +27,12 @@ import java.util.Arrays;
* P(bi | G) = 1 - P(error | q1) if bi is in G
* = P(error | q1) / 3 if bi is not in G
*
* for homozygous genotypes and
* for homozygous genotypes and for heterozygous genotypes:
*
* P(bi | G) = 1 - P(error | q1) / 2 + P(error | q1) / 6 if bi is in G
* = P(error | q1) / 3 if bi is not in G
*
* for the 10 unique diploid genotypes AA, AC, AG, .., TT
* for each of the 10 unique diploid genotypes AA, AC, AG, .., TT
*
* Everything is stored as arrays indexed by DiploidGenotype.ordinal() values in log10 space.
*
@ -47,63 +47,29 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods {
// The three fundamental data arrays associated with a Genotype Likelhoods object
//
private double[] likelihoods = null;
private double[] priors = null;
private double[] posteriors = null;
//
//
//
// todo -- fix me when this issue is resolved
public static boolean RequirePriorSumToOne = false;
private DiploidGenotypePriors priors = null;
/**
* Create a new GenotypeLikelhoods object with flat priors for each diploid genotype
*/
public NewHotnessGenotypeLikelihoods() {
priors = flatPriors.clone();
initialize();
}
/**
* Create a new GenotypeLikelihoods object with priors for a diploid with heterozygosity and reference
* base ref
*
* @param ref
* @param heterozygosity
*/
public NewHotnessGenotypeLikelihoods(char ref, double heterozygosity) {
double[] vals = heterozygosity2DiploidProbabilities(heterozygosity);
priors = getGenotypePriors(ref, vals[0], vals[1], vals[2]);
this.priors = new DiploidGenotypePriors();
initialize();
}
/**
*
* Create a new GenotypeLikelihoods object with priors for a diploid with reference
* base ref and priors for hom-ref (ref/ref), het (ref/X), and hom-var (X/X) states
*
* @param ref
* Create a new GenotypeLikelhoods object with flat priors for each diploid genotype
*/
public NewHotnessGenotypeLikelihoods(char ref, double priorHomRef, double priorHet, double priorHomVar) {
priors = getGenotypePriors(ref, priorHomRef, priorHet, priorHomVar);
initialize();
}
/**
* Create a new Genotypelike Likelhoods's object with priors (in log10 space) for each of the DiploteGenotypes
*
* @param log10Priors
*/
public NewHotnessGenotypeLikelihoods(double[] log10Priors) {
priors = log10Priors.clone();
public NewHotnessGenotypeLikelihoods(DiploidGenotypePriors priors) {
this.priors = priors;
initialize();
}
private void initialize() {
likelihoods = zeros.clone(); // likelihoods are all zeros
posteriors = priors.clone(); // posteriors are all the priors
posteriors = priors.getPriors().clone(); // posteriors are all the priors
}
/**
@ -147,7 +113,7 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods {
* @return log10 prior as a double array
*/
public double[] getPriors() {
return priors;
return priors.getPriors();
}
/**
@ -306,17 +272,13 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods {
public boolean validate(boolean throwException) {
try {
if ( RequirePriorSumToOne && MathUtils.compareDoubles(MathUtils.sumLog10(priors), 1.0) != 0 ) {
throw new IllegalStateException(String.format("Priors don't sum to 1: sum=%f %s", MathUtils.sumLog10(priors), Arrays.toString(priors)));
}
priors.validate(throwException);
for ( DiploidGenotype g : DiploidGenotype.values() ) {
String bad = null;
int i = g.ordinal();
if ( ! MathUtils.wellFormedDouble(priors[i]) || ! MathUtils.isNegativeOrZero(priors[i]) ) {
bad = String.format("Prior %f is badly formed %b", priors[i], MathUtils.isNegativeOrZero(priors[i]));
} else if ( ! MathUtils.wellFormedDouble(likelihoods[i]) || ! MathUtils.isNegativeOrZero(likelihoods[i]) ) {
if ( ! MathUtils.wellFormedDouble(likelihoods[i]) || ! MathUtils.isNegativeOrZero(likelihoods[i]) ) {
bad = String.format("Likelihood %f is badly formed", likelihoods[i]);
} else if ( ! MathUtils.wellFormedDouble(posteriors[i]) || ! MathUtils.isNegativeOrZero(posteriors[i]) ) {
bad = String.format("Posterior %f is badly formed", posteriors[i]);
@ -348,12 +310,10 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods {
// Constant static data
//
private final static double[] zeros = new double[DiploidGenotype.values().length];
private final static double[] flatPriors = new double[DiploidGenotype.values().length];
static {
for ( DiploidGenotype g : DiploidGenotype.values() ) {
zeros[g.ordinal()] = 0.0;
flatPriors[g.ordinal()] = Math.log10(1.0 / DiploidGenotype.values().length);
}
}
}

View File

@ -38,7 +38,9 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
// Control how the genotype hypotheses are weighed
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
public Double heterozygosity = GenotypeLikelihoods.HUMAN_HETEROZYGOSITY;
public Double heterozygosity = DiploidGenotypePriors.HUMAN_HETEROZYGOSITY;
// todo - print out priors at the start of SSG with .INFO
// todo -- remove dbSNP awareness until we understand how it should be used
//@Argument(fullName = "priors_dbsnp", shortName = "pdbsnp", doc = "Comma-separated prior likelihoods for dbSNP loci (homref,het,homvar)", required = false)
@ -47,7 +49,6 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
@Argument(fullName = "keepQ0Bases", shortName = "keepQ0Bases", doc = "If true, then Q0 bases will be included in the genotyping calculation, and treated as Q1 -- this is really not a good idea", required = false)
public boolean keepQ0Bases = false;
/**
* Filter out loci to ignore (at an ambiguous base in the reference or a locus with zero coverage).
*
@ -75,7 +76,8 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
*/
public SSGGenotypeCall map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext context) {
char ref = refContext.getBase();
NewHotnessGenotypeLikelihoods G = new NewHotnessGenotypeLikelihoods(ref, GenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, heterozygosity, DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
NewHotnessGenotypeLikelihoods G = new NewHotnessGenotypeLikelihoods(priors);
G.filterQ0Bases(! keepQ0Bases);
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
G.add(pileup, true);