As promised in the release notes for 1.6, I am removing the old deprecated genotyping framework revolving around the misordering of alleles and have moved the fixed version in its place in preparation for release 1.7 (or 2.0?).

This commit is contained in:
Eric Banks 2012-05-01 16:18:24 -04:00
parent c255dd5917
commit 619a69a5f1
9 changed files with 129 additions and 1135 deletions

View File

@ -32,7 +32,8 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidSNPGenotypePriors;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -68,8 +69,8 @@ public class GATKPaperGenotyper extends LocusWalker<Integer,Long> implements Tre
if (ref.getBase() == 'N' || ref.getBase() == 'n') return null; // we don't deal with the N ref base case
ReadBackedPileup pileup = context.getBasePileup().getPileupWithoutMappingQualityZeroReads();
double likelihoods[] = DiploidSNPGenotypePriors.getReferencePolarizedPriors(ref.getBase(),
DiploidSNPGenotypePriors.HUMAN_HETEROZYGOSITY,
double likelihoods[] = getReferencePolarizedPriors(ref.getBase(),
UnifiedGenotyperEngine.HUMAN_SNP_HETEROZYGOSITY,
0.01);
// get the bases and qualities from the pileup
byte bases[] = pileup.getBases();
@ -104,10 +105,106 @@ public class GATKPaperGenotyper extends LocusWalker<Integer,Long> implements Tre
}
/**
* Provide an initial value for reduce computations. In this case we simply return an empty list
* Takes reference base, and three priors for hom-ref, het, hom-var, and fills in the priors vector
* appropriately.
*
* @return Initial value of reduce.
* Suppose A is the reference base, and we are given the probability of being hom-ref, het, and hom-var,
* and that pTriSateGenotype is the true probability of observing reference A and a true genotype of B/C
* then this sets the priors to:
*
* AA = hom-ref
* AC = AG = AT = (het - pTriStateGenotype) / 3
* CC = GG = TT = hom-var / 3
* CG = CT = GT = pTriStateGenotype / 3
*
* So that we get:
*
* hom-ref + 3 * (het - pTriStateGenotype) / 3 + 3 * hom-var / 3 + 3 * pTriStateGenotype
* hom-ref + het - pTriStateGenotype + hom-var + pTriStateGenotype
* hom-ref + het + hom-var
* = 1
*
* @param ref
* @param heterozyosity
* @param pRefError
*/
public static double[] getReferencePolarizedPriors(byte ref, double heterozyosity, double pRefError ) {
if ( ! MathUtils.isBounded(pRefError, 0.0, 0.01) ) {
throw new RuntimeException(String.format("BUG: p Reference error is out of bounds (0.0 - 0.01) is allow range %f", pRefError));
}
double pTriStateGenotype = heterozyosity * pRefError;
// if ( pTriStateGenotype >= heterozyosity ) {
// throw new RuntimeException(String.format("p Tristate genotype %f is greater than the heterozygosity %f", pTriStateGenotype, heterozyosity));
// }
double pHomRef = heterozygosity2HomRefProbability(heterozyosity);
double pHet = heterozygosity2HetProbability(heterozyosity);
double pHomVar = heterozygosity2HomVarProbability(heterozyosity);
if (MathUtils.compareDoubles(pHomRef + pHet + pHomVar, 1.0) != 0) {
throw new RuntimeException(String.format("BUG: Prior probabilities don't sum to one => %f, %f, %f", pHomRef, pHet, pHomVar));
}
double[] priors = new double[DiploidGenotype.values().length];
for ( DiploidGenotype g : DiploidGenotype.values() ) {
double POfG;
final double nOnRefHets = 3;
final double nOffRefHets = 3;
final double nHomVars = 3;
if ( g.isHomRef(ref) ) { POfG = pHomRef; }
else if ( g.isHomVar(ref) ) { POfG = pHomVar / nHomVars; }
else if ( g.isHetRef(ref) ) { POfG = (pHet - pTriStateGenotype ) / nOnRefHets; }
else { POfG = pTriStateGenotype / nOffRefHets; }
priors[g.ordinal()] = Math.log10(POfG);
}
return priors;
}
/**
*
* @param h
* @return
*/
public static double heterozygosity2HomRefProbability(double h) {
if (MathUtils.isNegative(h)) {
throw new RuntimeException(String.format("Heterozygous value is bad %f", h));
}
double v = 1.0 - (3.0 * h / 2.0);
if (MathUtils.isNegative(v)) {
throw new RuntimeException(String.format("Heterozygous value is bad %f", h));
}
return v;
}
public static double heterozygosity2HetProbability(double h) {
if (MathUtils.isNegative(h)) {
throw new RuntimeException(String.format("Heterozygous value is bad %f", h));
}
return h;
}
public static double heterozygosity2HomVarProbability(double h) {
if (MathUtils.isNegative(h)) {
throw new RuntimeException(String.format("Heterozygous value is bad %f", h));
}
return h / 2.0;
}
/**
* Provide an initial value for reduce computations. In this case we simply return an empty list
*
* @return Initial value of reduce.
*/
public Long reduceInit() {
return 0L;
}

View File

@ -27,16 +27,15 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.BaseUtils;
@Deprecated
public enum DiploidGenotype {
AA ('A', 'A'),
AC ('A', 'C'),
AG ('A', 'G'),
AT ('A', 'T'),
CC ('C', 'C'),
AG ('A', 'G'),
CG ('C', 'G'),
CT ('C', 'T'),
GG ('G', 'G'),
AT ('A', 'T'),
CT ('C', 'T'),
GT ('G', 'T'),
TT ('T', 'T');

View File

@ -1,125 +0,0 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.BaseUtils;
public enum DiploidGenotypeWithCorrectAlleleOrdering {
AA ('A', 'A'),
AC ('A', 'C'),
CC ('C', 'C'),
AG ('A', 'G'),
CG ('C', 'G'),
GG ('G', 'G'),
AT ('A', 'T'),
CT ('C', 'T'),
GT ('G', 'T'),
TT ('T', 'T');
public byte base1, base2;
@Deprecated
private DiploidGenotypeWithCorrectAlleleOrdering(char base1, char base2) {
this((byte)base1, (byte)base2);
}
private DiploidGenotypeWithCorrectAlleleOrdering(byte base1, byte base2) {
this.base1 = base1;
this.base2 = base2;
}
public boolean isHomRef(byte r) {
return isHom() && r == base1;
}
public boolean isHomVar(byte r) {
return isHom() && r != base1;
}
public boolean isHetRef(byte r) {
if ( base1 == r )
return r != base2;
else
return base2 == r;
}
public boolean isHom() {
return ! isHet();
}
public boolean isHet() {
return base1 != base2;
}
/**
* create a diploid genotype, given a character to make into a hom genotype
* @param hom the character to turn into a hom genotype, i.e. if it is A, then returned will be AA
* @return the diploid genotype
*/
public static DiploidGenotypeWithCorrectAlleleOrdering createHomGenotype(byte hom) {
int index = BaseUtils.simpleBaseToBaseIndex(hom);
if ( index == -1 )
throw new IllegalArgumentException(hom + " is not a valid base character");
return conversionMatrix[index][index];
}
/**
* create a diploid genotype, given 2 chars which may not necessarily be ordered correctly
* @param base1 base1
* @param base2 base2
* @return the diploid genotype
*/
public static DiploidGenotypeWithCorrectAlleleOrdering createDiploidGenotype(byte base1, byte base2) {
int index1 = BaseUtils.simpleBaseToBaseIndex(base1);
if ( index1 == -1 )
throw new IllegalArgumentException(base1 + " is not a valid base character");
int index2 = BaseUtils.simpleBaseToBaseIndex(base2);
if ( index2 == -1 )
throw new IllegalArgumentException(base2 + " is not a valid base character");
return conversionMatrix[index1][index2];
}
/**
* create a diploid genotype, given 2 base indexes which may not necessarily be ordered correctly
* @param baseIndex1 base1
* @param baseIndex2 base2
* @return the diploid genotype
*/
public static DiploidGenotypeWithCorrectAlleleOrdering createDiploidGenotype(int baseIndex1, int baseIndex2) {
if ( baseIndex1 == -1 )
throw new IllegalArgumentException(baseIndex1 + " does not represent a valid base character");
if ( baseIndex2 == -1 )
throw new IllegalArgumentException(baseIndex2 + " does not represent a valid base character");
return conversionMatrix[baseIndex1][baseIndex2];
}
private static final DiploidGenotypeWithCorrectAlleleOrdering[][] conversionMatrix = {
{ DiploidGenotypeWithCorrectAlleleOrdering.AA, DiploidGenotypeWithCorrectAlleleOrdering.AC, DiploidGenotypeWithCorrectAlleleOrdering.AG, DiploidGenotypeWithCorrectAlleleOrdering.AT },
{ DiploidGenotypeWithCorrectAlleleOrdering.AC, DiploidGenotypeWithCorrectAlleleOrdering.CC, DiploidGenotypeWithCorrectAlleleOrdering.CG, DiploidGenotypeWithCorrectAlleleOrdering.CT },
{ DiploidGenotypeWithCorrectAlleleOrdering.AG, DiploidGenotypeWithCorrectAlleleOrdering.CG, DiploidGenotypeWithCorrectAlleleOrdering.GG, DiploidGenotypeWithCorrectAlleleOrdering.GT },
{ DiploidGenotypeWithCorrectAlleleOrdering.AT, DiploidGenotypeWithCorrectAlleleOrdering.CT, DiploidGenotypeWithCorrectAlleleOrdering.GT, DiploidGenotypeWithCorrectAlleleOrdering.TT }
};
}

View File

@ -27,10 +27,10 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import net.sf.samtools.SAMUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.fragments.FragmentCollection;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.fragments.FragmentCollection;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -70,8 +70,8 @@ import static java.lang.Math.pow;
* From then on, you can call any of the add() routines to update the likelihoods and posteriors in the above
* model.
*/
@Deprecated
public class DiploidSNPGenotypeLikelihoods implements Cloneable {
public final static double DEFAULT_PCR_ERROR_RATE = 1e-4;
protected final static int FIXED_PLOIDY = 2;
@ -82,36 +82,20 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
protected boolean VERBOSE = false;
//
// The fundamental data arrays associated with a Genotype Likelhoods object
// The fundamental data arrays associated with a Genotype Likelihoods object
//
protected double[] log10Likelihoods = null;
protected double[] log10Posteriors = null;
protected DiploidSNPGenotypePriors priors = null;
// TODO: don't calculate this each time through
protected double log10_PCR_error_3;
protected double log10_1_minus_PCR_error;
/**
* Create a new GenotypeLikelhoods object with flat priors for each diploid genotype
* Create a new GenotypeLikelhoods object with given PCR error rate for each diploid genotype
*
*/
public DiploidSNPGenotypeLikelihoods() {
this.priors = new DiploidSNPGenotypePriors();
log10_PCR_error_3 = log10(DEFAULT_PCR_ERROR_RATE) - log10_3;
log10_1_minus_PCR_error = log10(1.0 - DEFAULT_PCR_ERROR_RATE);
setToZero();
}
/**
* Create a new GenotypeLikelhoods object with given priors and PCR error rate for each diploid genotype
*
* @param priors priors
* @param PCR_error_rate the PCR error rate
*/
public DiploidSNPGenotypeLikelihoods(DiploidSNPGenotypePriors priors, double PCR_error_rate) {
this.priors = priors;
public DiploidSNPGenotypeLikelihoods(double PCR_error_rate) {
log10_PCR_error_3 = log10(PCR_error_rate) - log10_3;
log10_1_minus_PCR_error = log10(1.0 - PCR_error_rate);
setToZero();
@ -124,15 +108,12 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
*/
protected Object clone() throws CloneNotSupportedException {
DiploidSNPGenotypeLikelihoods c = (DiploidSNPGenotypeLikelihoods)super.clone();
c.priors = priors;
c.log10Likelihoods = log10Likelihoods.clone();
c.log10Posteriors = log10Posteriors.clone();
return c;
}
protected void setToZero() {
log10Likelihoods = genotypeZeros.clone(); // likelihoods are all zeros
log10Posteriors = priors.getPriors().clone(); // posteriors are all the priors
}
/**
@ -143,102 +124,6 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
return log10Likelihoods;
}
/**
* Returns the likelihood associated with DiploidGenotype g
* @param g genotype
* @return log10 likelihood as a double
*/
public double getLikelihood(DiploidGenotype g) {
return getLikelihoods()[g.ordinal()];
}
/**
* Returns an array of posteriors for each genotype, indexed by DiploidGenotype.ordinal values().
*
* @return raw log10 (not-normalized posteriors) as a double array
*/
public double[] getPosteriors() {
return log10Posteriors;
}
/**
* Returns the posterior associated with DiploidGenotype g
* @param g genotpe
* @return raw log10 (not-normalized posteror) as a double
*/
public double getPosterior(DiploidGenotype g) {
return getPosteriors()[g.ordinal()];
}
/**
* Returns an array of posteriors for each genotype, indexed by DiploidGenotype.ordinal values().
*
* @return normalized posterors as a double array
*/
public double[] getNormalizedPosteriors() {
double[] normalized = new double[log10Posteriors.length];
double sum = 0.0;
// for precision purposes, we need to add (or really subtract, since everything is negative)
// the largest posterior value from all entries so that numbers don't get too small
double maxValue = log10Posteriors[0];
for (int i = 1; i < log10Posteriors.length; i++) {
if ( maxValue < log10Posteriors[i] )
maxValue = log10Posteriors[i];
}
// collect the posteriors
for ( DiploidGenotype g : DiploidGenotype.values() ) {
double posterior = Math.pow(10, getPosterior(g) - maxValue);
normalized[g.ordinal()] = posterior;
sum += posterior;
}
// normalize
for (int i = 0; i < normalized.length; i++)
normalized[i] /= sum;
return normalized;
}
public DiploidSNPGenotypePriors getPriorObject() {
return priors;
}
/**
* Returns an array of priors for each genotype, indexed by DiploidGenotype.ordinal values().
*
* @return log10 prior as a double array
*/
public double[] getPriors() {
return priors.getPriors();
}
/**
* Sets the priors
* @param priors priors
*/
public void setPriors(DiploidSNPGenotypePriors priors) {
this.priors = priors;
log10Posteriors = genotypeZeros.clone();
for ( DiploidGenotype g : DiploidGenotype.values() ) {
int i = g.ordinal();
log10Posteriors[i] = priors.getPriors()[i] + log10Likelihoods[i];
}
}
/**
* Returns the prior associated with DiploidGenotype g
* @param g genotype
* @return log10 prior as a double
*/
public double getPrior(DiploidGenotype g) {
return getPriors()[g.ordinal()];
}
// -------------------------------------------------------------------------------------
//
// add() routines. These are the workhorse routines for calculating the overall genotype
@ -316,13 +201,12 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
/**
*
* @param obsBase1
* @param qual1
* @param obsBase2
* @param qual2 can be 0, indicating no second base was observed for this fragment
* @param nObs The number of times this quad of values was seen. Generally 1, but reduced reads
* can have nObs > 1 for synthetic reads
* @return
* @param obsBase1 first observed base
* @param qual1 base qual of first observed base
* @param obsBase2 second observed base
* @param qual2 base qual of second observed base; can be 0, indicating no second base was observed for this fragment
* @param nObs the number of times this quad of values was seen. Generally 1, but reduced reads can have nObs > 1 for synthetic reads
* @return 0 if the base is bad, 1 otherwise
*/
private int add(byte obsBase1, byte qual1, byte obsBase2, byte qual2, int nObs) {
// TODO-- Right now we assume that there are at most 2 reads per fragment. This assumption is fine
@ -346,7 +230,6 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
for ( DiploidGenotype g : DiploidGenotype.values() ) {
double likelihood = likelihoods[g.ordinal()];
log10Likelihoods[g.ordinal()] += likelihood * nObs;
log10Posteriors[g.ordinal()] += likelihood * nObs;
}
return 1;
@ -419,16 +302,15 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
double p_base = 0.0;
p_base += pow(10, log10FourBaseLikelihoods[BaseUtils.simpleBaseToBaseIndex(g.base1)] - ploidyAdjustment);
p_base += pow(10, log10FourBaseLikelihoods[BaseUtils.simpleBaseToBaseIndex(g.base2)] - ploidyAdjustment);
double likelihood = log10(p_base);
final double likelihood = log10(p_base);
gl.log10Likelihoods[g.ordinal()] += likelihood;
gl.log10Posteriors[g.ordinal()] += likelihood;
}
if ( VERBOSE ) {
for ( DiploidGenotype g : DiploidGenotype.values() ) { System.out.printf("%s\t", g); }
System.out.println();
for ( DiploidGenotype g : DiploidGenotype.values() ) { System.out.printf("%.2f\t", gl.log10Likelihoods[g.ordinal()]); }
for ( DiploidGenotype g : DiploidGenotype.values() ) { System.out.printf("%.2f\t", gl.log10Likelihoods[g.ordinal()]); }
System.out.println();
}
@ -508,11 +390,11 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
* likelihoods for a pileup element. May return 0 to indicate that the observation is bad, and may
* cap the quality score by the mapping quality of the read itself.
*
* @param p
* @param ignoreBadBases
* @param capBaseQualsAtMappingQual
* @param minBaseQual
* @return
* @param p Pileup element
* @param ignoreBadBases Should we ignore bad bases?
* @param capBaseQualsAtMappingQual Should we cap the base qualities at the mapping quality of the read?
* @param minBaseQual Minimum allowed base quality
* @return the actual base quality to use
*/
private static byte qualToUse(PileupElement p, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual, int minBaseQual) {
if ( ignoreBadBases && !BaseUtils.isRegularBase( p.getBase() ) )
@ -568,16 +450,12 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
public boolean validate(boolean throwException) {
try {
priors.validate(throwException);
for ( DiploidGenotype g : DiploidGenotype.values() ) {
String bad = null;
int i = g.ordinal();
if ( ! MathUtils.wellFormedDouble(log10Likelihoods[i]) || ! MathUtils.isNegativeOrZero(log10Likelihoods[i]) ) {
bad = String.format("Likelihood %f is badly formed", log10Likelihoods[i]);
} else if ( ! MathUtils.wellFormedDouble(log10Posteriors[i]) || ! MathUtils.isNegativeOrZero(log10Posteriors[i]) ) {
bad = String.format("Posterior %f is badly formed", log10Posteriors[i]);
}
if ( bad != null ) {

View File

@ -1,489 +0,0 @@
/*
* Copyright (c) 2010.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.genotyper;
import net.sf.samtools.SAMUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.fragments.FragmentCollection;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import java.util.List;
import static java.lang.Math.log10;
import static java.lang.Math.pow;
/**
* Stable, error checking version of the Bayesian genotyper. Useful for calculating the likelihoods, priors,
* and posteriors given a pile of bases and quality scores
*
* Suppose we have bases b1, b2, ..., bN with qualities scores q1, q2, ..., qN. This object
* calculates:
*
* P(G | D) = P(G) * P(D | G)
*
* where
*
* P(D | G) = sum_i log10 P(bi | G)
*
* and
*
* P(bi | G) = 1 - P(error | q1) if bi is in G
* = P(error | q1) / 3 if bi is not in G
*
* for homozygous genotypes and for heterozygous genotypes:
*
* P(bi | G) = 1 - P(error | q1) / 2 + P(error | q1) / 6 if bi is in G
* = P(error | q1) / 3 if bi is not in G
*
* for each of the 10 unique diploid genotypes AA, AC, AG, .., TT
*
* Everything is stored as arrays indexed by DiploidGenotype.ordinal() values in log10 space.
*
* The priors contain the relative probabilities of each genotype, and must be provided at object creation.
* From then on, you can call any of the add() routines to update the likelihoods and posteriors in the above
* model.
*/
public class DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering implements Cloneable {
public final static double DEFAULT_PCR_ERROR_RATE = 1e-4;
protected final static int FIXED_PLOIDY = 2;
protected final static int MAX_PLOIDY = FIXED_PLOIDY + 1;
protected final static double ploidyAdjustment = log10(FIXED_PLOIDY);
protected final static double log10_3 = log10(3.0);
protected boolean VERBOSE = false;
//
// The fundamental data arrays associated with a Genotype Likelihoods object
//
protected double[] log10Likelihoods = null;
// TODO: don't calculate this each time through
protected double log10_PCR_error_3;
protected double log10_1_minus_PCR_error;
/**
* Create a new GenotypeLikelhoods object with given PCR error rate for each diploid genotype
*
* @param PCR_error_rate the PCR error rate
*/
public DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering(double PCR_error_rate) {
log10_PCR_error_3 = log10(PCR_error_rate) - log10_3;
log10_1_minus_PCR_error = log10(1.0 - PCR_error_rate);
setToZero();
}
/**
* Cloning of the object
* @return clone
* @throws CloneNotSupportedException
*/
protected Object clone() throws CloneNotSupportedException {
DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering c = (DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering)super.clone();
c.log10Likelihoods = log10Likelihoods.clone();
return c;
}
protected void setToZero() {
log10Likelihoods = genotypeZeros.clone(); // likelihoods are all zeros
}
/**
* Returns an array of log10 likelihoods for each genotype, indexed by DiploidGenotype.ordinal values()
* @return likelihoods array
*/
public double[] getLikelihoods() {
return log10Likelihoods;
}
// -------------------------------------------------------------------------------------
//
// add() routines. These are the workhorse routines for calculating the overall genotype
// likelihoods given observed bases and reads. Includes high-level operators all the
// way down to single base and qual functions.
//
// -------------------------------------------------------------------------------------
/**
* Updates likelihoods and posteriors to reflect the additional observations contained within the
* read-based pileup up by calling add(observedBase, qualityScore) for each base / qual in the
* pileup
*
* @param pileup read pileup
* @param ignoreBadBases should we ignore bad bases?
* @param capBaseQualsAtMappingQual should we cap a base's quality by its read's mapping quality?
* @param minBaseQual the minimum base quality at which to consider a base valid
* @return the number of good bases found in the pileup
*/
public int add(ReadBackedPileup pileup, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual, int minBaseQual) {
int n = 0;
// for each fragment, add to the likelihoods
FragmentCollection<PileupElement> fpile = pileup.toFragments();
for ( PileupElement p : fpile.getSingletonReads() )
n += add(p, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual);
for ( List<PileupElement> overlappingPair : fpile.getOverlappingPairs() )
n += add(overlappingPair, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual);
return n;
}
public int add(PileupElement elt, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual, int minBaseQual) {
byte obsBase = elt.getBase();
byte qual = qualToUse(elt, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual);
if ( qual == 0 )
return 0;
if ( elt.getRead().isReducedRead() ) {
// reduced read representation
if ( BaseUtils.isRegularBase( obsBase )) {
int representativeCount = elt.getRepresentativeCount();
add(obsBase, qual, (byte)0, (byte)0, representativeCount); // fast calculation of n identical likelihoods
return representativeCount; // we added nObs bases here
}
// odd bases or deletions => don't use them
return 0;
}
return add(obsBase, qual, (byte)0, (byte)0, 1);
}
public int add(List<PileupElement> overlappingPair, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual, int minBaseQual) {
final PileupElement p1 = overlappingPair.get(0);
final PileupElement p2 = overlappingPair.get(1);
final byte observedBase1 = p1.getBase();
final byte qualityScore1 = qualToUse(p1, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual);
final byte observedBase2 = p2.getBase();
final byte qualityScore2 = qualToUse(p2, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual);
if ( qualityScore1 == 0 ) {
if ( qualityScore2 == 0 ) // abort early if we didn't see any good bases
return 0;
else {
return add(observedBase2, qualityScore2, (byte)0, (byte)0);
}
} else {
return add(observedBase1, qualityScore1, observedBase2, qualityScore2);
}
}
/**
*
* @param obsBase1 first observed base
* @param qual1 base qual of first observed base
* @param obsBase2 second observed base
* @param qual2 base qual of second observed base; can be 0, indicating no second base was observed for this fragment
* @param nObs the number of times this quad of values was seen. Generally 1, but reduced reads can have nObs > 1 for synthetic reads
* @return 0 if the base is bad, 1 otherwise
*/
private int add(byte obsBase1, byte qual1, byte obsBase2, byte qual2, int nObs) {
// TODO-- Right now we assume that there are at most 2 reads per fragment. This assumption is fine
// TODO-- given the current state of next-gen sequencing, but may need to be fixed in the future.
// TODO-- However, when that happens, we'll need to be a lot smarter about the caching we do here.
// Just look up the cached result if it's available, or compute and store it
DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering gl;
if ( ! inCache(obsBase1, qual1, obsBase2, qual2, FIXED_PLOIDY) ) {
gl = calculateCachedGenotypeLikelihoods(obsBase1, qual1, obsBase2, qual2, FIXED_PLOIDY);
} else {
gl = getCachedGenotypeLikelihoods(obsBase1, qual1, obsBase2, qual2, FIXED_PLOIDY);
}
// for bad bases, there are no likelihoods
if ( gl == null )
return 0;
double[] likelihoods = gl.getLikelihoods();
for ( DiploidGenotypeWithCorrectAlleleOrdering g : DiploidGenotypeWithCorrectAlleleOrdering.values() ) {
double likelihood = likelihoods[g.ordinal()];
log10Likelihoods[g.ordinal()] += likelihood * nObs;
}
return 1;
}
private int add(byte obsBase1, byte qual1, byte obsBase2, byte qual2) {
return add(obsBase1, qual1, obsBase2, qual2, 1);
}
// -------------------------------------------------------------------------------------
//
// Dealing with the cache routines
//
// -------------------------------------------------------------------------------------
static DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering[][][][][] CACHE = new DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering[BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE+1][BaseUtils.BASES.length+1][QualityUtils.MAX_QUAL_SCORE+1][MAX_PLOIDY];
protected boolean inCache(byte observedBase1, byte qualityScore1, byte observedBase2, byte qualityScore2, int ploidy) {
return getCache(CACHE, observedBase1, qualityScore1, observedBase2, qualityScore2, ploidy) != null;
}
protected DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering getCachedGenotypeLikelihoods(byte observedBase1, byte qualityScore1, byte observedBase2, byte qualityScore2, int ploidy) {
DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering gl = getCache(CACHE, observedBase1, qualityScore1, observedBase2, qualityScore2, ploidy);
if ( gl == null )
throw new RuntimeException(String.format("BUG: trying to fetch an unset cached genotype likelihood at base1=%c, qual1=%d, base2=%c, qual2=%d, ploidy=%d",
observedBase1, qualityScore1, observedBase2, qualityScore2, ploidy));
return gl;
}
protected DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering calculateCachedGenotypeLikelihoods(byte observedBase1, byte qualityScore1, byte observedBase2, byte qualityScore2, int ploidy) {
DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering gl = calculateGenotypeLikelihoods(observedBase1, qualityScore1, observedBase2, qualityScore2);
setCache(CACHE, observedBase1, qualityScore1, observedBase2, qualityScore2, ploidy, gl);
return gl;
}
protected void setCache( DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering[][][][][] cache,
byte observedBase1, byte qualityScore1, byte observedBase2, byte qualityScore2, int ploidy,
DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering val ) {
int i = BaseUtils.simpleBaseToBaseIndex(observedBase1);
int j = qualityScore1;
int k = qualityScore2 != 0 ? BaseUtils.simpleBaseToBaseIndex(observedBase2) : BaseUtils.BASES.length;
int l = qualityScore2;
int m = ploidy;
cache[i][j][k][l][m] = val;
}
protected DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering getCache(DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering[][][][][] cache,
byte observedBase1, byte qualityScore1, byte observedBase2, byte qualityScore2, int ploidy) {
int i = BaseUtils.simpleBaseToBaseIndex(observedBase1);
int j = qualityScore1;
int k = qualityScore2 != 0 ? BaseUtils.simpleBaseToBaseIndex(observedBase2) : BaseUtils.BASES.length;
int l = qualityScore2;
int m = ploidy;
return cache[i][j][k][l][m];
}
protected DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering calculateGenotypeLikelihoods(byte observedBase1, byte qualityScore1, byte observedBase2, byte qualityScore2) {
double[] log10FourBaseLikelihoods = computeLog10Likelihoods(observedBase1, qualityScore1, observedBase2, qualityScore2);
try {
DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering gl = (DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering)this.clone();
gl.setToZero();
// we need to adjust for ploidy. We take the raw p(obs | chrom) / ploidy, which is -log10(ploidy) in log space
for ( DiploidGenotypeWithCorrectAlleleOrdering g : DiploidGenotypeWithCorrectAlleleOrdering.values() ) {
// todo assumes ploidy is 2 -- should be generalized. Obviously the below code can be turned into a loop
double p_base = 0.0;
p_base += pow(10, log10FourBaseLikelihoods[BaseUtils.simpleBaseToBaseIndex(g.base1)] - ploidyAdjustment);
p_base += pow(10, log10FourBaseLikelihoods[BaseUtils.simpleBaseToBaseIndex(g.base2)] - ploidyAdjustment);
final double likelihood = log10(p_base);
gl.log10Likelihoods[g.ordinal()] += likelihood;
}
if ( VERBOSE ) {
for ( DiploidGenotypeWithCorrectAlleleOrdering g : DiploidGenotypeWithCorrectAlleleOrdering.values() ) { System.out.printf("%s\t", g); }
System.out.println();
for ( DiploidGenotypeWithCorrectAlleleOrdering g : DiploidGenotypeWithCorrectAlleleOrdering.values() ) { System.out.printf("%.2f\t", gl.log10Likelihoods[g.ordinal()]); }
System.out.println();
}
return gl;
} catch ( CloneNotSupportedException e ) {
throw new RuntimeException(e);
}
}
/**
* Updates likelihoods and posteriors to reflect an additional observation of observedBase with
* qualityScore.
*
* @param observedBase1 the base observed on the 1st read of the fragment
* @param qualityScore1 the qual of the base on the 1st read of the fragment, or zero if NA
* @param observedBase2 the base observed on the 2nd read of the fragment
* @param qualityScore2 the qual of the base on the 2nd read of the fragment, or zero if NA
* @return likelihoods for this observation or null if the base was not considered good enough to add to the likelihoods (Q0 or 'N', for example)
*/
protected double[] computeLog10Likelihoods(byte observedBase1, byte qualityScore1, byte observedBase2, byte qualityScore2) {
double[] log10FourBaseLikelihoods = baseZeros.clone();
for ( byte trueBase : BaseUtils.BASES ) {
double likelihood = 0.0;
for ( byte fragmentBase : BaseUtils.BASES ) {
double log10FragmentLikelihood = (trueBase == fragmentBase ? log10_1_minus_PCR_error : log10_PCR_error_3);
if ( qualityScore1 != 0 ) {
log10FragmentLikelihood += log10PofObservingBaseGivenChromosome(observedBase1, fragmentBase, qualityScore1);
}
if ( qualityScore2 != 0 ) {
log10FragmentLikelihood += log10PofObservingBaseGivenChromosome(observedBase2, fragmentBase, qualityScore2);
}
//if ( VERBOSE ) {
// System.out.printf(" L(%c | b=%s, Q=%d) = %f / %f%n",
// observedBase, trueBase, qualityScore, pow(10,likelihood) * 100, likelihood);
//}
likelihood += pow(10, log10FragmentLikelihood);
}
log10FourBaseLikelihoods[BaseUtils.simpleBaseToBaseIndex(trueBase)] = log10(likelihood);
}
return log10FourBaseLikelihoods;
}
/**
*
* @param observedBase observed base
* @param chromBase target base
* @param qual base quality
* @return log10 likelihood
*/
protected double log10PofObservingBaseGivenChromosome(byte observedBase, byte chromBase, byte qual) {
double logP;
if ( observedBase == chromBase ) {
// the base is consistent with the chromosome -- it's 1 - e
//logP = oneMinusData[qual];
double e = pow(10, (qual / -10.0));
logP = log10(1.0 - e);
} else {
// the base is inconsistent with the chromosome -- it's e * P(chromBase | observedBase is an error)
logP = qual / -10.0 + (-log10_3);
}
//System.out.printf("%c %c %d => %f%n", observedBase, chromBase, qual, logP);
return logP;
}
/**
* Helper function that returns the phred-scaled base quality score we should use for calculating
* likelihoods for a pileup element. May return 0 to indicate that the observation is bad, and may
* cap the quality score by the mapping quality of the read itself.
*
* @param p Pileup element
* @param ignoreBadBases Should we ignore bad bases?
* @param capBaseQualsAtMappingQual Should we cap the base qualities at the mapping quality of the read?
* @param minBaseQual Minimum allowed base quality
* @return the actual base quality to use
*/
private static byte qualToUse(PileupElement p, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual, int minBaseQual) {
if ( ignoreBadBases && !BaseUtils.isRegularBase( p.getBase() ) )
return 0;
byte qual = p.getQual();
if ( qual > SAMUtils.MAX_PHRED_SCORE )
throw new UserException.MalformedBAM(p.getRead(), String.format("the maximum allowed quality score is %d, but a quality of %d was observed in read %s. Perhaps your BAM incorrectly encodes the quality scores in Sanger format; see http://en.wikipedia.org/wiki/FASTQ_format for more details", SAMUtils.MAX_PHRED_SCORE, qual, p.getRead().getReadName()));
if ( capBaseQualsAtMappingQual )
qual = (byte)Math.min((int)qual, p.getMappingQual());
if ( (int)qual < minBaseQual )
qual = (byte)0;
return qual;
}
// -----------------------------------------------------------------------------------------------------------------
//
//
// helper routines
//
//
// -----------------------------------------------------------------------------------------------------------------
/**
* Return a string representation of this object in a moderately usable form
*
* @return string representation
*/
public String toString() {
double sum = 0;
StringBuilder s = new StringBuilder();
for (DiploidGenotypeWithCorrectAlleleOrdering g : DiploidGenotypeWithCorrectAlleleOrdering.values()) {
s.append(String.format("%s %.10f ", g, log10Likelihoods[g.ordinal()]));
sum += Math.pow(10,log10Likelihoods[g.ordinal()]);
}
s.append(String.format(" %f", sum));
return s.toString();
}
// -----------------------------------------------------------------------------------------------------------------
//
//
// Validation routines
//
//
// -----------------------------------------------------------------------------------------------------------------
public boolean validate() {
return validate(true);
}
public boolean validate(boolean throwException) {
try {
for ( DiploidGenotypeWithCorrectAlleleOrdering g : DiploidGenotypeWithCorrectAlleleOrdering.values() ) {
String bad = null;
int i = g.ordinal();
if ( ! MathUtils.wellFormedDouble(log10Likelihoods[i]) || ! MathUtils.isNegativeOrZero(log10Likelihoods[i]) ) {
bad = String.format("Likelihood %f is badly formed", log10Likelihoods[i]);
}
if ( bad != null ) {
throw new IllegalStateException(String.format("At %s: %s", g.toString(), bad));
}
}
} catch ( IllegalStateException e ) {
if ( throwException )
throw new RuntimeException(e);
else
return false;
}
return true;
}
//
// Constant static data
//
private final static double[] genotypeZeros = new double[DiploidGenotypeWithCorrectAlleleOrdering.values().length];
private final static double[] baseZeros = new double[BaseUtils.BASES.length];
static {
for ( DiploidGenotypeWithCorrectAlleleOrdering g : DiploidGenotypeWithCorrectAlleleOrdering.values() ) {
genotypeZeros[g.ordinal()] = 0.0;
}
for ( byte base : BaseUtils.BASES ) {
baseZeros[BaseUtils.simpleBaseToBaseIndex(base)] = 0.0;
}
}
}

View File

@ -1,257 +0,0 @@
/*
* Copyright (c) 2010.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.MathUtils;
import java.util.Arrays;
@Deprecated
public class DiploidSNPGenotypePriors implements GenotypePriors {
// --------------------------------------------------------------------------------------------------------------
//
// Constants and static information
//
// --------------------------------------------------------------------------------------------------------------
public static final double HUMAN_HETEROZYGOSITY = 1e-3;
public static final double CEU_HETEROZYGOSITY = 1e-3;
public static final double YRI_HETEROZYGOSITY = 1.0 / 850;
/**
* Default value of the prob of seeing a reference error. Used to calculation the
* chance of seeing a true B/C het when the reference is A, which we assume is the product
* of the ref error rate and the het. Default value is Q60
*/
public static final double PROB_OF_REFERENCE_ERROR = 1e-6; // the reference is
private final static double[] flatPriors = new double[DiploidGenotype.values().length];
// --------------------------------------------------------------------------------------------------------------
//
// Diploid priors
//
// --------------------------------------------------------------------------------------------------------------
private double[] priors = null;
// todo -- fix me when this issue is resolved
public static final boolean requirePriorSumToOne = false;
/**
* Create a new DiploidGenotypePriors object with flat priors for each diploid genotype
*/
public DiploidSNPGenotypePriors() {
priors = flatPriors.clone();
}
/**
* Create a new GenotypeLikelihoods object with priors for a diploid with heterozygosity and reference
* base ref
*
* @param ref
* @param heterozygosity
* @param probOfTriStateGenotype The prob of seeing a true B/C het when the reference is A
*/
public DiploidSNPGenotypePriors(byte ref, double heterozygosity, double probOfTriStateGenotype) {
priors = getReferencePolarizedPriors(ref, heterozygosity, probOfTriStateGenotype);
}
/**
* Create a new Genotypelike Likelhoods's object with priors (in log10 space) for each of the DiploteGenotypes
*
* @param log10Priors
*/
public DiploidSNPGenotypePriors(double[] log10Priors) {
priors = log10Priors.clone();
}
/**
* Returns an array of priors for each genotype, indexed by DiploidGenotype.ordinal values().
*
* @return log10 prior as a double array
*/
public double[] getPriors() {
return priors;
}
/**
* Returns the prior associated with DiploidGenotype g
* @param g
* @return log10 prior as a double
*/
public double getPrior(DiploidGenotype g) {
return getPriors()[g.ordinal()];
}
public double getHeterozygosity() { return HUMAN_HETEROZYGOSITY; }
public boolean validate(boolean throwException) {
try {
if ( requirePriorSumToOne && MathUtils.compareDoubles(MathUtils.sumLog10(priors), 1.0) != 0 ) {
throw new IllegalStateException(String.format("Priors don't sum to 1: sum=%f %s", MathUtils.sumLog10(priors), Arrays.toString(priors)));
}
for ( DiploidGenotype g : DiploidGenotype.values() ) {
int i = g.ordinal();
if ( ! MathUtils.wellFormedDouble(priors[i]) || ! MathUtils.isNegativeOrZero(priors[i]) ) {
String bad = String.format("Prior %f is badly formed %b", priors[i], MathUtils.isNegativeOrZero(priors[i]));
throw new IllegalStateException(String.format("At %s: %s", g.toString(), bad));
}
}
} catch ( IllegalStateException e ) {
if ( throwException )
throw new RuntimeException(e);
else
return false;
}
return true;
}
// --------------------------------------------------------------------------------------------------------------
//
// Static functionality
//
// --------------------------------------------------------------------------------------------------------------
/**
* Returns homozygous-reference, heterozygous, and homozygous-non-ref probabilities given a heterozygosity
* value, as elements 0, 1, and 2 of a double[], respectively
*
* @param h the heterozygosity [probability of a base being heterozygous]
*/
@Deprecated
public static double[] heterozygosity2DiploidProbabilities(double h) {
double[] pdbls = new double[3];
pdbls[0] = heterozygosity2HomRefProbability(h);
pdbls[1] = heterozygosity2HetProbability(h);
pdbls[2] = heterozygosity2HomVarProbability(h);
return pdbls;
}
/**
*
* @param h
* @return
*/
public static double heterozygosity2HomRefProbability(double h) {
if (MathUtils.isNegative(h)) {
throw new RuntimeException(String.format("Heterozygous value is bad %f", h));
}
double v = 1.0 - (3.0 * h / 2.0);
if (MathUtils.isNegative(v)) {
throw new RuntimeException(String.format("Heterozygous value is bad %f", h));
}
return v;
}
public static double heterozygosity2HetProbability(double h) {
if (MathUtils.isNegative(h)) {
throw new RuntimeException(String.format("Heterozygous value is bad %f", h));
}
return h;
}
public static double heterozygosity2HomVarProbability(double h) {
if (MathUtils.isNegative(h)) {
throw new RuntimeException(String.format("Heterozygous value is bad %f", h));
}
return h / 2.0;
}
/**
* Takes reference base, and three priors for hom-ref, het, hom-var, and fills in the priors vector
* appropriately.
*
* Suppose A is the reference base, and we are given the probability of being hom-ref, het, and hom-var,
* and that pTriSateGenotype is the true probability of observing reference A and a true genotype of B/C
* then this sets the priors to:
*
* AA = hom-ref
* AC = AG = AT = (het - pTriStateGenotype) / 3
* CC = GG = TT = hom-var / 3
* CG = CT = GT = pTriStateGenotype / 3
*
* So that we get:
*
* hom-ref + 3 * (het - pTriStateGenotype) / 3 + 3 * hom-var / 3 + 3 * pTriStateGenotype
* hom-ref + het - pTriStateGenotype + hom-var + pTriStateGenotype
* hom-ref + het + hom-var
* = 1
*
* @param ref
* @param heterozyosity
* @param pRefError
*/
public static double[] getReferencePolarizedPriors(byte ref, double heterozyosity, double pRefError ) {
if ( ! MathUtils.isBounded(pRefError, 0.0, 0.01) ) {
throw new RuntimeException(String.format("BUG: p Reference error is out of bounds (0.0 - 0.01) is allow range %f", pRefError));
}
double pTriStateGenotype = heterozyosity * pRefError;
// if ( pTriStateGenotype >= heterozyosity ) {
// throw new RuntimeException(String.format("p Tristate genotype %f is greater than the heterozygosity %f", pTriStateGenotype, heterozyosity));
// }
double pHomRef = heterozygosity2HomRefProbability(heterozyosity);
double pHet = heterozygosity2HetProbability(heterozyosity);
double pHomVar = heterozygosity2HomVarProbability(heterozyosity);
if (MathUtils.compareDoubles(pHomRef + pHet + pHomVar, 1.0) != 0) {
throw new RuntimeException(String.format("BUG: Prior probabilities don't sum to one => %f, %f, %f", pHomRef, pHet, pHomVar));
}
double[] priors = new double[DiploidGenotype.values().length];
for ( DiploidGenotype g : DiploidGenotype.values() ) {
double POfG;
final double nOnRefHets = 3;
final double nOffRefHets = 3;
final double nHomVars = 3;
if ( g.isHomRef(ref) ) { POfG = pHomRef; }
else if ( g.isHomVar(ref) ) { POfG = pHomVar / nHomVars; }
else if ( g.isHetRef(ref) ) { POfG = (pHet - pTriStateGenotype ) / nOnRefHets; }
else { POfG = pTriStateGenotype / nOffRefHets; }
priors[g.ordinal()] = Math.log10(POfG);
}
return priors;
}
static {
for ( DiploidGenotype g : DiploidGenotype.values() ) {
flatPriors[g.ordinal()] = Math.log10(1.0 / DiploidGenotype.values().length);
}
}
}

View File

@ -84,7 +84,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
pileup = createBAQedPileup( pileup );
// create the GenotypeLikelihoods object
final DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering GL = new DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering(UAC.PCR_error);
final DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods(UAC.PCR_error);
final int nGoodBases = GL.add(pileup, true, true, UAC.MIN_BASE_QUALTY_SCORE);
if ( nGoodBases > 0 )
GLs.add(new SampleGenotypeData(sample.getKey(), GL, getFilteredDepth(pileup)));
@ -139,7 +139,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
for ( int j = i; j <= numAltAlleles; j++ ) {
// As per the VCF spec: "the ordering of genotypes for the likelihoods is given by: F(j/k) = (k*(k+1)/2)+j.
// In other words, for biallelic sites the ordering is: AA,AB,BB; for triallelic sites the ordering is: AA,AB,BB,AC,BC,CC, etc."
PLordering[(j * (j+1) / 2) + i] = DiploidGenotypeWithCorrectAlleleOrdering.createDiploidGenotype(alleleOrdering[i], alleleOrdering[j]).ordinal();
PLordering[(j * (j+1) / 2) + i] = DiploidGenotype.createDiploidGenotype(alleleOrdering[i], alleleOrdering[j]).ordinal();
}
}
@ -171,7 +171,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
protected List<Allele> determineAlternateAlleles(final byte ref, final List<SampleGenotypeData> sampleDataList) {
final int baseIndexOfRef = BaseUtils.simpleBaseToBaseIndex(ref);
final int PLindexOfRef = DiploidGenotypeWithCorrectAlleleOrdering.createDiploidGenotype(ref, ref).ordinal();
final int PLindexOfRef = DiploidGenotype.createDiploidGenotype(ref, ref).ordinal();
for ( int i = 0; i < 4; i++ )
likelihoodSums[i] = 0.0;
@ -219,10 +219,10 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
private static class SampleGenotypeData {
public final String name;
public final DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering GL;
public final DiploidSNPGenotypeLikelihoods GL;
public final int depth;
public SampleGenotypeData(final String name, final DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering GL, final int depth) {
public SampleGenotypeData(final String name, final DiploidSNPGenotypeLikelihoods GL, final int depth) {
this.name = name;
this.GL = GL;
this.depth = depth;

View File

@ -53,7 +53,7 @@ public class UnifiedArgumentCollection {
* effectively acts as a cap on the base qualities.
*/
@Argument(fullName = "pcr_error_rate", shortName = "pcr_error", doc = "The PCR error rate to be used for computing fragment-based likelihoods", required = false)
public Double PCR_error = DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering.DEFAULT_PCR_ERROR_RATE;
public Double PCR_error = DiploidSNPGenotypeLikelihoods.DEFAULT_PCR_ERROR_RATE;
@Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Specifies how to determine the alternate alleles to use for genotyping", required = false)
public GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY;

View File

@ -1,109 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.testng.Assert;
import org.broadinstitute.sting.BaseTest;
import org.testng.annotations.Test;
import static java.lang.Math.log10;
public class GenotypePriorsUnitTest extends BaseTest {
private final static double DELTA = 1e-8;
@Test
public void testBasic() {
logger.warn("Executing testIsBetween");
Assert.assertEquals(DELTA, DiploidSNPGenotypePriors.HUMAN_HETEROZYGOSITY,1e-3);
}
// f <- function(h) { print(paste(1-3.0 * h / 2, h, h/2, sep=', '));}
@Test
public void testPriorsFromHet() {
logger.warn("Executing testPriorsFromHet");
testPriorsFromHet(0.0, 1, 0, 0);
testPriorsFromHet(1e-1, 0.85, 0.1, 0.05);
testPriorsFromHet(1e-2, 0.985, 0.01, 0.005);
testPriorsFromHet(1e-3, 0.9985, 0.001, 5e-04);
testPriorsFromHet(1e-4, 0.99985, 1e-04, 5e-05);
testPriorsFromHet(1e-5, 0.999985, 1e-05, 5e-06);
testPriorsFromHet(0.5, 0.25, 0.5, 0.25);
}
@Test(expectedExceptions=RuntimeException.class)
public void testPriorsFromHetFail1() {
logger.warn("Executing testPriorsFromHetFail1");
testPriorsFromHet(1.0, 0, 0, 0);
}
@Test(expectedExceptions=RuntimeException.class)
public void testPriorsFromHetFail2() {
logger.warn("Executing testPriorsFromHetFail2");
testPriorsFromHet(-1.0, 0, 0, 0);
}
private void testPriorsFromHet(double h, double homRef, double het, double homVar) {
double[] vals = DiploidSNPGenotypePriors.heterozygosity2DiploidProbabilities(h);
Assert.assertEquals(vals[0], homRef, DELTA);
Assert.assertEquals(vals[1], het, DELTA);
Assert.assertEquals(vals[2], homVar, DELTA);
Assert.assertEquals(DiploidSNPGenotypePriors.heterozygosity2HomRefProbability(h), homRef, DELTA);
Assert.assertEquals(DiploidSNPGenotypePriors.heterozygosity2HetProbability(h), het, DELTA);
Assert.assertEquals(DiploidSNPGenotypePriors.heterozygosity2HomVarProbability(h), homVar, DELTA);
}
//
@Test
public void testGenotypePriorsReferenceIndependent() {
logger.warn("Executing testGenotypePriorsReferenceIndependent");
// AA, AC, AG, AT, CC, CG, CT, GG, GT, TT
double[] array1 = {-0.0705810742857073, -1, -1, -1, -1.301029995663981, -1, -1, -1.301029995663981, -1, -1.301029995663981};
testGenotypePriors('A', 1e-1, array1);
double[] array2 = {-1.301029995663981, -1, -1, -1, -0.0705810742857073, -1, -1, -1.301029995663981, -1, -1.301029995663981};
testGenotypePriors('C', 1e-1, array2);
double[] array3 = {-1.301029995663981, -1, -1, -1, -1.301029995663981, -1, -1, -0.0705810742857073, -1, -1.301029995663981};
testGenotypePriors('G', 1e-1, array3);
double[] array4 = {-1.301029995663981, -1, -1, -1, -1.301029995663981, -1, -1, -1.301029995663981, -1, -0.0705810742857073};
testGenotypePriors('T', 1e-1, array4);
}
private void testGenotypePriors(char ref, double h, double[] array) {
for ( DiploidGenotype g : DiploidGenotype.values() ) {
double val = 0.0;
if ( g.isHomRef((byte)ref) ) val = DiploidSNPGenotypePriors.heterozygosity2HomRefProbability(h);
if ( g.isHet() ) val = DiploidSNPGenotypePriors.heterozygosity2HetProbability(h);
if ( g.isHomVar((byte)ref) ) val = DiploidSNPGenotypePriors.heterozygosity2HomVarProbability(h);
val = log10(val);
double e = array[g.ordinal()];
Assert.assertEquals(val, e, DELTA, String.format("%s should have p=%f but has p=%f", g, val, e));
}
}
@Test
public void testGenotypePriorsReferencePolarized() {
logger.warn("Executing testGenotypePriorsReferencePolarized");
// AA, AC, AG, AT, CC, CG, CT, GG, GT, TT
double[] array1 = {0.9985, 0.00033333, 0.00033333, 0.00033333, 0.000166666666666667, 3.333333e-09, 3.333333e-09, 0.000166666666666667, 3.33333333333333e-09, 0.000166666666666667};
logger.warn(" Array 1");
testPolarizedGenotypePriors('A', 1e-3, 1e-5, array1);
double[] array2 = {0.9985, 0.00033333, 0.00033333, 0.00033333, 0.000166666666666667, 3.333333e-10, 3.333333e-10, 0.000166666666666667, 3.33333333333333e-10, 0.000166666666666667};
logger.warn(" Array 2");
testPolarizedGenotypePriors('A', 1e-3, 1e-6, array2);
double[] array3 = {0.985, 0.0033333, 0.0033333, 0.0033333, 0.00166666666666667, 3.333333e-08, 3.333333e-08, 0.00166666666666667, 3.33333333333333e-08, 0.00166666666666667};
logger.warn(" Array 3");
testPolarizedGenotypePriors('A', 1e-2, 1e-5, array3);
double[] array4 = {0.99985, 3.33333e-05, 3.33333e-05, 3.33333e-05, 1.66666666666667e-05, 3.33333333333333e-12, 3.33333333333333e-12, 1.66666666666667e-05, 3.33333333333333e-12, 1.66666666666667e-05};
logger.warn(" Array 4");
testPolarizedGenotypePriors('A', 1e-4, 1e-6, array4);
}
private void testPolarizedGenotypePriors(char ref, double h, double pRefError, double[] array) {
DiploidSNPGenotypePriors priors = new DiploidSNPGenotypePriors((byte)ref, h, pRefError);
for ( DiploidGenotype g : DiploidGenotype.values() ) {
double val = Math.pow(10, priors.getPrior(g));
double e = array[g.ordinal()];
Assert.assertEquals(val, e, DELTA, String.format("%s should have p=%f but has p=%f", g, val, e));
}
}
}