Refactored GenotypeLikelihoods to use an underlying 4-base model.
It needs to be modified a bit and then hooked up to a pooled model, but that is now possible. At this point, there is no difference to the Unified Genotyper. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1978 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4d3871c655
commit
d549347f25
|
|
@ -1,7 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
||||
import static java.lang.Math.log10;
|
||||
import java.util.TreeMap;
|
||||
|
|
@ -10,7 +9,7 @@ import java.util.EnumMap;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
|
||||
public class EmpiricalSubstitutionGenotypeLikelihoods extends GenotypeLikelihoods {
|
||||
public class EmpiricalSubstitutionProbabilities extends FourBaseProbabilities {
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Static methods to manipulate machine platforms
|
||||
|
|
@ -64,6 +63,10 @@ public class EmpiricalSubstitutionGenotypeLikelihoods extends GenotypeLikelihood
|
|||
return plOfLastRead;
|
||||
}
|
||||
|
||||
public int getReadSequencerPlatformIndex( SAMRecord read ) {
|
||||
return getReadSequencerPlatform(read).ordinal();
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Static methods to get at the transition tables themselves
|
||||
|
|
@ -189,17 +192,15 @@ public class EmpiricalSubstitutionGenotypeLikelihoods extends GenotypeLikelihood
|
|||
//
|
||||
// forwarding constructors -- don't do anything at all
|
||||
//
|
||||
public EmpiricalSubstitutionGenotypeLikelihoods() { super(); }
|
||||
public EmpiricalSubstitutionProbabilities() { super(); }
|
||||
|
||||
public EmpiricalSubstitutionGenotypeLikelihoods(DiploidGenotypePriors priors) { super(priors); }
|
||||
|
||||
public EmpiricalSubstitutionGenotypeLikelihoods(DiploidGenotypePriors priors, boolean raiseErrorOnUnknownPlatform) {
|
||||
super(priors);
|
||||
public EmpiricalSubstitutionProbabilities(boolean raiseErrorOnUnknownPlatform) {
|
||||
super();
|
||||
this.raiseErrorOnUnknownPlatform = raiseErrorOnUnknownPlatform;
|
||||
}
|
||||
|
||||
public EmpiricalSubstitutionGenotypeLikelihoods(DiploidGenotypePriors priors, SequencerPlatform assumeUnknownPlatformsAreThis) {
|
||||
super(priors);
|
||||
public EmpiricalSubstitutionProbabilities(SequencerPlatform assumeUnknownPlatformsAreThis) {
|
||||
super();
|
||||
|
||||
if ( assumeUnknownPlatformsAreThis != null ) {
|
||||
raiseErrorOnUnknownPlatform = false;
|
||||
|
|
@ -209,38 +210,13 @@ public class EmpiricalSubstitutionGenotypeLikelihoods extends GenotypeLikelihood
|
|||
|
||||
/**
|
||||
* Cloning of the object
|
||||
* @return
|
||||
* @return clone
|
||||
* @throws CloneNotSupportedException
|
||||
*/
|
||||
protected Object clone() throws CloneNotSupportedException {
|
||||
return super.clone();
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
// caching routines
|
||||
//
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
static GenotypeLikelihoods[][][][][] EMPIRICAL_CACHE = new GenotypeLikelihoods[EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform.values().length][BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE][MAX_PLOIDY][2];
|
||||
|
||||
protected GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy,
|
||||
SAMRecord read, int offset, GenotypeLikelihoods val ) {
|
||||
SequencerPlatform pl = getReadSequencerPlatform(read);
|
||||
int a = pl.ordinal();
|
||||
int i = BaseUtils.simpleBaseToBaseIndex(observedBase);
|
||||
int j = qualityScore;
|
||||
int k = ploidy;
|
||||
int x = strandIndex(! read.getReadNegativeStrandFlag());
|
||||
|
||||
if ( val != null )
|
||||
EMPIRICAL_CACHE[a][i][j][k][x] = val;
|
||||
|
||||
return EMPIRICAL_CACHE[a][i][j][k][x];
|
||||
}
|
||||
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
|
|
@ -264,7 +240,7 @@ public class EmpiricalSubstitutionGenotypeLikelihoods extends GenotypeLikelihood
|
|||
|
||||
//System.out.printf("%s for %s%n", pl, read);
|
||||
|
||||
double log10p = 0.0;
|
||||
double log10p;
|
||||
if ( fwdStrand ) {
|
||||
log10p = getProbMiscallIsBase(pl, observedBase, chromBase);
|
||||
} else {
|
||||
|
|
@ -275,4 +251,4 @@ public class EmpiricalSubstitutionGenotypeLikelihoods extends GenotypeLikelihood
|
|||
|
||||
return log10p;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,341 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
|
||||
import static java.lang.Math.log10;
|
||||
import static java.lang.Math.pow;
|
||||
|
||||
/**
|
||||
* Stable, error checking version of strict 4 base likelihoods. Useful for calculating the likelihoods, priors,
|
||||
* and posteriors given a pile of bases and quality scores (in conjuncion with GenotypeLikelihoods)
|
||||
*
|
||||
* Suppose we have bases b1, b2, ..., bN with qualities scores q1, q2, ..., qN. This object
|
||||
* calculates:
|
||||
*
|
||||
* P(b | D) = P(b) * P(D | b)
|
||||
*
|
||||
* where
|
||||
*
|
||||
* P(D | b) = sum_i log10 P(bi | b)
|
||||
*
|
||||
* and
|
||||
*
|
||||
* P(bi | b) = 1 - P(error | q1) if bi = b
|
||||
* = P(error | q1) / 3 if bi != b
|
||||
*
|
||||
*
|
||||
*/
|
||||
public abstract class FourBaseProbabilities implements Cloneable {
|
||||
|
||||
protected boolean enableCacheFlag = true;
|
||||
|
||||
//
|
||||
// The fundamental data array associated with 4-base likelihoods
|
||||
//
|
||||
protected double[] log10Likelihoods = null;
|
||||
|
||||
|
||||
/**
|
||||
* If true, lots of output will be generated about the Likelihoods at each site
|
||||
*/
|
||||
private boolean verbose = false;
|
||||
|
||||
/**
|
||||
* Bases with Q scores below this threshold aren't included in the Likelihood calculation
|
||||
*/
|
||||
private int minQScoreToInclude = 0;
|
||||
|
||||
/**
|
||||
* Create a new FourBaseLikelihoods object
|
||||
*/
|
||||
public FourBaseProbabilities() {
|
||||
log10Likelihoods = zeros.clone(); // Likelihoods are all zeros
|
||||
}
|
||||
|
||||
/**
|
||||
* Cloning of the object
|
||||
* @return clone
|
||||
* @throws CloneNotSupportedException
|
||||
*/
|
||||
protected Object clone() throws CloneNotSupportedException {
|
||||
FourBaseProbabilities c = (FourBaseProbabilities)super.clone();
|
||||
c.log10Likelihoods = log10Likelihoods.clone();
|
||||
return c;
|
||||
}
|
||||
|
||||
public void setVerbose(boolean v) {
|
||||
verbose = v;
|
||||
}
|
||||
|
||||
public boolean isVerbose() {
|
||||
return verbose;
|
||||
}
|
||||
|
||||
public int getMinQScoreToInclude() {
|
||||
return minQScoreToInclude;
|
||||
}
|
||||
|
||||
public void setMinQScoreToInclude(int minQScoreToInclude) {
|
||||
this.minQScoreToInclude = minQScoreToInclude;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of log10 likelihoods for each base, indexed by BaseUtils.BASES.ordinal values()
|
||||
* @return probs
|
||||
*/
|
||||
public double[] getLog10Likelihoods() {
|
||||
return log10Likelihoods;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the likelihood associated with a base
|
||||
* @param base base
|
||||
* @return log10 likelihood as a double
|
||||
*/
|
||||
public double getLog10Likelihood(char base) {
|
||||
int baseIndex = BaseUtils.simpleBaseToBaseIndex(base);
|
||||
return (baseIndex < 0 ? 0.0 : getLog10Likelihoods()[baseIndex]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of likelihoods for each base, indexed by BaseUtils.BASES.ordinal values()
|
||||
* @return probs
|
||||
*/
|
||||
public double[] getLikelihoods() {
|
||||
double[] probs = new double[4];
|
||||
for (int i = 0; i < 4; i++)
|
||||
probs[i] = Math.pow(10, log10Likelihoods[i]);
|
||||
return probs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the likelihoods associated with a base
|
||||
* @param base base
|
||||
* @return likelihoods as a double
|
||||
*/
|
||||
public double getLikelihood(char base) {
|
||||
int baseIndex = BaseUtils.simpleBaseToBaseIndex(base);
|
||||
return (baseIndex < 0 ? 0.0 : Math.pow(10, log10Likelihoods[baseIndex]));
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
// add() -- the heart of
|
||||
//
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Updates likelihoods and posteriors to reflect an additional observation of observedBase with
|
||||
* qualityScore.
|
||||
*
|
||||
* @param observedBase observed base
|
||||
* @param qualityScore base quality
|
||||
* @param read SAM read
|
||||
* @param offset offset on read
|
||||
* @return 1 if the base was considered good enough to add to the likelihoods (not Q0 or 'N', for example)
|
||||
*/
|
||||
public int add(char observedBase, byte qualityScore, SAMRecord read, int offset) {
|
||||
FourBaseProbabilities fbl = computeLikelihoods(observedBase, qualityScore, read, offset);
|
||||
if ( fbl == null )
|
||||
return 0;
|
||||
|
||||
for ( char base : BaseUtils.BASES ) {
|
||||
double likelihood = fbl.getLikelihood(base);
|
||||
log10Likelihoods[BaseUtils.simpleBaseToBaseIndex(base)] += likelihood;
|
||||
}
|
||||
|
||||
if ( isVerbose() ) {
|
||||
for ( char base : BaseUtils.BASES ) { System.out.printf("%s\t", base); }
|
||||
System.out.println();
|
||||
for ( char base : BaseUtils.BASES ) { System.out.printf("%.2f\t", log10Likelihoods[BaseUtils.simpleBaseToBaseIndex(base)]); }
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates likelihoods and posteriors to reflect an additional observation of observedBase with
|
||||
* qualityScore.
|
||||
*
|
||||
* @param observedBase observed base
|
||||
* @param qualityScore base quality
|
||||
* @param read SAM read
|
||||
* @param offset offset on read
|
||||
* @return likelihoods for this observation or null if the base was not considered good enough to add to the likelihoods (Q0 or 'N', for example)
|
||||
*/
|
||||
public FourBaseProbabilities computeLikelihoods(char observedBase, byte qualityScore, SAMRecord read, int offset) {
|
||||
if ( badBase(observedBase) ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
if ( qualityScore > getMinQScoreToInclude() ) {
|
||||
|
||||
FourBaseProbabilities fbl = (FourBaseProbabilities)this.clone();
|
||||
fbl.log10Likelihoods = zeros.clone();
|
||||
|
||||
for ( char base : BaseUtils.BASES ) {
|
||||
double likelihood = log10PofObservingBaseGivenChromosome(observedBase, base, qualityScore, read, offset);
|
||||
|
||||
if ( isVerbose() ) {
|
||||
boolean fwdStrand = ! read.getReadNegativeStrandFlag();
|
||||
System.out.printf(" L(%c | b=%s, Q=%d, S=%s) = %f / %f%n",
|
||||
observedBase, base, qualityScore, fwdStrand ? "+" : "-", pow(10,likelihood) * 100, likelihood);
|
||||
}
|
||||
|
||||
fbl.log10Likelihoods[BaseUtils.simpleBaseToBaseIndex(base)] = likelihood;
|
||||
}
|
||||
|
||||
return fbl;
|
||||
}
|
||||
} catch ( CloneNotSupportedException e ) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
// helper routines
|
||||
//
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Returns true when the observedBase is considered bad and shouldn't be processed by this object. A base
|
||||
* is considered bad if:
|
||||
*
|
||||
* Criterion 1: observed base isn't a A,C,T,G or lower case equivalent
|
||||
*
|
||||
* @param observedBase observed base
|
||||
* @return true if the base is a bad base
|
||||
*/
|
||||
private boolean badBase(char observedBase) {
|
||||
return BaseUtils.simpleBaseToBaseIndex(observedBase) == -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a string representation of this object in a moderately usable form
|
||||
*
|
||||
* @return string representation
|
||||
*/
|
||||
public String toString() {
|
||||
double sum = 0;
|
||||
StringBuilder s = new StringBuilder();
|
||||
for ( char base : BaseUtils.BASES ) {
|
||||
int baseIndex = BaseUtils.simpleBaseToBaseIndex(base);
|
||||
s.append(String.format("%s %.10f ", base, log10Likelihoods[baseIndex]));
|
||||
sum += Math.pow(10, log10Likelihoods[baseIndex]);
|
||||
}
|
||||
s.append(String.format(" %f", sum));
|
||||
return s.toString();
|
||||
}
|
||||
|
||||
// in general, we don't care about the platform index; EmpiricalSubstitutionProbabilities overlaods this
|
||||
public int getReadSequencerPlatformIndex( SAMRecord read ) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
// Validation routines
|
||||
//
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public boolean validate() {
|
||||
return validate(true);
|
||||
}
|
||||
|
||||
public boolean validate(boolean throwException) {
|
||||
try {
|
||||
|
||||
for ( char base : BaseUtils.BASES ) {
|
||||
|
||||
int i = BaseUtils.simpleBaseToBaseIndex(base);
|
||||
if ( ! MathUtils.wellFormedDouble(log10Likelihoods[i]) || ! MathUtils.isNegativeOrZero(log10Likelihoods[i]) ) {
|
||||
String bad = String.format("Likelihood %f is badly formed", log10Likelihoods[i]);
|
||||
throw new IllegalStateException(String.format("At %s: %s", base, bad));
|
||||
}
|
||||
}
|
||||
} catch ( IllegalStateException e ) {
|
||||
if ( throwException )
|
||||
throw new RuntimeException(e);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
// Hearty math calculations follow
|
||||
//
|
||||
// -- these should not be messed with unless you know what you are doing
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
*
|
||||
* @param observedBase observed base
|
||||
* @param chromBase target base
|
||||
* @param qual base quality
|
||||
* @param read SAM read
|
||||
* @param offset offset on read
|
||||
* @return log10 likelihood
|
||||
*/
|
||||
|
||||
protected double log10PofObservingBaseGivenChromosome(char observedBase, char chromBase, byte qual, SAMRecord read, int offset) {
|
||||
if (qual == 0) { // zero quals are wrong
|
||||
throw new RuntimeException(String.format("Unexpected Q0 base discovered in log10PofObservingBaseGivenChromosome: %c %s %d at %d in %s",
|
||||
observedBase, chromBase, qual, offset, read));
|
||||
}
|
||||
|
||||
double logP;
|
||||
|
||||
if ( observedBase == chromBase ) {
|
||||
// the base is consistent with the chromosome -- it's 1 - e
|
||||
//logP = oneMinusData[qual];
|
||||
double e = pow(10, (qual / -10.0));
|
||||
logP = log10(1.0 - e);
|
||||
} else {
|
||||
// the base is inconsistent with the chromosome -- it's e * P(chromBase | observedBase is an error)
|
||||
logP = qual / -10.0 + log10PofTrueBaseGivenMiscall(observedBase, chromBase, read, offset);
|
||||
}
|
||||
|
||||
//System.out.printf("%c %c %d => %f%n", observedBase, chromBase, qual, logP);
|
||||
return logP;
|
||||
}
|
||||
|
||||
/**
|
||||
* Must be overridden by concrete subclasses
|
||||
*
|
||||
* @param observedBase observed base
|
||||
* @param chromBase target base
|
||||
* @param read SAM read
|
||||
* @param offset offset on read
|
||||
* @return log10 likelihood
|
||||
*/
|
||||
protected abstract double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset);
|
||||
|
||||
//
|
||||
// Constant static data
|
||||
//
|
||||
private final static double[] zeros = new double[BaseUtils.BASES.length];
|
||||
|
||||
static {
|
||||
for ( char base : BaseUtils.BASES ) {
|
||||
zeros[BaseUtils.simpleBaseToBaseIndex(base)] = 0.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import static org.broadinstitute.sting.gatk.walkers.genotyper.BaseMismatchModel.*;
|
||||
|
||||
public class FourBaseProbabilitiesFactory {
|
||||
//private FourBaseProbabilitiesFactory() {} // cannot be instantiated
|
||||
|
||||
public static BaseMismatchModel getBaseMismatchModel(final String name) {
|
||||
BaseMismatchModel m = valueOf(name);
|
||||
if ( m == null )
|
||||
throw new RuntimeException("Unexpected BaseMismatchModel " + name);
|
||||
else
|
||||
return m;
|
||||
}
|
||||
|
||||
public static BaseMismatchModel getBaseMismatchModel(final FourBaseProbabilities m) {
|
||||
if ( m instanceof OneStateErrorProbabilities)
|
||||
return ONE_STATE;
|
||||
else if ( m instanceof ThreeStateErrorProbabilities)
|
||||
return THREE_STATE;
|
||||
else if ( m instanceof EmpiricalSubstitutionProbabilities)
|
||||
return EMPIRICAL;
|
||||
|
||||
throw new RuntimeException("Unexpected BaseMismatchModel " + m.getClass());
|
||||
|
||||
}
|
||||
/**
|
||||
* General and correct way to create FourBaseLikelihood objects for arbitrary base mismatching models
|
||||
*
|
||||
* @param m model
|
||||
* @param pl default platform
|
||||
* @return new 4-base model
|
||||
*/
|
||||
public static FourBaseProbabilities makeFourBaseLikelihoods(BaseMismatchModel m,
|
||||
EmpiricalSubstitutionProbabilities.SequencerPlatform pl ) {
|
||||
switch ( m ) {
|
||||
case ONE_STATE: return new OneStateErrorProbabilities();
|
||||
case THREE_STATE: return new ThreeStateErrorProbabilities();
|
||||
case EMPIRICAL: return new EmpiricalSubstitutionProbabilities(pl);
|
||||
default: throw new RuntimeException("Unexpected BaseMismatchModel " + m);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -29,7 +29,7 @@ public abstract class GenotypeCalculationModel implements Cloneable {
|
|||
protected Set<String> samples;
|
||||
protected Logger logger;
|
||||
protected double heterozygosity;
|
||||
protected EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform defaultPlatform;
|
||||
protected EmpiricalSubstitutionProbabilities.SequencerPlatform defaultPlatform;
|
||||
protected GenotypeWriterFactory.GENOTYPE_FORMAT OUTPUT_FORMAT;
|
||||
protected boolean ALL_BASE_MODE;
|
||||
protected boolean GENOTYPE_MODE;
|
||||
|
|
|
|||
|
|
@ -6,7 +6,6 @@ import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
|||
|
||||
import static java.lang.Math.log10;
|
||||
import static java.lang.Math.pow;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Stable, error checking version of the Bayesian genotyper. Useful for calculating the likelihoods, priors,
|
||||
|
|
@ -39,92 +38,117 @@ import java.util.Arrays;
|
|||
* From then on, you can call any of the add() routines to update the likelihoods and posteriors in the above
|
||||
* model.
|
||||
*/
|
||||
public abstract class GenotypeLikelihoods implements Cloneable {
|
||||
public class GenotypeLikelihoods implements Cloneable {
|
||||
protected final static int FIXED_PLOIDY = 2;
|
||||
protected final static int MAX_PLOIDY = FIXED_PLOIDY + 1;
|
||||
|
||||
protected boolean enableCacheFlag = true;
|
||||
|
||||
//
|
||||
// The three fundamental data arrays associated with a Genotype Likelhoods object
|
||||
// The fundamental data arrays associated with a Genotype Likelhoods object
|
||||
//
|
||||
protected double[] likelihoods = null;
|
||||
protected double[] posteriors = null;
|
||||
protected double[] log10Likelihoods = null;
|
||||
protected double[] log10Posteriors = null;
|
||||
|
||||
private DiploidGenotypePriors priors = null;
|
||||
|
||||
/**
|
||||
* If true, lots of output will be generated about the likelihoods of individual genotypes at each site
|
||||
*/
|
||||
private boolean verbose = false;
|
||||
|
||||
/**
|
||||
* Bases with Q scores below this threshold aren't included in the likelihood calculation
|
||||
*/
|
||||
private int minQScoreToInclude = 0;
|
||||
private FourBaseProbabilities fourBaseLikelihoods = null;
|
||||
|
||||
/**
|
||||
* Create a new GenotypeLikelhoods object with flat priors for each diploid genotype
|
||||
*
|
||||
* @param m base model
|
||||
*/
|
||||
public GenotypeLikelihoods() {
|
||||
public GenotypeLikelihoods(BaseMismatchModel m) {
|
||||
this.priors = new DiploidGenotypePriors();
|
||||
initialize();
|
||||
initialize(m, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new GenotypeLikelhoods object with flat priors for each diploid genotype
|
||||
*
|
||||
* @param m base model
|
||||
* @param pl default platform
|
||||
*/
|
||||
public GenotypeLikelihoods(DiploidGenotypePriors priors) {
|
||||
public GenotypeLikelihoods(BaseMismatchModel m, EmpiricalSubstitutionProbabilities.SequencerPlatform pl) {
|
||||
this.priors = new DiploidGenotypePriors();
|
||||
initialize(m, pl);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new GenotypeLikelhoods object with given priors for each diploid genotype
|
||||
*
|
||||
* @param m base model
|
||||
* @param priors priors
|
||||
*/
|
||||
public GenotypeLikelihoods(BaseMismatchModel m, DiploidGenotypePriors priors) {
|
||||
this.priors = priors;
|
||||
initialize();
|
||||
initialize(m, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new GenotypeLikelhoods object with given priors for each diploid genotype
|
||||
*
|
||||
* @param m base model
|
||||
* @param priors priors
|
||||
* @param pl default platform
|
||||
*/
|
||||
public GenotypeLikelihoods(BaseMismatchModel m, DiploidGenotypePriors priors, EmpiricalSubstitutionProbabilities.SequencerPlatform pl) {
|
||||
this.priors = priors;
|
||||
initialize(m, pl);
|
||||
}
|
||||
|
||||
/**
|
||||
* Cloning of the object
|
||||
* @return
|
||||
* @return clone
|
||||
* @throws CloneNotSupportedException
|
||||
*/
|
||||
protected Object clone() throws CloneNotSupportedException {
|
||||
GenotypeLikelihoods c = (GenotypeLikelihoods)super.clone();
|
||||
c.priors = priors;
|
||||
c.likelihoods = likelihoods.clone();
|
||||
c.posteriors = posteriors.clone();
|
||||
c.log10Likelihoods = log10Likelihoods.clone();
|
||||
c.log10Posteriors = log10Posteriors.clone();
|
||||
c.fourBaseLikelihoods = (FourBaseProbabilities)fourBaseLikelihoods.clone();
|
||||
return c;
|
||||
}
|
||||
|
||||
private void initialize() {
|
||||
likelihoods = zeros.clone(); // likelihoods are all zeros
|
||||
posteriors = priors.getPriors().clone(); // posteriors are all the priors
|
||||
private void initialize(BaseMismatchModel m, EmpiricalSubstitutionProbabilities.SequencerPlatform pl) {
|
||||
fourBaseLikelihoods = FourBaseProbabilitiesFactory.makeFourBaseLikelihoods(m, pl);
|
||||
initialize();
|
||||
}
|
||||
|
||||
private void initialize() {
|
||||
log10Likelihoods = zeros.clone(); // likelihoods are all zeros
|
||||
log10Posteriors = priors.getPriors().clone(); // posteriors are all the priors
|
||||
}
|
||||
|
||||
public void setVerbose(boolean v) {
|
||||
verbose = v;
|
||||
fourBaseLikelihoods.setVerbose(v);
|
||||
}
|
||||
|
||||
public boolean isVerbose() {
|
||||
return verbose;
|
||||
return fourBaseLikelihoods.isVerbose();
|
||||
}
|
||||
|
||||
public int getMinQScoreToInclude() {
|
||||
return minQScoreToInclude;
|
||||
return fourBaseLikelihoods.getMinQScoreToInclude();
|
||||
}
|
||||
|
||||
public void setMinQScoreToInclude(int minQScoreToInclude) {
|
||||
this.minQScoreToInclude = minQScoreToInclude;
|
||||
fourBaseLikelihoods.setMinQScoreToInclude(minQScoreToInclude);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of log10 likelihoods for each genotype, indexed by DiploidGenotype.ordinal values()
|
||||
* @return
|
||||
* @return likelihoods array
|
||||
*/
|
||||
public double[] getLikelihoods() {
|
||||
return likelihoods;
|
||||
return log10Likelihoods;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the likelihood associated with DiploidGenotype g
|
||||
* @param g
|
||||
* @param g genotype
|
||||
* @return log10 likelihood as a double
|
||||
*/
|
||||
public double getLikelihood(DiploidGenotype g) {
|
||||
|
|
@ -137,12 +161,12 @@ public abstract class GenotypeLikelihoods implements Cloneable {
|
|||
* @return raw log10 (not-normalized posteriors) as a double array
|
||||
*/
|
||||
public double[] getPosteriors() {
|
||||
return posteriors;
|
||||
return log10Posteriors;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the posterior associated with DiploidGenotype g
|
||||
* @param g
|
||||
* @param g genotpe
|
||||
* @return raw log10 (not-normalized posteror) as a double
|
||||
*/
|
||||
public double getPosterior(DiploidGenotype g) {
|
||||
|
|
@ -156,15 +180,15 @@ public abstract class GenotypeLikelihoods implements Cloneable {
|
|||
* @return normalized posterors as a double array
|
||||
*/
|
||||
public double[] getNormalizedPosteriors() {
|
||||
double[] normalized = new double[posteriors.length];
|
||||
double[] normalized = new double[log10Posteriors.length];
|
||||
double sum = 0.0;
|
||||
|
||||
// for precision purposes, we need to add (or really subtract, since everything is negative)
|
||||
// the largest posterior value from all entries so that numbers don't get too small
|
||||
double maxValue = posteriors[0];
|
||||
for (int i = 1; i < posteriors.length; i++) {
|
||||
if ( maxValue < posteriors[i] )
|
||||
maxValue = posteriors[i];
|
||||
double maxValue = log10Posteriors[0];
|
||||
for (int i = 1; i < log10Posteriors.length; i++) {
|
||||
if ( maxValue < log10Posteriors[i] )
|
||||
maxValue = log10Posteriors[i];
|
||||
}
|
||||
|
||||
// collect the posteriors
|
||||
|
|
@ -198,19 +222,20 @@ public abstract class GenotypeLikelihoods implements Cloneable {
|
|||
|
||||
/**
|
||||
* Sets the priors
|
||||
* @param priors priors
|
||||
*/
|
||||
public void setPriors(DiploidGenotypePriors priors) {
|
||||
this.priors = priors;
|
||||
posteriors = zeros.clone();
|
||||
log10Posteriors = zeros.clone();
|
||||
for ( DiploidGenotype g : DiploidGenotype.values() ) {
|
||||
int i = g.ordinal();
|
||||
posteriors[i] = priors.getPriors()[i] + likelihoods[i];
|
||||
log10Posteriors[i] = priors.getPriors()[i] + log10Likelihoods[i];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the prior associated with DiploidGenotype g
|
||||
* @param g
|
||||
* @param g genotype
|
||||
* @return log10 prior as a double
|
||||
*/
|
||||
public double getPrior(DiploidGenotype g) {
|
||||
|
|
@ -233,24 +258,8 @@ public abstract class GenotypeLikelihoods implements Cloneable {
|
|||
enableCacheFlag = enable;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
// add() -- the heart of
|
||||
//
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Updates likelihoods and posteriors to reflect an additional observation of observedBase with
|
||||
* qualityScore.
|
||||
*
|
||||
* @param observedBase
|
||||
* @param qualityScore
|
||||
* @return 1 if the base was considered good enough to add to the likelihoods (not Q0 or 'N', for example)
|
||||
*/
|
||||
public int add(char observedBase, byte qualityScore, SAMRecord read, int offset) {
|
||||
return reallyAdd(observedBase, qualityScore, read, offset, true);
|
||||
public int add(ReadBackedPileup pileup) {
|
||||
return add(pileup, false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -258,7 +267,8 @@ public abstract class GenotypeLikelihoods implements Cloneable {
|
|||
* read-based pileup up by calling add(observedBase, qualityScore) for each base / qual in the
|
||||
* pileup
|
||||
*
|
||||
* @param pileup
|
||||
* @param pileup read pileup
|
||||
* @param ignoreBadBases should we ignore bad bases
|
||||
* @return the number of good bases found in the pileup
|
||||
*/
|
||||
public int add(ReadBackedPileup pileup, boolean ignoreBadBases) {
|
||||
|
|
@ -281,149 +291,127 @@ public abstract class GenotypeLikelihoods implements Cloneable {
|
|||
return n;
|
||||
}
|
||||
|
||||
public int add(ReadBackedPileup pileup) {
|
||||
return add(pileup, false);
|
||||
}
|
||||
public int add(char observedBase, byte qualityScore, SAMRecord read, int offset) {
|
||||
|
||||
|
||||
private int reallyAdd(char observedBase, byte qualityScore, SAMRecord read, int offset, boolean enableCacheArg ) {
|
||||
if ( badBase(observedBase) ) {
|
||||
throw new RuntimeException(String.format("BUG: unexpected base %c with Q%d passed to GenotypeLikelihoods", observedBase, qualityScore));
|
||||
// Handle caching if requested. Just look up the cached result if its available, or compute and store it
|
||||
GenotypeLikelihoods gl;
|
||||
if ( cacheIsEnabled() ) {
|
||||
if ( ! inCache( observedBase, qualityScore, FIXED_PLOIDY, read) ) {
|
||||
gl = calculateCachedGenotypeLikelihoods(observedBase, qualityScore, FIXED_PLOIDY, read, offset);
|
||||
} else {
|
||||
gl = getCachedGenotypeLikelihoods(observedBase, qualityScore, FIXED_PLOIDY, read);
|
||||
}
|
||||
} else {
|
||||
gl = calculateGenotypeLikelihoods(observedBase, qualityScore, read, offset);
|
||||
}
|
||||
|
||||
int nBasesAdded = 0; // the result -- how many bases did we add?
|
||||
// for bad bases, there are no likelihoods
|
||||
if ( gl == null )
|
||||
return 0;
|
||||
|
||||
if ( qualityScore > getMinQScoreToInclude() ) {
|
||||
// Handle caching if requested. Just look up the cached result if its available, or compute and store it
|
||||
boolean enableCache = enableCacheArg && cacheIsEnabled();
|
||||
GenotypeLikelihoods cached = null;
|
||||
if ( enableCache ) {
|
||||
if ( ! inCache( observedBase, qualityScore, FIXED_PLOIDY, read, offset ) ) {
|
||||
cached = calculateCachedGenotypeLikelihoods(observedBase, qualityScore, FIXED_PLOIDY, read, offset );
|
||||
} else {
|
||||
cached = getCachedGenotypeLikelihoods(observedBase, qualityScore, FIXED_PLOIDY, read, offset );
|
||||
}
|
||||
}
|
||||
|
||||
for ( DiploidGenotype g : DiploidGenotype.values() ) {
|
||||
double likelihoodCalc = ! enableCache ? log10PofObservingBaseGivenGenotype(observedBase, g, qualityScore, read, offset) : 0.0;
|
||||
double likelihoodCached = enableCache ? cached.likelihoods[g.ordinal()] : 0.0;
|
||||
double likelihood = enableCache ? likelihoodCached : likelihoodCalc;
|
||||
|
||||
//if ( enableCache && likelihoodCalc != 0.0 && MathUtils.compareDoubles(likelihoodCached, likelihoodCalc) != 0 ) {
|
||||
// System.out.printf("ERROR: Likelihoods not equal is CACHE=%f != calc=%f for %c %d %s%n",
|
||||
// likelihoodCached, likelihoodCalc, observedBase, qualityScore, g.toString());
|
||||
//}
|
||||
|
||||
if ( isVerbose() ) {
|
||||
boolean fwdStrand = ! read.getReadNegativeStrandFlag();
|
||||
System.out.printf(" L(%c | G=%s, Q=%d, S=%s) = %f / %f%n",
|
||||
observedBase, g, qualityScore, fwdStrand ? "+" : "-", pow(10,likelihood) * 100, likelihood);
|
||||
}
|
||||
|
||||
likelihoods[g.ordinal()] += likelihood;
|
||||
posteriors[g.ordinal()] += likelihood;
|
||||
}
|
||||
double[] likelihoods = gl.getLikelihoods();
|
||||
|
||||
for ( DiploidGenotype g : DiploidGenotype.values() ) {
|
||||
double likelihood = likelihoods[g.ordinal()];
|
||||
|
||||
if ( isVerbose() ) {
|
||||
for ( DiploidGenotype g : DiploidGenotype.values() ) { System.out.printf("%s\t", g); }
|
||||
System.out.println();
|
||||
for ( DiploidGenotype g : DiploidGenotype.values() ) { System.out.printf("%.2f\t", likelihoods[g.ordinal()]); }
|
||||
System.out.println();
|
||||
boolean fwdStrand = ! read.getReadNegativeStrandFlag();
|
||||
System.out.printf(" L(%c | G=%s, Q=%d, S=%s) = %f / %f%n",
|
||||
observedBase, g, qualityScore, fwdStrand ? "+" : "-", pow(10,likelihood) * 100, likelihood);
|
||||
}
|
||||
|
||||
nBasesAdded = 1;
|
||||
log10Likelihoods[g.ordinal()] += likelihood;
|
||||
log10Posteriors[g.ordinal()] += likelihood;
|
||||
}
|
||||
|
||||
return nBasesAdded;
|
||||
return 1;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
// caching routines
|
||||
//
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
static GenotypeLikelihoods[][][][][][] CACHE = new GenotypeLikelihoods[BaseMismatchModel.values().length][EmpiricalSubstitutionProbabilities.SequencerPlatform.values().length][BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE][MAX_PLOIDY][2];
|
||||
|
||||
/**
|
||||
* Procedure intended for overloading in subclasses. Returns / sets cached GL value given all of the information
|
||||
* one can reasonably expect to have. The default cache is fairly simple.
|
||||
*
|
||||
* @param observedBase
|
||||
* @param qualityScore
|
||||
* @param ploidy
|
||||
* @param read
|
||||
* @param offset
|
||||
* @param val
|
||||
* @return
|
||||
*/
|
||||
protected abstract GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy,
|
||||
SAMRecord read, int offset, GenotypeLikelihoods val );
|
||||
private boolean inCache( char observedBase, byte qualityScore, int ploidy, SAMRecord read) {
|
||||
return getCache(CACHE, observedBase, qualityScore, ploidy, read) != null;
|
||||
}
|
||||
|
||||
protected GenotypeLikelihoods simpleGetSetCache( GenotypeLikelihoods[][][][] cache,
|
||||
char observedBase, byte qualityScore, int ploidy,
|
||||
SAMRecord read, int offset, GenotypeLikelihoods val ) {
|
||||
private GenotypeLikelihoods getCachedGenotypeLikelihoods( char observedBase, byte qualityScore, int ploidy, SAMRecord read) {
|
||||
GenotypeLikelihoods gl = getCache(CACHE, observedBase, qualityScore, ploidy, read);
|
||||
if ( gl == null )
|
||||
throw new RuntimeException(String.format("BUG: trying to fetch an unset cached genotype likelihood at base=%c, qual=%d, ploidy=%d, read=%s",
|
||||
observedBase, qualityScore, ploidy, read));
|
||||
return gl;
|
||||
}
|
||||
|
||||
private GenotypeLikelihoods calculateCachedGenotypeLikelihoods(char observedBase, byte qualityScore, int ploidy, SAMRecord read, int offset) {
|
||||
GenotypeLikelihoods gl = calculateGenotypeLikelihoods(observedBase, qualityScore, read, offset);
|
||||
setCache(CACHE, observedBase, qualityScore, ploidy, read, gl);
|
||||
return gl;
|
||||
}
|
||||
|
||||
protected void setCache( GenotypeLikelihoods[][][][][][] cache,
|
||||
char observedBase, byte qualityScore, int ploidy,
|
||||
SAMRecord read, GenotypeLikelihoods val ) {
|
||||
int m = FourBaseProbabilitiesFactory.getBaseMismatchModel(fourBaseLikelihoods).ordinal();
|
||||
int a = fourBaseLikelihoods.getReadSequencerPlatformIndex(read);
|
||||
int i = BaseUtils.simpleBaseToBaseIndex(observedBase);
|
||||
int j = qualityScore;
|
||||
int k = ploidy;
|
||||
int x = strandIndex(! read.getReadNegativeStrandFlag() );
|
||||
|
||||
if ( val != null )
|
||||
cache[i][j][k][x] = val;
|
||||
|
||||
return cache[i][j][k][x];
|
||||
cache[m][a][i][j][k][x] = val;
|
||||
}
|
||||
|
||||
protected int strandIndex(boolean fwdStrand) {
|
||||
return fwdStrand ? 0 : 1;
|
||||
protected GenotypeLikelihoods getCache( GenotypeLikelihoods[][][][][][] cache,
|
||||
char observedBase, byte qualityScore, int ploidy, SAMRecord read) {
|
||||
int m = FourBaseProbabilitiesFactory.getBaseMismatchModel(fourBaseLikelihoods).ordinal();
|
||||
int a = fourBaseLikelihoods.getReadSequencerPlatformIndex(read);
|
||||
int i = BaseUtils.simpleBaseToBaseIndex(observedBase);
|
||||
int j = qualityScore;
|
||||
int k = ploidy;
|
||||
int x = strandIndex(! read.getReadNegativeStrandFlag() );
|
||||
return cache[m][a][i][j][k][x];
|
||||
}
|
||||
|
||||
//
|
||||
// All oft he following routines are totally generic
|
||||
//
|
||||
private GenotypeLikelihoods calculateGenotypeLikelihoods(char observedBase, byte qualityScore, SAMRecord read, int offset) {
|
||||
FourBaseProbabilities fbl = fourBaseLikelihoods.computeLikelihoods(observedBase, qualityScore, read, offset);
|
||||
if ( fbl == null )
|
||||
return null;
|
||||
|
||||
private GenotypeLikelihoods getCache( char observedBase, byte qualityScore, int ploidy, SAMRecord read, int offset ) {
|
||||
return getSetCache( observedBase, qualityScore, ploidy, read, offset, null );
|
||||
}
|
||||
|
||||
private void setCache( char observedBase, byte qualityScore, int ploidy, SAMRecord read, int offset, GenotypeLikelihoods val ) {
|
||||
getSetCache( observedBase, qualityScore, ploidy, read, offset, val );
|
||||
}
|
||||
|
||||
private boolean inCache( char observedBase, byte qualityScore, int ploidy, SAMRecord read, int offset ) {
|
||||
return getCache(observedBase, qualityScore, ploidy, read, offset) != null;
|
||||
}
|
||||
|
||||
private GenotypeLikelihoods getCachedGenotypeLikelihoods( char observedBase, byte qualityScore, int ploidy, SAMRecord read, int offset ) {
|
||||
if ( ! inCache(observedBase, qualityScore, ploidy, read, offset ) )
|
||||
throw new RuntimeException(String.format("BUG: trying to fetch an unset cached genotype likelihood at base=%c, qual=%d, ploidy=%d, read=%s",
|
||||
observedBase, qualityScore, ploidy, read));
|
||||
|
||||
return getCache(observedBase, qualityScore, ploidy, read, offset);
|
||||
}
|
||||
|
||||
private GenotypeLikelihoods calculateCachedGenotypeLikelihoods( char observedBase, byte qualityScore, int ploidy, SAMRecord read, int offset ) {
|
||||
if ( inCache(observedBase, qualityScore, ploidy, read, offset ) )
|
||||
throw new RuntimeException(String.format("BUG: trying to set an already set cached genotype likelihood at base=%c, qual=%d, ploidy=%d, read=%s",
|
||||
observedBase, qualityScore, ploidy, read));
|
||||
|
||||
// create a new genotype likelihoods object and add this single base to it -- now we have a cached result
|
||||
double[] fbLikelihoods = fbl.getLog10Likelihoods();
|
||||
try {
|
||||
GenotypeLikelihoods g = (GenotypeLikelihoods)this.clone();
|
||||
g.initialize();
|
||||
g.reallyAdd(observedBase, qualityScore, read, offset, false);
|
||||
|
||||
setCache(observedBase, qualityScore, ploidy, read, offset, g);
|
||||
GenotypeLikelihoods gl = (GenotypeLikelihoods)this.clone();
|
||||
gl.initialize();
|
||||
|
||||
//System.out.printf("Caching %c %d %d %s %s (%d total entries)%n", observedBase, qualityScore, ploidy, read.getReadName(), EmpiricalSubstitutionGenotypeLikelihoods.getReadSequencerPlatform(read), cacheSize);
|
||||
return g;
|
||||
} catch ( CloneNotSupportedException e ) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
// we need to adjust for ploidy. We take the raw p(obs | chrom) / ploidy, which is -log10(ploidy) in log space
|
||||
double ploidyAdjustment = log10(FIXED_PLOIDY);
|
||||
|
||||
for ( DiploidGenotype g : DiploidGenotype.values() ) {
|
||||
|
||||
// todo assumes ploidy is 2 -- should be generalized. Obviously the below code can be turned into a loop
|
||||
double p_base = 0.0;
|
||||
p_base += pow(10, fbLikelihoods[BaseUtils.simpleBaseToBaseIndex(g.base1)] - ploidyAdjustment);
|
||||
p_base += pow(10, fbLikelihoods[BaseUtils.simpleBaseToBaseIndex(g.base2)] - ploidyAdjustment);
|
||||
double likelihood = log10(p_base);
|
||||
|
||||
gl.log10Likelihoods[g.ordinal()] += likelihood;
|
||||
gl.log10Posteriors[g.ordinal()] += likelihood;
|
||||
}
|
||||
|
||||
if ( fourBaseLikelihoods.isVerbose() ) {
|
||||
for ( DiploidGenotype g : DiploidGenotype.values() ) { System.out.printf("%s\t", g); }
|
||||
System.out.println();
|
||||
for ( DiploidGenotype g : DiploidGenotype.values() ) { System.out.printf("%.2f\t", gl.log10Likelihoods[g.ordinal()]); }
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
return gl;
|
||||
|
||||
} catch ( CloneNotSupportedException e ) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
|
|
@ -432,13 +420,17 @@ public abstract class GenotypeLikelihoods implements Cloneable {
|
|||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
|
||||
protected int strandIndex(boolean fwdStrand) {
|
||||
return fwdStrand ? 0 : 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true when the observedBase is considered bad and shouldn't be processed by this object. A base
|
||||
* is considered bad if:
|
||||
*
|
||||
* Criterion 1: observed base isn't a A,C,T,G or lower case equivalent
|
||||
*
|
||||
* @param observedBase
|
||||
* @param observedBase observed base
|
||||
* @return true if the base is a bad base
|
||||
*/
|
||||
private boolean badBase(char observedBase) {
|
||||
|
|
@ -448,14 +440,14 @@ public abstract class GenotypeLikelihoods implements Cloneable {
|
|||
/**
|
||||
* Return a string representation of this object in a moderately usable form
|
||||
*
|
||||
* @return
|
||||
* @return string representation
|
||||
*/
|
||||
public String toString() {
|
||||
double sum = 0;
|
||||
StringBuilder s = new StringBuilder();
|
||||
for (DiploidGenotype g : DiploidGenotype.values()) {
|
||||
s.append(String.format("%s %.10f ", g, likelihoods[g.ordinal()]));
|
||||
sum += Math.pow(10,likelihoods[g.ordinal()]);
|
||||
s.append(String.format("%s %.10f ", g, log10Likelihoods[g.ordinal()]));
|
||||
sum += Math.pow(10,log10Likelihoods[g.ordinal()]);
|
||||
}
|
||||
s.append(String.format(" %f", sum));
|
||||
return s.toString();
|
||||
|
|
@ -481,10 +473,10 @@ public abstract class GenotypeLikelihoods implements Cloneable {
|
|||
String bad = null;
|
||||
|
||||
int i = g.ordinal();
|
||||
if ( ! MathUtils.wellFormedDouble(likelihoods[i]) || ! MathUtils.isNegativeOrZero(likelihoods[i]) ) {
|
||||
bad = String.format("Likelihood %f is badly formed", likelihoods[i]);
|
||||
} else if ( ! MathUtils.wellFormedDouble(posteriors[i]) || ! MathUtils.isNegativeOrZero(posteriors[i]) ) {
|
||||
bad = String.format("Posterior %f is badly formed", posteriors[i]);
|
||||
if ( ! MathUtils.wellFormedDouble(log10Likelihoods[i]) || ! MathUtils.isNegativeOrZero(log10Likelihoods[i]) ) {
|
||||
bad = String.format("Likelihood %f is badly formed", log10Likelihoods[i]);
|
||||
} else if ( ! MathUtils.wellFormedDouble(log10Posteriors[i]) || ! MathUtils.isNegativeOrZero(log10Posteriors[i]) ) {
|
||||
bad = String.format("Posterior %f is badly formed", log10Posteriors[i]);
|
||||
}
|
||||
|
||||
if ( bad != null ) {
|
||||
|
|
@ -501,68 +493,6 @@ public abstract class GenotypeLikelihoods implements Cloneable {
|
|||
return true;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
// Hearty math calculations follow
|
||||
//
|
||||
// -- these should not be messed with unless you know what you are doing
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Works for any ploidy (in organization).
|
||||
*
|
||||
* @param observedBase
|
||||
* @param g
|
||||
* @param qual
|
||||
* @return
|
||||
*/
|
||||
protected double log10PofObservingBaseGivenGenotype(char observedBase, DiploidGenotype g, byte qual, SAMRecord read, int offset ) {
|
||||
if (qual == 0) { // zero quals are wrong
|
||||
throw new RuntimeException(String.format("Unexpected Q0 base discovered in calculateAlleleLikelihood: %c %s %d at %d in %s",
|
||||
observedBase, g, qual, offset, read));
|
||||
}
|
||||
|
||||
// todo assumes ploidy is 2 -- should be generalized. Obviously the below code can be turned into a loop
|
||||
double p_base = 0.0;
|
||||
p_base += pow(10, log10PofObservingBaseGivenChromosome(observedBase, g.base1, qual, FIXED_PLOIDY, read, offset ));
|
||||
p_base += pow(10, log10PofObservingBaseGivenChromosome(observedBase, g.base2, qual, FIXED_PLOIDY, read, offset ));
|
||||
return log10(p_base);
|
||||
}
|
||||
|
||||
protected double log10PofObservingBaseGivenChromosome(char observedBase, char chromBase, byte qual, int ploidy, SAMRecord read, int offset) {
|
||||
double logP = 0.0;
|
||||
|
||||
if ( observedBase == chromBase ) {
|
||||
// the base is consistent with the chromosome -- it's 1 - e
|
||||
//logP = oneMinusData[qual];
|
||||
double e = pow(10, (qual / -10.0));
|
||||
logP = log10(1.0 - e);
|
||||
} else {
|
||||
// the base is inconsistent with the chromosome -- it's e * P(chromBase | observedBase is an error)
|
||||
logP = qual / -10.0 + log10PofTrueBaseGivenMiscall(observedBase, chromBase, read, offset);
|
||||
}
|
||||
|
||||
// adjust for ploidy. We take the raw p(obs | chrom) / ploidy, which is -log10(ploidy) in log space
|
||||
//logP -= log10N[ploidy];
|
||||
logP -= log10(ploidy);
|
||||
|
||||
//System.out.printf("%c %c %d %d => %f%n", observedBase, chromBase, qual, ploidy, logP);
|
||||
return logP;
|
||||
}
|
||||
|
||||
/**
|
||||
* Must be overridden by concrete subclasses
|
||||
*
|
||||
* @param observedBase
|
||||
* @param chromBase
|
||||
* @param read
|
||||
* @param offset
|
||||
* @return
|
||||
*/
|
||||
protected abstract double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset);
|
||||
|
||||
//
|
||||
// Constant static data
|
||||
//
|
||||
|
|
|
|||
|
|
@ -1,34 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import static org.broadinstitute.sting.gatk.walkers.genotyper.BaseMismatchModel.*;
|
||||
|
||||
public class GenotypeLikelihoodsFactory {
|
||||
//private GenotypeLikelihoodsFactory() {} // cannot be instantiated
|
||||
|
||||
public static BaseMismatchModel getBaseMismatchModel(final String name) {
|
||||
BaseMismatchModel m = valueOf(name);
|
||||
if ( m == null )
|
||||
throw new RuntimeException("Unexpected BaseMismatchModel " + name);
|
||||
else
|
||||
return m;
|
||||
}
|
||||
|
||||
/**
|
||||
* General and correct way to create GenotypeLikelihood objects for arbitrary base mismatching models
|
||||
*
|
||||
* @param m
|
||||
* @param priors
|
||||
* @param pl
|
||||
* @return
|
||||
*/
|
||||
public static GenotypeLikelihoods makeGenotypeLikelihoods(BaseMismatchModel m,
|
||||
DiploidGenotypePriors priors,
|
||||
EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform pl ) {
|
||||
switch ( m ) {
|
||||
case ONE_STATE: return new OneStateErrorGenotypeLikelihoods(priors);
|
||||
case THREE_STATE: return new ThreeStateErrorGenotypeLikelihoods(priors);
|
||||
case EMPIRICAL: return new EmpiricalSubstitutionGenotypeLikelihoods(priors, pl);
|
||||
default: throw new RuntimeException("Unexpected BaseMismatchModel " + m);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -106,7 +106,7 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
|
|||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType));
|
||||
|
||||
// create the GenotypeLikelihoods object
|
||||
GenotypeLikelihoods GL = GenotypeLikelihoodsFactory.makeGenotypeLikelihoods(baseModel, priors, defaultPlatform);
|
||||
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
|
||||
GL.add(pileup, true);
|
||||
|
||||
GLs.put(sample, GL);
|
||||
|
|
|
|||
|
|
@ -1,45 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* This implements the old style CRD calculation of the chance that a base being a true chromBase given
|
||||
* an miscalled base, in which the p is e, grabbing all of the probability. It shouldn't be used
|
||||
*/
|
||||
public class OneStateErrorGenotypeLikelihoods extends GenotypeLikelihoods {
|
||||
//
|
||||
// forwarding constructors -- don't do anything at all
|
||||
//
|
||||
public OneStateErrorGenotypeLikelihoods() { super(); }
|
||||
public OneStateErrorGenotypeLikelihoods(DiploidGenotypePriors priors) { super(priors); }
|
||||
|
||||
/**
|
||||
* Cloning of the object
|
||||
* @return
|
||||
* @throws CloneNotSupportedException
|
||||
*/
|
||||
protected Object clone() throws CloneNotSupportedException {
|
||||
return super.clone();
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param observedBase
|
||||
* @param chromBase
|
||||
* @param read
|
||||
* @param offset
|
||||
* @return
|
||||
*/
|
||||
protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) {
|
||||
return 0; // equivalent to e model
|
||||
}
|
||||
|
||||
static GenotypeLikelihoods[][][][] ONE_STATE_CACHE = new GenotypeLikelihoods[BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE][MAX_PLOIDY][2];
|
||||
protected GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy,
|
||||
SAMRecord read, int offset, GenotypeLikelihoods val ) {
|
||||
return simpleGetSetCache(ONE_STATE_CACHE, observedBase, qualityScore, ploidy, read, offset, val);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* This implements the old style CRD calculation of the chance that a base being a true chromBase given
|
||||
* an miscalled base, in which the p is e, grabbing all of the probability. It shouldn't be used
|
||||
*/
|
||||
public class OneStateErrorProbabilities extends FourBaseProbabilities {
|
||||
//
|
||||
// forwarding constructors -- don't do anything at all
|
||||
//
|
||||
public OneStateErrorProbabilities() { super(); }
|
||||
|
||||
/**
|
||||
* Cloning of the object
|
||||
* @return clone
|
||||
* @throws CloneNotSupportedException
|
||||
*/
|
||||
protected Object clone() throws CloneNotSupportedException {
|
||||
return super.clone();
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param observedBase observed base
|
||||
* @param chromBase target base
|
||||
* @param read SAM read
|
||||
* @param offset offset on read
|
||||
* @return log10 likelihood
|
||||
*/
|
||||
protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) {
|
||||
return 0; // equivalent to e model
|
||||
}
|
||||
}
|
||||
|
|
@ -105,7 +105,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
|
|||
pileup.setIncludeDeletionsInPileupString(true);
|
||||
|
||||
// create the GenotypeLikelihoods object
|
||||
GenotypeLikelihoods GL = GenotypeLikelihoodsFactory.makeGenotypeLikelihoods(baseModel, priors, defaultPlatform);
|
||||
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
|
||||
GL.add(pileup, true);
|
||||
return new Pair<ReadBackedPileup, GenotypeLikelihoods>(pileup, GL);
|
||||
}
|
||||
|
|
@ -129,7 +129,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
|
|||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType));
|
||||
|
||||
// create the GenotypeLikelihoods object
|
||||
GenotypeLikelihoods GL = GenotypeLikelihoodsFactory.makeGenotypeLikelihoods(baseModel, AFPriors, defaultPlatform);
|
||||
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, AFPriors, defaultPlatform);
|
||||
GL.add(pileup, true);
|
||||
|
||||
GLs.put(sample, GL);
|
||||
|
|
|
|||
|
|
@ -1,40 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
||||
import static java.lang.Math.log10;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
public class ThreeStateErrorGenotypeLikelihoods extends GenotypeLikelihoods {
|
||||
//
|
||||
// forwarding constructors -- don't do anything at all
|
||||
//
|
||||
public ThreeStateErrorGenotypeLikelihoods() { super(); }
|
||||
public ThreeStateErrorGenotypeLikelihoods(DiploidGenotypePriors priors) { super(priors); }
|
||||
|
||||
/**
|
||||
* Cloning of the object
|
||||
* @return
|
||||
* @throws CloneNotSupportedException
|
||||
*/
|
||||
protected Object clone() throws CloneNotSupportedException {
|
||||
return super.clone();
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple log10(3) cached value
|
||||
*/
|
||||
protected static final double log103 = log10(3.0);
|
||||
|
||||
protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) {
|
||||
return -log103; // equivalent to e / 3 model
|
||||
}
|
||||
|
||||
static GenotypeLikelihoods[][][][] THREE_STATE_CACHE = new GenotypeLikelihoods[BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE][MAX_PLOIDY][2];
|
||||
protected GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy,
|
||||
SAMRecord read, int offset, GenotypeLikelihoods val ) {
|
||||
return simpleGetSetCache(THREE_STATE_CACHE, observedBase, qualityScore, ploidy, read, offset, val);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import static java.lang.Math.log10;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
public class ThreeStateErrorProbabilities extends FourBaseProbabilities {
|
||||
//
|
||||
// forwarding constructors -- don't do anything at all
|
||||
//
|
||||
public ThreeStateErrorProbabilities() { super(); }
|
||||
|
||||
/**
|
||||
* Cloning of the object
|
||||
* @return clone
|
||||
* @throws CloneNotSupportedException
|
||||
*/
|
||||
protected Object clone() throws CloneNotSupportedException {
|
||||
return super.clone();
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple log10(3) cached value
|
||||
*/
|
||||
protected static final double log103 = log10(3.0);
|
||||
|
||||
/**
|
||||
*
|
||||
* @param observedBase observed base
|
||||
* @param chromBase target base
|
||||
* @param read SAM read
|
||||
* @param offset offset on read
|
||||
* @return log10 likelihood
|
||||
*/
|
||||
protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) {
|
||||
return -log103; // equivalent to e / 3 model
|
||||
}
|
||||
}
|
||||
|
|
@ -67,7 +67,7 @@ public class UnifiedArgumentCollection {
|
|||
public String ASSUME_SINGLE_SAMPLE = null;
|
||||
|
||||
@Argument(fullName = "platform", shortName = "pl", doc = "Causes the genotyper to assume that reads without PL header TAG are this platform. Defaults to null, indicating that the system will throw a runtime exception when such reads are detected", required = false)
|
||||
public EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform defaultPlatform = null;
|
||||
public EmpiricalSubstitutionProbabilities.SequencerPlatform defaultPlatform = null;
|
||||
|
||||
|
||||
// control the various parameters to be used
|
||||
|
|
|
|||
|
|
@ -119,7 +119,7 @@ public class CalculateBaseLikelihoodsWalker extends LocusWalker<Integer, Pair<Lo
|
|||
//}
|
||||
|
||||
//Calculate posterior probabilities
|
||||
GenotypeLikelihoods G = new ThreeStateErrorGenotypeLikelihoods();
|
||||
GenotypeLikelihoods G = new GenotypeLikelihoods(BaseMismatchModel.THREE_STATE);
|
||||
SAMRecord read; int offset; char base; byte qual; int mapquality; String readname;
|
||||
|
||||
//Check for bad bases and ensure mapping quality
|
||||
|
|
|
|||
|
|
@ -334,7 +334,7 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
|
|||
}
|
||||
|
||||
//Calculate posterior probabilities
|
||||
GenotypeLikelihoods G = new ThreeStateErrorGenotypeLikelihoods();
|
||||
GenotypeLikelihoods G = new GenotypeLikelihoods(BaseMismatchModel.THREE_STATE);
|
||||
SAMRecord read; int offset; char base; byte qual; int mapquality; String readname;
|
||||
|
||||
//Check for bad bases and ensure mapping quality myself. This works.
|
||||
|
|
|
|||
Loading…
Reference in New Issue