diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EmpiricalSubstitutionGenotypeLikelihoods.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EmpiricalSubstitutionProbabilities.java similarity index 83% rename from java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EmpiricalSubstitutionGenotypeLikelihoods.java rename to java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EmpiricalSubstitutionProbabilities.java index faf5f3479..8c3a9f203 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EmpiricalSubstitutionGenotypeLikelihoods.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EmpiricalSubstitutionProbabilities.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.QualityUtils; import static java.lang.Math.log10; import java.util.TreeMap; @@ -10,7 +9,7 @@ import java.util.EnumMap; import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMReadGroupRecord; -public class EmpiricalSubstitutionGenotypeLikelihoods extends GenotypeLikelihoods { +public class EmpiricalSubstitutionProbabilities extends FourBaseProbabilities { // -------------------------------------------------------------------------------------------------------------- // // Static methods to manipulate machine platforms @@ -64,6 +63,10 @@ public class EmpiricalSubstitutionGenotypeLikelihoods extends GenotypeLikelihood return plOfLastRead; } + public int getReadSequencerPlatformIndex( SAMRecord read ) { + return getReadSequencerPlatform(read).ordinal(); + } + // -------------------------------------------------------------------------------------------------------------- // // Static methods to get at the transition tables themselves @@ -189,17 +192,15 @@ public class EmpiricalSubstitutionGenotypeLikelihoods extends GenotypeLikelihood // // forwarding constructors -- don't do anything at all // - public EmpiricalSubstitutionGenotypeLikelihoods() { super(); } + public EmpiricalSubstitutionProbabilities() { super(); } - public EmpiricalSubstitutionGenotypeLikelihoods(DiploidGenotypePriors priors) { super(priors); } - - public EmpiricalSubstitutionGenotypeLikelihoods(DiploidGenotypePriors priors, boolean raiseErrorOnUnknownPlatform) { - super(priors); + public EmpiricalSubstitutionProbabilities(boolean raiseErrorOnUnknownPlatform) { + super(); this.raiseErrorOnUnknownPlatform = raiseErrorOnUnknownPlatform; } - public EmpiricalSubstitutionGenotypeLikelihoods(DiploidGenotypePriors priors, SequencerPlatform assumeUnknownPlatformsAreThis) { - super(priors); + public EmpiricalSubstitutionProbabilities(SequencerPlatform assumeUnknownPlatformsAreThis) { + super(); if ( assumeUnknownPlatformsAreThis != null ) { raiseErrorOnUnknownPlatform = false; @@ -209,38 +210,13 @@ public class EmpiricalSubstitutionGenotypeLikelihoods extends GenotypeLikelihood /** * Cloning of the object - * @return + * @return clone * @throws CloneNotSupportedException */ protected Object clone() throws CloneNotSupportedException { return super.clone(); } - // ----------------------------------------------------------------------------------------------------------------- - // - // - // caching routines - // - // - // ----------------------------------------------------------------------------------------------------------------- - static GenotypeLikelihoods[][][][][] EMPIRICAL_CACHE = new GenotypeLikelihoods[EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform.values().length][BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE][MAX_PLOIDY][2]; - - protected GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy, - SAMRecord read, int offset, GenotypeLikelihoods val ) { - SequencerPlatform pl = getReadSequencerPlatform(read); - int a = pl.ordinal(); - int i = BaseUtils.simpleBaseToBaseIndex(observedBase); - int j = qualityScore; - int k = ploidy; - int x = strandIndex(! read.getReadNegativeStrandFlag()); - - if ( val != null ) - EMPIRICAL_CACHE[a][i][j][k][x] = val; - - return EMPIRICAL_CACHE[a][i][j][k][x]; - } - - // ----------------------------------------------------------------------------------------------------------------- // // @@ -264,7 +240,7 @@ public class EmpiricalSubstitutionGenotypeLikelihoods extends GenotypeLikelihood //System.out.printf("%s for %s%n", pl, read); - double log10p = 0.0; + double log10p; if ( fwdStrand ) { log10p = getProbMiscallIsBase(pl, observedBase, chromBase); } else { @@ -275,4 +251,4 @@ public class EmpiricalSubstitutionGenotypeLikelihoods extends GenotypeLikelihood return log10p; } -} +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/FourBaseProbabilities.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/FourBaseProbabilities.java new file mode 100755 index 000000000..d1da3e98e --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/FourBaseProbabilities.java @@ -0,0 +1,341 @@ +package org.broadinstitute.sting.gatk.walkers.genotyper; + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.*; + +import static java.lang.Math.log10; +import static java.lang.Math.pow; + +/** + * Stable, error checking version of strict 4 base likelihoods. Useful for calculating the likelihoods, priors, + * and posteriors given a pile of bases and quality scores (in conjuncion with GenotypeLikelihoods) + * + * Suppose we have bases b1, b2, ..., bN with qualities scores q1, q2, ..., qN. This object + * calculates: + * + * P(b | D) = P(b) * P(D | b) + * + * where + * + * P(D | b) = sum_i log10 P(bi | b) + * + * and + * + * P(bi | b) = 1 - P(error | q1) if bi = b + * = P(error | q1) / 3 if bi != b + * + * + */ +public abstract class FourBaseProbabilities implements Cloneable { + + protected boolean enableCacheFlag = true; + + // + // The fundamental data array associated with 4-base likelihoods + // + protected double[] log10Likelihoods = null; + + + /** + * If true, lots of output will be generated about the Likelihoods at each site + */ + private boolean verbose = false; + + /** + * Bases with Q scores below this threshold aren't included in the Likelihood calculation + */ + private int minQScoreToInclude = 0; + + /** + * Create a new FourBaseLikelihoods object + */ + public FourBaseProbabilities() { + log10Likelihoods = zeros.clone(); // Likelihoods are all zeros + } + + /** + * Cloning of the object + * @return clone + * @throws CloneNotSupportedException + */ + protected Object clone() throws CloneNotSupportedException { + FourBaseProbabilities c = (FourBaseProbabilities)super.clone(); + c.log10Likelihoods = log10Likelihoods.clone(); + return c; + } + + public void setVerbose(boolean v) { + verbose = v; + } + + public boolean isVerbose() { + return verbose; + } + + public int getMinQScoreToInclude() { + return minQScoreToInclude; + } + + public void setMinQScoreToInclude(int minQScoreToInclude) { + this.minQScoreToInclude = minQScoreToInclude; + } + + /** + * Returns an array of log10 likelihoods for each base, indexed by BaseUtils.BASES.ordinal values() + * @return probs + */ + public double[] getLog10Likelihoods() { + return log10Likelihoods; + } + + /** + * Returns the likelihood associated with a base + * @param base base + * @return log10 likelihood as a double + */ + public double getLog10Likelihood(char base) { + int baseIndex = BaseUtils.simpleBaseToBaseIndex(base); + return (baseIndex < 0 ? 0.0 : getLog10Likelihoods()[baseIndex]); + } + + /** + * Returns an array of likelihoods for each base, indexed by BaseUtils.BASES.ordinal values() + * @return probs + */ + public double[] getLikelihoods() { + double[] probs = new double[4]; + for (int i = 0; i < 4; i++) + probs[i] = Math.pow(10, log10Likelihoods[i]); + return probs; + } + + /** + * Returns the likelihoods associated with a base + * @param base base + * @return likelihoods as a double + */ + public double getLikelihood(char base) { + int baseIndex = BaseUtils.simpleBaseToBaseIndex(base); + return (baseIndex < 0 ? 0.0 : Math.pow(10, log10Likelihoods[baseIndex])); + } + + // ----------------------------------------------------------------------------------------------------------------- + // + // + // add() -- the heart of + // + // + // ----------------------------------------------------------------------------------------------------------------- + + /** + * Updates likelihoods and posteriors to reflect an additional observation of observedBase with + * qualityScore. + * + * @param observedBase observed base + * @param qualityScore base quality + * @param read SAM read + * @param offset offset on read + * @return 1 if the base was considered good enough to add to the likelihoods (not Q0 or 'N', for example) + */ + public int add(char observedBase, byte qualityScore, SAMRecord read, int offset) { + FourBaseProbabilities fbl = computeLikelihoods(observedBase, qualityScore, read, offset); + if ( fbl == null ) + return 0; + + for ( char base : BaseUtils.BASES ) { + double likelihood = fbl.getLikelihood(base); + log10Likelihoods[BaseUtils.simpleBaseToBaseIndex(base)] += likelihood; + } + + if ( isVerbose() ) { + for ( char base : BaseUtils.BASES ) { System.out.printf("%s\t", base); } + System.out.println(); + for ( char base : BaseUtils.BASES ) { System.out.printf("%.2f\t", log10Likelihoods[BaseUtils.simpleBaseToBaseIndex(base)]); } + System.out.println(); + } + + return 1; + } + + /** + * Updates likelihoods and posteriors to reflect an additional observation of observedBase with + * qualityScore. + * + * @param observedBase observed base + * @param qualityScore base quality + * @param read SAM read + * @param offset offset on read + * @return likelihoods for this observation or null if the base was not considered good enough to add to the likelihoods (Q0 or 'N', for example) + */ + public FourBaseProbabilities computeLikelihoods(char observedBase, byte qualityScore, SAMRecord read, int offset) { + if ( badBase(observedBase) ) { + return null; + } + + try { + if ( qualityScore > getMinQScoreToInclude() ) { + + FourBaseProbabilities fbl = (FourBaseProbabilities)this.clone(); + fbl.log10Likelihoods = zeros.clone(); + + for ( char base : BaseUtils.BASES ) { + double likelihood = log10PofObservingBaseGivenChromosome(observedBase, base, qualityScore, read, offset); + + if ( isVerbose() ) { + boolean fwdStrand = ! read.getReadNegativeStrandFlag(); + System.out.printf(" L(%c | b=%s, Q=%d, S=%s) = %f / %f%n", + observedBase, base, qualityScore, fwdStrand ? "+" : "-", pow(10,likelihood) * 100, likelihood); + } + + fbl.log10Likelihoods[BaseUtils.simpleBaseToBaseIndex(base)] = likelihood; + } + + return fbl; + } + } catch ( CloneNotSupportedException e ) { + throw new RuntimeException(e); + } + + return null; + } + + + // ----------------------------------------------------------------------------------------------------------------- + // + // + // helper routines + // + // + // ----------------------------------------------------------------------------------------------------------------- + + /** + * Returns true when the observedBase is considered bad and shouldn't be processed by this object. A base + * is considered bad if: + * + * Criterion 1: observed base isn't a A,C,T,G or lower case equivalent + * + * @param observedBase observed base + * @return true if the base is a bad base + */ + private boolean badBase(char observedBase) { + return BaseUtils.simpleBaseToBaseIndex(observedBase) == -1; + } + + /** + * Return a string representation of this object in a moderately usable form + * + * @return string representation + */ + public String toString() { + double sum = 0; + StringBuilder s = new StringBuilder(); + for ( char base : BaseUtils.BASES ) { + int baseIndex = BaseUtils.simpleBaseToBaseIndex(base); + s.append(String.format("%s %.10f ", base, log10Likelihoods[baseIndex])); + sum += Math.pow(10, log10Likelihoods[baseIndex]); + } + s.append(String.format(" %f", sum)); + return s.toString(); + } + + // in general, we don't care about the platform index; EmpiricalSubstitutionProbabilities overlaods this + public int getReadSequencerPlatformIndex( SAMRecord read ) { + return 0; + } + + // ----------------------------------------------------------------------------------------------------------------- + // + // + // Validation routines + // + // + // ----------------------------------------------------------------------------------------------------------------- + + public boolean validate() { + return validate(true); + } + + public boolean validate(boolean throwException) { + try { + + for ( char base : BaseUtils.BASES ) { + + int i = BaseUtils.simpleBaseToBaseIndex(base); + if ( ! MathUtils.wellFormedDouble(log10Likelihoods[i]) || ! MathUtils.isNegativeOrZero(log10Likelihoods[i]) ) { + String bad = String.format("Likelihood %f is badly formed", log10Likelihoods[i]); + throw new IllegalStateException(String.format("At %s: %s", base, bad)); + } + } + } catch ( IllegalStateException e ) { + if ( throwException ) + throw new RuntimeException(e); + else + return false; + } + + return true; + } + + // ----------------------------------------------------------------------------------------------------------------- + // + // + // Hearty math calculations follow + // + // -- these should not be messed with unless you know what you are doing + // + // ----------------------------------------------------------------------------------------------------------------- + + /** + * + * @param observedBase observed base + * @param chromBase target base + * @param qual base quality + * @param read SAM read + * @param offset offset on read + * @return log10 likelihood + */ + + protected double log10PofObservingBaseGivenChromosome(char observedBase, char chromBase, byte qual, SAMRecord read, int offset) { + if (qual == 0) { // zero quals are wrong + throw new RuntimeException(String.format("Unexpected Q0 base discovered in log10PofObservingBaseGivenChromosome: %c %s %d at %d in %s", + observedBase, chromBase, qual, offset, read)); + } + + double logP; + + if ( observedBase == chromBase ) { + // the base is consistent with the chromosome -- it's 1 - e + //logP = oneMinusData[qual]; + double e = pow(10, (qual / -10.0)); + logP = log10(1.0 - e); + } else { + // the base is inconsistent with the chromosome -- it's e * P(chromBase | observedBase is an error) + logP = qual / -10.0 + log10PofTrueBaseGivenMiscall(observedBase, chromBase, read, offset); + } + + //System.out.printf("%c %c %d => %f%n", observedBase, chromBase, qual, logP); + return logP; + } + + /** + * Must be overridden by concrete subclasses + * + * @param observedBase observed base + * @param chromBase target base + * @param read SAM read + * @param offset offset on read + * @return log10 likelihood + */ + protected abstract double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset); + + // + // Constant static data + // + private final static double[] zeros = new double[BaseUtils.BASES.length]; + + static { + for ( char base : BaseUtils.BASES ) { + zeros[BaseUtils.simpleBaseToBaseIndex(base)] = 0.0; + } + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/FourBaseProbabilitiesFactory.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/FourBaseProbabilitiesFactory.java new file mode 100755 index 000000000..5b427c423 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/FourBaseProbabilitiesFactory.java @@ -0,0 +1,43 @@ +package org.broadinstitute.sting.gatk.walkers.genotyper; + +import static org.broadinstitute.sting.gatk.walkers.genotyper.BaseMismatchModel.*; + +public class FourBaseProbabilitiesFactory { + //private FourBaseProbabilitiesFactory() {} // cannot be instantiated + + public static BaseMismatchModel getBaseMismatchModel(final String name) { + BaseMismatchModel m = valueOf(name); + if ( m == null ) + throw new RuntimeException("Unexpected BaseMismatchModel " + name); + else + return m; + } + + public static BaseMismatchModel getBaseMismatchModel(final FourBaseProbabilities m) { + if ( m instanceof OneStateErrorProbabilities) + return ONE_STATE; + else if ( m instanceof ThreeStateErrorProbabilities) + return THREE_STATE; + else if ( m instanceof EmpiricalSubstitutionProbabilities) + return EMPIRICAL; + + throw new RuntimeException("Unexpected BaseMismatchModel " + m.getClass()); + + } + /** + * General and correct way to create FourBaseLikelihood objects for arbitrary base mismatching models + * + * @param m model + * @param pl default platform + * @return new 4-base model + */ + public static FourBaseProbabilities makeFourBaseLikelihoods(BaseMismatchModel m, + EmpiricalSubstitutionProbabilities.SequencerPlatform pl ) { + switch ( m ) { + case ONE_STATE: return new OneStateErrorProbabilities(); + case THREE_STATE: return new ThreeStateErrorProbabilities(); + case EMPIRICAL: return new EmpiricalSubstitutionProbabilities(pl); + default: throw new RuntimeException("Unexpected BaseMismatchModel " + m); + } + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java index e7d5120bb..13d7f3312 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java @@ -29,7 +29,7 @@ public abstract class GenotypeCalculationModel implements Cloneable { protected Set samples; protected Logger logger; protected double heterozygosity; - protected EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform defaultPlatform; + protected EmpiricalSubstitutionProbabilities.SequencerPlatform defaultPlatform; protected GenotypeWriterFactory.GENOTYPE_FORMAT OUTPUT_FORMAT; protected boolean ALL_BASE_MODE; protected boolean GENOTYPE_MODE; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoods.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoods.java index 5e24128ac..e6cd77b1e 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoods.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoods.java @@ -6,7 +6,6 @@ import org.broadinstitute.sting.utils.genotype.DiploidGenotype; import static java.lang.Math.log10; import static java.lang.Math.pow; -import java.util.Arrays; /** * Stable, error checking version of the Bayesian genotyper. Useful for calculating the likelihoods, priors, @@ -39,92 +38,117 @@ import java.util.Arrays; * From then on, you can call any of the add() routines to update the likelihoods and posteriors in the above * model. */ -public abstract class GenotypeLikelihoods implements Cloneable { +public class GenotypeLikelihoods implements Cloneable { protected final static int FIXED_PLOIDY = 2; protected final static int MAX_PLOIDY = FIXED_PLOIDY + 1; protected boolean enableCacheFlag = true; // - // The three fundamental data arrays associated with a Genotype Likelhoods object + // The fundamental data arrays associated with a Genotype Likelhoods object // - protected double[] likelihoods = null; - protected double[] posteriors = null; + protected double[] log10Likelihoods = null; + protected double[] log10Posteriors = null; private DiploidGenotypePriors priors = null; - /** - * If true, lots of output will be generated about the likelihoods of individual genotypes at each site - */ - private boolean verbose = false; - - /** - * Bases with Q scores below this threshold aren't included in the likelihood calculation - */ - private int minQScoreToInclude = 0; + private FourBaseProbabilities fourBaseLikelihoods = null; /** * Create a new GenotypeLikelhoods object with flat priors for each diploid genotype + * + * @param m base model */ - public GenotypeLikelihoods() { + public GenotypeLikelihoods(BaseMismatchModel m) { this.priors = new DiploidGenotypePriors(); - initialize(); + initialize(m, null); } /** * Create a new GenotypeLikelhoods object with flat priors for each diploid genotype + * + * @param m base model + * @param pl default platform */ - public GenotypeLikelihoods(DiploidGenotypePriors priors) { + public GenotypeLikelihoods(BaseMismatchModel m, EmpiricalSubstitutionProbabilities.SequencerPlatform pl) { + this.priors = new DiploidGenotypePriors(); + initialize(m, pl); + } + + /** + * Create a new GenotypeLikelhoods object with given priors for each diploid genotype + * + * @param m base model + * @param priors priors + */ + public GenotypeLikelihoods(BaseMismatchModel m, DiploidGenotypePriors priors) { this.priors = priors; - initialize(); + initialize(m, null); + } + + /** + * Create a new GenotypeLikelhoods object with given priors for each diploid genotype + * + * @param m base model + * @param priors priors + * @param pl default platform + */ + public GenotypeLikelihoods(BaseMismatchModel m, DiploidGenotypePriors priors, EmpiricalSubstitutionProbabilities.SequencerPlatform pl) { + this.priors = priors; + initialize(m, pl); } /** * Cloning of the object - * @return + * @return clone * @throws CloneNotSupportedException */ protected Object clone() throws CloneNotSupportedException { GenotypeLikelihoods c = (GenotypeLikelihoods)super.clone(); c.priors = priors; - c.likelihoods = likelihoods.clone(); - c.posteriors = posteriors.clone(); + c.log10Likelihoods = log10Likelihoods.clone(); + c.log10Posteriors = log10Posteriors.clone(); + c.fourBaseLikelihoods = (FourBaseProbabilities)fourBaseLikelihoods.clone(); return c; } - private void initialize() { - likelihoods = zeros.clone(); // likelihoods are all zeros - posteriors = priors.getPriors().clone(); // posteriors are all the priors + private void initialize(BaseMismatchModel m, EmpiricalSubstitutionProbabilities.SequencerPlatform pl) { + fourBaseLikelihoods = FourBaseProbabilitiesFactory.makeFourBaseLikelihoods(m, pl); + initialize(); } + private void initialize() { + log10Likelihoods = zeros.clone(); // likelihoods are all zeros + log10Posteriors = priors.getPriors().clone(); // posteriors are all the priors + } public void setVerbose(boolean v) { - verbose = v; + fourBaseLikelihoods.setVerbose(v); } public boolean isVerbose() { - return verbose; + return fourBaseLikelihoods.isVerbose(); } public int getMinQScoreToInclude() { - return minQScoreToInclude; + return fourBaseLikelihoods.getMinQScoreToInclude(); } public void setMinQScoreToInclude(int minQScoreToInclude) { - this.minQScoreToInclude = minQScoreToInclude; + fourBaseLikelihoods.setMinQScoreToInclude(minQScoreToInclude); } /** * Returns an array of log10 likelihoods for each genotype, indexed by DiploidGenotype.ordinal values() - * @return + * @return likelihoods array */ public double[] getLikelihoods() { - return likelihoods; + return log10Likelihoods; } /** * Returns the likelihood associated with DiploidGenotype g - * @param g + * @param g genotype * @return log10 likelihood as a double */ public double getLikelihood(DiploidGenotype g) { @@ -137,12 +161,12 @@ public abstract class GenotypeLikelihoods implements Cloneable { * @return raw log10 (not-normalized posteriors) as a double array */ public double[] getPosteriors() { - return posteriors; + return log10Posteriors; } /** * Returns the posterior associated with DiploidGenotype g - * @param g + * @param g genotpe * @return raw log10 (not-normalized posteror) as a double */ public double getPosterior(DiploidGenotype g) { @@ -156,15 +180,15 @@ public abstract class GenotypeLikelihoods implements Cloneable { * @return normalized posterors as a double array */ public double[] getNormalizedPosteriors() { - double[] normalized = new double[posteriors.length]; + double[] normalized = new double[log10Posteriors.length]; double sum = 0.0; // for precision purposes, we need to add (or really subtract, since everything is negative) // the largest posterior value from all entries so that numbers don't get too small - double maxValue = posteriors[0]; - for (int i = 1; i < posteriors.length; i++) { - if ( maxValue < posteriors[i] ) - maxValue = posteriors[i]; + double maxValue = log10Posteriors[0]; + for (int i = 1; i < log10Posteriors.length; i++) { + if ( maxValue < log10Posteriors[i] ) + maxValue = log10Posteriors[i]; } // collect the posteriors @@ -198,19 +222,20 @@ public abstract class GenotypeLikelihoods implements Cloneable { /** * Sets the priors + * @param priors priors */ public void setPriors(DiploidGenotypePriors priors) { this.priors = priors; - posteriors = zeros.clone(); + log10Posteriors = zeros.clone(); for ( DiploidGenotype g : DiploidGenotype.values() ) { int i = g.ordinal(); - posteriors[i] = priors.getPriors()[i] + likelihoods[i]; + log10Posteriors[i] = priors.getPriors()[i] + log10Likelihoods[i]; } } /** * Returns the prior associated with DiploidGenotype g - * @param g + * @param g genotype * @return log10 prior as a double */ public double getPrior(DiploidGenotype g) { @@ -233,24 +258,8 @@ public abstract class GenotypeLikelihoods implements Cloneable { enableCacheFlag = enable; } - // ----------------------------------------------------------------------------------------------------------------- - // - // - // add() -- the heart of - // - // - // ----------------------------------------------------------------------------------------------------------------- - - /** - * Updates likelihoods and posteriors to reflect an additional observation of observedBase with - * qualityScore. - * - * @param observedBase - * @param qualityScore - * @return 1 if the base was considered good enough to add to the likelihoods (not Q0 or 'N', for example) - */ - public int add(char observedBase, byte qualityScore, SAMRecord read, int offset) { - return reallyAdd(observedBase, qualityScore, read, offset, true); + public int add(ReadBackedPileup pileup) { + return add(pileup, false); } /** @@ -258,7 +267,8 @@ public abstract class GenotypeLikelihoods implements Cloneable { * read-based pileup up by calling add(observedBase, qualityScore) for each base / qual in the * pileup * - * @param pileup + * @param pileup read pileup + * @param ignoreBadBases should we ignore bad bases * @return the number of good bases found in the pileup */ public int add(ReadBackedPileup pileup, boolean ignoreBadBases) { @@ -281,149 +291,127 @@ public abstract class GenotypeLikelihoods implements Cloneable { return n; } - public int add(ReadBackedPileup pileup) { - return add(pileup, false); - } + public int add(char observedBase, byte qualityScore, SAMRecord read, int offset) { - - private int reallyAdd(char observedBase, byte qualityScore, SAMRecord read, int offset, boolean enableCacheArg ) { - if ( badBase(observedBase) ) { - throw new RuntimeException(String.format("BUG: unexpected base %c with Q%d passed to GenotypeLikelihoods", observedBase, qualityScore)); + // Handle caching if requested. Just look up the cached result if its available, or compute and store it + GenotypeLikelihoods gl; + if ( cacheIsEnabled() ) { + if ( ! inCache( observedBase, qualityScore, FIXED_PLOIDY, read) ) { + gl = calculateCachedGenotypeLikelihoods(observedBase, qualityScore, FIXED_PLOIDY, read, offset); + } else { + gl = getCachedGenotypeLikelihoods(observedBase, qualityScore, FIXED_PLOIDY, read); + } + } else { + gl = calculateGenotypeLikelihoods(observedBase, qualityScore, read, offset); } - int nBasesAdded = 0; // the result -- how many bases did we add? + // for bad bases, there are no likelihoods + if ( gl == null ) + return 0; - if ( qualityScore > getMinQScoreToInclude() ) { - // Handle caching if requested. Just look up the cached result if its available, or compute and store it - boolean enableCache = enableCacheArg && cacheIsEnabled(); - GenotypeLikelihoods cached = null; - if ( enableCache ) { - if ( ! inCache( observedBase, qualityScore, FIXED_PLOIDY, read, offset ) ) { - cached = calculateCachedGenotypeLikelihoods(observedBase, qualityScore, FIXED_PLOIDY, read, offset ); - } else { - cached = getCachedGenotypeLikelihoods(observedBase, qualityScore, FIXED_PLOIDY, read, offset ); - } - } - - for ( DiploidGenotype g : DiploidGenotype.values() ) { - double likelihoodCalc = ! enableCache ? log10PofObservingBaseGivenGenotype(observedBase, g, qualityScore, read, offset) : 0.0; - double likelihoodCached = enableCache ? cached.likelihoods[g.ordinal()] : 0.0; - double likelihood = enableCache ? likelihoodCached : likelihoodCalc; - - //if ( enableCache && likelihoodCalc != 0.0 && MathUtils.compareDoubles(likelihoodCached, likelihoodCalc) != 0 ) { - // System.out.printf("ERROR: Likelihoods not equal is CACHE=%f != calc=%f for %c %d %s%n", - // likelihoodCached, likelihoodCalc, observedBase, qualityScore, g.toString()); - //} - - if ( isVerbose() ) { - boolean fwdStrand = ! read.getReadNegativeStrandFlag(); - System.out.printf(" L(%c | G=%s, Q=%d, S=%s) = %f / %f%n", - observedBase, g, qualityScore, fwdStrand ? "+" : "-", pow(10,likelihood) * 100, likelihood); - } - - likelihoods[g.ordinal()] += likelihood; - posteriors[g.ordinal()] += likelihood; - } + double[] likelihoods = gl.getLikelihoods(); + for ( DiploidGenotype g : DiploidGenotype.values() ) { + double likelihood = likelihoods[g.ordinal()]; + if ( isVerbose() ) { - for ( DiploidGenotype g : DiploidGenotype.values() ) { System.out.printf("%s\t", g); } - System.out.println(); - for ( DiploidGenotype g : DiploidGenotype.values() ) { System.out.printf("%.2f\t", likelihoods[g.ordinal()]); } - System.out.println(); + boolean fwdStrand = ! read.getReadNegativeStrandFlag(); + System.out.printf(" L(%c | G=%s, Q=%d, S=%s) = %f / %f%n", + observedBase, g, qualityScore, fwdStrand ? "+" : "-", pow(10,likelihood) * 100, likelihood); } - nBasesAdded = 1; + log10Likelihoods[g.ordinal()] += likelihood; + log10Posteriors[g.ordinal()] += likelihood; } - return nBasesAdded; + return 1; } - // ----------------------------------------------------------------------------------------------------------------- - // - // - // caching routines - // - // - // ----------------------------------------------------------------------------------------------------------------- + static GenotypeLikelihoods[][][][][][] CACHE = new GenotypeLikelihoods[BaseMismatchModel.values().length][EmpiricalSubstitutionProbabilities.SequencerPlatform.values().length][BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE][MAX_PLOIDY][2]; - /** - * Procedure intended for overloading in subclasses. Returns / sets cached GL value given all of the information - * one can reasonably expect to have. The default cache is fairly simple. - * - * @param observedBase - * @param qualityScore - * @param ploidy - * @param read - * @param offset - * @param val - * @return - */ - protected abstract GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy, - SAMRecord read, int offset, GenotypeLikelihoods val ); + private boolean inCache( char observedBase, byte qualityScore, int ploidy, SAMRecord read) { + return getCache(CACHE, observedBase, qualityScore, ploidy, read) != null; + } - protected GenotypeLikelihoods simpleGetSetCache( GenotypeLikelihoods[][][][] cache, - char observedBase, byte qualityScore, int ploidy, - SAMRecord read, int offset, GenotypeLikelihoods val ) { + private GenotypeLikelihoods getCachedGenotypeLikelihoods( char observedBase, byte qualityScore, int ploidy, SAMRecord read) { + GenotypeLikelihoods gl = getCache(CACHE, observedBase, qualityScore, ploidy, read); + if ( gl == null ) + throw new RuntimeException(String.format("BUG: trying to fetch an unset cached genotype likelihood at base=%c, qual=%d, ploidy=%d, read=%s", + observedBase, qualityScore, ploidy, read)); + return gl; + } + private GenotypeLikelihoods calculateCachedGenotypeLikelihoods(char observedBase, byte qualityScore, int ploidy, SAMRecord read, int offset) { + GenotypeLikelihoods gl = calculateGenotypeLikelihoods(observedBase, qualityScore, read, offset); + setCache(CACHE, observedBase, qualityScore, ploidy, read, gl); + return gl; + } + + protected void setCache( GenotypeLikelihoods[][][][][][] cache, + char observedBase, byte qualityScore, int ploidy, + SAMRecord read, GenotypeLikelihoods val ) { + int m = FourBaseProbabilitiesFactory.getBaseMismatchModel(fourBaseLikelihoods).ordinal(); + int a = fourBaseLikelihoods.getReadSequencerPlatformIndex(read); int i = BaseUtils.simpleBaseToBaseIndex(observedBase); int j = qualityScore; int k = ploidy; int x = strandIndex(! read.getReadNegativeStrandFlag() ); - if ( val != null ) - cache[i][j][k][x] = val; - - return cache[i][j][k][x]; + cache[m][a][i][j][k][x] = val; } - protected int strandIndex(boolean fwdStrand) { - return fwdStrand ? 0 : 1; + protected GenotypeLikelihoods getCache( GenotypeLikelihoods[][][][][][] cache, + char observedBase, byte qualityScore, int ploidy, SAMRecord read) { + int m = FourBaseProbabilitiesFactory.getBaseMismatchModel(fourBaseLikelihoods).ordinal(); + int a = fourBaseLikelihoods.getReadSequencerPlatformIndex(read); + int i = BaseUtils.simpleBaseToBaseIndex(observedBase); + int j = qualityScore; + int k = ploidy; + int x = strandIndex(! read.getReadNegativeStrandFlag() ); + return cache[m][a][i][j][k][x]; } - // - // All oft he following routines are totally generic - // + private GenotypeLikelihoods calculateGenotypeLikelihoods(char observedBase, byte qualityScore, SAMRecord read, int offset) { + FourBaseProbabilities fbl = fourBaseLikelihoods.computeLikelihoods(observedBase, qualityScore, read, offset); + if ( fbl == null ) + return null; - private GenotypeLikelihoods getCache( char observedBase, byte qualityScore, int ploidy, SAMRecord read, int offset ) { - return getSetCache( observedBase, qualityScore, ploidy, read, offset, null ); - } - - private void setCache( char observedBase, byte qualityScore, int ploidy, SAMRecord read, int offset, GenotypeLikelihoods val ) { - getSetCache( observedBase, qualityScore, ploidy, read, offset, val ); - } - - private boolean inCache( char observedBase, byte qualityScore, int ploidy, SAMRecord read, int offset ) { - return getCache(observedBase, qualityScore, ploidy, read, offset) != null; - } - - private GenotypeLikelihoods getCachedGenotypeLikelihoods( char observedBase, byte qualityScore, int ploidy, SAMRecord read, int offset ) { - if ( ! inCache(observedBase, qualityScore, ploidy, read, offset ) ) - throw new RuntimeException(String.format("BUG: trying to fetch an unset cached genotype likelihood at base=%c, qual=%d, ploidy=%d, read=%s", - observedBase, qualityScore, ploidy, read)); - - return getCache(observedBase, qualityScore, ploidy, read, offset); - } - - private GenotypeLikelihoods calculateCachedGenotypeLikelihoods( char observedBase, byte qualityScore, int ploidy, SAMRecord read, int offset ) { - if ( inCache(observedBase, qualityScore, ploidy, read, offset ) ) - throw new RuntimeException(String.format("BUG: trying to set an already set cached genotype likelihood at base=%c, qual=%d, ploidy=%d, read=%s", - observedBase, qualityScore, ploidy, read)); - - // create a new genotype likelihoods object and add this single base to it -- now we have a cached result + double[] fbLikelihoods = fbl.getLog10Likelihoods(); try { - GenotypeLikelihoods g = (GenotypeLikelihoods)this.clone(); - g.initialize(); - g.reallyAdd(observedBase, qualityScore, read, offset, false); - setCache(observedBase, qualityScore, ploidy, read, offset, g); + GenotypeLikelihoods gl = (GenotypeLikelihoods)this.clone(); + gl.initialize(); - //System.out.printf("Caching %c %d %d %s %s (%d total entries)%n", observedBase, qualityScore, ploidy, read.getReadName(), EmpiricalSubstitutionGenotypeLikelihoods.getReadSequencerPlatform(read), cacheSize); - return g; - } catch ( CloneNotSupportedException e ) { - throw new RuntimeException(e); - } + // we need to adjust for ploidy. We take the raw p(obs | chrom) / ploidy, which is -log10(ploidy) in log space + double ploidyAdjustment = log10(FIXED_PLOIDY); + + for ( DiploidGenotype g : DiploidGenotype.values() ) { + + // todo assumes ploidy is 2 -- should be generalized. Obviously the below code can be turned into a loop + double p_base = 0.0; + p_base += pow(10, fbLikelihoods[BaseUtils.simpleBaseToBaseIndex(g.base1)] - ploidyAdjustment); + p_base += pow(10, fbLikelihoods[BaseUtils.simpleBaseToBaseIndex(g.base2)] - ploidyAdjustment); + double likelihood = log10(p_base); + + gl.log10Likelihoods[g.ordinal()] += likelihood; + gl.log10Posteriors[g.ordinal()] += likelihood; + } + + if ( fourBaseLikelihoods.isVerbose() ) { + for ( DiploidGenotype g : DiploidGenotype.values() ) { System.out.printf("%s\t", g); } + System.out.println(); + for ( DiploidGenotype g : DiploidGenotype.values() ) { System.out.printf("%.2f\t", gl.log10Likelihoods[g.ordinal()]); } + System.out.println(); + } + + return gl; + + } catch ( CloneNotSupportedException e ) { + throw new RuntimeException(e); + } } + // ----------------------------------------------------------------------------------------------------------------- // // @@ -432,13 +420,17 @@ public abstract class GenotypeLikelihoods implements Cloneable { // // ----------------------------------------------------------------------------------------------------------------- + protected int strandIndex(boolean fwdStrand) { + return fwdStrand ? 0 : 1; + } + /** * Returns true when the observedBase is considered bad and shouldn't be processed by this object. A base * is considered bad if: * * Criterion 1: observed base isn't a A,C,T,G or lower case equivalent * - * @param observedBase + * @param observedBase observed base * @return true if the base is a bad base */ private boolean badBase(char observedBase) { @@ -448,14 +440,14 @@ public abstract class GenotypeLikelihoods implements Cloneable { /** * Return a string representation of this object in a moderately usable form * - * @return + * @return string representation */ public String toString() { double sum = 0; StringBuilder s = new StringBuilder(); for (DiploidGenotype g : DiploidGenotype.values()) { - s.append(String.format("%s %.10f ", g, likelihoods[g.ordinal()])); - sum += Math.pow(10,likelihoods[g.ordinal()]); + s.append(String.format("%s %.10f ", g, log10Likelihoods[g.ordinal()])); + sum += Math.pow(10,log10Likelihoods[g.ordinal()]); } s.append(String.format(" %f", sum)); return s.toString(); @@ -481,10 +473,10 @@ public abstract class GenotypeLikelihoods implements Cloneable { String bad = null; int i = g.ordinal(); - if ( ! MathUtils.wellFormedDouble(likelihoods[i]) || ! MathUtils.isNegativeOrZero(likelihoods[i]) ) { - bad = String.format("Likelihood %f is badly formed", likelihoods[i]); - } else if ( ! MathUtils.wellFormedDouble(posteriors[i]) || ! MathUtils.isNegativeOrZero(posteriors[i]) ) { - bad = String.format("Posterior %f is badly formed", posteriors[i]); + if ( ! MathUtils.wellFormedDouble(log10Likelihoods[i]) || ! MathUtils.isNegativeOrZero(log10Likelihoods[i]) ) { + bad = String.format("Likelihood %f is badly formed", log10Likelihoods[i]); + } else if ( ! MathUtils.wellFormedDouble(log10Posteriors[i]) || ! MathUtils.isNegativeOrZero(log10Posteriors[i]) ) { + bad = String.format("Posterior %f is badly formed", log10Posteriors[i]); } if ( bad != null ) { @@ -501,68 +493,6 @@ public abstract class GenotypeLikelihoods implements Cloneable { return true; } - // ----------------------------------------------------------------------------------------------------------------- - // - // - // Hearty math calculations follow - // - // -- these should not be messed with unless you know what you are doing - // - // ----------------------------------------------------------------------------------------------------------------- - - /** - * Works for any ploidy (in organization). - * - * @param observedBase - * @param g - * @param qual - * @return - */ - protected double log10PofObservingBaseGivenGenotype(char observedBase, DiploidGenotype g, byte qual, SAMRecord read, int offset ) { - if (qual == 0) { // zero quals are wrong - throw new RuntimeException(String.format("Unexpected Q0 base discovered in calculateAlleleLikelihood: %c %s %d at %d in %s", - observedBase, g, qual, offset, read)); - } - - // todo assumes ploidy is 2 -- should be generalized. Obviously the below code can be turned into a loop - double p_base = 0.0; - p_base += pow(10, log10PofObservingBaseGivenChromosome(observedBase, g.base1, qual, FIXED_PLOIDY, read, offset )); - p_base += pow(10, log10PofObservingBaseGivenChromosome(observedBase, g.base2, qual, FIXED_PLOIDY, read, offset )); - return log10(p_base); - } - - protected double log10PofObservingBaseGivenChromosome(char observedBase, char chromBase, byte qual, int ploidy, SAMRecord read, int offset) { - double logP = 0.0; - - if ( observedBase == chromBase ) { - // the base is consistent with the chromosome -- it's 1 - e - //logP = oneMinusData[qual]; - double e = pow(10, (qual / -10.0)); - logP = log10(1.0 - e); - } else { - // the base is inconsistent with the chromosome -- it's e * P(chromBase | observedBase is an error) - logP = qual / -10.0 + log10PofTrueBaseGivenMiscall(observedBase, chromBase, read, offset); - } - - // adjust for ploidy. We take the raw p(obs | chrom) / ploidy, which is -log10(ploidy) in log space - //logP -= log10N[ploidy]; - logP -= log10(ploidy); - - //System.out.printf("%c %c %d %d => %f%n", observedBase, chromBase, qual, ploidy, logP); - return logP; - } - - /** - * Must be overridden by concrete subclasses - * - * @param observedBase - * @param chromBase - * @param read - * @param offset - * @return - */ - protected abstract double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset); - // // Constant static data // diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsFactory.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsFactory.java deleted file mode 100755 index b1d638094..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsFactory.java +++ /dev/null @@ -1,34 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.genotyper; - -import static org.broadinstitute.sting.gatk.walkers.genotyper.BaseMismatchModel.*; - -public class GenotypeLikelihoodsFactory { - //private GenotypeLikelihoodsFactory() {} // cannot be instantiated - - public static BaseMismatchModel getBaseMismatchModel(final String name) { - BaseMismatchModel m = valueOf(name); - if ( m == null ) - throw new RuntimeException("Unexpected BaseMismatchModel " + name); - else - return m; - } - - /** - * General and correct way to create GenotypeLikelihood objects for arbitrary base mismatching models - * - * @param m - * @param priors - * @param pl - * @return - */ - public static GenotypeLikelihoods makeGenotypeLikelihoods(BaseMismatchModel m, - DiploidGenotypePriors priors, - EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform pl ) { - switch ( m ) { - case ONE_STATE: return new OneStateErrorGenotypeLikelihoods(priors); - case THREE_STATE: return new ThreeStateErrorGenotypeLikelihoods(priors); - case EMPIRICAL: return new EmpiricalSubstitutionGenotypeLikelihoods(priors, pl); - default: throw new RuntimeException("Unexpected BaseMismatchModel " + m); - } - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java index 4ab3298ea..1d5139de4 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java @@ -106,7 +106,7 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType)); // create the GenotypeLikelihoods object - GenotypeLikelihoods GL = GenotypeLikelihoodsFactory.makeGenotypeLikelihoods(baseModel, priors, defaultPlatform); + GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform); GL.add(pileup, true); GLs.put(sample, GL); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/OneStateErrorGenotypeLikelihoods.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/OneStateErrorGenotypeLikelihoods.java deleted file mode 100755 index 6651a933f..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/OneStateErrorGenotypeLikelihoods.java +++ /dev/null @@ -1,45 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.genotyper; - -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.QualityUtils; - -import net.sf.samtools.SAMRecord; - -/** - * This implements the old style CRD calculation of the chance that a base being a true chromBase given - * an miscalled base, in which the p is e, grabbing all of the probability. It shouldn't be used - */ -public class OneStateErrorGenotypeLikelihoods extends GenotypeLikelihoods { - // - // forwarding constructors -- don't do anything at all - // - public OneStateErrorGenotypeLikelihoods() { super(); } - public OneStateErrorGenotypeLikelihoods(DiploidGenotypePriors priors) { super(priors); } - - /** - * Cloning of the object - * @return - * @throws CloneNotSupportedException - */ - protected Object clone() throws CloneNotSupportedException { - return super.clone(); - } - - /** - * - * @param observedBase - * @param chromBase - * @param read - * @param offset - * @return - */ - protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) { - return 0; // equivalent to e model - } - - static GenotypeLikelihoods[][][][] ONE_STATE_CACHE = new GenotypeLikelihoods[BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE][MAX_PLOIDY][2]; - protected GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy, - SAMRecord read, int offset, GenotypeLikelihoods val ) { - return simpleGetSetCache(ONE_STATE_CACHE, observedBase, qualityScore, ploidy, read, offset, val); - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/OneStateErrorProbabilities.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/OneStateErrorProbabilities.java new file mode 100755 index 000000000..13b9efeb2 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/OneStateErrorProbabilities.java @@ -0,0 +1,35 @@ +package org.broadinstitute.sting.gatk.walkers.genotyper; + +import net.sf.samtools.SAMRecord; + +/** + * This implements the old style CRD calculation of the chance that a base being a true chromBase given + * an miscalled base, in which the p is e, grabbing all of the probability. It shouldn't be used + */ +public class OneStateErrorProbabilities extends FourBaseProbabilities { + // + // forwarding constructors -- don't do anything at all + // + public OneStateErrorProbabilities() { super(); } + + /** + * Cloning of the object + * @return clone + * @throws CloneNotSupportedException + */ + protected Object clone() throws CloneNotSupportedException { + return super.clone(); + } + + /** + * + * @param observedBase observed base + * @param chromBase target base + * @param read SAM read + * @param offset offset on read + * @return log10 likelihood + */ + protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) { + return 0; // equivalent to e model + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java index c14615b1e..0dcee8a3c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java @@ -105,7 +105,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation pileup.setIncludeDeletionsInPileupString(true); // create the GenotypeLikelihoods object - GenotypeLikelihoods GL = GenotypeLikelihoodsFactory.makeGenotypeLikelihoods(baseModel, priors, defaultPlatform); + GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform); GL.add(pileup, true); return new Pair(pileup, GL); } @@ -129,7 +129,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType)); // create the GenotypeLikelihoods object - GenotypeLikelihoods GL = GenotypeLikelihoodsFactory.makeGenotypeLikelihoods(baseModel, AFPriors, defaultPlatform); + GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, AFPriors, defaultPlatform); GL.add(pileup, true); GLs.put(sample, GL); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ThreeStateErrorGenotypeLikelihoods.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ThreeStateErrorGenotypeLikelihoods.java deleted file mode 100755 index 67c1dbad3..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ThreeStateErrorGenotypeLikelihoods.java +++ /dev/null @@ -1,40 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.genotyper; - -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.QualityUtils; - -import static java.lang.Math.log10; - -import net.sf.samtools.SAMRecord; - -public class ThreeStateErrorGenotypeLikelihoods extends GenotypeLikelihoods { - // - // forwarding constructors -- don't do anything at all - // - public ThreeStateErrorGenotypeLikelihoods() { super(); } - public ThreeStateErrorGenotypeLikelihoods(DiploidGenotypePriors priors) { super(priors); } - - /** - * Cloning of the object - * @return - * @throws CloneNotSupportedException - */ - protected Object clone() throws CloneNotSupportedException { - return super.clone(); - } - - /** - * Simple log10(3) cached value - */ - protected static final double log103 = log10(3.0); - - protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) { - return -log103; // equivalent to e / 3 model - } - - static GenotypeLikelihoods[][][][] THREE_STATE_CACHE = new GenotypeLikelihoods[BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE][MAX_PLOIDY][2]; - protected GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy, - SAMRecord read, int offset, GenotypeLikelihoods val ) { - return simpleGetSetCache(THREE_STATE_CACHE, observedBase, qualityScore, ploidy, read, offset, val); - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ThreeStateErrorProbabilities.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ThreeStateErrorProbabilities.java new file mode 100755 index 000000000..230435c0a --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ThreeStateErrorProbabilities.java @@ -0,0 +1,38 @@ +package org.broadinstitute.sting.gatk.walkers.genotyper; + +import static java.lang.Math.log10; + +import net.sf.samtools.SAMRecord; + +public class ThreeStateErrorProbabilities extends FourBaseProbabilities { + // + // forwarding constructors -- don't do anything at all + // + public ThreeStateErrorProbabilities() { super(); } + + /** + * Cloning of the object + * @return clone + * @throws CloneNotSupportedException + */ + protected Object clone() throws CloneNotSupportedException { + return super.clone(); + } + + /** + * Simple log10(3) cached value + */ + protected static final double log103 = log10(3.0); + + /** + * + * @param observedBase observed base + * @param chromBase target base + * @param read SAM read + * @param offset offset on read + * @return log10 likelihood + */ + protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) { + return -log103; // equivalent to e / 3 model + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 406a2c3ac..c2d2a0910 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -67,7 +67,7 @@ public class UnifiedArgumentCollection { public String ASSUME_SINGLE_SAMPLE = null; @Argument(fullName = "platform", shortName = "pl", doc = "Causes the genotyper to assume that reads without PL header TAG are this platform. Defaults to null, indicating that the system will throw a runtime exception when such reads are detected", required = false) - public EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform defaultPlatform = null; + public EmpiricalSubstitutionProbabilities.SequencerPlatform defaultPlatform = null; // control the various parameters to be used diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CalculateBaseLikelihoodsWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CalculateBaseLikelihoodsWalker.java index 468cb0b7e..dbe8a16f8 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CalculateBaseLikelihoodsWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CalculateBaseLikelihoodsWalker.java @@ -119,7 +119,7 @@ public class CalculateBaseLikelihoodsWalker extends LocusWalker>{ } //Calculate posterior probabilities - GenotypeLikelihoods G = new ThreeStateErrorGenotypeLikelihoods(); + GenotypeLikelihoods G = new GenotypeLikelihoods(BaseMismatchModel.THREE_STATE); SAMRecord read; int offset; char base; byte qual; int mapquality; String readname; //Check for bad bases and ensure mapping quality myself. This works.