Experitmental support for empirical P(B_true | B_miscall). --useEmpiricalTransitions flag to SSG enables this support. Much better implementation of Genotype likelihoods -- the system should scream along now. Continuing progress towards deleting old model
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1469 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
ab9458d06d
commit
bf60980653
|
|
@ -10,16 +10,22 @@ import org.broadinstitute.sting.utils.Utils;
|
||||||
* To change this template use File | Settings | File Templates.
|
* To change this template use File | Settings | File Templates.
|
||||||
*/
|
*/
|
||||||
public enum DiploidGenotype {
|
public enum DiploidGenotype {
|
||||||
AA,
|
AA ('A', 'A'),
|
||||||
AC,
|
AC ('A', 'C'),
|
||||||
AG,
|
AG ('A', 'G'),
|
||||||
AT,
|
AT ('A', 'T'),
|
||||||
CC,
|
CC ('C', 'C'),
|
||||||
CG,
|
CG ('C', 'G'),
|
||||||
CT,
|
CT ('C', 'T'),
|
||||||
GG,
|
GG ('G', 'G'),
|
||||||
GT,
|
GT ('G', 'T'),
|
||||||
TT;
|
TT ('T', 'T');
|
||||||
|
|
||||||
|
public char base1, base2;
|
||||||
|
private DiploidGenotype(char base1, char base2) {
|
||||||
|
this.base1 = base1;
|
||||||
|
this.base2 = base2;
|
||||||
|
}
|
||||||
|
|
||||||
public boolean isHomRef(char r) {
|
public boolean isHomRef(char r) {
|
||||||
return isHom() && r == this.toString().charAt(0);
|
return isHom() && r == this.toString().charAt(0);
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,112 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
|
||||||
|
import static java.lang.Math.log10;
|
||||||
|
import static java.lang.Math.pow;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: mdepristo
|
||||||
|
* Date: Aug 23, 2009
|
||||||
|
* Time: 9:47:33 PM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
public class EmpiricalSubstitutionGenotypeLikelihoods extends NewHotnessGenotypeLikelihoods {
|
||||||
|
private final static double[][] misCallProbs = new double[4][4];
|
||||||
|
|
||||||
|
private static void addMisCall(char miscalledBase, char trueBase, double p) {
|
||||||
|
int i = BaseUtils.simpleBaseToBaseIndex(miscalledBase);
|
||||||
|
int j = BaseUtils.simpleBaseToBaseIndex(trueBase);
|
||||||
|
misCallProbs[i][j] = log10(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static double getProbMiscallIsBase(char miscalledBase, char trueBase) {
|
||||||
|
int i = BaseUtils.simpleBaseToBaseIndex(miscalledBase);
|
||||||
|
int j = BaseUtils.simpleBaseToBaseIndex(trueBase);
|
||||||
|
|
||||||
|
double logP = misCallProbs[i][j];
|
||||||
|
if ( logP == 0.0 )
|
||||||
|
throw new RuntimeException(String.format("Bad miscall base request miscalled=%c true=%b", miscalledBase, trueBase));
|
||||||
|
else
|
||||||
|
return logP;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cloning of the object
|
||||||
|
* @return
|
||||||
|
* @throws CloneNotSupportedException
|
||||||
|
*/
|
||||||
|
protected Object clone() throws CloneNotSupportedException {
|
||||||
|
return super.clone();
|
||||||
|
}
|
||||||
|
|
||||||
|
// NA12878 CHR1 solexa table
|
||||||
|
//
|
||||||
|
// True base A C G T Totals
|
||||||
|
// Error base Counts 1190455 686187 685080 1227860 3789582
|
||||||
|
// A 938661 0.0% 48.6% 16.3% 35.1% 100.00%
|
||||||
|
// C 955469 60.6% 0.0% 7.9% 31.5% 100.00%
|
||||||
|
// G 964246 30.2% 7.8% 0.0% 62.0% 100.00%
|
||||||
|
// T 931206 34.4% 16.6% 49.0% 0.0% 100.00%
|
||||||
|
//
|
||||||
|
// V2 table -- didn't incorporate complements based on read orientation
|
||||||
|
// static {
|
||||||
|
// addMisCall('A', 'C', 48.6/100.0);
|
||||||
|
// addMisCall('A', 'G', 16.3/100.0);
|
||||||
|
// addMisCall('A', 'T', 35.1/100.0);
|
||||||
|
//
|
||||||
|
// addMisCall('C', 'A', 60.6/100.0);
|
||||||
|
// addMisCall('C', 'G', 7.9/100.0);
|
||||||
|
// addMisCall('C', 'T', 31.5/100.0);
|
||||||
|
//
|
||||||
|
// addMisCall('G', 'A', 30.2/100.0);
|
||||||
|
// addMisCall('G', 'C', 7.8/100.0);
|
||||||
|
// addMisCall('G', 'T', 62.0/100.0);
|
||||||
|
//
|
||||||
|
// addMisCall('T', 'A', 34.4/100.0);
|
||||||
|
// addMisCall('T', 'C', 16.6/100.0);
|
||||||
|
// addMisCall('T', 'G', 49.9/100.0);
|
||||||
|
// }
|
||||||
|
|
||||||
|
//True base A C G T Totals
|
||||||
|
//Error base Counts 1209203 718053 653214 1209112 3789582
|
||||||
|
//A 809944 0.0% 59.2% 15.3% 25.6% 100.00%
|
||||||
|
//C 934612 54.2% 0.0% 10.3% 35.5% 100.00%
|
||||||
|
//G 985103 26.4% 5.6% 0.0% 68.0% 100.00%
|
||||||
|
//T 1059923 41.8% 17.3% 40.9% 0.0% 100.00%
|
||||||
|
static {
|
||||||
|
addMisCall('A', 'C', 59.2/100.0);
|
||||||
|
addMisCall('A', 'G', 15.3/100.0);
|
||||||
|
addMisCall('A', 'T', 25.6/100.0);
|
||||||
|
|
||||||
|
addMisCall('C', 'A', 54.2/100.0);
|
||||||
|
addMisCall('C', 'G', 10.3/100.0);
|
||||||
|
addMisCall('C', 'T', 35.5/100.0);
|
||||||
|
|
||||||
|
addMisCall('G', 'A', 26.4/100.0);
|
||||||
|
addMisCall('G', 'C', 5.6/100.0);
|
||||||
|
addMisCall('G', 'T', 68.0/100.0);
|
||||||
|
|
||||||
|
addMisCall('T', 'A', 41.8/100.0);
|
||||||
|
addMisCall('T', 'C', 17.3/100.0);
|
||||||
|
addMisCall('T', 'G', 40.9/100.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// forwarding constructors -- don't do anything at all
|
||||||
|
//
|
||||||
|
public EmpiricalSubstitutionGenotypeLikelihoods() { super(); }
|
||||||
|
public EmpiricalSubstitutionGenotypeLikelihoods(DiploidGenotypePriors priors) { super(priors); }
|
||||||
|
|
||||||
|
protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, boolean fwdStrand) {
|
||||||
|
if ( fwdStrand ) {
|
||||||
|
return getProbMiscallIsBase(observedBase, chromBase);
|
||||||
|
} else {
|
||||||
|
double v = getProbMiscallIsBase(BaseUtils.simpleComplement(observedBase), BaseUtils.simpleComplement(chromBase));
|
||||||
|
//System.out.printf("R: %c/%c => %f compared to %f%n", observedBase, chromBase, pow(10, getProbMiscallIsBase(observedBase, chromBase)), pow(10, v));
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -14,25 +14,5 @@ public abstract class GenotypeLikelihoods {
|
||||||
* during GL calculations. If this field is true, Q0 bases will be removed in add().
|
* during GL calculations. If this field is true, Q0 bases will be removed in add().
|
||||||
*/
|
*/
|
||||||
protected boolean filterQ0Bases = true;
|
protected boolean filterQ0Bases = true;
|
||||||
|
//public abstract int add(char ref, char read, byte qual);
|
||||||
protected static final double[] oneMinusData = new double[Byte.MAX_VALUE];
|
|
||||||
protected static final double[] oneHalfMinusDataArachne = new double[Byte.MAX_VALUE];
|
|
||||||
protected static final double[] oneHalfMinusData3Base = new double[Byte.MAX_VALUE];
|
|
||||||
protected static final double log10Of1_3 = log10(1.0 / 3.0);
|
|
||||||
|
|
||||||
static {
|
|
||||||
for (int qual = 0; qual < Byte.MAX_VALUE; qual++) {
|
|
||||||
oneMinusData[qual] = log10(1.0 - pow(10, (qual / -10.0)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static {
|
|
||||||
for (int qual = 0; qual < Byte.MAX_VALUE; qual++) {
|
|
||||||
double e = pow(10, (qual / -10.0));
|
|
||||||
oneHalfMinusDataArachne[qual] = log10(0.5 - e / 2.0);
|
|
||||||
oneHalfMinusData3Base[qual] = log10(0.5 - e / 2.0 + e / 6.0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public abstract int add(char ref, char read, byte qual);
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,13 +1,12 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
|
||||||
import edu.mit.broad.picard.util.MathUtil;
|
import edu.mit.broad.picard.util.MathUtil;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import static java.lang.Math.log10;
|
||||||
|
import static java.lang.Math.pow;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Stable, error checking version of the Bayesian genotyper. Useful for calculating the likelihoods, priors,
|
* Stable, error checking version of the Bayesian genotyper. Useful for calculating the likelihoods, priors,
|
||||||
|
|
@ -40,14 +39,15 @@ import java.util.Arrays;
|
||||||
* From then on, you can call any of the add() routines to update the likelihoods and posteriors in the above
|
* From then on, you can call any of the add() routines to update the likelihoods and posteriors in the above
|
||||||
* model.
|
* model.
|
||||||
*/
|
*/
|
||||||
public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods {
|
public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods implements Cloneable {
|
||||||
private int coverage = 0;
|
private final static int FIXED_PLOIDY = 2;
|
||||||
|
private final static int MAX_PLOIDY = FIXED_PLOIDY + 1;
|
||||||
|
|
||||||
//
|
//
|
||||||
// The three fundamental data arrays associated with a Genotype Likelhoods object
|
// The three fundamental data arrays associated with a Genotype Likelhoods object
|
||||||
//
|
//
|
||||||
private double[] likelihoods = null;
|
protected double[] likelihoods = null;
|
||||||
private double[] posteriors = null;
|
protected double[] posteriors = null;
|
||||||
|
|
||||||
private DiploidGenotypePriors priors = null;
|
private DiploidGenotypePriors priors = null;
|
||||||
|
|
||||||
|
|
@ -67,6 +67,19 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods {
|
||||||
initialize();
|
initialize();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cloning of the object
|
||||||
|
* @return
|
||||||
|
* @throws CloneNotSupportedException
|
||||||
|
*/
|
||||||
|
protected Object clone() throws CloneNotSupportedException {
|
||||||
|
NewHotnessGenotypeLikelihoods c = (NewHotnessGenotypeLikelihoods)super.clone();
|
||||||
|
c.priors = priors;
|
||||||
|
c.likelihoods = likelihoods.clone();
|
||||||
|
c.posteriors = posteriors.clone();
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
private void initialize() {
|
private void initialize() {
|
||||||
likelihoods = zeros.clone(); // likelihoods are all zeros
|
likelihoods = zeros.clone(); // likelihoods are all zeros
|
||||||
posteriors = priors.getPriors().clone(); // posteriors are all the priors
|
posteriors = priors.getPriors().clone(); // posteriors are all the priors
|
||||||
|
|
@ -107,6 +120,10 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods {
|
||||||
return getPosteriors()[g.ordinal()];
|
return getPosteriors()[g.ordinal()];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public DiploidGenotypePriors getPriorObject() {
|
||||||
|
return priors;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns an array of priors for each genotype, indexed by DiploidGenotype.ordinal values().
|
* Returns an array of priors for each genotype, indexed by DiploidGenotype.ordinal values().
|
||||||
*
|
*
|
||||||
|
|
@ -125,10 +142,6 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods {
|
||||||
return getPriors()[g.ordinal()];
|
return getPriors()[g.ordinal()];
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getCoverage() {
|
|
||||||
return coverage;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Are we ignoring Q0 bases during calculations?
|
* Are we ignoring Q0 bases during calculations?
|
||||||
* @return
|
* @return
|
||||||
|
|
@ -146,13 +159,13 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods {
|
||||||
this.filterQ0Bases = filterQ0Bases;
|
this.filterQ0Bases = filterQ0Bases;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
* Remove call -- for historical purposes only
|
//
|
||||||
*/
|
//
|
||||||
@Deprecated
|
// add() -- the heart of
|
||||||
public int add(char ref, char read, byte qual) {
|
//
|
||||||
return add(read, qual);
|
//
|
||||||
}
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Updates likelihoods and posteriors to reflect an additional observation of observedBase with
|
* Updates likelihoods and posteriors to reflect an additional observation of observedBase with
|
||||||
|
|
@ -162,12 +175,16 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods {
|
||||||
* @param qualityScore
|
* @param qualityScore
|
||||||
* @return 1 if the base was considered good enough to add to the likelihoods (not Q0 or 'N', for example)
|
* @return 1 if the base was considered good enough to add to the likelihoods (not Q0 or 'N', for example)
|
||||||
*/
|
*/
|
||||||
public int add(char observedBase, byte qualityScore)
|
public int add(char observedBase, byte qualityScore, boolean fwdStrand) {
|
||||||
{
|
return add(observedBase, qualityScore, fwdStrand, true, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int add(char observedBase, byte qualityScore, boolean fwdStrand, boolean enableCache, boolean verbose) {
|
||||||
if ( badBase(observedBase) ) {
|
if ( badBase(observedBase) ) {
|
||||||
throw new RuntimeException(String.format("BUG: unexpected base %c with Q%d passed to GenotypeLikelihoods", observedBase, qualityScore));
|
throw new RuntimeException(String.format("BUG: unexpected base %c with Q%d passed to GenotypeLikelihoods", observedBase, qualityScore));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO -- we should probably filter Q1 bases too
|
||||||
if (qualityScore <= 0) {
|
if (qualityScore <= 0) {
|
||||||
if ( isFilteringQ0Bases() ) {
|
if ( isFilteringQ0Bases() ) {
|
||||||
return 0;
|
return 0;
|
||||||
|
|
@ -176,17 +193,130 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
coverage++;
|
// Handle caching if requested. Just look up the cached result if its available, or compute and store it
|
||||||
for (DiploidGenotype g : DiploidGenotype.values() ) {
|
NewHotnessGenotypeLikelihoods cached = null;
|
||||||
double likelihood = calculateBaseLikelihood(observedBase, g, qualityScore);
|
if ( enableCache ) {
|
||||||
//System.out.printf("Likelihood is %f for %c %d %s%n", likelihood, read, qual, g.toString());
|
if ( ! inCache(observedBase, qualityScore, FIXED_PLOIDY, fwdStrand) ) {
|
||||||
|
cached = calculateCachedGenotypeLikelihoods(observedBase, qualityScore, FIXED_PLOIDY, fwdStrand);
|
||||||
|
} else {
|
||||||
|
cached = getCachedGenotypeLikelihoods(observedBase, qualityScore, FIXED_PLOIDY, fwdStrand);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( DiploidGenotype g : DiploidGenotype.values() ) {
|
||||||
|
double likelihoodCalc = ! enableCache ? log10PofObservingBaseGivenGenotype(observedBase, g, qualityScore, fwdStrand) : 0.0;
|
||||||
|
double likelihoodCached = enableCache ? cached.likelihoods[g.ordinal()] : 0.0;
|
||||||
|
double likelihood = enableCache ? likelihoodCached : likelihoodCalc;
|
||||||
|
|
||||||
|
//if ( enableCache && likelihoodCalc != 0.0 && MathUtils.compareDoubles(likelihoodCached, likelihoodCalc) != 0 ) {
|
||||||
|
// System.out.printf("ERROR: Likelihoods not equal is cache=%f != calc=%f for %c %d %s%n",
|
||||||
|
// likelihoodCached, likelihoodCalc, observedBase, qualityScore, g.toString());
|
||||||
|
//}
|
||||||
|
if ( verbose )
|
||||||
|
System.out.printf(" L(%c | G=%s, Q=%d, S=%s) = %f%n", observedBase, g, qualityScore, fwdStrand ? "+" : "-", likelihood);
|
||||||
|
|
||||||
likelihoods[g.ordinal()] += likelihood;
|
likelihoods[g.ordinal()] += likelihood;
|
||||||
posteriors[g.ordinal()] += likelihood;
|
posteriors[g.ordinal()] += likelihood;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( verbose ) {
|
||||||
|
for ( DiploidGenotype g : DiploidGenotype.values() ) { System.out.printf("%s\t", g); }
|
||||||
|
System.out.println();
|
||||||
|
for ( DiploidGenotype g : DiploidGenotype.values() ) { System.out.printf("%.2f\t", likelihoods[g.ordinal()]); }
|
||||||
|
System.out.println();
|
||||||
|
}
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Updates likelihoods and posteriors to reflect the additional observations contained within the
|
||||||
|
* read-based pileup up by calling add(observedBase, qualityScore) for each base / qual in the
|
||||||
|
* pileup
|
||||||
|
*
|
||||||
|
* @param pileup
|
||||||
|
* @return the number of good bases found in the pileup
|
||||||
|
*/
|
||||||
|
public int add(ReadBackedPileup pileup, boolean ignoreBadBases, boolean verbose) {
|
||||||
|
int n = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < pileup.getReads().size(); i++) {
|
||||||
|
SAMRecord read = pileup.getReads().get(i);
|
||||||
|
int offset = pileup.getOffsets().get(i);
|
||||||
|
char base = read.getReadString().charAt(offset);
|
||||||
|
byte qual = read.getBaseQualities()[offset];
|
||||||
|
if ( ! ignoreBadBases || ! badBase(base) ) {
|
||||||
|
n += add(base, qual, ! read.getReadNegativeStrandFlag(), true, verbose);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int add(ReadBackedPileup pileup, boolean ignoreBadBases) {
|
||||||
|
return add(pileup, ignoreBadBases, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public int add(ReadBackedPileup pileup) {
|
||||||
|
return add(pileup, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// caching routines
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
|
final static NewHotnessGenotypeLikelihoods[][][][] cache = new NewHotnessGenotypeLikelihoods[BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE][MAX_PLOIDY][2];
|
||||||
|
static int cacheSize = 0;
|
||||||
|
|
||||||
|
private int strandIndex(boolean fwdStrand) {
|
||||||
|
return fwdStrand ? 0 : 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean inCache( char observedBase, byte qualityScore, int ploidy, boolean fwdStrand ) {
|
||||||
|
return cache[BaseUtils.simpleBaseToBaseIndex(observedBase)][qualityScore][ploidy][strandIndex(fwdStrand)] != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private NewHotnessGenotypeLikelihoods getCachedGenotypeLikelihoods( char observedBase, byte qualityScore, int ploidy, boolean fwdStrand ) {
|
||||||
|
if ( ! inCache(observedBase, qualityScore, ploidy, fwdStrand ) )
|
||||||
|
throw new RuntimeException(String.format("BUG: trying to fetch an unset cached genotype likelihood at base=%c, qual=%d, ploidy=%d, fwdstrand=%b",
|
||||||
|
observedBase, qualityScore, ploidy, fwdStrand));
|
||||||
|
|
||||||
|
return cache[BaseUtils.simpleBaseToBaseIndex(observedBase)][qualityScore][ploidy][strandIndex(fwdStrand)];
|
||||||
|
}
|
||||||
|
|
||||||
|
private NewHotnessGenotypeLikelihoods calculateCachedGenotypeLikelihoods( char observedBase, byte qualityScore, int ploidy, boolean fwdStrand ) {
|
||||||
|
if ( inCache(observedBase, qualityScore, ploidy, fwdStrand ) )
|
||||||
|
throw new RuntimeException(String.format("BUG: trying to set an already set cached genotype likelihood at base=%c, qual=%d, ploidy=%d, fwdstrand=%b",
|
||||||
|
observedBase, qualityScore, ploidy, fwdStrand));
|
||||||
|
|
||||||
|
// create a new genotype likelihoods object and add this single base to it -- now we have a cached result
|
||||||
|
try {
|
||||||
|
NewHotnessGenotypeLikelihoods g = (NewHotnessGenotypeLikelihoods)this.clone();
|
||||||
|
g.initialize();
|
||||||
|
g.add(observedBase, qualityScore, fwdStrand, false, false);
|
||||||
|
|
||||||
|
cache[BaseUtils.simpleBaseToBaseIndex(observedBase)][qualityScore][ploidy][strandIndex(fwdStrand)] = g;
|
||||||
|
cacheSize++;
|
||||||
|
|
||||||
|
//System.out.printf("Caching %c %d %d %d %b (%d total entries)%n", observedBase, BaseUtils.simpleBaseToBaseIndex(observedBase), qualityScore, ploidy, fwdStrand, cacheSize);
|
||||||
|
return g;
|
||||||
|
} catch ( CloneNotSupportedException e ) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// helper routines
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns true when the observedBase is considered bad and shouldn't be processed by this object. A base
|
* Returns true when the observedBase is considered bad and shouldn't be processed by this object. A base
|
||||||
* is considered bad if:
|
* is considered bad if:
|
||||||
|
|
@ -200,56 +330,6 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods {
|
||||||
return BaseUtils.simpleBaseToBaseIndex(observedBase) == -1;
|
return BaseUtils.simpleBaseToBaseIndex(observedBase) == -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Updates likelihoods and posteriors to reflect the additional observations contained within the
|
|
||||||
* read-based pileup up by calling add(observedBase, qualityScore) for each base / qual in the
|
|
||||||
* pileup
|
|
||||||
*
|
|
||||||
* @param pileup
|
|
||||||
* @return the number of good bases found in the pileup
|
|
||||||
*/
|
|
||||||
public int add(ReadBackedPileup pileup, boolean ignoreBadBases) {
|
|
||||||
int n = 0;
|
|
||||||
|
|
||||||
for (int i = 0; i < pileup.getReads().size(); i++) {
|
|
||||||
SAMRecord read = pileup.getReads().get(i);
|
|
||||||
int offset = pileup.getOffsets().get(i);
|
|
||||||
char base = read.getReadString().charAt(offset);
|
|
||||||
byte qual = read.getBaseQualities()[offset];
|
|
||||||
if ( ! ignoreBadBases || ! badBase(base) ) {
|
|
||||||
n += add(base, qual);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return n;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int add(ReadBackedPileup pileup) {
|
|
||||||
return add(pileup, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
private double calculateBaseLikelihood(char observedBase, DiploidGenotype g, byte qual) {
|
|
||||||
if (qual == 0) { // zero quals are wrong
|
|
||||||
throw new RuntimeException(String.format("Unexpected Q0 base discovered in calculateAlleleLikelihood: %c %s %d", observedBase, g, qual));
|
|
||||||
}
|
|
||||||
|
|
||||||
double p_base = 0.0;
|
|
||||||
|
|
||||||
if ( g.isHomRef(observedBase) ) {
|
|
||||||
// hom
|
|
||||||
p_base = getOneMinusQual(qual);
|
|
||||||
} else if ( g.isHetRef(observedBase) ) {
|
|
||||||
// het
|
|
||||||
p_base = getOneHalfMinusQual(qual);
|
|
||||||
} else {
|
|
||||||
// error
|
|
||||||
//System.out.printf("%s %b %f %f%n", genotype, h1 != h2, log10Of2_3, log10Of1_3 );
|
|
||||||
p_base = qual / -10.0 + log10Of1_3;
|
|
||||||
}
|
|
||||||
|
|
||||||
return p_base;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return a string representation of this object in a moderately usable form
|
* Return a string representation of this object in a moderately usable form
|
||||||
*
|
*
|
||||||
|
|
@ -266,6 +346,14 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods {
|
||||||
return s.toString();
|
return s.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// Validation routines
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
public boolean validate() {
|
public boolean validate() {
|
||||||
return validate(true);
|
return validate(true);
|
||||||
}
|
}
|
||||||
|
|
@ -298,12 +386,63 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static double getOneMinusQual(final byte qual) {
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
return oneMinusData[qual];
|
//
|
||||||
|
//
|
||||||
|
// Hearty math calculations follow
|
||||||
|
//
|
||||||
|
// -- these should not be messed with unless you know what you are doing
|
||||||
|
//
|
||||||
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Works for any ploidy (in organization).
|
||||||
|
*
|
||||||
|
* @param observedBase
|
||||||
|
* @param g
|
||||||
|
* @param qual
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
protected double log10PofObservingBaseGivenGenotype(char observedBase, DiploidGenotype g, byte qual, boolean fwdStrand) {
|
||||||
|
if (qual == 0) { // zero quals are wrong
|
||||||
|
throw new RuntimeException(String.format("Unexpected Q0 base discovered in calculateAlleleLikelihood: %c %s %d", observedBase, g, qual));
|
||||||
|
}
|
||||||
|
|
||||||
|
// todo assumes ploidy is 2 -- should be generalized. Obviously the below code can be turned into a loop
|
||||||
|
double p_base = 0.0;
|
||||||
|
p_base += pow(10, log10PofObservingBaseGivenChromosome(observedBase, g.base1, qual, FIXED_PLOIDY, fwdStrand));
|
||||||
|
p_base += pow(10, log10PofObservingBaseGivenChromosome(observedBase, g.base2, qual, FIXED_PLOIDY, fwdStrand));
|
||||||
|
return log10(p_base);
|
||||||
}
|
}
|
||||||
|
|
||||||
private double getOneHalfMinusQual(final byte qual) {
|
protected double log10PofObservingBaseGivenChromosome(char observedBase, char chromBase, byte qual, int ploidy, boolean fwdStrand) {
|
||||||
return oneHalfMinusData3Base[qual];
|
double logP = 0.0;
|
||||||
|
|
||||||
|
if ( observedBase == chromBase ) {
|
||||||
|
// the base is consistent with the chromosome -- it's 1 - e
|
||||||
|
//logP = oneMinusData[qual];
|
||||||
|
double e = pow(10, (qual / -10.0));
|
||||||
|
logP = log10(1.0 - e);
|
||||||
|
} else {
|
||||||
|
// the base is inconsistent with the chromosome -- it's e * P(chromBase | observedBase is an error)
|
||||||
|
logP = qual / -10.0 + log10PofTrueBaseGivenMiscall(observedBase, chromBase, fwdStrand);
|
||||||
|
}
|
||||||
|
|
||||||
|
// adjust for ploidy. We take the raw p(obs | chrom) / ploidy, which is -log10(ploidy) in log space
|
||||||
|
//logP -= log10N[ploidy];
|
||||||
|
logP -= log10(ploidy);
|
||||||
|
|
||||||
|
//System.out.printf("%c %c %d %d => %f%n", observedBase, chromBase, qual, ploidy, logP);
|
||||||
|
return logP;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simple log10(3) cached value
|
||||||
|
*/
|
||||||
|
protected static final double log103 = log10(3.0);
|
||||||
|
|
||||||
|
protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, boolean fwdStrand) {
|
||||||
|
return -log103; // currently equivalent to e/3 model
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,21 @@ import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
public class OldAndBustedGenotypeLikelihoods extends GenotypeLikelihoods {
|
public class OldAndBustedGenotypeLikelihoods extends GenotypeLikelihoods {
|
||||||
|
protected static final double[] oneMinusData = new double[Byte.MAX_VALUE];
|
||||||
|
protected static final double[] oneHalfMinusDataArachne = new double[Byte.MAX_VALUE];
|
||||||
|
protected static final double[] oneHalfMinusData3Base = new double[Byte.MAX_VALUE];
|
||||||
|
//protected static final double[] oneHalfMinusData = new double[Byte.MAX_VALUE];
|
||||||
|
protected static final double log10Of1_3 = log10(1.0 / 3.0);
|
||||||
|
|
||||||
|
static {
|
||||||
|
for (int qual = 0; qual < Byte.MAX_VALUE; qual++) {
|
||||||
|
double e = pow(10, (qual / -10.0));
|
||||||
|
oneMinusData[qual] = log10(1.0 - e);
|
||||||
|
oneHalfMinusDataArachne[qual] = log10(0.5 - e / 2.0);
|
||||||
|
oneHalfMinusData3Base[qual] = log10(0.5 - e / 2.0 + e / 6.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static double getOneMinusQual(final byte qual) {
|
private static double getOneMinusQual(final byte qual) {
|
||||||
return oneMinusData[qual];
|
return oneMinusData[qual];
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -40,6 +40,12 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
|
||||||
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
|
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
|
||||||
public Double heterozygosity = DiploidGenotypePriors.HUMAN_HETEROZYGOSITY;
|
public Double heterozygosity = DiploidGenotypePriors.HUMAN_HETEROZYGOSITY;
|
||||||
|
|
||||||
|
@Argument(fullName = "useEmpiricalTransitions", shortName = "useEmpiricalTransitions", doc = "EXPERIMENTAL", required = false)
|
||||||
|
public boolean useEmpiricalTransitions = false;
|
||||||
|
|
||||||
|
@Argument(fullName = "verbose", shortName = "v", doc = "EXPERIMENTAL", required = false)
|
||||||
|
public boolean VERBOSE = false;
|
||||||
|
|
||||||
// todo - print out priors at the start of SSG with .INFO
|
// todo - print out priors at the start of SSG with .INFO
|
||||||
|
|
||||||
// todo -- remove dbSNP awareness until we understand how it should be used
|
// todo -- remove dbSNP awareness until we understand how it should be used
|
||||||
|
|
@ -77,10 +83,12 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
|
||||||
public SSGGenotypeCall map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext context) {
|
public SSGGenotypeCall map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext context) {
|
||||||
char ref = refContext.getBase();
|
char ref = refContext.getBase();
|
||||||
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, heterozygosity, DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
|
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, heterozygosity, DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
|
||||||
NewHotnessGenotypeLikelihoods G = new NewHotnessGenotypeLikelihoods(priors);
|
|
||||||
|
NewHotnessGenotypeLikelihoods G = useEmpiricalTransitions ? new EmpiricalSubstitutionGenotypeLikelihoods(priors) : new NewHotnessGenotypeLikelihoods(priors);
|
||||||
|
|
||||||
G.filterQ0Bases(! keepQ0Bases);
|
G.filterQ0Bases(! keepQ0Bases);
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||||
G.add(pileup, true);
|
G.add(pileup, true, VERBOSE);
|
||||||
G.validate();
|
G.validate();
|
||||||
|
|
||||||
// lets setup the locus
|
// lets setup the locus
|
||||||
|
|
@ -89,6 +97,7 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
|
||||||
lst.add(new BasicGenotype(pileup.getLocation(), g.toString(),new BayesianConfidenceScore(G.getLikelihood(g))));
|
lst.add(new BasicGenotype(pileup.getLocation(), g.toString(),new BayesianConfidenceScore(G.getLikelihood(g))));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//System.out.printf("At %s%n", new SSGGenotypeCall(! GENOTYPE, ref, 2, lst, G.getPosteriors(), pileup));
|
||||||
return new SSGGenotypeCall(! GENOTYPE, ref, 2, lst, G.getPosteriors(), pileup);
|
return new SSGGenotypeCall(! GENOTYPE, ref, 2, lst, G.getPosteriors(), pileup);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue