Experitmental support for empirical P(B_true | B_miscall). --useEmpiricalTransitions flag to SSG enables this support. Much better implementation of Genotype likelihoods -- the system should scream along now. Continuing progress towards deleting old model
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1469 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
ab9458d06d
commit
bf60980653
|
|
@ -10,16 +10,22 @@ import org.broadinstitute.sting.utils.Utils;
|
|||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public enum DiploidGenotype {
|
||||
AA,
|
||||
AC,
|
||||
AG,
|
||||
AT,
|
||||
CC,
|
||||
CG,
|
||||
CT,
|
||||
GG,
|
||||
GT,
|
||||
TT;
|
||||
AA ('A', 'A'),
|
||||
AC ('A', 'C'),
|
||||
AG ('A', 'G'),
|
||||
AT ('A', 'T'),
|
||||
CC ('C', 'C'),
|
||||
CG ('C', 'G'),
|
||||
CT ('C', 'T'),
|
||||
GG ('G', 'G'),
|
||||
GT ('G', 'T'),
|
||||
TT ('T', 'T');
|
||||
|
||||
public char base1, base2;
|
||||
private DiploidGenotype(char base1, char base2) {
|
||||
this.base1 = base1;
|
||||
this.base2 = base2;
|
||||
}
|
||||
|
||||
public boolean isHomRef(char r) {
|
||||
return isHom() && r == this.toString().charAt(0);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,112 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
|
||||
import static java.lang.Math.log10;
|
||||
import static java.lang.Math.pow;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: mdepristo
|
||||
* Date: Aug 23, 2009
|
||||
* Time: 9:47:33 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class EmpiricalSubstitutionGenotypeLikelihoods extends NewHotnessGenotypeLikelihoods {
|
||||
private final static double[][] misCallProbs = new double[4][4];
|
||||
|
||||
private static void addMisCall(char miscalledBase, char trueBase, double p) {
|
||||
int i = BaseUtils.simpleBaseToBaseIndex(miscalledBase);
|
||||
int j = BaseUtils.simpleBaseToBaseIndex(trueBase);
|
||||
misCallProbs[i][j] = log10(p);
|
||||
}
|
||||
|
||||
private static double getProbMiscallIsBase(char miscalledBase, char trueBase) {
|
||||
int i = BaseUtils.simpleBaseToBaseIndex(miscalledBase);
|
||||
int j = BaseUtils.simpleBaseToBaseIndex(trueBase);
|
||||
|
||||
double logP = misCallProbs[i][j];
|
||||
if ( logP == 0.0 )
|
||||
throw new RuntimeException(String.format("Bad miscall base request miscalled=%c true=%b", miscalledBase, trueBase));
|
||||
else
|
||||
return logP;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Cloning of the object
|
||||
* @return
|
||||
* @throws CloneNotSupportedException
|
||||
*/
|
||||
protected Object clone() throws CloneNotSupportedException {
|
||||
return super.clone();
|
||||
}
|
||||
|
||||
// NA12878 CHR1 solexa table
|
||||
//
|
||||
// True base A C G T Totals
|
||||
// Error base Counts 1190455 686187 685080 1227860 3789582
|
||||
// A 938661 0.0% 48.6% 16.3% 35.1% 100.00%
|
||||
// C 955469 60.6% 0.0% 7.9% 31.5% 100.00%
|
||||
// G 964246 30.2% 7.8% 0.0% 62.0% 100.00%
|
||||
// T 931206 34.4% 16.6% 49.0% 0.0% 100.00%
|
||||
//
|
||||
// V2 table -- didn't incorporate complements based on read orientation
|
||||
// static {
|
||||
// addMisCall('A', 'C', 48.6/100.0);
|
||||
// addMisCall('A', 'G', 16.3/100.0);
|
||||
// addMisCall('A', 'T', 35.1/100.0);
|
||||
//
|
||||
// addMisCall('C', 'A', 60.6/100.0);
|
||||
// addMisCall('C', 'G', 7.9/100.0);
|
||||
// addMisCall('C', 'T', 31.5/100.0);
|
||||
//
|
||||
// addMisCall('G', 'A', 30.2/100.0);
|
||||
// addMisCall('G', 'C', 7.8/100.0);
|
||||
// addMisCall('G', 'T', 62.0/100.0);
|
||||
//
|
||||
// addMisCall('T', 'A', 34.4/100.0);
|
||||
// addMisCall('T', 'C', 16.6/100.0);
|
||||
// addMisCall('T', 'G', 49.9/100.0);
|
||||
// }
|
||||
|
||||
//True base A C G T Totals
|
||||
//Error base Counts 1209203 718053 653214 1209112 3789582
|
||||
//A 809944 0.0% 59.2% 15.3% 25.6% 100.00%
|
||||
//C 934612 54.2% 0.0% 10.3% 35.5% 100.00%
|
||||
//G 985103 26.4% 5.6% 0.0% 68.0% 100.00%
|
||||
//T 1059923 41.8% 17.3% 40.9% 0.0% 100.00%
|
||||
static {
|
||||
addMisCall('A', 'C', 59.2/100.0);
|
||||
addMisCall('A', 'G', 15.3/100.0);
|
||||
addMisCall('A', 'T', 25.6/100.0);
|
||||
|
||||
addMisCall('C', 'A', 54.2/100.0);
|
||||
addMisCall('C', 'G', 10.3/100.0);
|
||||
addMisCall('C', 'T', 35.5/100.0);
|
||||
|
||||
addMisCall('G', 'A', 26.4/100.0);
|
||||
addMisCall('G', 'C', 5.6/100.0);
|
||||
addMisCall('G', 'T', 68.0/100.0);
|
||||
|
||||
addMisCall('T', 'A', 41.8/100.0);
|
||||
addMisCall('T', 'C', 17.3/100.0);
|
||||
addMisCall('T', 'G', 40.9/100.0);
|
||||
}
|
||||
|
||||
//
|
||||
// forwarding constructors -- don't do anything at all
|
||||
//
|
||||
public EmpiricalSubstitutionGenotypeLikelihoods() { super(); }
|
||||
public EmpiricalSubstitutionGenotypeLikelihoods(DiploidGenotypePriors priors) { super(priors); }
|
||||
|
||||
protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, boolean fwdStrand) {
|
||||
if ( fwdStrand ) {
|
||||
return getProbMiscallIsBase(observedBase, chromBase);
|
||||
} else {
|
||||
double v = getProbMiscallIsBase(BaseUtils.simpleComplement(observedBase), BaseUtils.simpleComplement(chromBase));
|
||||
//System.out.printf("R: %c/%c => %f compared to %f%n", observedBase, chromBase, pow(10, getProbMiscallIsBase(observedBase, chromBase)), pow(10, v));
|
||||
return v;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -14,25 +14,5 @@ public abstract class GenotypeLikelihoods {
|
|||
* during GL calculations. If this field is true, Q0 bases will be removed in add().
|
||||
*/
|
||||
protected boolean filterQ0Bases = true;
|
||||
|
||||
protected static final double[] oneMinusData = new double[Byte.MAX_VALUE];
|
||||
protected static final double[] oneHalfMinusDataArachne = new double[Byte.MAX_VALUE];
|
||||
protected static final double[] oneHalfMinusData3Base = new double[Byte.MAX_VALUE];
|
||||
protected static final double log10Of1_3 = log10(1.0 / 3.0);
|
||||
|
||||
static {
|
||||
for (int qual = 0; qual < Byte.MAX_VALUE; qual++) {
|
||||
oneMinusData[qual] = log10(1.0 - pow(10, (qual / -10.0)));
|
||||
}
|
||||
}
|
||||
|
||||
static {
|
||||
for (int qual = 0; qual < Byte.MAX_VALUE; qual++) {
|
||||
double e = pow(10, (qual / -10.0));
|
||||
oneHalfMinusDataArachne[qual] = log10(0.5 - e / 2.0);
|
||||
oneHalfMinusData3Base[qual] = log10(0.5 - e / 2.0 + e / 6.0);
|
||||
}
|
||||
}
|
||||
|
||||
public abstract int add(char ref, char read, byte qual);
|
||||
//public abstract int add(char ref, char read, byte qual);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,13 +1,12 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import edu.mit.broad.picard.util.MathUtil;
|
||||
|
||||
import java.util.Arrays;
|
||||
import static java.lang.Math.log10;
|
||||
import static java.lang.Math.pow;
|
||||
|
||||
/**
|
||||
* Stable, error checking version of the Bayesian genotyper. Useful for calculating the likelihoods, priors,
|
||||
|
|
@ -40,14 +39,15 @@ import java.util.Arrays;
|
|||
* From then on, you can call any of the add() routines to update the likelihoods and posteriors in the above
|
||||
* model.
|
||||
*/
|
||||
public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods {
|
||||
private int coverage = 0;
|
||||
public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods implements Cloneable {
|
||||
private final static int FIXED_PLOIDY = 2;
|
||||
private final static int MAX_PLOIDY = FIXED_PLOIDY + 1;
|
||||
|
||||
//
|
||||
// The three fundamental data arrays associated with a Genotype Likelhoods object
|
||||
//
|
||||
private double[] likelihoods = null;
|
||||
private double[] posteriors = null;
|
||||
protected double[] likelihoods = null;
|
||||
protected double[] posteriors = null;
|
||||
|
||||
private DiploidGenotypePriors priors = null;
|
||||
|
||||
|
|
@ -67,6 +67,19 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods {
|
|||
initialize();
|
||||
}
|
||||
|
||||
/**
|
||||
* Cloning of the object
|
||||
* @return
|
||||
* @throws CloneNotSupportedException
|
||||
*/
|
||||
protected Object clone() throws CloneNotSupportedException {
|
||||
NewHotnessGenotypeLikelihoods c = (NewHotnessGenotypeLikelihoods)super.clone();
|
||||
c.priors = priors;
|
||||
c.likelihoods = likelihoods.clone();
|
||||
c.posteriors = posteriors.clone();
|
||||
return c;
|
||||
}
|
||||
|
||||
private void initialize() {
|
||||
likelihoods = zeros.clone(); // likelihoods are all zeros
|
||||
posteriors = priors.getPriors().clone(); // posteriors are all the priors
|
||||
|
|
@ -107,6 +120,10 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods {
|
|||
return getPosteriors()[g.ordinal()];
|
||||
}
|
||||
|
||||
public DiploidGenotypePriors getPriorObject() {
|
||||
return priors;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of priors for each genotype, indexed by DiploidGenotype.ordinal values().
|
||||
*
|
||||
|
|
@ -125,10 +142,6 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods {
|
|||
return getPriors()[g.ordinal()];
|
||||
}
|
||||
|
||||
public int getCoverage() {
|
||||
return coverage;
|
||||
}
|
||||
|
||||
/**
|
||||
* Are we ignoring Q0 bases during calculations?
|
||||
* @return
|
||||
|
|
@ -146,13 +159,13 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods {
|
|||
this.filterQ0Bases = filterQ0Bases;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove call -- for historical purposes only
|
||||
*/
|
||||
@Deprecated
|
||||
public int add(char ref, char read, byte qual) {
|
||||
return add(read, qual);
|
||||
}
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
// add() -- the heart of
|
||||
//
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Updates likelihoods and posteriors to reflect an additional observation of observedBase with
|
||||
|
|
@ -162,12 +175,16 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods {
|
|||
* @param qualityScore
|
||||
* @return 1 if the base was considered good enough to add to the likelihoods (not Q0 or 'N', for example)
|
||||
*/
|
||||
public int add(char observedBase, byte qualityScore)
|
||||
{
|
||||
public int add(char observedBase, byte qualityScore, boolean fwdStrand) {
|
||||
return add(observedBase, qualityScore, fwdStrand, true, false);
|
||||
}
|
||||
|
||||
public int add(char observedBase, byte qualityScore, boolean fwdStrand, boolean enableCache, boolean verbose) {
|
||||
if ( badBase(observedBase) ) {
|
||||
throw new RuntimeException(String.format("BUG: unexpected base %c with Q%d passed to GenotypeLikelihoods", observedBase, qualityScore));
|
||||
}
|
||||
|
||||
// TODO -- we should probably filter Q1 bases too
|
||||
if (qualityScore <= 0) {
|
||||
if ( isFilteringQ0Bases() ) {
|
||||
return 0;
|
||||
|
|
@ -176,17 +193,130 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods {
|
|||
}
|
||||
}
|
||||
|
||||
coverage++;
|
||||
for (DiploidGenotype g : DiploidGenotype.values() ) {
|
||||
double likelihood = calculateBaseLikelihood(observedBase, g, qualityScore);
|
||||
//System.out.printf("Likelihood is %f for %c %d %s%n", likelihood, read, qual, g.toString());
|
||||
// Handle caching if requested. Just look up the cached result if its available, or compute and store it
|
||||
NewHotnessGenotypeLikelihoods cached = null;
|
||||
if ( enableCache ) {
|
||||
if ( ! inCache(observedBase, qualityScore, FIXED_PLOIDY, fwdStrand) ) {
|
||||
cached = calculateCachedGenotypeLikelihoods(observedBase, qualityScore, FIXED_PLOIDY, fwdStrand);
|
||||
} else {
|
||||
cached = getCachedGenotypeLikelihoods(observedBase, qualityScore, FIXED_PLOIDY, fwdStrand);
|
||||
}
|
||||
}
|
||||
|
||||
for ( DiploidGenotype g : DiploidGenotype.values() ) {
|
||||
double likelihoodCalc = ! enableCache ? log10PofObservingBaseGivenGenotype(observedBase, g, qualityScore, fwdStrand) : 0.0;
|
||||
double likelihoodCached = enableCache ? cached.likelihoods[g.ordinal()] : 0.0;
|
||||
double likelihood = enableCache ? likelihoodCached : likelihoodCalc;
|
||||
|
||||
//if ( enableCache && likelihoodCalc != 0.0 && MathUtils.compareDoubles(likelihoodCached, likelihoodCalc) != 0 ) {
|
||||
// System.out.printf("ERROR: Likelihoods not equal is cache=%f != calc=%f for %c %d %s%n",
|
||||
// likelihoodCached, likelihoodCalc, observedBase, qualityScore, g.toString());
|
||||
//}
|
||||
if ( verbose )
|
||||
System.out.printf(" L(%c | G=%s, Q=%d, S=%s) = %f%n", observedBase, g, qualityScore, fwdStrand ? "+" : "-", likelihood);
|
||||
|
||||
likelihoods[g.ordinal()] += likelihood;
|
||||
posteriors[g.ordinal()] += likelihood;
|
||||
}
|
||||
|
||||
if ( verbose ) {
|
||||
for ( DiploidGenotype g : DiploidGenotype.values() ) { System.out.printf("%s\t", g); }
|
||||
System.out.println();
|
||||
for ( DiploidGenotype g : DiploidGenotype.values() ) { System.out.printf("%.2f\t", likelihoods[g.ordinal()]); }
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates likelihoods and posteriors to reflect the additional observations contained within the
|
||||
* read-based pileup up by calling add(observedBase, qualityScore) for each base / qual in the
|
||||
* pileup
|
||||
*
|
||||
* @param pileup
|
||||
* @return the number of good bases found in the pileup
|
||||
*/
|
||||
public int add(ReadBackedPileup pileup, boolean ignoreBadBases, boolean verbose) {
|
||||
int n = 0;
|
||||
|
||||
for (int i = 0; i < pileup.getReads().size(); i++) {
|
||||
SAMRecord read = pileup.getReads().get(i);
|
||||
int offset = pileup.getOffsets().get(i);
|
||||
char base = read.getReadString().charAt(offset);
|
||||
byte qual = read.getBaseQualities()[offset];
|
||||
if ( ! ignoreBadBases || ! badBase(base) ) {
|
||||
n += add(base, qual, ! read.getReadNegativeStrandFlag(), true, verbose);
|
||||
}
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
public int add(ReadBackedPileup pileup, boolean ignoreBadBases) {
|
||||
return add(pileup, ignoreBadBases, false);
|
||||
}
|
||||
|
||||
|
||||
public int add(ReadBackedPileup pileup) {
|
||||
return add(pileup, false);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
// caching routines
|
||||
//
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
final static NewHotnessGenotypeLikelihoods[][][][] cache = new NewHotnessGenotypeLikelihoods[BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE][MAX_PLOIDY][2];
|
||||
static int cacheSize = 0;
|
||||
|
||||
private int strandIndex(boolean fwdStrand) {
|
||||
return fwdStrand ? 0 : 1;
|
||||
}
|
||||
|
||||
private boolean inCache( char observedBase, byte qualityScore, int ploidy, boolean fwdStrand ) {
|
||||
return cache[BaseUtils.simpleBaseToBaseIndex(observedBase)][qualityScore][ploidy][strandIndex(fwdStrand)] != null;
|
||||
}
|
||||
|
||||
private NewHotnessGenotypeLikelihoods getCachedGenotypeLikelihoods( char observedBase, byte qualityScore, int ploidy, boolean fwdStrand ) {
|
||||
if ( ! inCache(observedBase, qualityScore, ploidy, fwdStrand ) )
|
||||
throw new RuntimeException(String.format("BUG: trying to fetch an unset cached genotype likelihood at base=%c, qual=%d, ploidy=%d, fwdstrand=%b",
|
||||
observedBase, qualityScore, ploidy, fwdStrand));
|
||||
|
||||
return cache[BaseUtils.simpleBaseToBaseIndex(observedBase)][qualityScore][ploidy][strandIndex(fwdStrand)];
|
||||
}
|
||||
|
||||
private NewHotnessGenotypeLikelihoods calculateCachedGenotypeLikelihoods( char observedBase, byte qualityScore, int ploidy, boolean fwdStrand ) {
|
||||
if ( inCache(observedBase, qualityScore, ploidy, fwdStrand ) )
|
||||
throw new RuntimeException(String.format("BUG: trying to set an already set cached genotype likelihood at base=%c, qual=%d, ploidy=%d, fwdstrand=%b",
|
||||
observedBase, qualityScore, ploidy, fwdStrand));
|
||||
|
||||
// create a new genotype likelihoods object and add this single base to it -- now we have a cached result
|
||||
try {
|
||||
NewHotnessGenotypeLikelihoods g = (NewHotnessGenotypeLikelihoods)this.clone();
|
||||
g.initialize();
|
||||
g.add(observedBase, qualityScore, fwdStrand, false, false);
|
||||
|
||||
cache[BaseUtils.simpleBaseToBaseIndex(observedBase)][qualityScore][ploidy][strandIndex(fwdStrand)] = g;
|
||||
cacheSize++;
|
||||
|
||||
//System.out.printf("Caching %c %d %d %d %b (%d total entries)%n", observedBase, BaseUtils.simpleBaseToBaseIndex(observedBase), qualityScore, ploidy, fwdStrand, cacheSize);
|
||||
return g;
|
||||
} catch ( CloneNotSupportedException e ) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
// helper routines
|
||||
//
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Returns true when the observedBase is considered bad and shouldn't be processed by this object. A base
|
||||
* is considered bad if:
|
||||
|
|
@ -200,56 +330,6 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods {
|
|||
return BaseUtils.simpleBaseToBaseIndex(observedBase) == -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates likelihoods and posteriors to reflect the additional observations contained within the
|
||||
* read-based pileup up by calling add(observedBase, qualityScore) for each base / qual in the
|
||||
* pileup
|
||||
*
|
||||
* @param pileup
|
||||
* @return the number of good bases found in the pileup
|
||||
*/
|
||||
public int add(ReadBackedPileup pileup, boolean ignoreBadBases) {
|
||||
int n = 0;
|
||||
|
||||
for (int i = 0; i < pileup.getReads().size(); i++) {
|
||||
SAMRecord read = pileup.getReads().get(i);
|
||||
int offset = pileup.getOffsets().get(i);
|
||||
char base = read.getReadString().charAt(offset);
|
||||
byte qual = read.getBaseQualities()[offset];
|
||||
if ( ! ignoreBadBases || ! badBase(base) ) {
|
||||
n += add(base, qual);
|
||||
}
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
public int add(ReadBackedPileup pileup) {
|
||||
return add(pileup, false);
|
||||
}
|
||||
|
||||
private double calculateBaseLikelihood(char observedBase, DiploidGenotype g, byte qual) {
|
||||
if (qual == 0) { // zero quals are wrong
|
||||
throw new RuntimeException(String.format("Unexpected Q0 base discovered in calculateAlleleLikelihood: %c %s %d", observedBase, g, qual));
|
||||
}
|
||||
|
||||
double p_base = 0.0;
|
||||
|
||||
if ( g.isHomRef(observedBase) ) {
|
||||
// hom
|
||||
p_base = getOneMinusQual(qual);
|
||||
} else if ( g.isHetRef(observedBase) ) {
|
||||
// het
|
||||
p_base = getOneHalfMinusQual(qual);
|
||||
} else {
|
||||
// error
|
||||
//System.out.printf("%s %b %f %f%n", genotype, h1 != h2, log10Of2_3, log10Of1_3 );
|
||||
p_base = qual / -10.0 + log10Of1_3;
|
||||
}
|
||||
|
||||
return p_base;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a string representation of this object in a moderately usable form
|
||||
*
|
||||
|
|
@ -266,6 +346,14 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods {
|
|||
return s.toString();
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
// Validation routines
|
||||
//
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public boolean validate() {
|
||||
return validate(true);
|
||||
}
|
||||
|
|
@ -298,12 +386,63 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods {
|
|||
return true;
|
||||
}
|
||||
|
||||
private static double getOneMinusQual(final byte qual) {
|
||||
return oneMinusData[qual];
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
// Hearty math calculations follow
|
||||
//
|
||||
// -- these should not be messed with unless you know what you are doing
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Works for any ploidy (in organization).
|
||||
*
|
||||
* @param observedBase
|
||||
* @param g
|
||||
* @param qual
|
||||
* @return
|
||||
*/
|
||||
protected double log10PofObservingBaseGivenGenotype(char observedBase, DiploidGenotype g, byte qual, boolean fwdStrand) {
|
||||
if (qual == 0) { // zero quals are wrong
|
||||
throw new RuntimeException(String.format("Unexpected Q0 base discovered in calculateAlleleLikelihood: %c %s %d", observedBase, g, qual));
|
||||
}
|
||||
|
||||
// todo assumes ploidy is 2 -- should be generalized. Obviously the below code can be turned into a loop
|
||||
double p_base = 0.0;
|
||||
p_base += pow(10, log10PofObservingBaseGivenChromosome(observedBase, g.base1, qual, FIXED_PLOIDY, fwdStrand));
|
||||
p_base += pow(10, log10PofObservingBaseGivenChromosome(observedBase, g.base2, qual, FIXED_PLOIDY, fwdStrand));
|
||||
return log10(p_base);
|
||||
}
|
||||
|
||||
private double getOneHalfMinusQual(final byte qual) {
|
||||
return oneHalfMinusData3Base[qual];
|
||||
protected double log10PofObservingBaseGivenChromosome(char observedBase, char chromBase, byte qual, int ploidy, boolean fwdStrand) {
|
||||
double logP = 0.0;
|
||||
|
||||
if ( observedBase == chromBase ) {
|
||||
// the base is consistent with the chromosome -- it's 1 - e
|
||||
//logP = oneMinusData[qual];
|
||||
double e = pow(10, (qual / -10.0));
|
||||
logP = log10(1.0 - e);
|
||||
} else {
|
||||
// the base is inconsistent with the chromosome -- it's e * P(chromBase | observedBase is an error)
|
||||
logP = qual / -10.0 + log10PofTrueBaseGivenMiscall(observedBase, chromBase, fwdStrand);
|
||||
}
|
||||
|
||||
// adjust for ploidy. We take the raw p(obs | chrom) / ploidy, which is -log10(ploidy) in log space
|
||||
//logP -= log10N[ploidy];
|
||||
logP -= log10(ploidy);
|
||||
|
||||
//System.out.printf("%c %c %d %d => %f%n", observedBase, chromBase, qual, ploidy, logP);
|
||||
return logP;
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple log10(3) cached value
|
||||
*/
|
||||
protected static final double log103 = log10(3.0);
|
||||
|
||||
protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, boolean fwdStrand) {
|
||||
return -log103; // currently equivalent to e/3 model
|
||||
}
|
||||
|
||||
//
|
||||
|
|
|
|||
|
|
@ -16,6 +16,21 @@ import java.util.HashMap;
|
|||
import java.util.List;
|
||||
|
||||
public class OldAndBustedGenotypeLikelihoods extends GenotypeLikelihoods {
|
||||
protected static final double[] oneMinusData = new double[Byte.MAX_VALUE];
|
||||
protected static final double[] oneHalfMinusDataArachne = new double[Byte.MAX_VALUE];
|
||||
protected static final double[] oneHalfMinusData3Base = new double[Byte.MAX_VALUE];
|
||||
//protected static final double[] oneHalfMinusData = new double[Byte.MAX_VALUE];
|
||||
protected static final double log10Of1_3 = log10(1.0 / 3.0);
|
||||
|
||||
static {
|
||||
for (int qual = 0; qual < Byte.MAX_VALUE; qual++) {
|
||||
double e = pow(10, (qual / -10.0));
|
||||
oneMinusData[qual] = log10(1.0 - e);
|
||||
oneHalfMinusDataArachne[qual] = log10(0.5 - e / 2.0);
|
||||
oneHalfMinusData3Base[qual] = log10(0.5 - e / 2.0 + e / 6.0);
|
||||
}
|
||||
}
|
||||
|
||||
private static double getOneMinusQual(final byte qual) {
|
||||
return oneMinusData[qual];
|
||||
}
|
||||
|
|
|
|||
|
|
@ -40,6 +40,12 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
|
|||
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
|
||||
public Double heterozygosity = DiploidGenotypePriors.HUMAN_HETEROZYGOSITY;
|
||||
|
||||
@Argument(fullName = "useEmpiricalTransitions", shortName = "useEmpiricalTransitions", doc = "EXPERIMENTAL", required = false)
|
||||
public boolean useEmpiricalTransitions = false;
|
||||
|
||||
@Argument(fullName = "verbose", shortName = "v", doc = "EXPERIMENTAL", required = false)
|
||||
public boolean VERBOSE = false;
|
||||
|
||||
// todo - print out priors at the start of SSG with .INFO
|
||||
|
||||
// todo -- remove dbSNP awareness until we understand how it should be used
|
||||
|
|
@ -77,10 +83,12 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
|
|||
public SSGGenotypeCall map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext context) {
|
||||
char ref = refContext.getBase();
|
||||
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, heterozygosity, DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
|
||||
NewHotnessGenotypeLikelihoods G = new NewHotnessGenotypeLikelihoods(priors);
|
||||
|
||||
NewHotnessGenotypeLikelihoods G = useEmpiricalTransitions ? new EmpiricalSubstitutionGenotypeLikelihoods(priors) : new NewHotnessGenotypeLikelihoods(priors);
|
||||
|
||||
G.filterQ0Bases(! keepQ0Bases);
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||
G.add(pileup, true);
|
||||
G.add(pileup, true, VERBOSE);
|
||||
G.validate();
|
||||
|
||||
// lets setup the locus
|
||||
|
|
@ -89,6 +97,7 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
|
|||
lst.add(new BasicGenotype(pileup.getLocation(), g.toString(),new BayesianConfidenceScore(G.getLikelihood(g))));
|
||||
}
|
||||
|
||||
//System.out.printf("At %s%n", new SSGGenotypeCall(! GENOTYPE, ref, 2, lst, G.getPosteriors(), pileup));
|
||||
return new SSGGenotypeCall(! GENOTYPE, ref, 2, lst, G.getPosteriors(), pileup);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue