Continuing improvements to unified genotyper
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2098 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
33dcfc858d
commit
82fd824c4d
|
|
@ -94,7 +94,10 @@ public abstract class FourBaseProbabilities implements Cloneable {
|
|||
* @return log10 likelihood as a double
|
||||
*/
|
||||
public double getLog10Likelihood(char base) {
|
||||
int baseIndex = BaseUtils.simpleBaseToBaseIndex(base);
|
||||
return getLog10Likelihood(BaseUtils.simpleBaseToBaseIndex(base));
|
||||
}
|
||||
|
||||
public double getLog10Likelihood(int baseIndex) {
|
||||
return (baseIndex < 0 ? 0.0 : getLog10Likelihoods()[baseIndex]);
|
||||
}
|
||||
|
||||
|
|
@ -115,10 +118,14 @@ public abstract class FourBaseProbabilities implements Cloneable {
|
|||
* @return likelihoods as a double
|
||||
*/
|
||||
public double getLikelihood(char base) {
|
||||
int baseIndex = BaseUtils.simpleBaseToBaseIndex(base);
|
||||
return getLikelihood(BaseUtils.simpleBaseToBaseIndex(base));
|
||||
}
|
||||
|
||||
public double getLikelihood(int baseIndex) {
|
||||
return (baseIndex < 0 ? 0.0 : Math.pow(10, log10Likelihoods[baseIndex]));
|
||||
}
|
||||
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
|
|
@ -138,7 +145,7 @@ public abstract class FourBaseProbabilities implements Cloneable {
|
|||
* @return 1 if the base was considered good enough to add to the likelihoods (not Q0 or 'N', for example)
|
||||
*/
|
||||
public int add(char observedBase, byte qualityScore, SAMRecord read, int offset) {
|
||||
FourBaseProbabilities fbl = computeLikelihoods(observedBase, qualityScore, read, offset);
|
||||
FourBaseProbabilities fbl = computeLog10Likelihoods(observedBase, qualityScore, read, offset);
|
||||
if ( fbl == null )
|
||||
return 0;
|
||||
|
||||
|
|
@ -167,7 +174,7 @@ public abstract class FourBaseProbabilities implements Cloneable {
|
|||
* @param offset offset on read
|
||||
* @return likelihoods for this observation or null if the base was not considered good enough to add to the likelihoods (Q0 or 'N', for example)
|
||||
*/
|
||||
public FourBaseProbabilities computeLikelihoods(char observedBase, byte qualityScore, SAMRecord read, int offset) {
|
||||
public FourBaseProbabilities computeLog10Likelihoods(char observedBase, byte qualityScore, SAMRecord read, int offset) {
|
||||
if ( badBase(observedBase) ) {
|
||||
return null;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -114,10 +114,10 @@ public class GenotypeLikelihoods implements Cloneable {
|
|||
|
||||
private void initialize(BaseMismatchModel m, EmpiricalSubstitutionProbabilities.SequencerPlatform pl) {
|
||||
fourBaseLikelihoods = FourBaseProbabilitiesFactory.makeFourBaseLikelihoods(m, pl);
|
||||
initialize();
|
||||
setToZero();
|
||||
}
|
||||
|
||||
private void initialize() {
|
||||
private void setToZero() {
|
||||
log10Likelihoods = zeros.clone(); // likelihoods are all zeros
|
||||
log10Posteriors = priors.getPriors().clone(); // posteriors are all the priors
|
||||
}
|
||||
|
|
@ -372,7 +372,7 @@ public class GenotypeLikelihoods implements Cloneable {
|
|||
}
|
||||
|
||||
private GenotypeLikelihoods calculateGenotypeLikelihoods(char observedBase, byte qualityScore, SAMRecord read, int offset) {
|
||||
FourBaseProbabilities fbl = fourBaseLikelihoods.computeLikelihoods(observedBase, qualityScore, read, offset);
|
||||
FourBaseProbabilities fbl = fourBaseLikelihoods.computeLog10Likelihoods(observedBase, qualityScore, read, offset);
|
||||
if ( fbl == null )
|
||||
return null;
|
||||
|
||||
|
|
@ -380,7 +380,7 @@ public class GenotypeLikelihoods implements Cloneable {
|
|||
try {
|
||||
|
||||
GenotypeLikelihoods gl = (GenotypeLikelihoods)this.clone();
|
||||
gl.initialize();
|
||||
gl.setToZero();
|
||||
|
||||
// we need to adjust for ploidy. We take the raw p(obs | chrom) / ploidy, which is -log10(ploidy) in log space
|
||||
double ploidyAdjustment = log10(FIXED_PLOIDY);
|
||||
|
|
|
|||
|
|
@ -10,11 +10,44 @@ import java.util.*;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
public class PooledCalculationModel extends JointEstimateGenotypeCalculationModel {
|
||||
|
||||
private static final String POOL_SAMPLE_NAME = "POOL";
|
||||
|
||||
private FourBaseProbabilities fourBaseLikelihoods = null;
|
||||
|
||||
private double[] log10F = null;
|
||||
private double[] log10OneMinusF = null;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
protected PooledCalculationModel() {}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param ref
|
||||
* @param contexts
|
||||
* @param contextType
|
||||
*/
|
||||
protected void initialize(char ref, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
|
||||
super.initialize(ref, contexts, contextType);
|
||||
|
||||
// prepare the four base vector calculator
|
||||
fourBaseLikelihoods = FourBaseProbabilitiesFactory.makeFourBaseLikelihoods(baseModel, defaultPlatform);
|
||||
|
||||
// todo - can you add static initialize()
|
||||
// prepare cached values of the log10 of f and (1-f)
|
||||
if ( log10F == null ) {
|
||||
int nChromosomes = 2 * getNSamples(contexts);
|
||||
log10F = new double[nChromosomes+1];
|
||||
log10OneMinusF = new double[nChromosomes+1];
|
||||
for ( int i = 0; i < (nChromosomes+1); i++ ) {
|
||||
double f = (1.0 * i) / nChromosomes;
|
||||
log10F[i] = Math.log10(f);
|
||||
log10OneMinusF[i] = Math.log10(1-f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected int getNSamples(HashMap<String, AlignmentContextBySample> contexts) {
|
||||
return POOL_SIZE;
|
||||
}
|
||||
|
|
@ -22,7 +55,6 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode
|
|||
protected HashMap<String, AlignmentContextBySample> createContexts(AlignmentContext context) {
|
||||
// for testing purposes, we may want to throw multi-samples at pooled mode,
|
||||
// so we can't use the standard splitContextBySample() method here
|
||||
|
||||
AlignmentContextBySample pooledContext = new AlignmentContextBySample(context.getLocation());
|
||||
|
||||
int deletionsInPileup = 0;
|
||||
|
|
@ -30,7 +62,6 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode
|
|||
List<Integer> offsets = context.getOffsets();
|
||||
|
||||
for (int i = 0; i < reads.size(); i++) {
|
||||
|
||||
// check for deletions
|
||||
int offset = offsets.get(i);
|
||||
if ( offset == -1 ) {
|
||||
|
|
@ -48,7 +79,13 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode
|
|||
contexts.put(POOL_SAMPLE_NAME, pooledContext);
|
||||
return contexts;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// todo - create cache indexed by chromosome, tech, qual, observed base, and potential polymorphism (A/C) for all
|
||||
// todo -- 16 pairs. We can change the calculation at a locus to: for each read in pileup, calculate offset into
|
||||
// todo -- this table given observed base, read, and qual. Then, for each i -> 0, 2n and polymorphism, run over
|
||||
// todo -- the table, summing values
|
||||
//
|
||||
protected double computeLog10PofDgivenAFi(char refArg, char altArg, double f, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
|
||||
AlignmentContextBySample context = contexts.get(POOL_SAMPLE_NAME);
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(refArg, context.getContext(contextType));
|
||||
|
|
@ -62,6 +99,106 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode
|
|||
int nAltAlleles = (int)(f * nChromosomes);
|
||||
int nRefAlleles = nChromosomes - nAltAlleles;
|
||||
|
||||
double log10POfRef = log10OneMinusF[nAltAlleles];
|
||||
double log10POfAlt = log10F[nAltAlleles];
|
||||
double POfRef = Math.pow(10,log10POfRef);
|
||||
double POfAlt = Math.pow(10,log10POfAlt);
|
||||
|
||||
for (int i = 0; i < pileup.getReads().size(); i++) {
|
||||
int offset = pileup.getOffsets().get(i);
|
||||
|
||||
// ignore deletions
|
||||
if ( offset == -1 )
|
||||
continue;
|
||||
|
||||
SAMRecord read = pileup.getReads().get(i);
|
||||
char base = (char)read.getReadBases()[offset];
|
||||
int bIndex = BaseUtils.simpleBaseToBaseIndex(base);
|
||||
byte qual = read.getBaseQualities()[offset];
|
||||
|
||||
if ( qual > 0 && bIndex != -1 ) {
|
||||
FourBaseProbabilities fbl = fourBaseLikelihoods.computeLog10Likelihoods(base, qual, read, offset);
|
||||
double POfBGivenRef = fbl.getLikelihood(refIndex);
|
||||
double POfBGivenAlt = fbl.getLikelihood(altIndex);
|
||||
double P = POfRef * POfBGivenRef + POfAlt * POfBGivenAlt;
|
||||
log10L += Math.log10(P);
|
||||
}
|
||||
}
|
||||
|
||||
//if ( verboseWriter != null )
|
||||
// verboseWriter.printf("POOL_DEBUG %s %c %c %d %d %d %.2f %.2f %.2f %f%n",
|
||||
// context.getContext(StratifiedContext.OVERALL).getLocation(),
|
||||
// refArg, altArg, nChromosomes, nAltAlleles, nRefAlleles, f, log10POfRef, log10POfAlt, log10L);
|
||||
|
||||
return log10L;
|
||||
}
|
||||
|
||||
/* protected double computeLog10PofDgivenAFi_V2(char refArg, char altArg, double f, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
|
||||
AlignmentContextBySample context = contexts.get(POOL_SAMPLE_NAME);
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(refArg, context.getContext(contextType));
|
||||
|
||||
double log10L = 0.0;
|
||||
|
||||
int refIndex = BaseUtils.simpleBaseToBaseIndex(refArg);
|
||||
int altIndex = BaseUtils.simpleBaseToBaseIndex(altArg);
|
||||
|
||||
int nChromosomes = 2 * getNSamples(contexts);
|
||||
int nAltAlleles = (int)(f * nChromosomes);
|
||||
int nRefAlleles = nChromosomes - nAltAlleles;
|
||||
|
||||
double log10POfRef = log10OneMinusF[nAltAlleles];
|
||||
double log10POfAlt = log10F[nAltAlleles];
|
||||
|
||||
for (int i = 0; i < pileup.getReads().size(); i++) {
|
||||
int offset = pileup.getOffsets().get(i);
|
||||
|
||||
// ignore deletions
|
||||
if ( offset == -1 )
|
||||
continue;
|
||||
|
||||
SAMRecord read = pileup.getReads().get(i);
|
||||
char base = (char)read.getReadBases()[offset];
|
||||
int bIndex = BaseUtils.simpleBaseToBaseIndex(base);
|
||||
byte qual = read.getBaseQualities()[offset];
|
||||
|
||||
double log10POfNotB = Math.log10(QualityUtils.qualToErrorProb(qual));
|
||||
if ( qual > 0 && bIndex != -1 ) {
|
||||
FourBaseProbabilities fbl = fourBaseLikelihoods.computeLog10Likelihoods(base, qual, read, offset);
|
||||
|
||||
double log10POfB = fbl.getLog10Likelihood(base);
|
||||
|
||||
if ( bIndex == refIndex && nRefAlleles > 0 ) {
|
||||
log10L += log10POfRef + log10POfB;
|
||||
} else if ( bIndex == altIndex && nAltAlleles > 0) {
|
||||
log10L += log10POfAlt + log10POfB;
|
||||
} else {
|
||||
//log10L += Math.min(log10POfRef + fbl.getLog10Likelihood(refIndex), log10POfAlt + fbl.getLog10Likelihood(altIndex));
|
||||
log10L += log10POfNotB;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( verboseWriter != null )
|
||||
verboseWriter.printf("POOL_DEBUG %s %c %c %d %d %d %.2f %.2f %.2f %f%n",
|
||||
context.getContext(StratifiedContext.OVERALL).getLocation(),
|
||||
refArg, altArg, nChromosomes, nAltAlleles, nRefAlleles, f, log10POfRef, log10POfAlt, log10L);
|
||||
|
||||
return log10L;
|
||||
} */
|
||||
|
||||
/* protected double computeLog10PofDgivenAFi_V1(char refArg, char altArg, double f, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
|
||||
AlignmentContextBySample context = contexts.get(POOL_SAMPLE_NAME);
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(refArg, context.getContext(contextType));
|
||||
|
||||
double log10L = 0.0;
|
||||
|
||||
int refIndex = BaseUtils.simpleBaseToBaseIndex(refArg);
|
||||
int altIndex = BaseUtils.simpleBaseToBaseIndex(altArg);
|
||||
|
||||
int nChromosomes = 2 * getNSamples(contexts);
|
||||
int nAltAlleles = (int)(f * nChromosomes);
|
||||
int nRefAlleles = nChromosomes - nAltAlleles;
|
||||
|
||||
double log10POfRef = Math.log10(1 - f);
|
||||
double log10POfAlt = Math.log10(f);
|
||||
//double log10ChromChooseRef = Math.log10(Arithmetic.binomial(nChromosomes, nRefAlleles));
|
||||
|
|
@ -94,5 +231,5 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode
|
|||
refArg, altArg, nChromosomes, nAltAlleles, nRefAlleles, f, log10POfRef, log10POfAlt, log10L);
|
||||
|
||||
return log10L;
|
||||
}
|
||||
}*/
|
||||
}
|
||||
Loading…
Reference in New Issue