Support for EXPERIMENT sampling-based genotype likelihoods
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3044 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
7b17bcd0af
commit
d8ff552311
|
|
@ -12,8 +12,14 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalculationModel {
|
public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalculationModel {
|
||||||
|
/**
|
||||||
|
* Should we enable the experimental genotype likelihood calculations?
|
||||||
|
*/
|
||||||
|
protected boolean useExptGenotypeLikelihoods = false;
|
||||||
|
|
||||||
protected DiploidGenotypeCalculationModel() {}
|
protected DiploidGenotypeCalculationModel(boolean useExptGenotypeLikelihoods) {
|
||||||
|
this.useExptGenotypeLikelihoods = useExptGenotypeLikelihoods;
|
||||||
|
}
|
||||||
|
|
||||||
// the GenotypeLikelihoods map
|
// the GenotypeLikelihoods map
|
||||||
private HashMap<String, GenotypeLikelihoods> GLs = new HashMap<String, GenotypeLikelihoods>();
|
private HashMap<String, GenotypeLikelihoods> GLs = new HashMap<String, GenotypeLikelihoods>();
|
||||||
|
|
@ -21,7 +27,9 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
||||||
|
|
||||||
private enum GenotypeType { REF, HET, HOM }
|
private enum GenotypeType { REF, HET, HOM }
|
||||||
|
|
||||||
protected void initialize(char ref, Map<String, StratifiedAlignmentContext> contexts, StratifiedAlignmentContext.StratifiedContextType contextType) {
|
protected void initialize(char ref,
|
||||||
|
Map<String, StratifiedAlignmentContext> contexts,
|
||||||
|
StratifiedAlignmentContext.StratifiedContextType contextType) {
|
||||||
// initialize the GenotypeLikelihoods
|
// initialize the GenotypeLikelihoods
|
||||||
GLs.clear();
|
GLs.clear();
|
||||||
AFMatrixMap.clear();
|
AFMatrixMap.clear();
|
||||||
|
|
@ -40,7 +48,13 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
||||||
ReadBackedPileup pileup = context.getContext(contextType).getBasePileup();
|
ReadBackedPileup pileup = context.getContext(contextType).getBasePileup();
|
||||||
|
|
||||||
// create the GenotypeLikelihoods object
|
// create the GenotypeLikelihoods object
|
||||||
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
|
GenotypeLikelihoods GL;
|
||||||
|
if ( useExptGenotypeLikelihoods ) {
|
||||||
|
GL = new SamplingGenotypeLikelihoods(baseModel, priors, defaultPlatform);
|
||||||
|
} else {
|
||||||
|
GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
|
||||||
|
}
|
||||||
|
|
||||||
GL.add(pileup, true);
|
GL.add(pileup, true);
|
||||||
GLs.put(sample, GL);
|
GLs.put(sample, GL);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,7 @@ public abstract class GenotypeCalculationModel implements Cloneable {
|
||||||
|
|
||||||
public enum Model {
|
public enum Model {
|
||||||
JOINT_ESTIMATE,
|
JOINT_ESTIMATE,
|
||||||
|
JOINT_ESTIMATE_EXPT_GL,
|
||||||
POOLED,
|
POOLED,
|
||||||
INDELS
|
INDELS
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -65,7 +65,9 @@ public class GenotypeCalculationModelFactory {
|
||||||
GenotypeCalculationModel gcm;
|
GenotypeCalculationModel gcm;
|
||||||
switch ( UAC.genotypeModel ) {
|
switch ( UAC.genotypeModel ) {
|
||||||
case JOINT_ESTIMATE:
|
case JOINT_ESTIMATE:
|
||||||
gcm = new DiploidGenotypeCalculationModel();
|
case JOINT_ESTIMATE_EXPT_GL:
|
||||||
|
boolean useExptGenotypeLikelihoods = UAC.genotypeModel == JOINT_ESTIMATE_EXPT_GL;
|
||||||
|
gcm = new DiploidGenotypeCalculationModel(useExptGenotypeLikelihoods);
|
||||||
break;
|
break;
|
||||||
case POOLED:
|
case POOLED:
|
||||||
gcm = new PooledCalculationModel();
|
gcm = new PooledCalculationModel();
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,169 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.utils.*;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||||
|
|
||||||
|
import static java.lang.Math.log10;
|
||||||
|
import static java.lang.Math.pow;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
|
public class SamplingGenotypeLikelihoods extends GenotypeLikelihoods {
|
||||||
|
/**
|
||||||
|
* Create a new GenotypeLikelhoods object with flat priors for each diploid genotype
|
||||||
|
*
|
||||||
|
* @param m base model
|
||||||
|
*/
|
||||||
|
public SamplingGenotypeLikelihoods(BaseMismatchModel m) {
|
||||||
|
super(m);
|
||||||
|
enableCacheFlag = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new GenotypeLikelhoods object with flat priors for each diploid genotype
|
||||||
|
*
|
||||||
|
* @param m base model
|
||||||
|
* @param pl default platform
|
||||||
|
*/
|
||||||
|
public SamplingGenotypeLikelihoods(BaseMismatchModel m, EmpiricalSubstitutionProbabilities.SequencerPlatform pl) {
|
||||||
|
super(m, pl);
|
||||||
|
enableCacheFlag = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new GenotypeLikelhoods object with given priors for each diploid genotype
|
||||||
|
*
|
||||||
|
* @param m base model
|
||||||
|
* @param priors priors
|
||||||
|
*/
|
||||||
|
public SamplingGenotypeLikelihoods(BaseMismatchModel m, DiploidGenotypePriors priors) {
|
||||||
|
super(m, priors);
|
||||||
|
enableCacheFlag = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new GenotypeLikelhoods object with given priors for each diploid genotype
|
||||||
|
*
|
||||||
|
* @param m base model
|
||||||
|
* @param priors priors
|
||||||
|
* @param pl default platform
|
||||||
|
*/
|
||||||
|
public SamplingGenotypeLikelihoods(BaseMismatchModel m, DiploidGenotypePriors priors, EmpiricalSubstitutionProbabilities.SequencerPlatform pl) {
|
||||||
|
super(m, priors, pl);
|
||||||
|
enableCacheFlag = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cloning of the object
|
||||||
|
* @return clone
|
||||||
|
* @throws CloneNotSupportedException
|
||||||
|
*/
|
||||||
|
protected Object clone() throws CloneNotSupportedException {
|
||||||
|
return super.clone();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Updates likelihoods and posteriors to reflect the additional observations contained within the
|
||||||
|
* read-based pileup up by calling add(observedBase, qualityScore) for each base / qual in the
|
||||||
|
* pileup
|
||||||
|
*
|
||||||
|
* @param pileup read pileup
|
||||||
|
* @param ignoreBadBases should we ignore bad bases
|
||||||
|
* @return the number of good bases found in the pileup
|
||||||
|
*/
|
||||||
|
public int add(ReadBackedPileup pileup, boolean ignoreBadBases) {
|
||||||
|
// we're actually going to be happy with our homozygous theories
|
||||||
|
int n = super.add(pileup, ignoreBadBases);
|
||||||
|
|
||||||
|
// Now, loop over the heterygous theories and do our fancy sampling calculation
|
||||||
|
FourBaseProbabilities[] probs = fourBaseProbVector(n, pileup, ignoreBadBases);
|
||||||
|
for ( DiploidGenotype g : DiploidGenotype.values() ) {
|
||||||
|
if ( g.isHet() ) {
|
||||||
|
//System.out.printf("DEBUG: computing best k for %s at %s%n", g, pileup.getLocation());
|
||||||
|
double likelihood = calculateSamplingHetGenotypeLikelihood(probs, g, n);
|
||||||
|
//System.out.printf("DEBUG: L was %.2f%n", likelihood);
|
||||||
|
if ( likelihood != 1 ) {
|
||||||
|
log10Likelihoods[g.ordinal()] = likelihood;
|
||||||
|
log10Posteriors[g.ordinal()] = likelihood + priors.getPrior(g);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int add(char observedBase, byte qualityScore, SAMRecord read, int offset) {
|
||||||
|
throw new UnsupportedOperationException("BUG: Sampling genotype likelihoods does not support sequential addition of bases; use add(ReadBackedPileup) instead");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private double calculateSamplingHetGenotypeLikelihood(FourBaseProbabilities[] probs, DiploidGenotype g, int goodBaseDepth) {
|
||||||
|
double bestLog10L = 1;
|
||||||
|
//int bestK = -1;
|
||||||
|
|
||||||
|
for ( int k = 0; k < goodBaseDepth; k++ ) {
|
||||||
|
// for every possible sampling depth of the A allele
|
||||||
|
double log10BinomialSampling = Math.log10(MathUtils.binomialProbabilityLog(k, goodBaseDepth, 0.5));
|
||||||
|
double log10Het = mostLikelyKBases(probs, k, BaseUtils.simpleBaseToBaseIndex(g.base1), BaseUtils.simpleBaseToBaseIndex(g.base2));
|
||||||
|
|
||||||
|
double log10L = log10BinomialSampling + log10Het;
|
||||||
|
//System.out.printf(" DEBUG: computing best k = %d, bin = %.2f, het = %.2f, log10L = %.2f%n", k, log10BinomialSampling, log10Het, log10L);
|
||||||
|
|
||||||
|
if ( log10L > bestLog10L || bestLog10L == 1 ) {
|
||||||
|
bestLog10L = log10L;
|
||||||
|
//bestK = k;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//System.out.printf(" DEBUG: best k = %d with log10L of %.2f%n", bestK, bestLog10L);
|
||||||
|
return bestLog10L;
|
||||||
|
}
|
||||||
|
|
||||||
|
FourBaseProbabilities[] fourBaseProbVector(int n, ReadBackedPileup pileup, boolean ignoreBadBases) {
|
||||||
|
FourBaseProbabilities[] probs = new FourBaseProbabilities[n];
|
||||||
|
|
||||||
|
int i = 0;
|
||||||
|
for ( PileupElement p : pileup ) {
|
||||||
|
char observedBase = (char)p.getBase();
|
||||||
|
byte qualityScore = p.getQual();
|
||||||
|
SAMRecord read = p.getRead();
|
||||||
|
int offset = p.getOffset();
|
||||||
|
|
||||||
|
if ( ! ignoreBadBases || ! badBase(observedBase) ) {
|
||||||
|
FourBaseProbabilities fbl = fourBaseLikelihoods.computeLog10Likelihoods(observedBase, qualityScore, read, offset);
|
||||||
|
|
||||||
|
if ( fbl != null ) {
|
||||||
|
probs[i++] = fbl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return probs;
|
||||||
|
}
|
||||||
|
|
||||||
|
class FourBaseComparator implements Comparator<FourBaseProbabilities> {
|
||||||
|
int index = 0;
|
||||||
|
public FourBaseComparator(int i) { index = i; }
|
||||||
|
public int compare(FourBaseProbabilities a, FourBaseProbabilities b) {
|
||||||
|
return -1 * Double.compare(a.getLog10Likelihood(index), b.getLog10Likelihood(index));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static final double log103 = log10(3.0);
|
||||||
|
double mostLikelyKBases(FourBaseProbabilities[] probs, int k, int base1, int base2) {
|
||||||
|
if ( k == 0 ) // optimization -- if k > 1, we've already sorted the vector
|
||||||
|
Arrays.sort(probs, new FourBaseComparator(base1));
|
||||||
|
|
||||||
|
double log10L = 0;
|
||||||
|
for ( int i = 0; i < probs.length; i++ ) {
|
||||||
|
int base = i < k ? base1 : base2;
|
||||||
|
log10L += probs[i].getLog10Likelihood(base) - log103;
|
||||||
|
//System.out.printf("%3d %d %.2f %.2f %.2f%n", i, base, probs[i].getLog10Likelihood(base), probs[i].getLog10Likelihood(base1), probs[i].getLog10Likelihood(base2));
|
||||||
|
}
|
||||||
|
|
||||||
|
return log10L;
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue