Performance improvements for pooled caller. Now possible to actually run on real data in a finite amount of time. Minor changes to GL interface (making strandIndex public) to support cached calculations in pooled caller.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2107 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
da7de9960b
commit
27122f7f97
|
|
@ -420,7 +420,7 @@ public class GenotypeLikelihoods implements Cloneable {
|
||||||
//
|
//
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
protected int strandIndex(boolean fwdStrand) {
|
public static int strandIndex(boolean fwdStrand) {
|
||||||
return fwdStrand ? 0 : 1;
|
return fwdStrand ? 0 : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,10 +12,8 @@ import net.sf.samtools.SAMRecord;
|
||||||
public class PooledCalculationModel extends JointEstimateGenotypeCalculationModel {
|
public class PooledCalculationModel extends JointEstimateGenotypeCalculationModel {
|
||||||
private static final String POOL_SAMPLE_NAME = "POOL";
|
private static final String POOL_SAMPLE_NAME = "POOL";
|
||||||
|
|
||||||
private FourBaseProbabilities fourBaseLikelihoods = null;
|
private static FourBaseProbabilities fourBaseLikelihoods = null;
|
||||||
|
private static boolean USE_CACHE = true;
|
||||||
private double[] log10F = null;
|
|
||||||
private double[] log10OneMinusF = null;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
|
|
@ -31,21 +29,15 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode
|
||||||
protected void initialize(char ref, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
|
protected void initialize(char ref, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
|
||||||
super.initialize(ref, contexts, contextType);
|
super.initialize(ref, contexts, contextType);
|
||||||
|
|
||||||
|
// todo -- move this code to a static initializer
|
||||||
|
|
||||||
// prepare the four base vector calculator
|
// prepare the four base vector calculator
|
||||||
|
if ( fourBaseLikelihoods == null )
|
||||||
fourBaseLikelihoods = FourBaseProbabilitiesFactory.makeFourBaseLikelihoods(baseModel, defaultPlatform);
|
fourBaseLikelihoods = FourBaseProbabilitiesFactory.makeFourBaseLikelihoods(baseModel, defaultPlatform);
|
||||||
|
|
||||||
// todo - can you add static initialize()
|
// setup the cache
|
||||||
// prepare cached values of the log10 of f and (1-f)
|
if ( CACHE == null )
|
||||||
if ( log10F == null ) {
|
makeCache(POOL_SIZE);
|
||||||
int nChromosomes = 2 * getNSamples(contexts);
|
|
||||||
log10F = new double[nChromosomes+1];
|
|
||||||
log10OneMinusF = new double[nChromosomes+1];
|
|
||||||
for ( int i = 0; i < (nChromosomes+1); i++ ) {
|
|
||||||
double f = (1.0 * i) / nChromosomes;
|
|
||||||
log10F[i] = Math.log10(f);
|
|
||||||
log10OneMinusF[i] = Math.log10(1-f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected int getNSamples(HashMap<String, AlignmentContextBySample> contexts) {
|
protected int getNSamples(HashMap<String, AlignmentContextBySample> contexts) {
|
||||||
|
|
@ -80,12 +72,6 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode
|
||||||
return contexts;
|
return contexts;
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
|
||||||
// todo - create cache indexed by chromosome, tech, qual, observed base, and potential polymorphism (A/C) for all
|
|
||||||
// todo -- 16 pairs. We can change the calculation at a locus to: for each read in pileup, calculate offset into
|
|
||||||
// todo -- this table given observed base, read, and qual. Then, for each i -> 0, 2n and polymorphism, run over
|
|
||||||
// todo -- the table, summing values
|
|
||||||
//
|
|
||||||
protected double computeLog10PofDgivenAFi(char refArg, char altArg, double f, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
|
protected double computeLog10PofDgivenAFi(char refArg, char altArg, double f, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
|
||||||
AlignmentContextBySample context = contexts.get(POOL_SAMPLE_NAME);
|
AlignmentContextBySample context = contexts.get(POOL_SAMPLE_NAME);
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(refArg, context.getContext(contextType));
|
ReadBackedPileup pileup = new ReadBackedPileup(refArg, context.getContext(contextType));
|
||||||
|
|
@ -95,6 +81,102 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode
|
||||||
int refIndex = BaseUtils.simpleBaseToBaseIndex(refArg);
|
int refIndex = BaseUtils.simpleBaseToBaseIndex(refArg);
|
||||||
int altIndex = BaseUtils.simpleBaseToBaseIndex(altArg);
|
int altIndex = BaseUtils.simpleBaseToBaseIndex(altArg);
|
||||||
|
|
||||||
|
int nChromosomes = 2 * getNSamples(contexts);
|
||||||
|
int nAltAlleles = (int)(f * nChromosomes);
|
||||||
|
|
||||||
|
for (int i = 0; i < pileup.getReads().size(); i++) {
|
||||||
|
int offset = pileup.getOffsets().get(i);
|
||||||
|
|
||||||
|
// ignore deletions
|
||||||
|
if ( offset == -1 )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
SAMRecord read = pileup.getReads().get(i);
|
||||||
|
char base = (char)read.getReadBases()[offset];
|
||||||
|
int bIndex = BaseUtils.simpleBaseToBaseIndex(base);
|
||||||
|
byte qual = read.getBaseQualities()[offset];
|
||||||
|
|
||||||
|
if ( qual > 0 && bIndex != -1 ) {
|
||||||
|
log10L += calcPBGivenH(refIndex, altIndex, nAltAlleles, nChromosomes, base, qual, read, offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return log10L;
|
||||||
|
}
|
||||||
|
|
||||||
|
// cache = BMM x PL x REF x ALT x base x QUAL x strand x F as i
|
||||||
|
static double[][][][][][][][] CACHE = null;
|
||||||
|
static int N_CACHED = 0;
|
||||||
|
private static void makeCache(int pool_size) {
|
||||||
|
CACHE = new double[BaseMismatchModel.values().length][EmpiricalSubstitutionProbabilities.SequencerPlatform.values().length][BaseUtils.BASES.length][BaseUtils.BASES.length][BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE][2][2 * pool_size+1];
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void setCache( int refIndex, int altIndex, int nAltAlleles, char base, byte qual, SAMRecord read, double val ) {
|
||||||
|
int m = FourBaseProbabilitiesFactory.getBaseMismatchModel(fourBaseLikelihoods).ordinal();
|
||||||
|
int a = fourBaseLikelihoods.getReadSequencerPlatformIndex(read);
|
||||||
|
int i = refIndex;
|
||||||
|
int j = altIndex;
|
||||||
|
int k = BaseUtils.simpleBaseToBaseIndex(base);
|
||||||
|
int l = qual;
|
||||||
|
int x = GenotypeLikelihoods.strandIndex(! read.getReadNegativeStrandFlag());
|
||||||
|
int f = nAltAlleles;
|
||||||
|
|
||||||
|
N_CACHED++;
|
||||||
|
System.out.printf("Setting cache value %d %d %d %d %d %d %d %d = %f [count = %d]%n", m, a, i, j, k, l, x, f, val, N_CACHED);
|
||||||
|
CACHE[m][a][i][j][k][l][x][f] = val;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected double getCache( int refIndex, int altIndex, int nAltAlleles, char base, byte qual, SAMRecord read ) {
|
||||||
|
int m = FourBaseProbabilitiesFactory.getBaseMismatchModel(fourBaseLikelihoods).ordinal();
|
||||||
|
int a = fourBaseLikelihoods.getReadSequencerPlatformIndex(read);
|
||||||
|
int i = refIndex;
|
||||||
|
int j = altIndex;
|
||||||
|
int k = BaseUtils.simpleBaseToBaseIndex(base);
|
||||||
|
int l = qual;
|
||||||
|
int x = GenotypeLikelihoods.strandIndex(! read.getReadNegativeStrandFlag());
|
||||||
|
int f = nAltAlleles;
|
||||||
|
//System.out.printf("Getting cache value %d %d %d %d %d %d %d %d%n", m, a, i, j, k, l, x, f);
|
||||||
|
|
||||||
|
return CACHE[m][a][i][j][k][l][x][f];
|
||||||
|
}
|
||||||
|
|
||||||
|
private double calcPBGivenH(int refIndex, int altIndex, int nAltAlleles, int nChromosomes, char base, byte qual, SAMRecord read, int offset) {
|
||||||
|
double L = 0.0;
|
||||||
|
|
||||||
|
if ( USE_CACHE ) {
|
||||||
|
L = getCache(refIndex, altIndex, nAltAlleles, base, qual, read);
|
||||||
|
if ( L == 0.0 ) {
|
||||||
|
L = reallyCalcPBGivenH(refIndex, altIndex, nAltAlleles, nChromosomes, base, qual, read, offset);
|
||||||
|
setCache(refIndex, altIndex, nAltAlleles, base, qual, read, L);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
L = reallyCalcPBGivenH(refIndex, altIndex, nAltAlleles, nChromosomes, base, qual, read, offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
return L;
|
||||||
|
}
|
||||||
|
|
||||||
|
private double reallyCalcPBGivenH(int refIndex, int altIndex, int nAltAlleles, int nChromosomes, char base, byte qual, SAMRecord read, int offset) {
|
||||||
|
double f = (1.0 * nAltAlleles) / nChromosomes;
|
||||||
|
double POfRef = 1 - f;
|
||||||
|
double POfAlt = f;
|
||||||
|
|
||||||
|
FourBaseProbabilities fbl = fourBaseLikelihoods.computeLog10Likelihoods(base, qual, read, offset);
|
||||||
|
double POfBGivenRef = fbl.getLikelihood(refIndex);
|
||||||
|
double POfBGivenAlt = fbl.getLikelihood(altIndex);
|
||||||
|
double P = POfRef * POfBGivenRef + POfAlt * POfBGivenAlt;
|
||||||
|
return Math.log10(P);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* protected double computeLog10PofDgivenAFi(char refArg, char altArg, double f, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
|
||||||
|
AlignmentContextBySample context = contexts.get(POOL_SAMPLE_NAME);
|
||||||
|
ReadBackedPileup pileup = new ReadBackedPileup(refArg, context.getContext(contextType));
|
||||||
|
|
||||||
|
double log10L = 0.0;
|
||||||
|
|
||||||
|
int refIndex = BaseUtils.simpleBaseToBaseIndex(refArg);
|
||||||
|
int altIndex = BaseUtils.simpleBaseToBaseIndex(altArg);
|
||||||
|
|
||||||
int nChromosomes = 2 * getNSamples(contexts);
|
int nChromosomes = 2 * getNSamples(contexts);
|
||||||
int nAltAlleles = (int)(f * nChromosomes);
|
int nAltAlleles = (int)(f * nChromosomes);
|
||||||
int nRefAlleles = nChromosomes - nAltAlleles;
|
int nRefAlleles = nChromosomes - nAltAlleles;
|
||||||
|
|
@ -131,7 +213,7 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode
|
||||||
// refArg, altArg, nChromosomes, nAltAlleles, nRefAlleles, f, log10POfRef, log10POfAlt, log10L);
|
// refArg, altArg, nChromosomes, nAltAlleles, nRefAlleles, f, log10POfRef, log10POfAlt, log10L);
|
||||||
|
|
||||||
return log10L;
|
return log10L;
|
||||||
}
|
}*/
|
||||||
|
|
||||||
/* protected double computeLog10PofDgivenAFi_V2(char refArg, char altArg, double f, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
|
/* protected double computeLog10PofDgivenAFi_V2(char refArg, char altArg, double f, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
|
||||||
AlignmentContextBySample context = contexts.get(POOL_SAMPLE_NAME);
|
AlignmentContextBySample context = contexts.get(POOL_SAMPLE_NAME);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue