Added previous optimization to diploid (non-pool) model and shaved off 20% of runtime from it. Moved out some common functionality to joint estimate parent class.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2453 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-12-28 21:20:48 +00:00
parent 92e3682991
commit 893c9c85fa
3 changed files with 25 additions and 4 deletions

View File

@ -61,6 +61,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
// first, calculate for AF=0 (no change to matrix)
log10PofDgivenAFi[baseIndex][0] = matrix.getLikelihoodsOfFrequency();
double maxLikelihoodSeen = log10PofDgivenAFi[baseIndex][0];
// for each minor allele frequency, calculate log10PofDgivenAFi
for (int i = 1; i <= numFrequencies; i++) {
@ -69,6 +70,16 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
// calculate new likelihoods
log10PofDgivenAFi[baseIndex][i] = matrix.getLikelihoodsOfFrequency();
// an optimization to speed up the calculation: if we are beyond the local maximum such
// that subsequent likelihoods won't factor into the confidence score, just quit
if ( maxLikelihoodSeen - log10PofDgivenAFi[baseIndex][i] > LOG10_OPTIMIZATION_EPSILON ) {
ignoreAlleleFrequenciesAboveI(i, numFrequencies, baseIndex);
return;
}
if ( log10PofDgivenAFi[baseIndex][i] > maxLikelihoodSeen )
maxLikelihoodSeen = log10PofDgivenAFi[baseIndex][i];
}
}

View File

@ -13,6 +13,10 @@ import java.io.PrintWriter;
public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalculationModel {
// for use in optimizing the P(D|AF) calculations:
// how much off from the max likelihoods do we need to be before we can quit calculating?
protected static final Double LOG10_OPTIMIZATION_EPSILON = 8.0;
// because the null allele frequencies are constant for a given N,
// we cache the results to avoid having to recompute everything
private HashMap<Integer, double[]> nullAlleleFrequencyCache = new HashMap<Integer, double[]>();
@ -205,6 +209,15 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
/********************************************************************************/
/**
* @param freqI allele frequency I
* @param numFrequencies total number of allele frequencies
* @param altBaseIndex the index of the alternate allele
*/
protected void ignoreAlleleFrequenciesAboveI(int freqI, int numFrequencies, int altBaseIndex) {
while ( ++freqI <= numFrequencies )
log10PofDgivenAFi[altBaseIndex][freqI] = -1.0 * Double.MAX_VALUE;
}
/**
* @param ref the ref base

View File

@ -13,8 +13,6 @@ import net.sf.samtools.SAMRecord;
public class PooledCalculationModel extends JointEstimateGenotypeCalculationModel {
protected static final String POOL_SAMPLE_NAME = "POOL";
private static final Double LOG10_OPTIMIZATION_EPSILON = 8.0;
private static FourBaseProbabilities fourBaseLikelihoods = null;
private static boolean USE_CACHE = true;
@ -75,8 +73,7 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode
// an optimization to speed up the calculation: if we are beyond the local maximum such
// that subsequent likelihoods won't factor into the confidence score, just quit
if ( frequency > 0 && maxLikelihoodSeen - log10PofDgivenAFi[altIndex][frequency] > LOG10_OPTIMIZATION_EPSILON ) {
while ( ++frequency <= nChromosomes )
log10PofDgivenAFi[altIndex][frequency] = -1.0 * Double.MAX_VALUE;
ignoreAlleleFrequenciesAboveI(frequency, nChromosomes, altIndex);
return;
}