Added previous optimization to diploid (non-pool) model and shaved off 20% of runtime from it. Moved out some common functionality to joint estimate parent class.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2453 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
92e3682991
commit
893c9c85fa
|
|
@ -61,6 +61,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
|||
|
||||
// first, calculate for AF=0 (no change to matrix)
|
||||
log10PofDgivenAFi[baseIndex][0] = matrix.getLikelihoodsOfFrequency();
|
||||
double maxLikelihoodSeen = log10PofDgivenAFi[baseIndex][0];
|
||||
|
||||
// for each minor allele frequency, calculate log10PofDgivenAFi
|
||||
for (int i = 1; i <= numFrequencies; i++) {
|
||||
|
|
@ -69,6 +70,16 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
|||
|
||||
// calculate new likelihoods
|
||||
log10PofDgivenAFi[baseIndex][i] = matrix.getLikelihoodsOfFrequency();
|
||||
|
||||
// an optimization to speed up the calculation: if we are beyond the local maximum such
|
||||
// that subsequent likelihoods won't factor into the confidence score, just quit
|
||||
if ( maxLikelihoodSeen - log10PofDgivenAFi[baseIndex][i] > LOG10_OPTIMIZATION_EPSILON ) {
|
||||
ignoreAlleleFrequenciesAboveI(i, numFrequencies, baseIndex);
|
||||
return;
|
||||
}
|
||||
|
||||
if ( log10PofDgivenAFi[baseIndex][i] > maxLikelihoodSeen )
|
||||
maxLikelihoodSeen = log10PofDgivenAFi[baseIndex][i];
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -13,6 +13,10 @@ import java.io.PrintWriter;
|
|||
|
||||
public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalculationModel {
|
||||
|
||||
// for use in optimizing the P(D|AF) calculations:
|
||||
// how much off from the max likelihoods do we need to be before we can quit calculating?
|
||||
protected static final Double LOG10_OPTIMIZATION_EPSILON = 8.0;
|
||||
|
||||
// because the null allele frequencies are constant for a given N,
|
||||
// we cache the results to avoid having to recompute everything
|
||||
private HashMap<Integer, double[]> nullAlleleFrequencyCache = new HashMap<Integer, double[]>();
|
||||
|
|
@ -205,6 +209,15 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
|||
|
||||
/********************************************************************************/
|
||||
|
||||
/**
|
||||
* @param freqI allele frequency I
|
||||
* @param numFrequencies total number of allele frequencies
|
||||
* @param altBaseIndex the index of the alternate allele
|
||||
*/
|
||||
protected void ignoreAlleleFrequenciesAboveI(int freqI, int numFrequencies, int altBaseIndex) {
|
||||
while ( ++freqI <= numFrequencies )
|
||||
log10PofDgivenAFi[altBaseIndex][freqI] = -1.0 * Double.MAX_VALUE;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param ref the ref base
|
||||
|
|
|
|||
|
|
@ -13,8 +13,6 @@ import net.sf.samtools.SAMRecord;
|
|||
public class PooledCalculationModel extends JointEstimateGenotypeCalculationModel {
|
||||
protected static final String POOL_SAMPLE_NAME = "POOL";
|
||||
|
||||
private static final Double LOG10_OPTIMIZATION_EPSILON = 8.0;
|
||||
|
||||
private static FourBaseProbabilities fourBaseLikelihoods = null;
|
||||
private static boolean USE_CACHE = true;
|
||||
|
||||
|
|
@ -75,8 +73,7 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode
|
|||
// an optimization to speed up the calculation: if we are beyond the local maximum such
|
||||
// that subsequent likelihoods won't factor into the confidence score, just quit
|
||||
if ( frequency > 0 && maxLikelihoodSeen - log10PofDgivenAFi[altIndex][frequency] > LOG10_OPTIMIZATION_EPSILON ) {
|
||||
while ( ++frequency <= nChromosomes )
|
||||
log10PofDgivenAFi[altIndex][frequency] = -1.0 * Double.MAX_VALUE;
|
||||
ignoreAlleleFrequenciesAboveI(frequency, nChromosomes, altIndex);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue