Optimization to pooled calculation model: stop calculating P(D|AF) if we are beyond the max likelihood such that subsequent likelihoods won't factor into the confidence score. Also, use new Pileup interface.

Pooled calling now takes less than half the time it used to.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2449 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-12-28 18:39:55 +00:00
parent b1ac4b81d5
commit c8d0e6e004
1 changed files with 25 additions and 13 deletions

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.QualityUtils;
@ -12,6 +13,8 @@ import net.sf.samtools.SAMRecord;
public class PooledCalculationModel extends JointEstimateGenotypeCalculationModel {
protected static final String POOL_SAMPLE_NAME = "POOL";
private static final Double LOG10_OPTIMIZATION_EPSILON = 8.0;
private static FourBaseProbabilities fourBaseLikelihoods = null;
private static boolean USE_CACHE = true;
@ -52,24 +55,33 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode
int refIndex = BaseUtils.simpleBaseToBaseIndex(ref);
int altIndex = BaseUtils.simpleBaseToBaseIndex(alt);
for (int i = 0; i < pileup.getReads().size(); i++) {
int offset = pileup.getOffsets().get(i);
double maxLikelihoodSeen = -1.0 * Double.MAX_VALUE;
// ignore deletions
if ( offset == -1 )
continue;
// for each minor allele frequency, calculate log10PofDgivenAFi
for (int frequency = 0; frequency <= nChromosomes; frequency++) {
SAMRecord read = pileup.getReads().get(i);
char base = (char)read.getReadBases()[offset];
int bIndex = BaseUtils.simpleBaseToBaseIndex(base);
byte qual = read.getBaseQualities()[offset];
for ( PileupElement p : pileup ) {
// ignore deletions
if ( p.isDeletion() )
continue;
if ( qual > 0 && bIndex != -1 ) {
char base = (char)p.getBase();
byte qual = p.getQual();
// for each minor allele frequency, calculate log10PofDgivenAFi
for (int frequency = 0; frequency <= nChromosomes; frequency++)
log10PofDgivenAFi[altIndex][frequency] += calcPBGivenH(refIndex, altIndex, frequency, nChromosomes, base, qual, read, offset);
if ( qual > 0 && BaseUtils.simpleBaseToBaseIndex(base) != -1 )
log10PofDgivenAFi[altIndex][frequency] += calcPBGivenH(refIndex, altIndex, frequency, nChromosomes, base, qual, p.getRead(), p.getOffset());
}
// an optimization to speed up the calculation: if we are beyond the local maximum such
// that subsequent likelihoods won't factor into the confidence score, just quit
if ( frequency > 0 && maxLikelihoodSeen - log10PofDgivenAFi[altIndex][frequency] > LOG10_OPTIMIZATION_EPSILON ) {
while ( ++frequency <= nChromosomes )
log10PofDgivenAFi[altIndex][frequency] = -1.0 * Double.MAX_VALUE;
return;
}
if ( log10PofDgivenAFi[altIndex][frequency] > maxLikelihoodSeen )
maxLikelihoodSeen = log10PofDgivenAFi[altIndex][frequency];
}
}