From c8d0e6e004d1db136ce2b05797549b198f8fdd4c Mon Sep 17 00:00:00 2001 From: ebanks Date: Mon, 28 Dec 2009 18:39:55 +0000 Subject: [PATCH] Optimization to pooled calculation model: stop calculating P(D|AF) if we are beyond the max likelihood such that subsequent likelihoods won't factor into the confidence score. Also, use new Pileup interface. Pooled calling now takes less than half the time it used to. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2449 348d0f76-0448-11de-a6fe-93d51630548a --- .../genotyper/PooledCalculationModel.java | 38 ++++++++++++------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java index 9cfbb4fad..7509ca273 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.QualityUtils; @@ -12,6 +13,8 @@ import net.sf.samtools.SAMRecord; public class PooledCalculationModel extends JointEstimateGenotypeCalculationModel { protected static final String POOL_SAMPLE_NAME = "POOL"; + private static final Double LOG10_OPTIMIZATION_EPSILON = 8.0; + private static FourBaseProbabilities fourBaseLikelihoods = null; private static boolean USE_CACHE = true; @@ -52,24 +55,33 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode int refIndex = BaseUtils.simpleBaseToBaseIndex(ref); int altIndex = BaseUtils.simpleBaseToBaseIndex(alt); - for (int i = 0; i < pileup.getReads().size(); i++) { - int offset = pileup.getOffsets().get(i); + double maxLikelihoodSeen = -1.0 * Double.MAX_VALUE; - // ignore deletions - if ( offset == -1 ) - continue; + // for each minor allele frequency, calculate log10PofDgivenAFi + for (int frequency = 0; frequency <= nChromosomes; frequency++) { - SAMRecord read = pileup.getReads().get(i); - char base = (char)read.getReadBases()[offset]; - int bIndex = BaseUtils.simpleBaseToBaseIndex(base); - byte qual = read.getBaseQualities()[offset]; + for ( PileupElement p : pileup ) { + // ignore deletions + if ( p.isDeletion() ) + continue; - if ( qual > 0 && bIndex != -1 ) { + char base = (char)p.getBase(); + byte qual = p.getQual(); - // for each minor allele frequency, calculate log10PofDgivenAFi - for (int frequency = 0; frequency <= nChromosomes; frequency++) - log10PofDgivenAFi[altIndex][frequency] += calcPBGivenH(refIndex, altIndex, frequency, nChromosomes, base, qual, read, offset); + if ( qual > 0 && BaseUtils.simpleBaseToBaseIndex(base) != -1 ) + log10PofDgivenAFi[altIndex][frequency] += calcPBGivenH(refIndex, altIndex, frequency, nChromosomes, base, qual, p.getRead(), p.getOffset()); } + + // an optimization to speed up the calculation: if we are beyond the local maximum such + // that subsequent likelihoods won't factor into the confidence score, just quit + if ( frequency > 0 && maxLikelihoodSeen - log10PofDgivenAFi[altIndex][frequency] > LOG10_OPTIMIZATION_EPSILON ) { + while ( ++frequency <= nChromosomes ) + log10PofDgivenAFi[altIndex][frequency] = -1.0 * Double.MAX_VALUE; + return; + } + + if ( log10PofDgivenAFi[altIndex][frequency] > maxLikelihoodSeen ) + maxLikelihoodSeen = log10PofDgivenAFi[altIndex][frequency]; } }