GenotypeLikelhoods now support a cache per subclass, avoiding genotyping clashes

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1554 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2009-09-09 10:39:14 +00:00
parent 0cc219c0df
commit eeb9b6eb13
6 changed files with 84 additions and 35 deletions

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.genotyper; package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import static java.lang.Math.log10; import static java.lang.Math.log10;
import java.util.TreeMap; import java.util.TreeMap;
@ -215,9 +216,38 @@ public class EmpiricalSubstitutionGenotypeLikelihoods extends GenotypeLikelihood
return super.clone(); return super.clone();
} }
protected boolean cacheByTech() { // -----------------------------------------------------------------------------------------------------------------
return true; //
//
// caching routines
//
//
// -----------------------------------------------------------------------------------------------------------------
static GenotypeLikelihoods[][][][][] EMPIRICAL_CACHE = new GenotypeLikelihoods[EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform.values().length][BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE][MAX_PLOIDY][2];
protected GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy,
SAMRecord read, int offset, GenotypeLikelihoods val ) {
SequencerPlatform pl = getReadSequencerPlatform(read);
int a = pl.ordinal();
int i = BaseUtils.simpleBaseToBaseIndex(observedBase);
int j = qualityScore;
int k = ploidy;
int x = strandIndex(! read.getReadNegativeStrandFlag());
if ( val != null )
EMPIRICAL_CACHE[a][i][j][k][x] = val;
return EMPIRICAL_CACHE[a][i][j][k][x];
} }
// -----------------------------------------------------------------------------------------------------------------
//
//
// calculation of p(B|GT)
//
//
// -----------------------------------------------------------------------------------------------------------------
protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) { protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) {
boolean fwdStrand = ! read.getReadNegativeStrandFlag(); boolean fwdStrand = ! read.getReadNegativeStrandFlag();

View File

@ -38,8 +38,8 @@ import static java.lang.Math.pow;
* model. * model.
*/ */
public abstract class GenotypeLikelihoods implements Cloneable { public abstract class GenotypeLikelihoods implements Cloneable {
private final static int FIXED_PLOIDY = 2; protected final static int FIXED_PLOIDY = 2;
private final static int MAX_PLOIDY = FIXED_PLOIDY + 1; protected final static int MAX_PLOIDY = FIXED_PLOIDY + 1;
protected boolean enableCacheFlag = true; protected boolean enableCacheFlag = true;
@ -185,16 +185,6 @@ public abstract class GenotypeLikelihoods implements Cloneable {
enableCacheFlag = enable; enableCacheFlag = enable;
} }
/**
* Does the caller need to know the tech of the read? If true, we will use the declared tech of the
* read for caching, which can be painfully slow. By default we aren't tech aware.
*
* @return true
*/
protected boolean cacheByTech() {
return false;
}
// ----------------------------------------------------------------------------------------------------------------- // -----------------------------------------------------------------------------------------------------------------
// //
// //
@ -303,31 +293,45 @@ public abstract class GenotypeLikelihoods implements Cloneable {
// //
// //
// ----------------------------------------------------------------------------------------------------------------- // -----------------------------------------------------------------------------------------------------------------
static GenotypeLikelihoods[][][][][] CACHE =
new GenotypeLikelihoods[EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform.values().length][BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE][MAX_PLOIDY][2];
static int cacheSize = 0;
public static void clearCache() { /**
CACHE = new GenotypeLikelihoods[EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform.values().length][BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE][MAX_PLOIDY][2]; * Procedure intended for overloading in subclasses. Returns / sets cached GL value given all of the information
cacheSize = 0; * one can reasonably expect to have. The default cache is fairly simple.
} *
* @param observedBase
* @param qualityScore
* @param ploidy
* @param read
* @param offset
* @param val
* @return
*/
protected abstract GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy,
SAMRecord read, int offset, GenotypeLikelihoods val );
private GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy, protected GenotypeLikelihoods simpleGetSetCache( GenotypeLikelihoods[][][][] cache,
SAMRecord read, int offset, GenotypeLikelihoods val ) { char observedBase, byte qualityScore, int ploidy,
SAMRecord read, int offset, GenotypeLikelihoods val ) {
EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform pl = cacheByTech() ? EmpiricalSubstitutionGenotypeLikelihoods.getReadSequencerPlatform(read) : EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform.UNKNOWN;
int a = pl.ordinal();
int i = BaseUtils.simpleBaseToBaseIndex(observedBase); int i = BaseUtils.simpleBaseToBaseIndex(observedBase);
int j = qualityScore; int j = qualityScore;
int k = ploidy; int k = ploidy;
int x = strandIndex(! read.getReadNegativeStrandFlag()); int x = strandIndex(! read.getReadNegativeStrandFlag() );
if ( val != null ) if ( val != null )
CACHE[a][i][j][k][x] = val; cache[i][j][k][x] = val;
return CACHE[a][i][j][k][x]; return cache[i][j][k][x];
} }
protected int strandIndex(boolean fwdStrand) {
return fwdStrand ? 0 : 1;
}
//
// All oft he following routines are totally generic
//
private GenotypeLikelihoods getCache( char observedBase, byte qualityScore, int ploidy, SAMRecord read, int offset ) { private GenotypeLikelihoods getCache( char observedBase, byte qualityScore, int ploidy, SAMRecord read, int offset ) {
return getSetCache( observedBase, qualityScore, ploidy, read, offset, null ); return getSetCache( observedBase, qualityScore, ploidy, read, offset, null );
} }
@ -336,10 +340,6 @@ public abstract class GenotypeLikelihoods implements Cloneable {
getSetCache( observedBase, qualityScore, ploidy, read, offset, val ); getSetCache( observedBase, qualityScore, ploidy, read, offset, val );
} }
private int strandIndex(boolean fwdStrand) {
return fwdStrand ? 0 : 1;
}
private boolean inCache( char observedBase, byte qualityScore, int ploidy, SAMRecord read, int offset ) { private boolean inCache( char observedBase, byte qualityScore, int ploidy, SAMRecord read, int offset ) {
return getCache(observedBase, qualityScore, ploidy, read, offset) != null; return getCache(observedBase, qualityScore, ploidy, read, offset) != null;
} }
@ -364,7 +364,6 @@ public abstract class GenotypeLikelihoods implements Cloneable {
g.reallyAdd(observedBase, qualityScore, read, offset, false); g.reallyAdd(observedBase, qualityScore, read, offset, false);
setCache(observedBase, qualityScore, ploidy, read, offset, g); setCache(observedBase, qualityScore, ploidy, read, offset, g);
cacheSize++;
//System.out.printf("Caching %c %d %d %s %s (%d total entries)%n", observedBase, qualityScore, ploidy, read.getReadName(), EmpiricalSubstitutionGenotypeLikelihoods.getReadSequencerPlatform(read), cacheSize); //System.out.printf("Caching %c %d %d %s %s (%d total entries)%n", observedBase, qualityScore, ploidy, read.getReadName(), EmpiricalSubstitutionGenotypeLikelihoods.getReadSequencerPlatform(read), cacheSize);
return g; return g;

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.genotyper; package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import static java.lang.Math.log10; import static java.lang.Math.log10;
import java.util.TreeMap; import java.util.TreeMap;
@ -40,4 +41,10 @@ public class OneStateErrorGenotypeLikelihoods extends GenotypeLikelihoods {
protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) { protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) {
return 0; // equivalent to e model return 0; // equivalent to e model
} }
}
static GenotypeLikelihoods[][][][] ONE_STATE_CACHE = new GenotypeLikelihoods[BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE][MAX_PLOIDY][2];
protected GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy,
SAMRecord read, int offset, GenotypeLikelihoods val ) {
return simpleGetSetCache(ONE_STATE_CACHE, observedBase, qualityScore, ploidy, read, offset, val);
}
}

View File

@ -79,7 +79,7 @@ public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSam
/** Initialize the walker with some sensible defaults */ /** Initialize the walker with some sensible defaults */
public void initialize() { public void initialize() {
GenotypeLikelihoods.clearCache(); //GenotypeLikelihoods.clearCache();
// nothing to do // nothing to do
} }

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.genotyper; package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import static java.lang.Math.log10; import static java.lang.Math.log10;
import java.util.TreeMap; import java.util.TreeMap;
@ -33,4 +34,10 @@ public class ThreeStateErrorGenotypeLikelihoods extends GenotypeLikelihoods {
protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) { protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) {
return -log103; // equivalent to e / 3 model return -log103; // equivalent to e / 3 model
} }
static GenotypeLikelihoods[][][][] THREE_STATE_CACHE = new GenotypeLikelihoods[BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE][MAX_PLOIDY][2];
protected GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy,
SAMRecord read, int offset, GenotypeLikelihoods val ) {
return simpleGetSetCache(THREE_STATE_CACHE, observedBase, qualityScore, ploidy, read, offset, val);
}
} }

View File

@ -26,6 +26,12 @@ public class SSGenotypeCallTest extends BaseTest {
// we need a fake GenotypeLikelihoods class // we need a fake GenotypeLikelihoods class
public class GenotypeLikelihoodsImpl extends GenotypeLikelihoods { public class GenotypeLikelihoodsImpl extends GenotypeLikelihoods {
public boolean cacheIsEnabled() { return false; }
protected GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy,
SAMRecord read, int offset, GenotypeLikelihoods val ) {
return null;
}
/** /**
* Must be overridden by concrete subclasses * Must be overridden by concrete subclasses