GenotypeLikelhoods now support a cache per subclass, avoiding genotyping clashes

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1554 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2009-09-09 10:39:14 +00:00
parent 0cc219c0df
commit eeb9b6eb13
6 changed files with 84 additions and 35 deletions

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import static java.lang.Math.log10;
import java.util.TreeMap;
@ -215,9 +216,38 @@ public class EmpiricalSubstitutionGenotypeLikelihoods extends GenotypeLikelihood
return super.clone();
}
protected boolean cacheByTech() {
return true;
// -----------------------------------------------------------------------------------------------------------------
//
//
// caching routines
//
//
// -----------------------------------------------------------------------------------------------------------------
static GenotypeLikelihoods[][][][][] EMPIRICAL_CACHE = new GenotypeLikelihoods[EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform.values().length][BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE][MAX_PLOIDY][2];
protected GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy,
SAMRecord read, int offset, GenotypeLikelihoods val ) {
SequencerPlatform pl = getReadSequencerPlatform(read);
int a = pl.ordinal();
int i = BaseUtils.simpleBaseToBaseIndex(observedBase);
int j = qualityScore;
int k = ploidy;
int x = strandIndex(! read.getReadNegativeStrandFlag());
if ( val != null )
EMPIRICAL_CACHE[a][i][j][k][x] = val;
return EMPIRICAL_CACHE[a][i][j][k][x];
}
// -----------------------------------------------------------------------------------------------------------------
//
//
// calculation of p(B|GT)
//
//
// -----------------------------------------------------------------------------------------------------------------
protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) {
boolean fwdStrand = ! read.getReadNegativeStrandFlag();

View File

@ -38,8 +38,8 @@ import static java.lang.Math.pow;
* model.
*/
public abstract class GenotypeLikelihoods implements Cloneable {
private final static int FIXED_PLOIDY = 2;
private final static int MAX_PLOIDY = FIXED_PLOIDY + 1;
protected final static int FIXED_PLOIDY = 2;
protected final static int MAX_PLOIDY = FIXED_PLOIDY + 1;
protected boolean enableCacheFlag = true;
@ -185,16 +185,6 @@ public abstract class GenotypeLikelihoods implements Cloneable {
enableCacheFlag = enable;
}
/**
* Does the caller need to know the tech of the read? If true, we will use the declared tech of the
* read for caching, which can be painfully slow. By default we aren't tech aware.
*
* @return true
*/
protected boolean cacheByTech() {
return false;
}
// -----------------------------------------------------------------------------------------------------------------
//
//
@ -303,31 +293,45 @@ public abstract class GenotypeLikelihoods implements Cloneable {
//
//
// -----------------------------------------------------------------------------------------------------------------
static GenotypeLikelihoods[][][][][] CACHE =
new GenotypeLikelihoods[EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform.values().length][BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE][MAX_PLOIDY][2];
static int cacheSize = 0;
public static void clearCache() {
CACHE = new GenotypeLikelihoods[EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform.values().length][BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE][MAX_PLOIDY][2];
cacheSize = 0;
}
/**
* Procedure intended for overloading in subclasses. Returns / sets cached GL value given all of the information
* one can reasonably expect to have. The default cache is fairly simple.
*
* @param observedBase
* @param qualityScore
* @param ploidy
* @param read
* @param offset
* @param val
* @return
*/
protected abstract GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy,
SAMRecord read, int offset, GenotypeLikelihoods val );
private GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy,
SAMRecord read, int offset, GenotypeLikelihoods val ) {
protected GenotypeLikelihoods simpleGetSetCache( GenotypeLikelihoods[][][][] cache,
char observedBase, byte qualityScore, int ploidy,
SAMRecord read, int offset, GenotypeLikelihoods val ) {
EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform pl = cacheByTech() ? EmpiricalSubstitutionGenotypeLikelihoods.getReadSequencerPlatform(read) : EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform.UNKNOWN;
int a = pl.ordinal();
int i = BaseUtils.simpleBaseToBaseIndex(observedBase);
int j = qualityScore;
int k = ploidy;
int x = strandIndex(! read.getReadNegativeStrandFlag());
int x = strandIndex(! read.getReadNegativeStrandFlag() );
if ( val != null )
CACHE[a][i][j][k][x] = val;
cache[i][j][k][x] = val;
return CACHE[a][i][j][k][x];
return cache[i][j][k][x];
}
protected int strandIndex(boolean fwdStrand) {
return fwdStrand ? 0 : 1;
}
//
// All oft he following routines are totally generic
//
private GenotypeLikelihoods getCache( char observedBase, byte qualityScore, int ploidy, SAMRecord read, int offset ) {
return getSetCache( observedBase, qualityScore, ploidy, read, offset, null );
}
@ -336,10 +340,6 @@ public abstract class GenotypeLikelihoods implements Cloneable {
getSetCache( observedBase, qualityScore, ploidy, read, offset, val );
}
private int strandIndex(boolean fwdStrand) {
return fwdStrand ? 0 : 1;
}
private boolean inCache( char observedBase, byte qualityScore, int ploidy, SAMRecord read, int offset ) {
return getCache(observedBase, qualityScore, ploidy, read, offset) != null;
}
@ -364,7 +364,6 @@ public abstract class GenotypeLikelihoods implements Cloneable {
g.reallyAdd(observedBase, qualityScore, read, offset, false);
setCache(observedBase, qualityScore, ploidy, read, offset, g);
cacheSize++;
//System.out.printf("Caching %c %d %d %s %s (%d total entries)%n", observedBase, qualityScore, ploidy, read.getReadName(), EmpiricalSubstitutionGenotypeLikelihoods.getReadSequencerPlatform(read), cacheSize);
return g;

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import static java.lang.Math.log10;
import java.util.TreeMap;
@ -40,4 +41,10 @@ public class OneStateErrorGenotypeLikelihoods extends GenotypeLikelihoods {
protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) {
return 0; // equivalent to e model
}
}
static GenotypeLikelihoods[][][][] ONE_STATE_CACHE = new GenotypeLikelihoods[BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE][MAX_PLOIDY][2];
protected GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy,
SAMRecord read, int offset, GenotypeLikelihoods val ) {
return simpleGetSetCache(ONE_STATE_CACHE, observedBase, qualityScore, ploidy, read, offset, val);
}
}

View File

@ -79,7 +79,7 @@ public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSam
/** Initialize the walker with some sensible defaults */
public void initialize() {
GenotypeLikelihoods.clearCache();
//GenotypeLikelihoods.clearCache();
// nothing to do
}

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import static java.lang.Math.log10;
import java.util.TreeMap;
@ -33,4 +34,10 @@ public class ThreeStateErrorGenotypeLikelihoods extends GenotypeLikelihoods {
protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) {
return -log103; // equivalent to e / 3 model
}
static GenotypeLikelihoods[][][][] THREE_STATE_CACHE = new GenotypeLikelihoods[BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE][MAX_PLOIDY][2];
protected GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy,
SAMRecord read, int offset, GenotypeLikelihoods val ) {
return simpleGetSetCache(THREE_STATE_CACHE, observedBase, qualityScore, ploidy, read, offset, val);
}
}

View File

@ -26,6 +26,12 @@ public class SSGenotypeCallTest extends BaseTest {
// we need a fake GenotypeLikelihoods class
public class GenotypeLikelihoodsImpl extends GenotypeLikelihoods {
public boolean cacheIsEnabled() { return false; }
protected GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy,
SAMRecord read, int offset, GenotypeLikelihoods val ) {
return null;
}
/**
* Must be overridden by concrete subclasses