GenotypeLikelhoods now support a cache per subclass, avoiding genotyping clashes
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1554 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
0cc219c0df
commit
eeb9b6eb13
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
||||
import static java.lang.Math.log10;
|
||||
import java.util.TreeMap;
|
||||
|
|
@ -215,9 +216,38 @@ public class EmpiricalSubstitutionGenotypeLikelihoods extends GenotypeLikelihood
|
|||
return super.clone();
|
||||
}
|
||||
|
||||
protected boolean cacheByTech() {
|
||||
return true;
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
// caching routines
|
||||
//
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
static GenotypeLikelihoods[][][][][] EMPIRICAL_CACHE = new GenotypeLikelihoods[EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform.values().length][BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE][MAX_PLOIDY][2];
|
||||
|
||||
protected GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy,
|
||||
SAMRecord read, int offset, GenotypeLikelihoods val ) {
|
||||
SequencerPlatform pl = getReadSequencerPlatform(read);
|
||||
int a = pl.ordinal();
|
||||
int i = BaseUtils.simpleBaseToBaseIndex(observedBase);
|
||||
int j = qualityScore;
|
||||
int k = ploidy;
|
||||
int x = strandIndex(! read.getReadNegativeStrandFlag());
|
||||
|
||||
if ( val != null )
|
||||
EMPIRICAL_CACHE[a][i][j][k][x] = val;
|
||||
|
||||
return EMPIRICAL_CACHE[a][i][j][k][x];
|
||||
}
|
||||
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
// calculation of p(B|GT)
|
||||
//
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
|
||||
protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) {
|
||||
boolean fwdStrand = ! read.getReadNegativeStrandFlag();
|
||||
|
|
|
|||
|
|
@ -38,8 +38,8 @@ import static java.lang.Math.pow;
|
|||
* model.
|
||||
*/
|
||||
public abstract class GenotypeLikelihoods implements Cloneable {
|
||||
private final static int FIXED_PLOIDY = 2;
|
||||
private final static int MAX_PLOIDY = FIXED_PLOIDY + 1;
|
||||
protected final static int FIXED_PLOIDY = 2;
|
||||
protected final static int MAX_PLOIDY = FIXED_PLOIDY + 1;
|
||||
|
||||
protected boolean enableCacheFlag = true;
|
||||
|
||||
|
|
@ -185,16 +185,6 @@ public abstract class GenotypeLikelihoods implements Cloneable {
|
|||
enableCacheFlag = enable;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does the caller need to know the tech of the read? If true, we will use the declared tech of the
|
||||
* read for caching, which can be painfully slow. By default we aren't tech aware.
|
||||
*
|
||||
* @return true
|
||||
*/
|
||||
protected boolean cacheByTech() {
|
||||
return false;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
|
|
@ -303,31 +293,45 @@ public abstract class GenotypeLikelihoods implements Cloneable {
|
|||
//
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
static GenotypeLikelihoods[][][][][] CACHE =
|
||||
new GenotypeLikelihoods[EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform.values().length][BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE][MAX_PLOIDY][2];
|
||||
static int cacheSize = 0;
|
||||
|
||||
public static void clearCache() {
|
||||
CACHE = new GenotypeLikelihoods[EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform.values().length][BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE][MAX_PLOIDY][2];
|
||||
cacheSize = 0;
|
||||
}
|
||||
/**
|
||||
* Procedure intended for overloading in subclasses. Returns / sets cached GL value given all of the information
|
||||
* one can reasonably expect to have. The default cache is fairly simple.
|
||||
*
|
||||
* @param observedBase
|
||||
* @param qualityScore
|
||||
* @param ploidy
|
||||
* @param read
|
||||
* @param offset
|
||||
* @param val
|
||||
* @return
|
||||
*/
|
||||
protected abstract GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy,
|
||||
SAMRecord read, int offset, GenotypeLikelihoods val );
|
||||
|
||||
private GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy,
|
||||
SAMRecord read, int offset, GenotypeLikelihoods val ) {
|
||||
protected GenotypeLikelihoods simpleGetSetCache( GenotypeLikelihoods[][][][] cache,
|
||||
char observedBase, byte qualityScore, int ploidy,
|
||||
SAMRecord read, int offset, GenotypeLikelihoods val ) {
|
||||
|
||||
EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform pl = cacheByTech() ? EmpiricalSubstitutionGenotypeLikelihoods.getReadSequencerPlatform(read) : EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform.UNKNOWN;
|
||||
int a = pl.ordinal();
|
||||
int i = BaseUtils.simpleBaseToBaseIndex(observedBase);
|
||||
int j = qualityScore;
|
||||
int k = ploidy;
|
||||
int x = strandIndex(! read.getReadNegativeStrandFlag());
|
||||
int x = strandIndex(! read.getReadNegativeStrandFlag() );
|
||||
|
||||
if ( val != null )
|
||||
CACHE[a][i][j][k][x] = val;
|
||||
cache[i][j][k][x] = val;
|
||||
|
||||
return CACHE[a][i][j][k][x];
|
||||
return cache[i][j][k][x];
|
||||
}
|
||||
|
||||
protected int strandIndex(boolean fwdStrand) {
|
||||
return fwdStrand ? 0 : 1;
|
||||
}
|
||||
|
||||
//
|
||||
// All oft he following routines are totally generic
|
||||
//
|
||||
|
||||
private GenotypeLikelihoods getCache( char observedBase, byte qualityScore, int ploidy, SAMRecord read, int offset ) {
|
||||
return getSetCache( observedBase, qualityScore, ploidy, read, offset, null );
|
||||
}
|
||||
|
|
@ -336,10 +340,6 @@ public abstract class GenotypeLikelihoods implements Cloneable {
|
|||
getSetCache( observedBase, qualityScore, ploidy, read, offset, val );
|
||||
}
|
||||
|
||||
private int strandIndex(boolean fwdStrand) {
|
||||
return fwdStrand ? 0 : 1;
|
||||
}
|
||||
|
||||
private boolean inCache( char observedBase, byte qualityScore, int ploidy, SAMRecord read, int offset ) {
|
||||
return getCache(observedBase, qualityScore, ploidy, read, offset) != null;
|
||||
}
|
||||
|
|
@ -364,7 +364,6 @@ public abstract class GenotypeLikelihoods implements Cloneable {
|
|||
g.reallyAdd(observedBase, qualityScore, read, offset, false);
|
||||
|
||||
setCache(observedBase, qualityScore, ploidy, read, offset, g);
|
||||
cacheSize++;
|
||||
|
||||
//System.out.printf("Caching %c %d %d %s %s (%d total entries)%n", observedBase, qualityScore, ploidy, read.getReadName(), EmpiricalSubstitutionGenotypeLikelihoods.getReadSequencerPlatform(read), cacheSize);
|
||||
return g;
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
||||
import static java.lang.Math.log10;
|
||||
import java.util.TreeMap;
|
||||
|
|
@ -40,4 +41,10 @@ public class OneStateErrorGenotypeLikelihoods extends GenotypeLikelihoods {
|
|||
protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) {
|
||||
return 0; // equivalent to e model
|
||||
}
|
||||
}
|
||||
|
||||
static GenotypeLikelihoods[][][][] ONE_STATE_CACHE = new GenotypeLikelihoods[BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE][MAX_PLOIDY][2];
|
||||
protected GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy,
|
||||
SAMRecord read, int offset, GenotypeLikelihoods val ) {
|
||||
return simpleGetSetCache(ONE_STATE_CACHE, observedBase, qualityScore, ploidy, read, offset, val);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -79,7 +79,7 @@ public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSam
|
|||
|
||||
/** Initialize the walker with some sensible defaults */
|
||||
public void initialize() {
|
||||
GenotypeLikelihoods.clearCache();
|
||||
//GenotypeLikelihoods.clearCache();
|
||||
// nothing to do
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
||||
import static java.lang.Math.log10;
|
||||
import java.util.TreeMap;
|
||||
|
|
@ -33,4 +34,10 @@ public class ThreeStateErrorGenotypeLikelihoods extends GenotypeLikelihoods {
|
|||
protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) {
|
||||
return -log103; // equivalent to e / 3 model
|
||||
}
|
||||
|
||||
static GenotypeLikelihoods[][][][] THREE_STATE_CACHE = new GenotypeLikelihoods[BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE][MAX_PLOIDY][2];
|
||||
protected GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy,
|
||||
SAMRecord read, int offset, GenotypeLikelihoods val ) {
|
||||
return simpleGetSetCache(THREE_STATE_CACHE, observedBase, qualityScore, ploidy, read, offset, val);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,6 +26,12 @@ public class SSGenotypeCallTest extends BaseTest {
|
|||
|
||||
// we need a fake GenotypeLikelihoods class
|
||||
public class GenotypeLikelihoodsImpl extends GenotypeLikelihoods {
|
||||
public boolean cacheIsEnabled() { return false; }
|
||||
|
||||
protected GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy,
|
||||
SAMRecord read, int offset, GenotypeLikelihoods val ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Must be overridden by concrete subclasses
|
||||
|
|
|
|||
Loading…
Reference in New Issue