GenotypeLikelhoods now support a cache per subclass, avoiding genotyping clashes
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1554 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
0cc219c0df
commit
eeb9b6eb13
|
|
@ -1,6 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
|
|
||||||
import static java.lang.Math.log10;
|
import static java.lang.Math.log10;
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
|
|
@ -215,9 +216,38 @@ public class EmpiricalSubstitutionGenotypeLikelihoods extends GenotypeLikelihood
|
||||||
return super.clone();
|
return super.clone();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected boolean cacheByTech() {
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
return true;
|
//
|
||||||
|
//
|
||||||
|
// caching routines
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
|
static GenotypeLikelihoods[][][][][] EMPIRICAL_CACHE = new GenotypeLikelihoods[EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform.values().length][BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE][MAX_PLOIDY][2];
|
||||||
|
|
||||||
|
protected GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy,
|
||||||
|
SAMRecord read, int offset, GenotypeLikelihoods val ) {
|
||||||
|
SequencerPlatform pl = getReadSequencerPlatform(read);
|
||||||
|
int a = pl.ordinal();
|
||||||
|
int i = BaseUtils.simpleBaseToBaseIndex(observedBase);
|
||||||
|
int j = qualityScore;
|
||||||
|
int k = ploidy;
|
||||||
|
int x = strandIndex(! read.getReadNegativeStrandFlag());
|
||||||
|
|
||||||
|
if ( val != null )
|
||||||
|
EMPIRICAL_CACHE[a][i][j][k][x] = val;
|
||||||
|
|
||||||
|
return EMPIRICAL_CACHE[a][i][j][k][x];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// calculation of p(B|GT)
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) {
|
protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) {
|
||||||
boolean fwdStrand = ! read.getReadNegativeStrandFlag();
|
boolean fwdStrand = ! read.getReadNegativeStrandFlag();
|
||||||
|
|
|
||||||
|
|
@ -38,8 +38,8 @@ import static java.lang.Math.pow;
|
||||||
* model.
|
* model.
|
||||||
*/
|
*/
|
||||||
public abstract class GenotypeLikelihoods implements Cloneable {
|
public abstract class GenotypeLikelihoods implements Cloneable {
|
||||||
private final static int FIXED_PLOIDY = 2;
|
protected final static int FIXED_PLOIDY = 2;
|
||||||
private final static int MAX_PLOIDY = FIXED_PLOIDY + 1;
|
protected final static int MAX_PLOIDY = FIXED_PLOIDY + 1;
|
||||||
|
|
||||||
protected boolean enableCacheFlag = true;
|
protected boolean enableCacheFlag = true;
|
||||||
|
|
||||||
|
|
@ -185,16 +185,6 @@ public abstract class GenotypeLikelihoods implements Cloneable {
|
||||||
enableCacheFlag = enable;
|
enableCacheFlag = enable;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Does the caller need to know the tech of the read? If true, we will use the declared tech of the
|
|
||||||
* read for caching, which can be painfully slow. By default we aren't tech aware.
|
|
||||||
*
|
|
||||||
* @return true
|
|
||||||
*/
|
|
||||||
protected boolean cacheByTech() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
//
|
//
|
||||||
|
|
@ -303,31 +293,45 @@ public abstract class GenotypeLikelihoods implements Cloneable {
|
||||||
//
|
//
|
||||||
//
|
//
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
static GenotypeLikelihoods[][][][][] CACHE =
|
|
||||||
new GenotypeLikelihoods[EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform.values().length][BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE][MAX_PLOIDY][2];
|
|
||||||
static int cacheSize = 0;
|
|
||||||
|
|
||||||
public static void clearCache() {
|
/**
|
||||||
CACHE = new GenotypeLikelihoods[EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform.values().length][BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE][MAX_PLOIDY][2];
|
* Procedure intended for overloading in subclasses. Returns / sets cached GL value given all of the information
|
||||||
cacheSize = 0;
|
* one can reasonably expect to have. The default cache is fairly simple.
|
||||||
}
|
*
|
||||||
|
* @param observedBase
|
||||||
|
* @param qualityScore
|
||||||
|
* @param ploidy
|
||||||
|
* @param read
|
||||||
|
* @param offset
|
||||||
|
* @param val
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
protected abstract GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy,
|
||||||
|
SAMRecord read, int offset, GenotypeLikelihoods val );
|
||||||
|
|
||||||
private GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy,
|
protected GenotypeLikelihoods simpleGetSetCache( GenotypeLikelihoods[][][][] cache,
|
||||||
SAMRecord read, int offset, GenotypeLikelihoods val ) {
|
char observedBase, byte qualityScore, int ploidy,
|
||||||
|
SAMRecord read, int offset, GenotypeLikelihoods val ) {
|
||||||
|
|
||||||
EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform pl = cacheByTech() ? EmpiricalSubstitutionGenotypeLikelihoods.getReadSequencerPlatform(read) : EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform.UNKNOWN;
|
|
||||||
int a = pl.ordinal();
|
|
||||||
int i = BaseUtils.simpleBaseToBaseIndex(observedBase);
|
int i = BaseUtils.simpleBaseToBaseIndex(observedBase);
|
||||||
int j = qualityScore;
|
int j = qualityScore;
|
||||||
int k = ploidy;
|
int k = ploidy;
|
||||||
int x = strandIndex(! read.getReadNegativeStrandFlag());
|
int x = strandIndex(! read.getReadNegativeStrandFlag() );
|
||||||
|
|
||||||
if ( val != null )
|
if ( val != null )
|
||||||
CACHE[a][i][j][k][x] = val;
|
cache[i][j][k][x] = val;
|
||||||
|
|
||||||
return CACHE[a][i][j][k][x];
|
return cache[i][j][k][x];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected int strandIndex(boolean fwdStrand) {
|
||||||
|
return fwdStrand ? 0 : 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// All oft he following routines are totally generic
|
||||||
|
//
|
||||||
|
|
||||||
private GenotypeLikelihoods getCache( char observedBase, byte qualityScore, int ploidy, SAMRecord read, int offset ) {
|
private GenotypeLikelihoods getCache( char observedBase, byte qualityScore, int ploidy, SAMRecord read, int offset ) {
|
||||||
return getSetCache( observedBase, qualityScore, ploidy, read, offset, null );
|
return getSetCache( observedBase, qualityScore, ploidy, read, offset, null );
|
||||||
}
|
}
|
||||||
|
|
@ -336,10 +340,6 @@ public abstract class GenotypeLikelihoods implements Cloneable {
|
||||||
getSetCache( observedBase, qualityScore, ploidy, read, offset, val );
|
getSetCache( observedBase, qualityScore, ploidy, read, offset, val );
|
||||||
}
|
}
|
||||||
|
|
||||||
private int strandIndex(boolean fwdStrand) {
|
|
||||||
return fwdStrand ? 0 : 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean inCache( char observedBase, byte qualityScore, int ploidy, SAMRecord read, int offset ) {
|
private boolean inCache( char observedBase, byte qualityScore, int ploidy, SAMRecord read, int offset ) {
|
||||||
return getCache(observedBase, qualityScore, ploidy, read, offset) != null;
|
return getCache(observedBase, qualityScore, ploidy, read, offset) != null;
|
||||||
}
|
}
|
||||||
|
|
@ -364,7 +364,6 @@ public abstract class GenotypeLikelihoods implements Cloneable {
|
||||||
g.reallyAdd(observedBase, qualityScore, read, offset, false);
|
g.reallyAdd(observedBase, qualityScore, read, offset, false);
|
||||||
|
|
||||||
setCache(observedBase, qualityScore, ploidy, read, offset, g);
|
setCache(observedBase, qualityScore, ploidy, read, offset, g);
|
||||||
cacheSize++;
|
|
||||||
|
|
||||||
//System.out.printf("Caching %c %d %d %s %s (%d total entries)%n", observedBase, qualityScore, ploidy, read.getReadName(), EmpiricalSubstitutionGenotypeLikelihoods.getReadSequencerPlatform(read), cacheSize);
|
//System.out.printf("Caching %c %d %d %s %s (%d total entries)%n", observedBase, qualityScore, ploidy, read.getReadName(), EmpiricalSubstitutionGenotypeLikelihoods.getReadSequencerPlatform(read), cacheSize);
|
||||||
return g;
|
return g;
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
|
|
||||||
import static java.lang.Math.log10;
|
import static java.lang.Math.log10;
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
|
|
@ -40,4 +41,10 @@ public class OneStateErrorGenotypeLikelihoods extends GenotypeLikelihoods {
|
||||||
protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) {
|
protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) {
|
||||||
return 0; // equivalent to e model
|
return 0; // equivalent to e model
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
static GenotypeLikelihoods[][][][] ONE_STATE_CACHE = new GenotypeLikelihoods[BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE][MAX_PLOIDY][2];
|
||||||
|
protected GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy,
|
||||||
|
SAMRecord read, int offset, GenotypeLikelihoods val ) {
|
||||||
|
return simpleGetSetCache(ONE_STATE_CACHE, observedBase, qualityScore, ploidy, read, offset, val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -79,7 +79,7 @@ public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSam
|
||||||
|
|
||||||
/** Initialize the walker with some sensible defaults */
|
/** Initialize the walker with some sensible defaults */
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
GenotypeLikelihoods.clearCache();
|
//GenotypeLikelihoods.clearCache();
|
||||||
// nothing to do
|
// nothing to do
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
|
|
||||||
import static java.lang.Math.log10;
|
import static java.lang.Math.log10;
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
|
|
@ -33,4 +34,10 @@ public class ThreeStateErrorGenotypeLikelihoods extends GenotypeLikelihoods {
|
||||||
protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) {
|
protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) {
|
||||||
return -log103; // equivalent to e / 3 model
|
return -log103; // equivalent to e / 3 model
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static GenotypeLikelihoods[][][][] THREE_STATE_CACHE = new GenotypeLikelihoods[BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE][MAX_PLOIDY][2];
|
||||||
|
protected GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy,
|
||||||
|
SAMRecord read, int offset, GenotypeLikelihoods val ) {
|
||||||
|
return simpleGetSetCache(THREE_STATE_CACHE, observedBase, qualityScore, ploidy, read, offset, val);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,12 @@ public class SSGenotypeCallTest extends BaseTest {
|
||||||
|
|
||||||
// we need a fake GenotypeLikelihoods class
|
// we need a fake GenotypeLikelihoods class
|
||||||
public class GenotypeLikelihoodsImpl extends GenotypeLikelihoods {
|
public class GenotypeLikelihoodsImpl extends GenotypeLikelihoods {
|
||||||
|
public boolean cacheIsEnabled() { return false; }
|
||||||
|
|
||||||
|
protected GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy,
|
||||||
|
SAMRecord read, int offset, GenotypeLikelihoods val ) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Must be overridden by concrete subclasses
|
* Must be overridden by concrete subclasses
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue