Intermediate checking before code reorganization. Full blown support for empirical transition probs in SSG for all platforms. Support for defaultPlatform arg in SSG. Renaming classes for final cleanup

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1479 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2009-08-30 17:34:43 +00:00
parent 6ab9ddf9f5
commit 5af4bb628b
4 changed files with 349 additions and 177 deletions

View File

@ -4,6 +4,11 @@ import org.broadinstitute.sting.utils.BaseUtils;
import static java.lang.Math.log10;
import static java.lang.Math.pow;
import java.util.TreeMap;
import java.util.EnumMap;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMReadGroupRecord;
/**
* Created by IntelliJ IDEA.
@ -13,25 +18,208 @@ import static java.lang.Math.pow;
* To change this template use File | Settings | File Templates.
*/
public class EmpiricalSubstitutionGenotypeLikelihoods extends NewHotnessGenotypeLikelihoods {
private final static double[][] misCallProbs = new double[4][4];
// --------------------------------------------------------------------------------------------------------------
//
// Static methods to manipulate machine platforms
//
// --------------------------------------------------------------------------------------------------------------
public enum SequencerPlatform {
SOLEXA, // Solexa / Illumina
ROCHE454, // 454
SOLID, // SOLiD
UNKNOWN // No idea -- defaulting to 1/3
}
private static void addMisCall(char miscalledBase, char trueBase, double p) {
private final static String SAM_PLATFORM_TAG = "PL";
private static TreeMap<String, SequencerPlatform> PLFieldToSequencerPlatform = new TreeMap<String, SequencerPlatform>();
private static void bind(String s, SequencerPlatform x) {
PLFieldToSequencerPlatform.put(s, x);
PLFieldToSequencerPlatform.put(s.toUpperCase(), x);
PLFieldToSequencerPlatform.put(s.toLowerCase(), x);
}
//
// Static list of platforms supported by this system.
//
static {
bind("LS454", SequencerPlatform.ROCHE454);
bind("454", SequencerPlatform.ROCHE454);
bind("ILLUMINA", SequencerPlatform.SOLEXA);
bind("solid", SequencerPlatform.SOLID);
}
public static SequencerPlatform standardizeSequencerPlatform( final String sequencerString ) {
if ( sequencerString != null && PLFieldToSequencerPlatform.containsKey(sequencerString) ) {
return PLFieldToSequencerPlatform.get(sequencerString);
} else {
return SequencerPlatform.UNKNOWN;
}
}
// --------------------------------------------------------------------------------------------------------------
//
// Static methods to get at the transition tables themselves
//
// --------------------------------------------------------------------------------------------------------------
/**
* A matrix of value i x j -> log10(p) where
*
* i - char of the miscalled base (i.e., A)
* j - char of the presumed true base (i.e., C)
* log10p - empirical probability p that A is actually C
*
* The table is available for each technology
*/
private final static EnumMap<SequencerPlatform, double[][]> log10pTrueGivenMiscall = new EnumMap<SequencerPlatform, double[][]>(SequencerPlatform.class);
private static void addMisCall(final SequencerPlatform pl, char miscalledBase, char trueBase, double p) {
if ( ! log10pTrueGivenMiscall.containsKey(pl) )
log10pTrueGivenMiscall.put(pl, new double[4][4]);
double[][] misCallProbs = log10pTrueGivenMiscall.get(pl);
int i = BaseUtils.simpleBaseToBaseIndex(miscalledBase);
int j = BaseUtils.simpleBaseToBaseIndex(trueBase);
misCallProbs[i][j] = log10(p);
}
private static double getProbMiscallIsBase(char miscalledBase, char trueBase) {
private static double getProbMiscallIsBase(SequencerPlatform pl, char miscalledBase, char trueBase) {
int i = BaseUtils.simpleBaseToBaseIndex(miscalledBase);
int j = BaseUtils.simpleBaseToBaseIndex(trueBase);
double logP = misCallProbs[i][j];
double logP = log10pTrueGivenMiscall.get(pl)[i][j];
if ( logP == 0.0 )
throw new RuntimeException(String.format("Bad miscall base request miscalled=%c true=%b", miscalledBase, trueBase));
else
return logP;
}
private static void addSolexa() {
SequencerPlatform pl = SequencerPlatform.SOLEXA;
addMisCall(pl, 'A', 'C', 57.7/100.0);
addMisCall(pl, 'A', 'G', 17.1/100.0);
addMisCall(pl, 'A', 'T', 25.2/100.0);
addMisCall(pl, 'C', 'A', 34.9/100.0);
addMisCall(pl, 'C', 'G', 11.3/100.0);
addMisCall(pl, 'C', 'T', 53.9/100.0);
addMisCall(pl, 'G', 'A', 31.9/100.0);
addMisCall(pl, 'G', 'C', 5.1/100.0);
addMisCall(pl, 'G', 'T', 63.0/100.0);
addMisCall(pl, 'T', 'A', 45.8/100.0);
addMisCall(pl, 'T', 'C', 22.1/100.0);
addMisCall(pl, 'T', 'G', 32.0/100.0);
}
//
// // TODO -- delete me for testing only
// private static void addSolexa() {
// SequencerPlatform pl = SequencerPlatform.SOLEXA;
// addMisCall(pl, 'A', 'C', 59.2/100.0);
// addMisCall(pl, 'A', 'G', 15.3/100.0);
// addMisCall(pl, 'A', 'T', 25.6/100.0);
//
// addMisCall(pl, 'C', 'A', 54.2/100.0);
// addMisCall(pl, 'C', 'G', 10.3/100.0);
// addMisCall(pl, 'C', 'T', 35.5/100.0);
//
// addMisCall(pl, 'G', 'A', 26.4/100.0);
// addMisCall(pl, 'G', 'C', 5.6/100.0);
// addMisCall(pl, 'G', 'T', 68.0/100.0);
//
// addMisCall(pl, 'T', 'A', 41.8/100.0);
// addMisCall(pl, 'T', 'C', 17.3/100.0);
// addMisCall(pl, 'T', 'G', 40.9/100.0);
//
// }
private static void addSOLiD() {
SequencerPlatform pl = SequencerPlatform.SOLID;
addMisCall(pl, 'A', 'C', 18.7/100.0);
addMisCall(pl, 'A', 'G', 42.5/100.0);
addMisCall(pl, 'A', 'T', 38.7/100.0);
addMisCall(pl, 'C', 'A', 27.0/100.0);
addMisCall(pl, 'C', 'G', 18.9/100.0);
addMisCall(pl, 'C', 'T', 54.1/100.0);
addMisCall(pl, 'G', 'A', 61.0/100.0);
addMisCall(pl, 'G', 'C', 15.7/100.0);
addMisCall(pl, 'G', 'T', 23.2/100.0);
addMisCall(pl, 'T', 'A', 40.5/100.0);
addMisCall(pl, 'T', 'C', 34.3/100.0);
addMisCall(pl, 'T', 'G', 25.2/100.0);
}
private static void add454() {
SequencerPlatform pl = SequencerPlatform.ROCHE454;
addMisCall(pl, 'A', 'C', 23.2/100.0);
addMisCall(pl, 'A', 'G', 42.6/100.0);
addMisCall(pl, 'A', 'T', 34.3/100.0);
addMisCall(pl, 'C', 'A', 19.7/100.0);
addMisCall(pl, 'C', 'G', 8.4/100.0);
addMisCall(pl, 'C', 'T', 71.9/100.0);
addMisCall(pl, 'G', 'A', 71.5/100.0);
addMisCall(pl, 'G', 'C', 6.6/100.0);
addMisCall(pl, 'G', 'T', 21.9/100.0);
addMisCall(pl, 'T', 'A', 43.8/100.0);
addMisCall(pl, 'T', 'C', 37.8/100.0);
addMisCall(pl, 'T', 'G', 18.5/100.0);
}
private static void addUnknown() {
SequencerPlatform pl = SequencerPlatform.UNKNOWN;
for ( char b1 : BaseUtils.BASES ) {
for ( char b2 : BaseUtils.BASES ) {
if ( b1 != b2 )
addMisCall(pl, b1, b2, 1.0/3.0);
}
}
}
static {
addSolexa();
add454();
addSOLiD();
addUnknown();
}
// --------------------------------------------------------------------------------------------------------------
//
// The actual objects themselves
//
// --------------------------------------------------------------------------------------------------------------
private boolean raiseErrorOnUnknownPlatform = true;
private SequencerPlatform defaultPlatform = SequencerPlatform.UNKNOWN;
//
// forwarding constructors -- don't do anything at all
//
public EmpiricalSubstitutionGenotypeLikelihoods() { super(); }
public EmpiricalSubstitutionGenotypeLikelihoods(DiploidGenotypePriors priors) { super(priors); }
public EmpiricalSubstitutionGenotypeLikelihoods(DiploidGenotypePriors priors, boolean raiseErrorOnUnknownPlatform) {
super(priors);
this.raiseErrorOnUnknownPlatform = raiseErrorOnUnknownPlatform;
}
public EmpiricalSubstitutionGenotypeLikelihoods(DiploidGenotypePriors priors, SequencerPlatform assumeUnknownPlatformsAreThis) {
super(priors);
if ( assumeUnknownPlatformsAreThis != null ) {
raiseErrorOnUnknownPlatform = false;
defaultPlatform = assumeUnknownPlatformsAreThis;
}
}
/**
* Cloning of the object
@ -40,73 +228,35 @@ public class EmpiricalSubstitutionGenotypeLikelihoods extends NewHotnessGenotype
*/
protected Object clone() throws CloneNotSupportedException {
return super.clone();
}
// NA12878 CHR1 solexa table
//
// True base A C G T Totals
// Error base Counts 1190455 686187 685080 1227860 3789582
// A 938661 0.0% 48.6% 16.3% 35.1% 100.00%
// C 955469 60.6% 0.0% 7.9% 31.5% 100.00%
// G 964246 30.2% 7.8% 0.0% 62.0% 100.00%
// T 931206 34.4% 16.6% 49.0% 0.0% 100.00%
//
// V2 table -- didn't incorporate complements based on read orientation
// static {
// addMisCall('A', 'C', 48.6/100.0);
// addMisCall('A', 'G', 16.3/100.0);
// addMisCall('A', 'T', 35.1/100.0);
//
// addMisCall('C', 'A', 60.6/100.0);
// addMisCall('C', 'G', 7.9/100.0);
// addMisCall('C', 'T', 31.5/100.0);
//
// addMisCall('G', 'A', 30.2/100.0);
// addMisCall('G', 'C', 7.8/100.0);
// addMisCall('G', 'T', 62.0/100.0);
//
// addMisCall('T', 'A', 34.4/100.0);
// addMisCall('T', 'C', 16.6/100.0);
// addMisCall('T', 'G', 49.9/100.0);
// }
//True base A C G T Totals
//Error base Counts 1209203 718053 653214 1209112 3789582
//A 809944 0.0% 59.2% 15.3% 25.6% 100.00%
//C 934612 54.2% 0.0% 10.3% 35.5% 100.00%
//G 985103 26.4% 5.6% 0.0% 68.0% 100.00%
//T 1059923 41.8% 17.3% 40.9% 0.0% 100.00%
static {
addMisCall('A', 'C', 59.2/100.0);
addMisCall('A', 'G', 15.3/100.0);
addMisCall('A', 'T', 25.6/100.0);
addMisCall('C', 'A', 54.2/100.0);
addMisCall('C', 'G', 10.3/100.0);
addMisCall('C', 'T', 35.5/100.0);
addMisCall('G', 'A', 26.4/100.0);
addMisCall('G', 'C', 5.6/100.0);
addMisCall('G', 'T', 68.0/100.0);
addMisCall('T', 'A', 41.8/100.0);
addMisCall('T', 'C', 17.3/100.0);
addMisCall('T', 'G', 40.9/100.0);
}
//
// forwarding constructors -- don't do anything at all
//
public EmpiricalSubstitutionGenotypeLikelihoods() { super(); }
public EmpiricalSubstitutionGenotypeLikelihoods(DiploidGenotypePriors priors) { super(priors); }
protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, boolean fwdStrand) {
if ( fwdStrand ) {
return getProbMiscallIsBase(observedBase, chromBase);
} else {
double v = getProbMiscallIsBase(BaseUtils.simpleComplement(observedBase), BaseUtils.simpleComplement(chromBase));
//System.out.printf("R: %c/%c => %f compared to %f%n", observedBase, chromBase, pow(10, getProbMiscallIsBase(observedBase, chromBase)), pow(10, v));
return v;
protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) {
boolean fwdStrand = ! read.getReadNegativeStrandFlag();
final String readGroupString = ((String)read.getAttribute("RG"));
SAMReadGroupRecord readGroup = readGroupString == null ? null : read.getHeader().getReadGroup(readGroupString);
final String platformName = readGroup == null ? null : (String)readGroup.getAttribute(SAM_PLATFORM_TAG);
SequencerPlatform pl = standardizeSequencerPlatform(platformName);
if ( pl == SequencerPlatform.UNKNOWN ) {
if ( raiseErrorOnUnknownPlatform )
throw new RuntimeException("Unknown Sequencer platform for read " + read.format());
else {
pl = defaultPlatform;
}
}
//System.out.printf("%s for %s%n", pl, read);
double log10p = 0.0;
if ( fwdStrand ) {
log10p = getProbMiscallIsBase(pl, observedBase, chromBase);
} else {
log10p = getProbMiscallIsBase(pl, BaseUtils.simpleComplement(observedBase), BaseUtils.simpleComplement(chromBase));
}
//System.out.printf("p = %f for %s %c %c fwd=%b %d at %s%n", pow(10,log10p), pl, observedBase, chromBase, fwdStrand, offset, read.getReadName() );
return log10p;
}
}

View File

@ -51,6 +51,9 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods implement
private DiploidGenotypePriors priors = null;
private boolean verbose = false;
private int minQScoreToInclude = 0;
/**
* Create a new GenotypeLikelhoods object with flat priors for each diploid genotype
*/
@ -85,6 +88,23 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods implement
posteriors = priors.getPriors().clone(); // posteriors are all the priors
}
public void setVerbose(boolean v) {
verbose = v;
}
public boolean isVerbose() {
return verbose;
}
public int getMinQScoreToInclude() {
return minQScoreToInclude;
}
public void setMinQScoreToInclude(int minQScoreToInclude) {
this.minQScoreToInclude = minQScoreToInclude;
}
/**
* Returns an array of log10 likelihoods for each genotype, indexed by DiploidGenotype.ordinal values()
* @return
@ -142,23 +162,6 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods implement
return getPriors()[g.ordinal()];
}
/**
* Are we ignoring Q0 bases during calculations?
* @return
*/
public boolean isFilteringQ0Bases() {
return filterQ0Bases;
}
/**
* Enable / disable filtering of Q0 bases. Enabled by default
*
* @param filterQ0Bases
*/
public void filterQ0Bases(boolean filterQ0Bases) {
this.filterQ0Bases = filterQ0Bases;
}
// -----------------------------------------------------------------------------------------------------------------
//
//
@ -175,58 +178,8 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods implement
* @param qualityScore
* @return 1 if the base was considered good enough to add to the likelihoods (not Q0 or 'N', for example)
*/
public int add(char observedBase, byte qualityScore, boolean fwdStrand) {
return add(observedBase, qualityScore, fwdStrand, true, false);
}
public int add(char observedBase, byte qualityScore, boolean fwdStrand, boolean enableCache, boolean verbose) {
if ( badBase(observedBase) ) {
throw new RuntimeException(String.format("BUG: unexpected base %c with Q%d passed to GenotypeLikelihoods", observedBase, qualityScore));
}
// TODO -- we should probably filter Q1 bases too
if (qualityScore <= 0) {
if ( isFilteringQ0Bases() ) {
return 0;
} else {
qualityScore = 1;
}
}
// Handle caching if requested. Just look up the cached result if its available, or compute and store it
NewHotnessGenotypeLikelihoods cached = null;
if ( enableCache ) {
if ( ! inCache(observedBase, qualityScore, FIXED_PLOIDY, fwdStrand) ) {
cached = calculateCachedGenotypeLikelihoods(observedBase, qualityScore, FIXED_PLOIDY, fwdStrand);
} else {
cached = getCachedGenotypeLikelihoods(observedBase, qualityScore, FIXED_PLOIDY, fwdStrand);
}
}
for ( DiploidGenotype g : DiploidGenotype.values() ) {
double likelihoodCalc = ! enableCache ? log10PofObservingBaseGivenGenotype(observedBase, g, qualityScore, fwdStrand) : 0.0;
double likelihoodCached = enableCache ? cached.likelihoods[g.ordinal()] : 0.0;
double likelihood = enableCache ? likelihoodCached : likelihoodCalc;
//if ( enableCache && likelihoodCalc != 0.0 && MathUtils.compareDoubles(likelihoodCached, likelihoodCalc) != 0 ) {
// System.out.printf("ERROR: Likelihoods not equal is cache=%f != calc=%f for %c %d %s%n",
// likelihoodCached, likelihoodCalc, observedBase, qualityScore, g.toString());
//}
if ( verbose )
System.out.printf(" L(%c | G=%s, Q=%d, S=%s) = %f%n", observedBase, g, qualityScore, fwdStrand ? "+" : "-", likelihood);
likelihoods[g.ordinal()] += likelihood;
posteriors[g.ordinal()] += likelihood;
}
if ( verbose ) {
for ( DiploidGenotype g : DiploidGenotype.values() ) { System.out.printf("%s\t", g); }
System.out.println();
for ( DiploidGenotype g : DiploidGenotype.values() ) { System.out.printf("%.2f\t", likelihoods[g.ordinal()]); }
System.out.println();
}
return 1;
public int add(char observedBase, byte qualityScore, SAMRecord read, int offset) {
return reallyAdd(observedBase, qualityScore, read, offset, true);
}
/**
@ -237,7 +190,7 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods implement
* @param pileup
* @return the number of good bases found in the pileup
*/
public int add(ReadBackedPileup pileup, boolean ignoreBadBases, boolean verbose) {
public int add(ReadBackedPileup pileup, boolean ignoreBadBases) {
int n = 0;
for (int i = 0; i < pileup.getReads().size(); i++) {
@ -246,20 +199,67 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods implement
char base = read.getReadString().charAt(offset);
byte qual = read.getBaseQualities()[offset];
if ( ! ignoreBadBases || ! badBase(base) ) {
n += add(base, qual, ! read.getReadNegativeStrandFlag(), true, verbose);
n += add(base, qual, read, offset);
}
}
return n;
}
public int add(ReadBackedPileup pileup, boolean ignoreBadBases) {
return add(pileup, ignoreBadBases, false);
public int add(ReadBackedPileup pileup) {
return add(pileup, false);
}
public int add(ReadBackedPileup pileup) {
return add(pileup, false);
private int reallyAdd(char observedBase, byte qualityScore, SAMRecord read, int offset, boolean enableCache) {
if ( badBase(observedBase) ) {
throw new RuntimeException(String.format("BUG: unexpected base %c with Q%d passed to GenotypeLikelihoods", observedBase, qualityScore));
}
int nBasesAdded = 0; // the result -- how many bases did we add?
if ( qualityScore > getMinQScoreToInclude() ) {
// Handle caching if requested. Just look up the cached result if its available, or compute and store it
NewHotnessGenotypeLikelihoods cached = null;
if ( enableCache ) {
if ( ! inCache( observedBase, qualityScore, FIXED_PLOIDY, read, offset ) ) {
cached = calculateCachedGenotypeLikelihoods(observedBase, qualityScore, FIXED_PLOIDY, read, offset );
} else {
cached = getCachedGenotypeLikelihoods(observedBase, qualityScore, FIXED_PLOIDY, read, offset );
}
}
for ( DiploidGenotype g : DiploidGenotype.values() ) {
double likelihoodCalc = ! enableCache ? log10PofObservingBaseGivenGenotype(observedBase, g, qualityScore, read, offset) : 0.0;
double likelihoodCached = enableCache ? cached.likelihoods[g.ordinal()] : 0.0;
double likelihood = enableCache ? likelihoodCached : likelihoodCalc;
//if ( enableCache && likelihoodCalc != 0.0 && MathUtils.compareDoubles(likelihoodCached, likelihoodCalc) != 0 ) {
// System.out.printf("ERROR: Likelihoods not equal is cache=%f != calc=%f for %c %d %s%n",
// likelihoodCached, likelihoodCalc, observedBase, qualityScore, g.toString());
//}
if ( isVerbose() ) {
boolean fwdStrand = ! read.getReadNegativeStrandFlag();
System.out.printf(" L(%c | G=%s, Q=%d, S=%s) = %f / %f%n",
observedBase, g, qualityScore, fwdStrand ? "+" : "-", pow(10,likelihood) * 100, likelihood);
}
likelihoods[g.ordinal()] += likelihood;
posteriors[g.ordinal()] += likelihood;
}
if ( isVerbose() ) {
for ( DiploidGenotype g : DiploidGenotype.values() ) { System.out.printf("%s\t", g); }
System.out.println();
for ( DiploidGenotype g : DiploidGenotype.values() ) { System.out.printf("%.2f\t", likelihoods[g.ordinal()]); }
System.out.println();
}
nBasesAdded = 1;
}
return nBasesAdded;
}
// -----------------------------------------------------------------------------------------------------------------
@ -272,34 +272,56 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods implement
final static NewHotnessGenotypeLikelihoods[][][][] cache = new NewHotnessGenotypeLikelihoods[BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE][MAX_PLOIDY][2];
static int cacheSize = 0;
private NewHotnessGenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy,
SAMRecord read, int offset, NewHotnessGenotypeLikelihoods val ) {
int i = BaseUtils.simpleBaseToBaseIndex(observedBase);
int j = qualityScore;
int k = ploidy;
int x = strandIndex(! read.getReadNegativeStrandFlag());
if ( val != null )
cache[i][j][k][x] = val;
return cache[i][j][k][x];
}
private NewHotnessGenotypeLikelihoods getCache( char observedBase, byte qualityScore, int ploidy, SAMRecord read, int offset ) {
return getSetCache( observedBase, qualityScore, ploidy, read, offset, null );
}
private void setCache( char observedBase, byte qualityScore, int ploidy, SAMRecord read, int offset, NewHotnessGenotypeLikelihoods val ) {
getSetCache( observedBase, qualityScore, ploidy, read, offset, val );
}
private int strandIndex(boolean fwdStrand) {
return fwdStrand ? 0 : 1;
}
private boolean inCache( char observedBase, byte qualityScore, int ploidy, boolean fwdStrand ) {
return cache[BaseUtils.simpleBaseToBaseIndex(observedBase)][qualityScore][ploidy][strandIndex(fwdStrand)] != null;
private boolean inCache( char observedBase, byte qualityScore, int ploidy, SAMRecord read, int offset ) {
return getCache(observedBase, qualityScore, ploidy, read, offset) != null;
}
private NewHotnessGenotypeLikelihoods getCachedGenotypeLikelihoods( char observedBase, byte qualityScore, int ploidy, boolean fwdStrand ) {
if ( ! inCache(observedBase, qualityScore, ploidy, fwdStrand ) )
throw new RuntimeException(String.format("BUG: trying to fetch an unset cached genotype likelihood at base=%c, qual=%d, ploidy=%d, fwdstrand=%b",
observedBase, qualityScore, ploidy, fwdStrand));
private NewHotnessGenotypeLikelihoods getCachedGenotypeLikelihoods( char observedBase, byte qualityScore, int ploidy, SAMRecord read, int offset ) {
if ( ! inCache(observedBase, qualityScore, ploidy, read, offset ) )
throw new RuntimeException(String.format("BUG: trying to fetch an unset cached genotype likelihood at base=%c, qual=%d, ploidy=%d, read=%s",
observedBase, qualityScore, ploidy, read));
return cache[BaseUtils.simpleBaseToBaseIndex(observedBase)][qualityScore][ploidy][strandIndex(fwdStrand)];
return getCache(observedBase, qualityScore, ploidy, read, offset);
}
private NewHotnessGenotypeLikelihoods calculateCachedGenotypeLikelihoods( char observedBase, byte qualityScore, int ploidy, boolean fwdStrand ) {
if ( inCache(observedBase, qualityScore, ploidy, fwdStrand ) )
throw new RuntimeException(String.format("BUG: trying to set an already set cached genotype likelihood at base=%c, qual=%d, ploidy=%d, fwdstrand=%b",
observedBase, qualityScore, ploidy, fwdStrand));
private NewHotnessGenotypeLikelihoods calculateCachedGenotypeLikelihoods( char observedBase, byte qualityScore, int ploidy, SAMRecord read, int offset ) {
if ( inCache(observedBase, qualityScore, ploidy, read, offset ) )
throw new RuntimeException(String.format("BUG: trying to set an already set cached genotype likelihood at base=%c, qual=%d, ploidy=%d, read=%s",
observedBase, qualityScore, ploidy, read));
// create a new genotype likelihoods object and add this single base to it -- now we have a cached result
try {
NewHotnessGenotypeLikelihoods g = (NewHotnessGenotypeLikelihoods)this.clone();
g.initialize();
g.add(observedBase, qualityScore, fwdStrand, false, false);
g.reallyAdd(observedBase, qualityScore, read, offset, false);
cache[BaseUtils.simpleBaseToBaseIndex(observedBase)][qualityScore][ploidy][strandIndex(fwdStrand)] = g;
setCache(observedBase, qualityScore, ploidy, read, offset, g);
cacheSize++;
//System.out.printf("Caching %c %d %d %d %b (%d total entries)%n", observedBase, BaseUtils.simpleBaseToBaseIndex(observedBase), qualityScore, ploidy, fwdStrand, cacheSize);
@ -403,19 +425,20 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods implement
* @param qual
* @return
*/
protected double log10PofObservingBaseGivenGenotype(char observedBase, DiploidGenotype g, byte qual, boolean fwdStrand) {
protected double log10PofObservingBaseGivenGenotype(char observedBase, DiploidGenotype g, byte qual, SAMRecord read, int offset ) {
if (qual == 0) { // zero quals are wrong
throw new RuntimeException(String.format("Unexpected Q0 base discovered in calculateAlleleLikelihood: %c %s %d", observedBase, g, qual));
throw new RuntimeException(String.format("Unexpected Q0 base discovered in calculateAlleleLikelihood: %c %s %d at %d in %s",
observedBase, g, qual, offset, read));
}
// todo assumes ploidy is 2 -- should be generalized. Obviously the below code can be turned into a loop
double p_base = 0.0;
p_base += pow(10, log10PofObservingBaseGivenChromosome(observedBase, g.base1, qual, FIXED_PLOIDY, fwdStrand));
p_base += pow(10, log10PofObservingBaseGivenChromosome(observedBase, g.base2, qual, FIXED_PLOIDY, fwdStrand));
p_base += pow(10, log10PofObservingBaseGivenChromosome(observedBase, g.base1, qual, FIXED_PLOIDY, read, offset ));
p_base += pow(10, log10PofObservingBaseGivenChromosome(observedBase, g.base2, qual, FIXED_PLOIDY, read, offset ));
return log10(p_base);
}
protected double log10PofObservingBaseGivenChromosome(char observedBase, char chromBase, byte qual, int ploidy, boolean fwdStrand) {
protected double log10PofObservingBaseGivenChromosome(char observedBase, char chromBase, byte qual, int ploidy, SAMRecord read, int offset) {
double logP = 0.0;
if ( observedBase == chromBase ) {
@ -425,7 +448,7 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods implement
logP = log10(1.0 - e);
} else {
// the base is inconsistent with the chromosome -- it's e * P(chromBase | observedBase is an error)
logP = qual / -10.0 + log10PofTrueBaseGivenMiscall(observedBase, chromBase, fwdStrand);
logP = qual / -10.0 + log10PofTrueBaseGivenMiscall(observedBase, chromBase, read, offset);
}
// adjust for ploidy. We take the raw p(obs | chrom) / ploidy, which is -log10(ploidy) in log space
@ -441,7 +464,7 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods implement
*/
protected static final double log103 = log10(3.0);
protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, boolean fwdStrand) {
protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) {
return -log103; // currently equivalent to e/3 model
}

View File

@ -46,16 +46,10 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
@Argument(fullName = "verbose", shortName = "v", doc = "EXPERIMENTAL", required = false)
public boolean VERBOSE = false;
// todo - print out priors at the start of SSG with .INFO
@Argument(fullName = "platform", shortName = "pl", doc = "Causes the genotyper to assume that reads without PL header TAG are this platform. Defaults to null, indicating that the system will throw a runtime exception when such reads are detected", required = false)
public EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform defaultPlatform = null;
// todo -- remove dbSNP awareness until we understand how it should be used
//@Argument(fullName = "priors_dbsnp", shortName = "pdbsnp", doc = "Comma-separated prior likelihoods for dbSNP loci (homref,het,homvar)", required = false)
//public String PRIORS_DBSNP = "0.999,1e-3,1e-5";
@Argument(fullName = "keepQ0Bases", shortName = "keepQ0Bases", doc = "If true, then Q0 bases will be included in the genotyping calculation, and treated as Q1 -- this is really not a good idea", required = false)
public boolean keepQ0Bases = false;
/**
/**
* Filter out loci to ignore (at an ambiguous base in the reference or a locus with zero coverage).
*
* @param tracker the meta data tracker
@ -84,11 +78,16 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
char ref = refContext.getBase();
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, heterozygosity, DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
NewHotnessGenotypeLikelihoods G = useEmpiricalTransitions ? new EmpiricalSubstitutionGenotypeLikelihoods(priors) : new NewHotnessGenotypeLikelihoods(priors);
// setup GenotypeLike object
NewHotnessGenotypeLikelihoods G;
if ( useEmpiricalTransitions )
G = new EmpiricalSubstitutionGenotypeLikelihoods(priors, defaultPlatform);
else
G = new NewHotnessGenotypeLikelihoods(priors);
G.filterQ0Bases(! keepQ0Bases);
G.setVerbose(VERBOSE);
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
G.add(pileup, true, VERBOSE);
G.add(pileup, true);
G.validate();
// lets setup the locus

View File

@ -208,7 +208,7 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
if (base == 'T'){numTs++;}
if (base == 'G'){numGs++;}
//consider base in likelihood calculations if it looks good and has high mapping score
G.add(base, qual, ! read.getReadNegativeStrandFlag());
G.add(base, qual, read, offset);
}
}