Major change to UG engine to support:
a) Genotype given alleles with indels b) Genotyping and computing likelihoods of multi-allelic sites. When GGA option is enabled, indels will be called on regular pileups, not on extended pileups (extended pileups will be removed shortly in a next iteration). As a result, likelihood computation is suboptimal since we can't see reads that start with an insertion right after a position, and hence quality of some insertions is removed and we could be missing a few marginal calls, but it makes everything else much simpler. For multiallelic sites, we currently can't call them in discovery mode but we can genotype them and compute/report full PL's on them (annotation support comes in next commit). There are several suboptimal approximations made in exact model to compute this. Ideally, joint likelihood Pr(Data | AC1=i,AC2=j..) should be computed but this is hard. Instead, marginal likelihoods are computed Pr(Data | ACi=k) for all i,k, and QUAL is based on highest likelihood allele. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5941 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4c6751ec3c
commit
a8faacda4e
|
|
@ -26,6 +26,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broad.tribble.util.variantcontext.Allele;
|
||||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
|
@ -66,12 +67,13 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
|
||||||
* @param tracker rod data
|
* @param tracker rod data
|
||||||
* @param ref reference context
|
* @param ref reference context
|
||||||
* @param GLs genotype likelihoods
|
* @param GLs genotype likelihoods
|
||||||
|
* @param Alleles Alleles corresponding to GLs
|
||||||
* @param log10AlleleFrequencyPriors priors
|
* @param log10AlleleFrequencyPriors priors
|
||||||
* @param log10AlleleFrequencyPosteriors array (pre-allocated) to store results
|
* @param log10AlleleFrequencyPosteriors array (pre-allocated) to store results
|
||||||
*/
|
*/
|
||||||
protected abstract void getLog10PNonRef(RefMetaDataTracker tracker,
|
protected abstract void getLog10PNonRef(RefMetaDataTracker tracker,
|
||||||
ReferenceContext ref,
|
ReferenceContext ref,
|
||||||
Map<String, Genotype> GLs,
|
Map<String, Genotype> GLs, Set<Allele> Alleles,
|
||||||
double[] log10AlleleFrequencyPriors,
|
double[] log10AlleleFrequencyPriors,
|
||||||
double[] log10AlleleFrequencyPosteriors);
|
double[] log10AlleleFrequencyPosteriors);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -58,7 +58,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
|
|
||||||
private boolean SIMPLE_GREEDY_GENOTYPER = false;
|
private boolean SIMPLE_GREEDY_GENOTYPER = false;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
final private ExactCalculation calcToUse;
|
final private ExactCalculation calcToUse;
|
||||||
protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
||||||
|
|
@ -68,7 +68,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
|
|
||||||
public void getLog10PNonRef(RefMetaDataTracker tracker,
|
public void getLog10PNonRef(RefMetaDataTracker tracker,
|
||||||
ReferenceContext ref,
|
ReferenceContext ref,
|
||||||
Map<String, Genotype> GLs,
|
Map<String, Genotype> GLs, Set<Allele>alleles,
|
||||||
double[] log10AlleleFrequencyPriors,
|
double[] log10AlleleFrequencyPriors,
|
||||||
double[] log10AlleleFrequencyPosteriors) {
|
double[] log10AlleleFrequencyPosteriors) {
|
||||||
// todo -- REMOVE ME AFTER TESTING
|
// todo -- REMOVE ME AFTER TESTING
|
||||||
|
|
@ -78,11 +78,15 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
if ( COMPARE_TO_GS ) // due to annoying special values in incoming array, we have to clone up here
|
if ( COMPARE_TO_GS ) // due to annoying special values in incoming array, we have to clone up here
|
||||||
gsPosteriors = log10AlleleFrequencyPosteriors.clone();
|
gsPosteriors = log10AlleleFrequencyPosteriors.clone();
|
||||||
|
|
||||||
|
int idxAA = GenotypeType.AA.ordinal();
|
||||||
|
int idxAB = GenotypeType.AB.ordinal();
|
||||||
|
int idxBB = GenotypeType.BB.ordinal();
|
||||||
|
|
||||||
// todo -- remove me after testing
|
// todo -- remove me after testing
|
||||||
if ( N_CYCLES > 1 ) {
|
if ( N_CYCLES > 1 ) {
|
||||||
for ( int i = 0; i < N_CYCLES; i++) {
|
for ( int i = 0; i < N_CYCLES; i++) {
|
||||||
timerGS.restart();
|
timerGS.restart();
|
||||||
linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone());
|
linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone(), idxAA, idxAB, idxBB);
|
||||||
timerGS.stop();
|
timerGS.stop();
|
||||||
|
|
||||||
timerExpt.restart();
|
timerExpt.restart();
|
||||||
|
|
@ -95,20 +99,60 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
}
|
}
|
||||||
|
|
||||||
int lastK = -1;
|
int lastK = -1;
|
||||||
switch ( calcToUse ) {
|
|
||||||
case N2_GOLD_STANDARD:
|
int numAlleles = alleles.size();
|
||||||
lastK = gdaN2GoldStandard(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors);
|
|
||||||
break;
|
int idxDiag = numAlleles;
|
||||||
case LINEAR_EXPERIMENTAL:
|
int incr = numAlleles - 1;
|
||||||
lastK = linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors);
|
|
||||||
break;
|
double[][] posteriorCache = new double[numAlleles-1][];
|
||||||
|
double[] bestAFguess = new double[numAlleles-1];
|
||||||
|
|
||||||
|
for (int k=1; k < numAlleles; k++) {
|
||||||
|
// multi-allelic approximation, part 1: Ideally
|
||||||
|
// for each alt allele compute marginal (suboptimal) posteriors -
|
||||||
|
// compute indices for AA,AB,BB for current allele - genotype likelihoods are a linear vector that can be thought of
|
||||||
|
// as a row-wise upper triangular matrix of likelihoods.
|
||||||
|
// So, for example, with 2 alt alleles, likelihoods have AA,AB,AC,BB,BC,CC.
|
||||||
|
// 3 alt alleles: AA,AB,AC,AD BB BC BD CC CD DD
|
||||||
|
|
||||||
|
idxAA = 0;
|
||||||
|
idxAB = k;
|
||||||
|
// yy is always element on the diagonal.
|
||||||
|
// 2 alleles: BBelement 2
|
||||||
|
// 3 alleles: BB element 3. CC element 5
|
||||||
|
// 4 alleles:
|
||||||
|
idxBB = idxDiag;
|
||||||
|
idxDiag += incr--;
|
||||||
|
|
||||||
|
// todo - possible cleanup
|
||||||
|
switch ( calcToUse ) {
|
||||||
|
case N2_GOLD_STANDARD:
|
||||||
|
lastK = gdaN2GoldStandard(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB);
|
||||||
|
break;
|
||||||
|
case LINEAR_EXPERIMENTAL:
|
||||||
|
lastK = linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (numAlleles > 2) {
|
||||||
|
posteriorCache[k-1] = log10AlleleFrequencyPosteriors.clone();
|
||||||
|
bestAFguess[k-1] = (double)MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (numAlleles > 2) {
|
||||||
|
// multiallelic approximation, part 2:
|
||||||
|
// report posteriors for allele that has highest estimated AC
|
||||||
|
int mostLikelyAlleleIdx = MathUtils.maxElementIndex(bestAFguess);
|
||||||
|
for (int k=0; k < log10AlleleFrequencyPosteriors.length-1; k++)
|
||||||
|
log10AlleleFrequencyPosteriors[k] = (posteriorCache[mostLikelyAlleleIdx][k]);
|
||||||
|
|
||||||
|
}
|
||||||
// todo -- REMOVE ME AFTER TESTING
|
// todo -- REMOVE ME AFTER TESTING
|
||||||
// todo -- REMOVE ME AFTER TESTING
|
// todo -- REMOVE ME AFTER TESTING
|
||||||
// todo -- REMOVE ME AFTER TESTING
|
// todo -- REMOVE ME AFTER TESTING
|
||||||
if ( COMPARE_TO_GS ) {
|
if ( COMPARE_TO_GS ) {
|
||||||
gdaN2GoldStandard(GLs, log10AlleleFrequencyPriors, gsPosteriors);
|
gdaN2GoldStandard(GLs, log10AlleleFrequencyPriors, gsPosteriors, idxAA, idxAB, idxBB);
|
||||||
|
|
||||||
double log10thisPVar = Math.log10(MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors)[0]);
|
double log10thisPVar = Math.log10(MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors)[0]);
|
||||||
double log10gsPVar = Math.log10(MathUtils.normalizeFromLog10(gsPosteriors)[0]);
|
double log10gsPVar = Math.log10(MathUtils.normalizeFromLog10(gsPosteriors)[0]);
|
||||||
|
|
@ -268,7 +312,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
|
|
||||||
public int linearExact(Map<String, Genotype> GLs,
|
public int linearExact(Map<String, Genotype> GLs,
|
||||||
double[] log10AlleleFrequencyPriors,
|
double[] log10AlleleFrequencyPriors,
|
||||||
double[] log10AlleleFrequencyPosteriors) {
|
double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) {
|
||||||
final int numSamples = GLs.size();
|
final int numSamples = GLs.size();
|
||||||
final int numChr = 2*numSamples;
|
final int numChr = 2*numSamples;
|
||||||
final double[][] genotypeLikelihoods = getGLs(GLs);
|
final double[][] genotypeLikelihoods = getGLs(GLs);
|
||||||
|
|
@ -285,7 +329,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
|
|
||||||
if ( k == 0 ) { // special case for k = 0
|
if ( k == 0 ) { // special case for k = 0
|
||||||
for ( int j=1; j <= numSamples; j++ ) {
|
for ( int j=1; j <= numSamples; j++ ) {
|
||||||
kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods[j][GenotypeType.AA.ordinal()];
|
kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods[j][idxAA];
|
||||||
}
|
}
|
||||||
} else { // k > 0
|
} else { // k > 0
|
||||||
final double[] kMinus1 = logY.getkMinus1();
|
final double[] kMinus1 = logY.getkMinus1();
|
||||||
|
|
@ -298,14 +342,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
double aa = Double.NEGATIVE_INFINITY;
|
double aa = Double.NEGATIVE_INFINITY;
|
||||||
double ab = Double.NEGATIVE_INFINITY;
|
double ab = Double.NEGATIVE_INFINITY;
|
||||||
if (k < 2*j-1)
|
if (k < 2*j-1)
|
||||||
aa = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + kMinus0[j-1] + gl[GenotypeType.AA.ordinal()];
|
aa = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + kMinus0[j-1] + gl[idxAA];
|
||||||
|
|
||||||
if (k < 2*j)
|
if (k < 2*j)
|
||||||
ab = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ kMinus1[j-1] + gl[GenotypeType.AB.ordinal()];
|
ab = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ kMinus1[j-1] + gl[idxAB];
|
||||||
|
|
||||||
double log10Max;
|
double log10Max;
|
||||||
if (k > 1) {
|
if (k > 1) {
|
||||||
final double bb = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + kMinus2[j-1] + gl[GenotypeType.BB.ordinal()];
|
final double bb = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + kMinus2[j-1] + gl[idxBB];
|
||||||
log10Max = approximateLog10SumLog10(aa, ab, bb);
|
log10Max = approximateLog10SumLog10(aa, ab, bb);
|
||||||
} else {
|
} else {
|
||||||
// we know we aren't considering the BB case, so we can use an optimized log10 function
|
// we know we aren't considering the BB case, so we can use an optimized log10 function
|
||||||
|
|
@ -385,8 +429,10 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
if ( !vc.isVariant() )
|
if ( !vc.isVariant() )
|
||||||
throw new UserException("The VCF record passed in does not contain an ALT allele at " + vc.getChr() + ":" + vc.getStart());
|
throw new UserException("The VCF record passed in does not contain an ALT allele at " + vc.getChr() + ":" + vc.getStart());
|
||||||
|
|
||||||
Allele refAllele = vc.getReference();
|
boolean multiAllelicRecord = false;
|
||||||
Allele altAllele = vc.getAlternateAllele(0);
|
|
||||||
|
if (vc.getAlternateAlleles().size() > 1)
|
||||||
|
multiAllelicRecord = true;
|
||||||
|
|
||||||
Map<String, Genotype> GLs = vc.getGenotypes();
|
Map<String, Genotype> GLs = vc.getGenotypes();
|
||||||
double[][] pathMetricArray = new double[GLs.size()+1][AFofMaxLikelihood+1];
|
double[][] pathMetricArray = new double[GLs.size()+1][AFofMaxLikelihood+1];
|
||||||
|
|
@ -402,7 +448,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
|
|
||||||
pathMetricArray[0][0] = 0.0;
|
pathMetricArray[0][0] = 0.0;
|
||||||
|
|
||||||
if (SIMPLE_GREEDY_GENOTYPER) {
|
// todo = can't deal with optimal dynamic programming solution with multiallelic records
|
||||||
|
if (SIMPLE_GREEDY_GENOTYPER || multiAllelicRecord) {
|
||||||
sampleIndices.addAll(GLs.keySet());
|
sampleIndices.addAll(GLs.keySet());
|
||||||
sampleIdx = GLs.size();
|
sampleIdx = GLs.size();
|
||||||
}
|
}
|
||||||
|
|
@ -453,7 +500,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
if ( !g.hasLikelihoods() )
|
if ( !g.hasLikelihoods() )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (SIMPLE_GREEDY_GENOTYPER)
|
if (SIMPLE_GREEDY_GENOTYPER || multiAllelicRecord)
|
||||||
bestGTguess = Utils.findIndexOfMaxEntry(g.getLikelihoods().getAsVector());
|
bestGTguess = Utils.findIndexOfMaxEntry(g.getLikelihoods().getAsVector());
|
||||||
else {
|
else {
|
||||||
int newIdx = tracebackArray[k][startIdx];
|
int newIdx = tracebackArray[k][startIdx];
|
||||||
|
|
@ -463,24 +510,48 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
|
|
||||||
ArrayList<Allele> myAlleles = new ArrayList<Allele>();
|
ArrayList<Allele> myAlleles = new ArrayList<Allele>();
|
||||||
|
|
||||||
double qual;
|
double qual = Double.NEGATIVE_INFINITY;
|
||||||
double[] likelihoods = g.getLikelihoods().getAsVector();
|
double[] likelihoods = g.getLikelihoods().getAsVector();
|
||||||
|
/* System.out.format("Sample: %s GL:",sample);
|
||||||
|
for (int i=0; i < likelihoods.length; i++)
|
||||||
|
System.out.format("%1.4f ",likelihoods[i]);
|
||||||
|
*/
|
||||||
|
|
||||||
if (bestGTguess == 0) {
|
for (int i=0; i < likelihoods.length; i++) {
|
||||||
myAlleles.add(refAllele);
|
if (i==bestGTguess)
|
||||||
myAlleles.add(refAllele);
|
continue;
|
||||||
qual = likelihoods[0] - Math.max(likelihoods[1], likelihoods[2]);
|
if (likelihoods[i] >= qual)
|
||||||
} else if(bestGTguess == 1) {
|
qual = likelihoods[i];
|
||||||
myAlleles.add(refAllele);
|
|
||||||
myAlleles.add(altAllele);
|
|
||||||
qual = likelihoods[1] - Math.max(likelihoods[0], likelihoods[2]);
|
|
||||||
|
|
||||||
} else {
|
|
||||||
myAlleles.add(altAllele);
|
|
||||||
myAlleles.add(altAllele);
|
|
||||||
qual = likelihoods[2] - Math.max(likelihoods[1], likelihoods[0]);
|
|
||||||
}
|
}
|
||||||
|
// qual contains now max(likelihoods[k]) for all k != bestGTguess
|
||||||
|
qual = likelihoods[bestGTguess] - qual;
|
||||||
|
|
||||||
|
// likelihoods are stored row-wise in upper triangular matrix. IE
|
||||||
|
// for 2 alleles they have ordering AA,AB,BB
|
||||||
|
// for 3 alleles they are ordered AA,AB,AC,BB,BC,CC
|
||||||
|
// Get now alleles corresponding to best index
|
||||||
|
int kk=0;
|
||||||
|
boolean done = false;
|
||||||
|
for (int i=0; i < vc.getNAlleles(); i++) {
|
||||||
|
for (int j=i; j < vc.getNAlleles(); j++){
|
||||||
|
if (kk++ == bestGTguess) {
|
||||||
|
if (i==0)
|
||||||
|
myAlleles.add(vc.getReference());
|
||||||
|
else
|
||||||
|
myAlleles.add(vc.getAlternateAllele(i-1));
|
||||||
|
|
||||||
|
if (j==0)
|
||||||
|
myAlleles.add(vc.getReference());
|
||||||
|
else
|
||||||
|
myAlleles.add(vc.getAlternateAllele(j-1));
|
||||||
|
done = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
if (done)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (qual < 0) {
|
if (qual < 0) {
|
||||||
// QUAL can be negative if the chosen genotype is not the most likely one individually.
|
// QUAL can be negative if the chosen genotype is not the most likely one individually.
|
||||||
|
|
@ -489,7 +560,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
double chosenGenotype = normalized[bestGTguess];
|
double chosenGenotype = normalized[bestGTguess];
|
||||||
qual = -1.0 * Math.log10(1.0 - chosenGenotype);
|
qual = -1.0 * Math.log10(1.0 - chosenGenotype);
|
||||||
}
|
}
|
||||||
|
//System.out.println(myAlleles.toString());
|
||||||
calls.put(sample, new Genotype(sample, myAlleles, qual, null, g.getAttributes(), false));
|
calls.put(sample, new Genotype(sample, myAlleles, qual, null, g.getAttributes(), false));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -506,7 +577,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
// -------------------------------------------------------------------------------------
|
// -------------------------------------------------------------------------------------
|
||||||
public int gdaN2GoldStandard(Map<String, Genotype> GLs,
|
public int gdaN2GoldStandard(Map<String, Genotype> GLs,
|
||||||
double[] log10AlleleFrequencyPriors,
|
double[] log10AlleleFrequencyPriors,
|
||||||
double[] log10AlleleFrequencyPosteriors) {
|
double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) {
|
||||||
int numSamples = GLs.size();
|
int numSamples = GLs.size();
|
||||||
int numChr = 2*numSamples;
|
int numChr = 2*numSamples;
|
||||||
|
|
||||||
|
|
@ -516,7 +587,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
for (int j=0; j <=numChr; j++)
|
for (int j=0; j <=numChr; j++)
|
||||||
logYMatrix[i][j] = Double.NEGATIVE_INFINITY;
|
logYMatrix[i][j] = Double.NEGATIVE_INFINITY;
|
||||||
|
|
||||||
//YMatrix[0][0] = 1.0;
|
//YMatrix[0][0] = 1.0;
|
||||||
logYMatrix[0][0] = 0.0;
|
logYMatrix[0][0] = 0.0;
|
||||||
int j=0;
|
int j=0;
|
||||||
|
|
||||||
|
|
@ -533,7 +604,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
|
|
||||||
// special treatment for k=0: iteration reduces to:
|
// special treatment for k=0: iteration reduces to:
|
||||||
//YMatrix[j][0] = YMatrix[j-1][0]*genotypeLikelihoods[GenotypeType.AA.ordinal()];
|
//YMatrix[j][0] = YMatrix[j-1][0]*genotypeLikelihoods[GenotypeType.AA.ordinal()];
|
||||||
logYMatrix[j][0] = logYMatrix[j-1][0] + genotypeLikelihoods[GenotypeType.AA.ordinal()];
|
logYMatrix[j][0] = logYMatrix[j-1][0] + genotypeLikelihoods[idxAA];
|
||||||
|
|
||||||
for (int k=1; k <= 2*j; k++ ) {
|
for (int k=1; k <= 2*j; k++ ) {
|
||||||
|
|
||||||
|
|
@ -542,20 +613,20 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
logNumerator = new double[3];
|
logNumerator = new double[3];
|
||||||
if (k < 2*j-1)
|
if (k < 2*j-1)
|
||||||
logNumerator[0] = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + logYMatrix[j-1][k] +
|
logNumerator[0] = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + logYMatrix[j-1][k] +
|
||||||
genotypeLikelihoods[GenotypeType.AA.ordinal()];
|
genotypeLikelihoods[idxAA];
|
||||||
else
|
else
|
||||||
logNumerator[0] = Double.NEGATIVE_INFINITY;
|
logNumerator[0] = Double.NEGATIVE_INFINITY;
|
||||||
|
|
||||||
|
|
||||||
if (k < 2*j)
|
if (k < 2*j)
|
||||||
logNumerator[1] = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ logYMatrix[j-1][k-1] +
|
logNumerator[1] = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ logYMatrix[j-1][k-1] +
|
||||||
genotypeLikelihoods[GenotypeType.AB.ordinal()];
|
genotypeLikelihoods[idxAB];
|
||||||
else
|
else
|
||||||
logNumerator[1] = Double.NEGATIVE_INFINITY;
|
logNumerator[1] = Double.NEGATIVE_INFINITY;
|
||||||
|
|
||||||
if (k > 1)
|
if (k > 1)
|
||||||
logNumerator[2] = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + logYMatrix[j-1][k-2] +
|
logNumerator[2] = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + logYMatrix[j-1][k-2] +
|
||||||
genotypeLikelihoods[GenotypeType.BB.ordinal()];
|
genotypeLikelihoods[idxBB];
|
||||||
else
|
else
|
||||||
logNumerator[2] = Double.NEGATIVE_INFINITY;
|
logNumerator[2] = Double.NEGATIVE_INFINITY;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -88,7 +88,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
|
||||||
Map<String, AlignmentContext> contexts,
|
Map<String, AlignmentContext> contexts,
|
||||||
AlignmentContextUtils.ReadOrientation contextType,
|
AlignmentContextUtils.ReadOrientation contextType,
|
||||||
GenotypePriors priors,
|
GenotypePriors priors,
|
||||||
Map<String, BiallelicGenotypeLikelihoods> GLs,
|
Map<String, MultiallelicGenotypeLikelihoods> GLs,
|
||||||
Allele alternateAlleleToUse, boolean useBAQedPileup);
|
Allele alternateAlleleToUse, boolean useBAQedPileup);
|
||||||
|
|
||||||
protected int getFilteredDepth(ReadBackedPileup pileup) {
|
protected int getFilteredDepth(ReadBackedPileup pileup) {
|
||||||
|
|
|
||||||
|
|
@ -54,7 +54,7 @@ public class GridSearchAFEstimation extends AlleleFrequencyCalculationModel {
|
||||||
|
|
||||||
protected void getLog10PNonRef(RefMetaDataTracker tracker,
|
protected void getLog10PNonRef(RefMetaDataTracker tracker,
|
||||||
ReferenceContext ref,
|
ReferenceContext ref,
|
||||||
Map<String, Genotype> GLs,
|
Map<String, Genotype> GLs, Set<Allele>alleles,
|
||||||
double[] log10AlleleFrequencyPriors,
|
double[] log10AlleleFrequencyPriors,
|
||||||
double[] log10AlleleFrequencyPosteriors) {
|
double[] log10AlleleFrequencyPosteriors) {
|
||||||
initializeAFMatrix(GLs);
|
initializeAFMatrix(GLs);
|
||||||
|
|
|
||||||
|
|
@ -60,7 +60,13 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
|
|
||||||
private PairHMMIndelErrorModel pairModel;
|
private PairHMMIndelErrorModel pairModel;
|
||||||
|
|
||||||
private HashMap<PileupElement,LinkedHashMap<Allele,Double>> indelLikelihoodMap;
|
private static ThreadLocal<HashMap<PileupElement,LinkedHashMap<Allele,Double>>> indelLikelihoodMap =
|
||||||
|
new ThreadLocal<HashMap<PileupElement,LinkedHashMap<Allele,Double>>>() {
|
||||||
|
protected synchronized HashMap<PileupElement,LinkedHashMap<Allele,Double>> initialValue() {
|
||||||
|
return new HashMap<PileupElement,LinkedHashMap<Allele,Double>>();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
private LinkedHashMap<Allele,Haplotype> haplotypeMap;
|
private LinkedHashMap<Allele,Haplotype> haplotypeMap;
|
||||||
|
|
||||||
// gdebug removeme
|
// gdebug removeme
|
||||||
|
|
@ -69,7 +75,12 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
private boolean useOldWrongHorribleHackedUpLikelihoodModel = false;
|
private boolean useOldWrongHorribleHackedUpLikelihoodModel = false;
|
||||||
//
|
//
|
||||||
private GenomeLoc lastSiteVisited;
|
private GenomeLoc lastSiteVisited;
|
||||||
private List<Allele> alleleList;
|
private ArrayList<Allele> alleleList;
|
||||||
|
|
||||||
|
static {
|
||||||
|
indelLikelihoodMap.set(new HashMap<PileupElement,LinkedHashMap<Allele,Double>>());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
protected IndelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
protected IndelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
||||||
super(UAC, logger);
|
super(UAC, logger);
|
||||||
|
|
@ -99,7 +110,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
HAPLOTYPE_SIZE = UAC.INDEL_HAPLOTYPE_SIZE;
|
HAPLOTYPE_SIZE = UAC.INDEL_HAPLOTYPE_SIZE;
|
||||||
DEBUG = UAC.OUTPUT_DEBUG_INDEL_INFO;
|
DEBUG = UAC.OUTPUT_DEBUG_INDEL_INFO;
|
||||||
|
|
||||||
indelLikelihoodMap = new HashMap<PileupElement,LinkedHashMap<Allele,Double>>();
|
|
||||||
haplotypeMap = new LinkedHashMap<Allele,Haplotype>();
|
haplotypeMap = new LinkedHashMap<Allele,Haplotype>();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -289,7 +299,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
Map<String, AlignmentContext> contexts,
|
Map<String, AlignmentContext> contexts,
|
||||||
AlignmentContextUtils.ReadOrientation contextType,
|
AlignmentContextUtils.ReadOrientation contextType,
|
||||||
GenotypePriors priors,
|
GenotypePriors priors,
|
||||||
Map<String, BiallelicGenotypeLikelihoods> GLs,
|
Map<String, MultiallelicGenotypeLikelihoods> GLs,
|
||||||
Allele alternateAlleleToUse,
|
Allele alternateAlleleToUse,
|
||||||
boolean useBAQedPileup) {
|
boolean useBAQedPileup) {
|
||||||
|
|
||||||
|
|
@ -305,13 +315,15 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
// starting a new site: clear allele list
|
// starting a new site: clear allele list
|
||||||
alleleList.clear();
|
alleleList.clear();
|
||||||
lastSiteVisited = ref.getLocus();
|
lastSiteVisited = ref.getLocus();
|
||||||
indelLikelihoodMap.clear();
|
indelLikelihoodMap.set(new HashMap<PileupElement,LinkedHashMap<Allele,Double>>());
|
||||||
haplotypeMap.clear();
|
haplotypeMap.clear();
|
||||||
|
|
||||||
if (getAlleleListFromVCF) {
|
if (getAlleleListFromVCF) {
|
||||||
|
EnumSet<VariantContext.Type> allowableTypes = EnumSet.of(VariantContext.Type.INDEL);
|
||||||
for( final VariantContext vc_input : tracker.getVariantContexts(ref, "alleles", null, ref.getLocus(), false, false) ) {
|
allowableTypes.add(VariantContext.Type.MIXED);
|
||||||
if( vc_input != null && ! vc_input.isFiltered() && vc_input.isIndel() && ref.getLocus().getStart() == vc_input.getStart()) {
|
for( final VariantContext vc_input : tracker.getVariantContexts(ref, "alleles",
|
||||||
|
allowableTypes, ref.getLocus(), false, false) ) {
|
||||||
|
if( vc_input != null && ref.getLocus().getStart() == vc_input.getStart()) {
|
||||||
vc = vc_input;
|
vc = vc_input;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -320,10 +332,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
if ( vc == null )
|
if ( vc == null )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
||||||
if (!vc.isIndel())
|
|
||||||
return null;
|
|
||||||
|
|
||||||
alleleList.clear();
|
alleleList.clear();
|
||||||
for (Allele a : vc.getAlleles())
|
for (Allele a : vc.getAlleles())
|
||||||
alleleList.add(a);
|
alleleList.add(a);
|
||||||
|
|
@ -350,11 +358,19 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
|
|
||||||
refAllele = alleleList.get(0);
|
refAllele = alleleList.get(0);
|
||||||
altAllele = alleleList.get(1);
|
altAllele = alleleList.get(1);
|
||||||
|
|
||||||
|
// look for alt allele that has biggest length distance to ref allele
|
||||||
|
int maxLenDiff = 0;
|
||||||
|
for (Allele a: alleleList) {
|
||||||
|
if(a.isNonReference()) {
|
||||||
|
int lenDiff = Math.abs(a.getBaseString().length() - refAllele.getBaseString().length());
|
||||||
|
if (lenDiff > maxLenDiff) {
|
||||||
|
maxLenDiff = lenDiff;
|
||||||
|
altAllele = a;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
int eventLength = altAllele.getBaseString().length() - refAllele.getBaseString().length();
|
int eventLength = altAllele.getBaseString().length() - refAllele.getBaseString().length();
|
||||||
// assume only one alt allele for now
|
|
||||||
|
|
||||||
//List<Haplotype> haplotypesInVC;
|
|
||||||
|
|
||||||
int hsize = (int)ref.getWindow().size()-Math.abs(eventLength)-1;
|
int hsize = (int)ref.getWindow().size()-Math.abs(eventLength)-1;
|
||||||
int numPrefBases= ref.getLocus().getStart()-ref.getWindow().getStart()+1;
|
int numPrefBases= ref.getLocus().getStart()-ref.getWindow().getStart()+1;
|
||||||
|
|
||||||
|
|
@ -386,27 +402,34 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
if (pileup != null ) {
|
if (pileup != null ) {
|
||||||
double[] genotypeLikelihoods;
|
double[] genotypeLikelihoods;
|
||||||
if (useOldWrongHorribleHackedUpLikelihoodModel)
|
if (useOldWrongHorribleHackedUpLikelihoodModel)
|
||||||
genotypeLikelihoods = model.computeReadHaplotypeLikelihoods( pileup, haplotypeMap);
|
genotypeLikelihoods = model.computeReadHaplotypeLikelihoods( pileup, haplotypeMap);
|
||||||
else
|
else
|
||||||
genotypeLikelihoods = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypeMap, ref, HAPLOTYPE_SIZE, eventLength, indelLikelihoodMap);
|
genotypeLikelihoods = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
GLs.put(sample.getKey(), new BiallelicGenotypeLikelihoods(sample.getKey(),
|
// which genotype likelihoods correspond to two most likely alleles? By convention, likelihood vector is lexically ordered, for example
|
||||||
refAllele,
|
// for 3 alleles it's 00 01 02 11 12 22
|
||||||
altAllele,
|
GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(),
|
||||||
genotypeLikelihoods[0],
|
alleleList,
|
||||||
genotypeLikelihoods[1],
|
genotypeLikelihoods,
|
||||||
genotypeLikelihoods[2],
|
|
||||||
getFilteredDepth(pileup)));
|
getFilteredDepth(pileup)));
|
||||||
if (DEBUG)
|
|
||||||
System.out.format("Sample:%s GL:%4.2f %4.2f %4.2f\n",sample.getKey(), genotypeLikelihoods[0],genotypeLikelihoods[1], genotypeLikelihoods[2]);
|
if (DEBUG) {
|
||||||
|
System.out.format("Sample:%s Alleles:%s GL:",sample.getKey(), alleleList.toString());
|
||||||
|
for (int k=0; k < genotypeLikelihoods.length; k++)
|
||||||
|
System.out.format("%1.4f ",genotypeLikelihoods[k]);
|
||||||
|
System.out.println();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return refAllele;
|
return refAllele;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static HashMap<PileupElement,LinkedHashMap<Allele,Double>> getIndelLikelihoodMap() {
|
||||||
|
return indelLikelihoodMap.get();
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.broad.tribble.util.variantcontext.Allele;
|
import org.broad.tribble.util.variantcontext.Allele;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
|
@ -19,10 +20,15 @@ public class MultiallelicGenotypeLikelihoods {
|
||||||
|
|
||||||
public MultiallelicGenotypeLikelihoods(String sample,
|
public MultiallelicGenotypeLikelihoods(String sample,
|
||||||
ArrayList<Allele> A,
|
ArrayList<Allele> A,
|
||||||
double[] log10AALikelihoods, int depth) {
|
double[] log10Likelihoods, int depth) {
|
||||||
|
/* Check for consistency between likelihood vector and number of alleles */
|
||||||
|
int numAlleles = A.size();
|
||||||
|
if (log10Likelihoods.length != numAlleles*(numAlleles+1)/2)
|
||||||
|
throw new StingException(("BUG: Incorrect length of GL vector when creating MultiallelicGenotypeLikelihoods object!"));
|
||||||
|
|
||||||
this.sample = sample;
|
this.sample = sample;
|
||||||
this.alleleList = A;
|
this.alleleList = A;
|
||||||
this.GLs = log10AALikelihoods;
|
this.GLs = log10Likelihoods;
|
||||||
this.depth = depth;
|
this.depth = depth;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -79,7 +79,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
||||||
Map<String, AlignmentContext> contexts,
|
Map<String, AlignmentContext> contexts,
|
||||||
AlignmentContextUtils.ReadOrientation contextType,
|
AlignmentContextUtils.ReadOrientation contextType,
|
||||||
GenotypePriors priors,
|
GenotypePriors priors,
|
||||||
Map<String, BiallelicGenotypeLikelihoods> GLs,
|
Map<String, MultiallelicGenotypeLikelihoods> GLs,
|
||||||
Allele alternateAlleleToUse,
|
Allele alternateAlleleToUse,
|
||||||
boolean useBAQedPileup) {
|
boolean useBAQedPileup) {
|
||||||
|
|
||||||
|
|
@ -136,13 +136,12 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
||||||
DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(refBase);
|
DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(refBase);
|
||||||
DiploidGenotype hetGenotype = DiploidGenotype.createDiploidGenotype(refBase, bestAlternateAllele);
|
DiploidGenotype hetGenotype = DiploidGenotype.createDiploidGenotype(refBase, bestAlternateAllele);
|
||||||
DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(bestAlternateAllele);
|
DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(bestAlternateAllele);
|
||||||
GLs.put(sample.getKey(), new BiallelicGenotypeLikelihoods(sample.getKey(),
|
ArrayList<Allele> aList = new ArrayList<Allele>();
|
||||||
refAllele,
|
aList.add(refAllele);
|
||||||
altAllele,
|
aList.add(altAllele);
|
||||||
likelihoods[refGenotype.ordinal()],
|
double[] dlike = new double[]{likelihoods[refGenotype.ordinal()],likelihoods[hetGenotype.ordinal()],likelihoods[homGenotype.ordinal()]} ;
|
||||||
likelihoods[hetGenotype.ordinal()],
|
GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(),
|
||||||
likelihoods[homGenotype.ordinal()],
|
aList, dlike, getFilteredDepth(pileup)));
|
||||||
getFilteredDepth(pileup)));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return refAllele;
|
return refAllele;
|
||||||
|
|
|
||||||
|
|
@ -82,7 +82,9 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
||||||
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
||||||
|
|
||||||
// enable extended events for indels
|
// enable extended events for indels
|
||||||
public boolean generateExtendedEvents() { return UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.SNP; }
|
public boolean generateExtendedEvents() {
|
||||||
|
return (UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.SNP && UAC.GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Inner class for collecting output statistics from the UG
|
* Inner class for collecting output statistics from the UG
|
||||||
|
|
|
||||||
|
|
@ -144,7 +144,7 @@ public class UnifiedGenotyperEngine {
|
||||||
if ( UAC.COVERAGE_AT_WHICH_TO_ABORT > 0 && rawContext.size() > UAC.COVERAGE_AT_WHICH_TO_ABORT )
|
if ( UAC.COVERAGE_AT_WHICH_TO_ABORT > 0 && rawContext.size() > UAC.COVERAGE_AT_WHICH_TO_ABORT )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
final GenotypeLikelihoodsCalculationModel.Model model = getCurrentGLModel( rawContext );
|
final GenotypeLikelihoodsCalculationModel.Model model = getCurrentGLModel(tracker, refContext, rawContext );
|
||||||
if( model == null ) {
|
if( model == null ) {
|
||||||
return (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? generateEmptyContext(tracker, refContext, null, rawContext) : null);
|
return (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? generateEmptyContext(tracker, refContext, null, rawContext) : null);
|
||||||
}
|
}
|
||||||
|
|
@ -171,7 +171,7 @@ public class UnifiedGenotyperEngine {
|
||||||
* @return the VariantContext object
|
* @return the VariantContext object
|
||||||
*/
|
*/
|
||||||
public VariantContext calculateLikelihoods(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
|
public VariantContext calculateLikelihoods(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
|
||||||
final GenotypeLikelihoodsCalculationModel.Model model = getCurrentGLModel( rawContext );
|
final GenotypeLikelihoodsCalculationModel.Model model = getCurrentGLModel( tracker, refContext, rawContext );
|
||||||
if( model == null )
|
if( model == null )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
@ -192,7 +192,7 @@ public class UnifiedGenotyperEngine {
|
||||||
* @return the VariantCallContext object
|
* @return the VariantCallContext object
|
||||||
*/
|
*/
|
||||||
public VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, VariantContext vc) {
|
public VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, VariantContext vc) {
|
||||||
final GenotypeLikelihoodsCalculationModel.Model model = getCurrentGLModel( rawContext );
|
final GenotypeLikelihoodsCalculationModel.Model model = getCurrentGLModel(tracker, refContext, rawContext );
|
||||||
if( model == null ) {
|
if( model == null ) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
@ -217,7 +217,7 @@ public class UnifiedGenotyperEngine {
|
||||||
glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
|
glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
|
||||||
}
|
}
|
||||||
|
|
||||||
Map<String, BiallelicGenotypeLikelihoods> GLs = new HashMap<String, BiallelicGenotypeLikelihoods>();
|
Map<String, MultiallelicGenotypeLikelihoods> GLs = new HashMap<String, MultiallelicGenotypeLikelihoods>();
|
||||||
|
|
||||||
Allele refAllele = glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), GLs, alternateAlleleToUse, useBAQedPileup && BAQEnabledOnCMDLine);
|
Allele refAllele = glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), GLs, alternateAlleleToUse, useBAQedPileup && BAQEnabledOnCMDLine);
|
||||||
|
|
||||||
|
|
@ -259,21 +259,23 @@ public class UnifiedGenotyperEngine {
|
||||||
return new VariantCallContext(vc, ref.getBase(), false);
|
return new VariantCallContext(vc, ref.getBase(), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
private VariantContext createVariantContextFromLikelihoods(ReferenceContext refContext, Allele refAllele, Map<String, BiallelicGenotypeLikelihoods> GLs) {
|
private VariantContext createVariantContextFromLikelihoods(ReferenceContext refContext, Allele refAllele, Map<String, MultiallelicGenotypeLikelihoods> GLs) {
|
||||||
// no-call everyone for now
|
// no-call everyone for now
|
||||||
List<Allele> noCall = new ArrayList<Allele>();
|
List<Allele> noCall = new ArrayList<Allele>();
|
||||||
noCall.add(Allele.NO_CALL);
|
noCall.add(Allele.NO_CALL);
|
||||||
|
|
||||||
Set<Allele> alleles = new HashSet<Allele>();
|
Set<Allele> alleles = new LinkedHashSet<Allele>();
|
||||||
alleles.add(refAllele);
|
alleles.add(refAllele);
|
||||||
boolean addedAltAllele = false;
|
boolean addedAltAlleles = false;
|
||||||
|
|
||||||
HashMap<String, Genotype> genotypes = new HashMap<String, Genotype>();
|
HashMap<String, Genotype> genotypes = new HashMap<String, Genotype>();
|
||||||
for ( BiallelicGenotypeLikelihoods GL : GLs.values() ) {
|
for ( MultiallelicGenotypeLikelihoods GL : GLs.values() ) {
|
||||||
if ( !addedAltAllele ) {
|
if ( !addedAltAlleles ) {
|
||||||
addedAltAllele = true;
|
addedAltAlleles = true;
|
||||||
alleles.add(GL.getAlleleA());
|
// ordering important to maintain consistency
|
||||||
alleles.add(GL.getAlleleB());
|
for (Allele a: GL.getAlleles()) {
|
||||||
|
alleles.add(a);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||||
|
|
@ -316,7 +318,7 @@ public class UnifiedGenotyperEngine {
|
||||||
|
|
||||||
// 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position)
|
// 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position)
|
||||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
||||||
afcm.get().getLog10PNonRef(tracker, refContext, vc.getGenotypes(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
afcm.get().getLog10PNonRef(tracker, refContext, vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
||||||
|
|
||||||
// find the most likely frequency
|
// find the most likely frequency
|
||||||
int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get());
|
int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get());
|
||||||
|
|
@ -374,7 +376,7 @@ public class UnifiedGenotyperEngine {
|
||||||
// the overall lod
|
// the overall lod
|
||||||
VariantContext vcOverall = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, vc.getAlternateAllele(0), false, model);
|
VariantContext vcOverall = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, vc.getAlternateAllele(0), false, model);
|
||||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
||||||
afcm.get().getLog10PNonRef(tracker, refContext, vcOverall.getGenotypes(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
afcm.get().getLog10PNonRef(tracker, refContext, vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
||||||
//double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
//double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
||||||
double overallLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
|
double overallLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
|
||||||
if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
|
if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
|
||||||
|
|
@ -382,7 +384,7 @@ public class UnifiedGenotyperEngine {
|
||||||
// the forward lod
|
// the forward lod
|
||||||
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, vc.getAlternateAllele(0), false, model);
|
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, vc.getAlternateAllele(0), false, model);
|
||||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
||||||
afcm.get().getLog10PNonRef(tracker, refContext, vcForward.getGenotypes(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
afcm.get().getLog10PNonRef(tracker, refContext, vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
||||||
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
|
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
|
||||||
double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
||||||
double forwardLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
|
double forwardLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
|
||||||
|
|
@ -391,7 +393,7 @@ public class UnifiedGenotyperEngine {
|
||||||
// the reverse lod
|
// the reverse lod
|
||||||
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, vc.getAlternateAllele(0), false, model);
|
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, vc.getAlternateAllele(0), false, model);
|
||||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
||||||
afcm.get().getLog10PNonRef(tracker, refContext, vcReverse.getGenotypes(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
afcm.get().getLog10PNonRef(tracker, refContext, vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
||||||
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
|
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
|
||||||
double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
||||||
double reverseLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
|
double reverseLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
|
||||||
|
|
@ -465,18 +467,33 @@ public class UnifiedGenotyperEngine {
|
||||||
|
|
||||||
if ( model == GenotypeLikelihoodsCalculationModel.Model.INDEL ) {
|
if ( model == GenotypeLikelihoodsCalculationModel.Model.INDEL ) {
|
||||||
|
|
||||||
ReadBackedExtendedEventPileup rawPileup = rawContext.getExtendedEventPileup();
|
if (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
|
||||||
|
// regular pileup in this case
|
||||||
|
ReadBackedPileup pileup = rawContext.getBasePileup() .getMappingFilteredPileup(UAC.MIN_MAPPING_QUALTY_SCORE);
|
||||||
|
|
||||||
// filter the context based on min mapping quality
|
// don't call when there is no coverage
|
||||||
ReadBackedExtendedEventPileup pileup = rawPileup.getMappingFilteredPileup(UAC.MIN_MAPPING_QUALTY_SCORE);
|
if ( pileup.size() == 0 && UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES )
|
||||||
|
return null;
|
||||||
|
|
||||||
// don't call when there is no coverage
|
// stratify the AlignmentContext and cut by sample
|
||||||
if ( pileup.size() == 0 && !(UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) )
|
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE);
|
||||||
return null;
|
|
||||||
|
|
||||||
// stratify the AlignmentContext and cut by sample
|
} else {
|
||||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE);
|
// todo - tmp will get rid of extended events so this wont be needed
|
||||||
|
if (!rawContext.hasExtendedEventPileup())
|
||||||
|
return null;
|
||||||
|
ReadBackedExtendedEventPileup rawPileup = rawContext.getExtendedEventPileup();
|
||||||
|
|
||||||
|
// filter the context based on min mapping quality
|
||||||
|
ReadBackedExtendedEventPileup pileup = rawPileup.getMappingFilteredPileup(UAC.MIN_MAPPING_QUALTY_SCORE);
|
||||||
|
|
||||||
|
// don't call when there is no coverage
|
||||||
|
if ( pileup.size() == 0 && UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES )
|
||||||
|
return null;
|
||||||
|
|
||||||
|
// stratify the AlignmentContext and cut by sample
|
||||||
|
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE);
|
||||||
|
}
|
||||||
} else if ( model == GenotypeLikelihoodsCalculationModel.Model.SNP ) {
|
} else if ( model == GenotypeLikelihoodsCalculationModel.Model.SNP ) {
|
||||||
|
|
||||||
if ( !BaseUtils.isRegularBase( refContext.getBase() ) )
|
if ( !BaseUtils.isRegularBase( refContext.getBase() ) )
|
||||||
|
|
@ -583,14 +600,35 @@ public class UnifiedGenotyperEngine {
|
||||||
}
|
}
|
||||||
|
|
||||||
// decide whether we are currently processing SNPs, indels, or neither
|
// decide whether we are currently processing SNPs, indels, or neither
|
||||||
private GenotypeLikelihoodsCalculationModel.Model getCurrentGLModel( final AlignmentContext rawContext ) {
|
private GenotypeLikelihoodsCalculationModel.Model getCurrentGLModel(final RefMetaDataTracker tracker, final ReferenceContext refContext,
|
||||||
if( (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL) && rawContext.hasExtendedEventPileup() ) {
|
final AlignmentContext rawContext ) {
|
||||||
return GenotypeLikelihoodsCalculationModel.Model.INDEL;
|
if (rawContext.hasExtendedEventPileup() ) {
|
||||||
} else if( (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP) && !rawContext.hasExtendedEventPileup() ) {
|
// todo - remove this code
|
||||||
return GenotypeLikelihoodsCalculationModel.Model.SNP;
|
if ((UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL) &&
|
||||||
} else {
|
(UAC.GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) )
|
||||||
return null;
|
return GenotypeLikelihoodsCalculationModel.Model.INDEL;
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
// no extended event pileup
|
||||||
|
// if we're genotyping given alleles and we have a requested SNP at this position, do SNP
|
||||||
|
if (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
|
||||||
|
VariantContext vcInput = SNPGenotypeLikelihoodsCalculationModel.getSNPVCFromAllelesRod(tracker, refContext, false, logger);
|
||||||
|
if (vcInput == null)
|
||||||
|
return null;
|
||||||
|
|
||||||
|
if (vcInput.isSNP() && ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP))
|
||||||
|
return GenotypeLikelihoodsCalculationModel.Model.SNP;
|
||||||
|
else if ((vcInput.isIndel() || vcInput.isMixed()) && (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL))
|
||||||
|
return GenotypeLikelihoodsCalculationModel.Model.INDEL;
|
||||||
|
} else {
|
||||||
|
// todo - this assumes SNP's take priority when BOTH is selected, should do a smarter way once extended events are removed
|
||||||
|
if( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP)
|
||||||
|
return GenotypeLikelihoodsCalculationModel.Model.SNP;
|
||||||
|
else if (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL)
|
||||||
|
return GenotypeLikelihoodsCalculationModel.Model.INDEL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void computeAlleleFrequencyPriors(int N, final double[] priors, final GenotypeLikelihoodsCalculationModel.Model model) {
|
protected void computeAlleleFrequencyPriors(int N, final double[] priors, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||||
|
|
|
||||||
|
|
@ -712,7 +712,7 @@ public class PairHMMIndelErrorModel {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
public synchronized double[] computeReadHaplotypeLikelihoods(ReadBackedPileup pileup, LinkedHashMap<Allele,Haplotype> haplotypeMap,
|
public synchronized double[] computeReadHaplotypeLikelihoods(ReadBackedPileup pileup, LinkedHashMap<Allele,Haplotype> haplotypeMap,
|
||||||
ReferenceContext ref, int haplotypeSize, int eventLength,
|
ReferenceContext ref, int eventLength,
|
||||||
HashMap<PileupElement, LinkedHashMap<Allele,Double>> indelLikelihoodMap){
|
HashMap<PileupElement, LinkedHashMap<Allele,Double>> indelLikelihoodMap){
|
||||||
|
|
||||||
int numHaplotypes = haplotypeMap.size();
|
int numHaplotypes = haplotypeMap.size();
|
||||||
|
|
@ -1022,8 +1022,7 @@ public class PairHMMIndelErrorModel {
|
||||||
// combine likelihoods of haplotypeLikelihoods[i], haplotypeLikelihoods[j]
|
// combine likelihoods of haplotypeLikelihoods[i], haplotypeLikelihoods[j]
|
||||||
// L(Hi, Hj) = sum_reads ( Pr(R|Hi)/2 + Pr(R|Hj)/2)
|
// L(Hi, Hj) = sum_reads ( Pr(R|Hi)/2 + Pr(R|Hj)/2)
|
||||||
//readLikelihoods[k][j] has log10(Pr(R_k) | H[j] )
|
//readLikelihoods[k][j] has log10(Pr(R_k) | H[j] )
|
||||||
double[] readLikelihood = new double[2]; // diploid sample
|
for (readIdx=0; readIdx < pileup.getReads().size(); readIdx++) {
|
||||||
for (readIdx=0; readIdx < pileup.getReads().size(); readIdx++) {
|
|
||||||
|
|
||||||
// Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
|
// Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
|
||||||
// First term is approximated by Jacobian log with table lookup.
|
// First term is approximated by Jacobian log with table lookup.
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue