Optionally applies secondary base distribution priors to normal single-sample genotyper posteriors.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@581 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
58c80d8d87
commit
e7534b292f
|
|
@ -38,6 +38,7 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
public AlleleFrequencyEstimate map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
public AlleleFrequencyEstimate map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||||
String rodString = getRodString(tracker);
|
String rodString = getRodString(tracker);
|
||||||
|
|
||||||
|
/*
|
||||||
AlleleFrequencyEstimate freq;
|
AlleleFrequencyEstimate freq;
|
||||||
if (fourBaseMode) {
|
if (fourBaseMode) {
|
||||||
if (retest) {
|
if (retest) {
|
||||||
|
|
@ -55,6 +56,9 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
// Compute single quality score genotype likelihoods
|
// Compute single quality score genotype likelihoods
|
||||||
freq = getOneProbAlleleFrequency(ref, context, rodString);
|
freq = getOneProbAlleleFrequency(ref, context, rodString);
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
AlleleFrequencyEstimate freq = getOneProbAlleleFrequency(ref, context, rodString);
|
||||||
|
|
||||||
if (printMetrics) {
|
if (printMetrics) {
|
||||||
if (freq != null) { metrics.nextPosition(freq, tracker); }
|
if (freq != null) { metrics.nextPosition(freq, tracker); }
|
||||||
|
|
@ -80,33 +84,25 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
double[] genotypeBalances = { qHom, qHet, qHomNonRef };
|
double[] genotypeBalances = { qHom, qHet, qHomNonRef };
|
||||||
double[] reportingBalances = { 0.0, 0.5, 1.0 };
|
double[] reportingBalances = { 0.0, 0.5, 1.0 };
|
||||||
|
|
||||||
// Compute the distribution of second-best bases (should be random)
|
// Compute the probability of the distribution of second-best bases (should be random)
|
||||||
/*
|
int[] secondaryBaseCounts = getSecondaryBaseCounts(probs);
|
||||||
double[] secondaryBaseProbs = { 0.333, 0.333, 0.333 };
|
double[] secondaryBaseProbs = new double[4];
|
||||||
int[] allBaseCounts = new int[3];
|
|
||||||
for (int baseIndex = 0; baseIndex < 4; baseIndex++) {
|
for (int baseIndex = 0; baseIndex < 4; baseIndex++) {
|
||||||
|
secondaryBaseProbs[baseIndex] = 0.25;
|
||||||
}
|
}
|
||||||
*/
|
double secondaryBaseDistProb = MathUtils.multinomialProbability(secondaryBaseCounts, secondaryBaseProbs);
|
||||||
|
|
||||||
|
//System.out.printf("%d %d %d %d ", secondaryBaseCounts[0], secondaryBaseCounts[1], secondaryBaseCounts[2], secondaryBaseCounts[3]);
|
||||||
|
//System.out.println(secondaryBaseDistProb);
|
||||||
|
|
||||||
// Determine the probability distribution of non-ref alleles
|
// Determine the probability distribution of non-ref alleles
|
||||||
//double[] obsWeights = getObservationWeights(probs, refBaseIndex, altBaseIndex);
|
double[] obsWeights = getObservationWeights(probs, refBaseIndex, altBaseIndex);
|
||||||
|
|
||||||
// Convolve them with the binomial sampling probability distribution in order to
|
// Convolve them with the binomial sampling probability distribution in order to
|
||||||
// marginalize the non-ref allele balance.
|
// marginalize the non-ref allele balance.
|
||||||
double[] posteriors = new double[3];
|
double[] posteriors = new double[3];
|
||||||
double qhat = 0.0, qstar = 0.0, lodVsRef = 0.0, pBest = Double.MIN_VALUE;
|
double qhat = 0.0, qstar = 0.0, lodVsRef = 0.0, pBest = Double.MIN_VALUE;
|
||||||
for (int hypothesis = 0; hypothesis < 3; hypothesis++) {
|
for (int hypothesis = 0; hypothesis < 3; hypothesis++) {
|
||||||
int base1;
|
|
||||||
int base2;
|
|
||||||
switch (hypothesis) {
|
|
||||||
case 0: base1 = refBaseIndex; base2 = refBaseIndex; break;
|
|
||||||
case 1: base1 = refBaseIndex; base2 = altBaseIndex; break;
|
|
||||||
case 2: base1 = altBaseIndex; base2 = altBaseIndex; break;
|
|
||||||
default: base1 = refBaseIndex; base2 = refBaseIndex; break;
|
|
||||||
}
|
|
||||||
double[] obsWeights = getObservationWeights(probs, refBaseIndex, altBaseIndex);
|
|
||||||
|
|
||||||
posteriors[hypothesis] = 0.0;
|
posteriors[hypothesis] = 0.0;
|
||||||
|
|
||||||
for (int weightIndex = 0; weightIndex < obsWeights.length; weightIndex++) {
|
for (int weightIndex = 0; weightIndex < obsWeights.length; weightIndex++) {
|
||||||
|
|
@ -121,6 +117,7 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
|
|
||||||
posteriors[hypothesis] += obsWeights[weightIndex]*binomialWeight;
|
posteriors[hypothesis] += obsWeights[weightIndex]*binomialWeight;
|
||||||
}
|
}
|
||||||
|
|
||||||
posteriors[hypothesis] *= genotypePriors[hypothesis];
|
posteriors[hypothesis] *= genotypePriors[hypothesis];
|
||||||
|
|
||||||
double refLikelihood = Double.isInfinite(Math.log10(posteriors[0])) ? -10000.0 : Math.log10(posteriors[0]);
|
double refLikelihood = Double.isInfinite(Math.log10(posteriors[0])) ? -10000.0 : Math.log10(posteriors[0]);
|
||||||
|
|
@ -227,6 +224,31 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
|
|
||||||
return bestAltBaseIndex;
|
return bestAltBaseIndex;
|
||||||
}
|
}
|
||||||
|
private int[] getSecondaryBaseCounts(double[][] probs) {
|
||||||
|
int[] secondaryBaseCounts = new int[4];
|
||||||
|
|
||||||
|
for (int readIndex = 0; readIndex < probs.length; readIndex++) {
|
||||||
|
double bestProb = 0.0;
|
||||||
|
for (int baseIndex = 0; baseIndex < probs[readIndex].length; baseIndex++) {
|
||||||
|
if (probs[readIndex][baseIndex] > bestProb) {
|
||||||
|
bestProb = probs[readIndex][baseIndex];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
double secondBestProb = 0.0;
|
||||||
|
int secondBestBaseIndex = 0;
|
||||||
|
for (int baseIndex = 0; baseIndex < 4; baseIndex++) {
|
||||||
|
if (probs[readIndex][baseIndex] > secondBestProb && probs[readIndex][baseIndex] < bestProb) {
|
||||||
|
secondBestProb = probs[readIndex][baseIndex];
|
||||||
|
secondBestBaseIndex = baseIndex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
secondaryBaseCounts[secondBestBaseIndex]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return secondaryBaseCounts;
|
||||||
|
}
|
||||||
|
|
||||||
private AlleleFrequencyEstimate getOneProbAlleleFrequency(char ref, LocusContext context, String rodString) {
|
private AlleleFrequencyEstimate getOneProbAlleleFrequency(char ref, LocusContext context, String rodString) {
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||||
|
|
@ -246,6 +268,10 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
}
|
}
|
||||||
G.ApplyPrior(ref, this.allele_frequency_prior);
|
G.ApplyPrior(ref, this.allele_frequency_prior);
|
||||||
|
|
||||||
|
if (fourBaseMode) {
|
||||||
|
G.applyFourBaseDistributionPrior(pileup.getBases(), pileup.getSecondaryBasePileup());
|
||||||
|
}
|
||||||
|
|
||||||
return G.toAlleleFrequencyEstimate(context.getLocation(), ref, bases.length(), bases, G.likelihoods);
|
return G.toAlleleFrequencyEstimate(context.getLocation(), ref, bases.length(), bases, G.likelihoods);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue