Four-base mode now estimates the genotype using the one-base method and retests the site if the one-base method suggests the site is a het.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@503 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
bd719f9c06
commit
11e85f1969
|
|
@ -18,17 +18,22 @@ import java.util.*;
|
||||||
// j.maguire 3-7-2009
|
// j.maguire 3-7-2009
|
||||||
|
|
||||||
public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate, Integer> {
|
public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate, Integer> {
|
||||||
|
@Argument(fullName="metrics",required=true)
|
||||||
|
public String metricsFileName;
|
||||||
|
|
||||||
|
@Argument(fullName="lodThreshold",shortName="lod",required=false,defaultValue="0.0")
|
||||||
|
public Double lodThreshold;
|
||||||
|
|
||||||
@Argument(fullName="fourBaseMode",required=false,defaultValue="false")
|
@Argument(fullName="fourBaseMode",required=false,defaultValue="false")
|
||||||
public Boolean fourBaseMode;
|
public Boolean fourBaseMode;
|
||||||
|
|
||||||
@Argument(fullName="decideOnBase",required=false,defaultValue="false")
|
public AlleleMetrics metrics;
|
||||||
public Boolean decideOnBase;
|
|
||||||
|
|
||||||
private AlleleMetrics metrics;
|
|
||||||
|
|
||||||
public boolean filter(RefMetaDataTracker tracker, char ref, LocusContext context) { return true; } // We are keeping all the reads
|
public boolean filter(RefMetaDataTracker tracker, char ref, LocusContext context) { return true; } // We are keeping all the reads
|
||||||
public boolean requiresReads() { return true; }
|
public boolean requiresReads() { return true; }
|
||||||
public void initialize() { metrics = new AlleleMetrics("metrics.out"); }
|
public void initialize() {
|
||||||
|
metrics = new AlleleMetrics(metricsFileName, lodThreshold);
|
||||||
|
}
|
||||||
|
|
||||||
public AlleleFrequencyEstimate map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
public AlleleFrequencyEstimate map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||||
String rodString = getRodString(tracker);
|
String rodString = getRodString(tracker);
|
||||||
|
|
@ -36,11 +41,13 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
AlleleFrequencyEstimate freq = null;
|
AlleleFrequencyEstimate freq = null;
|
||||||
if (fourBaseMode)
|
if (fourBaseMode)
|
||||||
{
|
{
|
||||||
// Compute four-base prob genotype likelihoods
|
// Compute single quality score genotype likelihoods
|
||||||
freq = getFourProbAlleleFrequency(ref, context, rodString);
|
freq = getOneProbAlleleFrequency(ref, context, rodString);
|
||||||
}
|
|
||||||
else if (decideOnBase)
|
if (Math.abs(freq.qhat - 0.5) < 0.01) {
|
||||||
{
|
// If we found a putative het, recompute four-base prob genotype likelihoods
|
||||||
|
freq = getFourProbAlleleFrequency(ref, context, rodString, tracker);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
@ -54,7 +61,7 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
return freq;
|
return freq;
|
||||||
}
|
}
|
||||||
|
|
||||||
private AlleleFrequencyEstimate getFourProbAlleleFrequency(char ref, LocusContext context, String rodString) {
|
private AlleleFrequencyEstimate getFourProbAlleleFrequency(char ref, LocusContext context, String rodString, RefMetaDataTracker tracker) {
|
||||||
/*
|
/*
|
||||||
P(D)*P(G,q|D) = P(D|G,q)*P(G,q)
|
P(D)*P(G,q|D) = P(D|G,q)*P(G,q)
|
||||||
= P(D|q)*P(q|G)*P(G)
|
= P(D|q)*P(q|G)*P(G)
|
||||||
|
|
@ -71,62 +78,97 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
int altBaseIndex = getMostFrequentNonRefBase(getFractionalCounts(probs), refBaseIndex);
|
int altBaseIndex = getMostFrequentNonRefBase(getFractionalCounts(probs), refBaseIndex);
|
||||||
|
|
||||||
if (refBaseIndex >= 0 && altBaseIndex >= 0) {
|
if (refBaseIndex >= 0 && altBaseIndex >= 0) {
|
||||||
System.out.println(context.getLocation().toString() + " " + refBaseIndex + " " + altBaseIndex + " " + probs.length + " " + rodString);
|
/*
|
||||||
for (int i = 0; i < probs.length; i++) {
|
for (int i = 0; i < probs.length; i++) {
|
||||||
System.out.printf(" [ %4.4f %4.4f %4.4f %4.4f ]\n", probs[i][0], probs[i][1], probs[i][2], probs[i][3]);
|
System.out.printf(" [ %4.4f %4.4f %4.4f %4.4f ]\n", probs[i][0], probs[i][1], probs[i][2], probs[i][3]);
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
double[] obsWeights = getObservationWeights(probs, refBaseIndex, altBaseIndex);
|
double[] obsWeights = getObservationWeights(probs, refBaseIndex, altBaseIndex);
|
||||||
|
|
||||||
|
/*
|
||||||
System.out.print(" Weights: ");
|
System.out.print(" Weights: ");
|
||||||
for (int i = 0; i < obsWeights.length; i++) {
|
for (int i = 0; i < obsWeights.length; i++) {
|
||||||
System.out.printf("%4.4f ", obsWeights[i]);
|
System.out.print(obsWeights[i] + " ");
|
||||||
}
|
}
|
||||||
System.out.println();
|
System.out.println();
|
||||||
|
*/
|
||||||
|
|
||||||
double[] genotypePriors = { 0.999, 1e-3, 1e-5 };
|
double[] genotypePriors = { 0.999, 1e-3, 1e-5 };
|
||||||
double[] genotypeBalances = { 0.02, 0.5, 0.98 };
|
//double[] genotypeBalances = { 0.02, 0.5, 0.98 };
|
||||||
|
double[] genotypeBalances = { 0.002, 0.5, 1.0 };
|
||||||
|
|
||||||
double[] posteriors = new double[3];
|
double[] posteriors = new double[3];
|
||||||
double qhat = 0.0, qstar = 0.0, lodVsRef = 0.0, lodVsNextBest = 0.0, pBest = Double.MIN_VALUE;
|
double qhat = 0.0, qstar = 0.0, lodVsRef = 0.0, pBest = Double.MIN_VALUE;
|
||||||
|
|
||||||
for (int hypothesis = 0; hypothesis < 3; hypothesis++) {
|
for (int hypothesis = 0; hypothesis < 3; hypothesis++) {
|
||||||
posteriors[hypothesis] = 0.0;
|
posteriors[hypothesis] = 0.0;
|
||||||
|
|
||||||
for (int weightIndex = 0; weightIndex < obsWeights.length; weightIndex++) {
|
for (int weightIndex = 0; weightIndex < obsWeights.length; weightIndex++) {
|
||||||
posteriors[hypothesis] += obsWeights[weightIndex]*binomialProb(weightIndex, probs.length, genotypeBalances[hypothesis]);
|
//if (hypothesis == 0) {
|
||||||
|
// System.out.println("DEBUG1: " + hypothesis + " " + weightIndex + " " + obsWeights[weightIndex] + " " + binomialProb(weightIndex, probs.length, genotypeBalances[hypothesis]));
|
||||||
|
//}
|
||||||
|
if (hypothesis == 0) {
|
||||||
|
//System.out.println(weightIndex + " " + probs.length + " " + genotypeBalances[hypothesis] + " " + binomialProb(weightIndex, probs.length, genotypeBalances[hypothesis]));
|
||||||
|
}
|
||||||
|
double binomialWeight = binomialProb(weightIndex, probs.length, genotypeBalances[hypothesis]);;
|
||||||
|
if (hypothesis == 0 && weightIndex < obsWeights.length/4) {
|
||||||
|
binomialWeight = 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
posteriors[hypothesis] += obsWeights[weightIndex]*binomialWeight;
|
||||||
}
|
}
|
||||||
posteriors[hypothesis] *= genotypePriors[hypothesis];
|
posteriors[hypothesis] *= genotypePriors[hypothesis];
|
||||||
|
|
||||||
System.out.printf(" Hypothesis %d %f %f %f\n", hypothesis, genotypeBalances[hypothesis], posteriors[hypothesis], Math.log10(posteriors[hypothesis]/posteriors[0]));
|
double refLikelihood = Double.isInfinite(Math.log10(posteriors[0])) ? -10000.0 : Math.log10(posteriors[0]);
|
||||||
|
|
||||||
|
//System.out.println(" Hypothesis " + hypothesis + " " + genotypeBalances[hypothesis] + " " + posteriors[hypothesis] + " " + (Math.log10(posteriors[hypothesis]) - refLikelihood));
|
||||||
|
|
||||||
if (posteriors[hypothesis] > pBest) {
|
if (posteriors[hypothesis] > pBest) {
|
||||||
qhat = genotypeBalances[hypothesis];
|
qhat = genotypeBalances[hypothesis];
|
||||||
qstar = genotypeBalances[hypothesis];
|
qstar = genotypeBalances[hypothesis];
|
||||||
lodVsRef = Math.log10(posteriors[hypothesis]/posteriors[0]);
|
lodVsRef = Math.log10(posteriors[hypothesis]) - refLikelihood;
|
||||||
pBest = posteriors[hypothesis];
|
pBest = posteriors[hypothesis];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
double pRef = posteriors[0];
|
double pRef = posteriors[0];
|
||||||
|
|
||||||
System.out.println("\n");
|
|
||||||
|
|
||||||
return new AlleleFrequencyEstimate(context.getLocation(),
|
double[] sposteriors = Arrays.copyOf(posteriors, posteriors.length);
|
||||||
ref,
|
Arrays.sort(sposteriors);
|
||||||
BaseUtils.baseIndexToSimpleBase(altBaseIndex),
|
double bestLogPosterior = (Double.isInfinite(Math.log10(sposteriors[2]))) ? -10000.0 : Math.log10(sposteriors[2]);
|
||||||
2,
|
double nextBestLogPosterior = (Double.isInfinite(Math.log10(sposteriors[1]))) ? -10000.0 : Math.log10(sposteriors[1]);
|
||||||
qhat,
|
double lodVsNextBest = bestLogPosterior - nextBestLogPosterior;
|
||||||
qstar,
|
|
||||||
lodVsRef,
|
|
||||||
lodVsNextBest,
|
|
||||||
pBest,
|
|
||||||
pRef,
|
|
||||||
probs.length,
|
|
||||||
ReadBackedPileup.basePileupAsString(context.getReads(), context.getOffsets()),
|
|
||||||
probs,
|
|
||||||
posteriors);
|
|
||||||
|
|
||||||
|
//System.out.println("\n");
|
||||||
|
|
||||||
|
AlleleFrequencyEstimate freq = new AlleleFrequencyEstimate(context.getLocation(),
|
||||||
|
ref,
|
||||||
|
BaseUtils.baseIndexToSimpleBase(altBaseIndex),
|
||||||
|
2,
|
||||||
|
qhat,
|
||||||
|
qstar,
|
||||||
|
(Math.abs(qhat - genotypeBalances[0]) < 0.001 ? -lodVsNextBest : lodVsRef),
|
||||||
|
lodVsNextBest,
|
||||||
|
pBest,
|
||||||
|
pRef,
|
||||||
|
probs.length,
|
||||||
|
ReadBackedPileup.basePileupAsString(context.getReads(), context.getOffsets()),
|
||||||
|
probs,
|
||||||
|
posteriors);
|
||||||
|
|
||||||
|
for ( ReferenceOrderedDatum datum : tracker.getAllRods() ) {
|
||||||
|
if ( datum != null ) {
|
||||||
|
if ( datum instanceof rodGFF ) {
|
||||||
|
rodGFF hapmap_chip_genotype = (rodGFF) datum;
|
||||||
|
|
||||||
|
//System.out.println(hapmap_chip_genotype.toString());
|
||||||
|
//System.out.println(freq.asTabularString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return freq;
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
|
|
@ -136,11 +178,13 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
// k - number of successes
|
// k - number of successes
|
||||||
// n - number of Bernoulli trials
|
// n - number of Bernoulli trials
|
||||||
// p - probability of success
|
// p - probability of success
|
||||||
|
double dbinom = 0.0;
|
||||||
|
|
||||||
if ((n*p < 5) && (n*(1-p) < 5))
|
if ((n*p < 5) && (n*(1-p) < 5))
|
||||||
{
|
{
|
||||||
// For small n and the edges, compute it directly.
|
// For small n and the edges, compute it directly.
|
||||||
return (double)nchoosek(n, k) * Math.pow(p, k) * Math.pow(1-p, n-k);
|
//return (double)nchoosek(n, k) * Math.pow(p, k) * Math.pow(1-p, n-k);
|
||||||
|
dbinom = (double)nchoosek(n, k) * Math.pow(p, k) * Math.pow(1-p, n-k);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
@ -151,8 +195,10 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
double check = (double)nchoosek(n, k) * Math.pow(p, k) * Math.pow(1-p, n-k);
|
double check = (double)nchoosek(n, k) * Math.pow(p, k) * Math.pow(1-p, n-k);
|
||||||
double residual = ans - check;
|
double residual = ans - check;
|
||||||
|
|
||||||
return check;
|
dbinom = check;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return (dbinom < 0.0) ? 0.0 : dbinom;
|
||||||
}
|
}
|
||||||
|
|
||||||
long nchoosek(long n, long k) {
|
long nchoosek(long n, long k) {
|
||||||
|
|
@ -169,11 +215,11 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
private double[] getObservationWeights(double[][] probs, int refBaseIndex, int altBaseIndex) {
|
private double[] getObservationWeights(double[][] probs, int refBaseIndex, int altBaseIndex) {
|
||||||
//if (probs.length <= 10)
|
//if (probs.length <= 10)
|
||||||
//{
|
//{
|
||||||
// return getWeightTableTraces(getWeightTable(probs, refBaseIndex, altBaseIndex, probs.length));
|
return getWeightTableTraces(getWeightTable(probs, refBaseIndex, altBaseIndex, probs.length));
|
||||||
//}
|
//}
|
||||||
//else
|
//else
|
||||||
//{
|
//{
|
||||||
return FastObservationWeights(probs, refBaseIndex, altBaseIndex);
|
// return FastObservationWeights(probs, refBaseIndex, altBaseIndex);
|
||||||
//}
|
//}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -235,7 +281,7 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
paths = Utils.PermuteList(paths, permutation);
|
paths = Utils.PermuteList(paths, permutation);
|
||||||
likelihoods = Utils.PermuteList(likelihoods, permutation);
|
likelihoods = Utils.PermuteList(likelihoods, permutation);
|
||||||
|
|
||||||
while ((output_paths.size() < 10) && (paths.size() > 0))
|
while (/*(output_paths.size() < 10) &&*/ (paths.size() > 0))
|
||||||
{
|
{
|
||||||
// 4. Choose the next best path
|
// 4. Choose the next best path
|
||||||
int[] next_best_path = paths.get(paths.size()-1);
|
int[] next_best_path = paths.get(paths.size()-1);
|
||||||
|
|
@ -291,22 +337,28 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
likelihoods = Utils.PermuteList(likelihoods, permutation);
|
likelihoods = Utils.PermuteList(likelihoods, permutation);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
double[] ans = new double[probs.length+1];
|
||||||
|
|
||||||
for (int i = 0; i < output_paths.size(); i++)
|
for (int i = 0; i < output_paths.size(); i++)
|
||||||
{
|
{
|
||||||
int[] path = output_paths.get(i);
|
int[] path = output_paths.get(i);
|
||||||
double likelihood = output_likelihoods.get(i);
|
double likelihood = output_likelihoods.get(i);
|
||||||
|
int altCount = 0;
|
||||||
|
|
||||||
System.out.printf("DBG %d ", i);
|
//System.out.printf("DBG %d ", i);
|
||||||
for (int j = 0; j < path.length; j++)
|
for (int j = 0; j < path.length; j++)
|
||||||
{
|
{
|
||||||
System.out.printf("%c", BaseUtils.baseIndexToSimpleBase(path[j]));
|
//System.out.printf("%c", BaseUtils.baseIndexToSimpleBase(path[j]));
|
||||||
}
|
|
||||||
System.out.printf(" %f\n", likelihood);
|
if (path[j] != ref) { altCount++; }
|
||||||
}
|
}
|
||||||
System.out.printf("\n");
|
//System.out.printf(" %f\n", likelihood);
|
||||||
|
|
||||||
|
ans[altCount] += Math.pow(10.0, likelihood);
|
||||||
|
//System.out.println(altCount + " " + likelihood + " " + Math.pow(10.0, likelihood));
|
||||||
|
}
|
||||||
|
//System.out.printf("\n");
|
||||||
|
|
||||||
double[] ans = new double[probs.length+1];
|
|
||||||
for (int i = 0; i < ans.length; i++) { ans[i] = 1.0; }
|
|
||||||
return ans;
|
return ans;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -389,7 +441,7 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
}
|
}
|
||||||
G.ApplyPrior(ref, Double.NaN);
|
G.ApplyPrior(ref, Double.NaN);
|
||||||
|
|
||||||
System.out.printf("%s %s %s %s\n", context.getLocation(), ref, bases, G.toString(ref), rodString);
|
//System.out.printf("%s %s %s %s\n", context.getLocation(), ref, bases, G.toString(ref), rodString);
|
||||||
|
|
||||||
return G.toAlleleFrequencyEstimate(context.getLocation(), ref, bases.length(), bases, G.likelihoods);
|
return G.toAlleleFrequencyEstimate(context.getLocation(), ref, bases.length(), bases, G.likelihoods);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue