added k-best quality path enumeration.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@497 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
b8a6f6e830
commit
6cef8bd76c
|
|
@ -12,7 +12,7 @@ import org.broadinstitute.sting.utils.Utils;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.*;
|
||||||
|
|
||||||
// Draft single sample genotyper
|
// Draft single sample genotyper
|
||||||
// j.maguire 3-7-2009
|
// j.maguire 3-7-2009
|
||||||
|
|
@ -34,11 +34,16 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
String rodString = getRodString(tracker);
|
String rodString = getRodString(tracker);
|
||||||
|
|
||||||
AlleleFrequencyEstimate freq = null;
|
AlleleFrequencyEstimate freq = null;
|
||||||
if (fourBaseMode) {
|
if (fourBaseMode)
|
||||||
|
{
|
||||||
// Compute four-base prob genotype likelihoods
|
// Compute four-base prob genotype likelihoods
|
||||||
freq = getFourProbAlleleFrequency(ref, context, rodString);
|
freq = getFourProbAlleleFrequency(ref, context, rodString);
|
||||||
} else if (decideOnBase) {
|
}
|
||||||
} else {
|
else if (decideOnBase)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
// Compute single quality score genotype likelihoods
|
// Compute single quality score genotype likelihoods
|
||||||
freq = getOneProbAlleleFrequency(ref, context, rodString);
|
freq = getOneProbAlleleFrequency(ref, context, rodString);
|
||||||
}
|
}
|
||||||
|
|
@ -162,11 +167,134 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
}
|
}
|
||||||
|
|
||||||
private double[] getObservationWeights(double[][] probs, int refBaseIndex, int altBaseIndex) {
|
private double[] getObservationWeights(double[][] probs, int refBaseIndex, int altBaseIndex) {
|
||||||
if (probs.length <= 10) {
|
//if (probs.length <= 10)
|
||||||
return getWeightTableTraces(getWeightTable(probs, refBaseIndex, altBaseIndex, probs.length));
|
//{
|
||||||
|
// return getWeightTableTraces(getWeightTable(probs, refBaseIndex, altBaseIndex, probs.length));
|
||||||
|
//}
|
||||||
|
//else
|
||||||
|
//{
|
||||||
|
return FastObservationWeights(probs, refBaseIndex, altBaseIndex);
|
||||||
|
//}
|
||||||
|
}
|
||||||
|
|
||||||
|
private double[] FastObservationWeights(double[][] probs, int ref, int alt)
|
||||||
|
{
|
||||||
|
List<int[]> paths = new ArrayList<int[]>();
|
||||||
|
List<Double> likelihoods = new ArrayList<Double>();
|
||||||
|
|
||||||
|
List<int[]> output_paths = new ArrayList<int[]>();
|
||||||
|
List<Double> output_likelihoods = new ArrayList<Double>();
|
||||||
|
|
||||||
|
HashMap<String,Boolean> done_paths = new HashMap<String,Boolean>();
|
||||||
|
|
||||||
|
// 1. Find the best path.
|
||||||
|
int[] best_path = new int[probs.length];
|
||||||
|
double best_likelihood = 0;
|
||||||
|
for (int i = 0; i < probs.length; i++)
|
||||||
|
{
|
||||||
|
int max;
|
||||||
|
double max_p;
|
||||||
|
if (probs[i][ref] >= probs[i][alt]) { max = ref; max_p = probs[i][ref]; }
|
||||||
|
else { max = alt; max_p = probs[i][alt]; }
|
||||||
|
best_path[i] = max;
|
||||||
|
best_likelihood += Math.log10(max_p);
|
||||||
|
}
|
||||||
|
output_paths.add(best_path);
|
||||||
|
output_likelihoods.add(best_likelihood);
|
||||||
|
String s = ""; for (int j = 0; j < best_path.length; j++) { s += best_path[j]; }
|
||||||
|
done_paths.put(s,true);
|
||||||
|
|
||||||
|
// 2. Enumerate all paths one-away from the best path
|
||||||
|
for (int i = 0; i < best_path.length; i++)
|
||||||
|
{
|
||||||
|
int[] path = Arrays.copyOf(best_path, best_path.length);
|
||||||
|
double likelihood;
|
||||||
|
if (path[i] == ref)
|
||||||
|
{
|
||||||
|
path[i] = alt;
|
||||||
|
likelihood = best_likelihood - Math.log10(probs[i][ref]) + Math.log10(probs[i][alt]);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
path[i] = ref;
|
||||||
|
likelihood = best_likelihood - Math.log10(probs[i][alt]) + Math.log10(probs[i][ref]);
|
||||||
|
}
|
||||||
|
paths.add(path);
|
||||||
|
likelihoods.add(likelihood);
|
||||||
}
|
}
|
||||||
|
|
||||||
return new double[probs.length + 1];
|
// 3. Sort paths by likelihood
|
||||||
|
Integer[] permutation = Utils.SortPermutation(likelihoods);
|
||||||
|
paths = Utils.PermuteList(paths, permutation);
|
||||||
|
likelihoods = Utils.PermuteList(likelihoods, permutation);
|
||||||
|
|
||||||
|
while ((output_paths.size() < 10) && (paths.size() > 0))
|
||||||
|
{
|
||||||
|
// 4. Choose the next best path
|
||||||
|
int[] next_best_path = paths.get(paths.size()-1);
|
||||||
|
double next_best_likelihood = likelihoods.get(likelihoods.size()-1);
|
||||||
|
output_paths.add(next_best_path);
|
||||||
|
output_likelihoods.add(next_best_likelihood);
|
||||||
|
paths.remove(paths.size()-1);
|
||||||
|
likelihoods.remove(likelihoods.size()-1);
|
||||||
|
s = ""; for (int j = 0; j < next_best_path.length; j++) { s += next_best_path[j]; }
|
||||||
|
done_paths.put(s,true);
|
||||||
|
|
||||||
|
/*
|
||||||
|
if (likelihoods.get(likelihoods.size()-1) < probs.length*Math.log10(0.5))
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
// 5. Enumerate all paths one-away from next best
|
||||||
|
for (int i = 0; i < best_path.length; i++)
|
||||||
|
{
|
||||||
|
int[] path = Arrays.copyOf(next_best_path, next_best_path.length);
|
||||||
|
double likelihood;
|
||||||
|
if (path[i] == ref)
|
||||||
|
{
|
||||||
|
path[i] = alt;
|
||||||
|
likelihood = next_best_likelihood - Math.log10(probs[i][ref]) + Math.log10(probs[i][alt]);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
path[i] = ref;
|
||||||
|
likelihood = next_best_likelihood - Math.log10(probs[i][alt]) + Math.log10(probs[i][ref]);
|
||||||
|
}
|
||||||
|
|
||||||
|
s = "";
|
||||||
|
for (int j = 0; j < path.length; j++) { s += path[j]; }
|
||||||
|
if (done_paths.get(s) == null)
|
||||||
|
{
|
||||||
|
paths.add(path);
|
||||||
|
likelihoods.add(likelihood);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 6. Re-sort
|
||||||
|
permutation = Utils.SortPermutation(likelihoods);
|
||||||
|
paths = Utils.PermuteList(paths, permutation);
|
||||||
|
likelihoods = Utils.PermuteList(likelihoods, permutation);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < output_paths.size(); i++)
|
||||||
|
{
|
||||||
|
int[] path = output_paths.get(i);
|
||||||
|
double likelihood = output_likelihoods.get(i);
|
||||||
|
|
||||||
|
System.out.printf("DBG %d ", i);
|
||||||
|
for (int j = 0; j < path.length; j++)
|
||||||
|
{
|
||||||
|
System.out.printf("%c", BaseUtils.baseIndexToSimpleBase(path[j]));
|
||||||
|
}
|
||||||
|
System.out.printf(" %f\n", likelihood);
|
||||||
|
}
|
||||||
|
System.out.printf("\n");
|
||||||
|
|
||||||
|
double[] ans = new double[probs.length+1];
|
||||||
|
for (int i = 0; i < ans.length; i++) { ans[i] = 1.0; }
|
||||||
|
return ans;
|
||||||
}
|
}
|
||||||
|
|
||||||
private double[][] getWeightTable(double[][] probs, int refBaseIndex, int altBaseIndex, int numReadsToConsider) {
|
private double[][] getWeightTable(double[][] probs, int refBaseIndex, int altBaseIndex, int numReadsToConsider) {
|
||||||
|
|
@ -271,6 +399,11 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
return rodString;
|
return rodString;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setAlleleFrequencyPrior(double freq)
|
||||||
|
{
|
||||||
|
assert(false);
|
||||||
|
}
|
||||||
|
|
||||||
// Given result of map function
|
// Given result of map function
|
||||||
public Integer reduceInit() { return 0; }
|
public Integer reduceInit() { return 0; }
|
||||||
public Integer reduce(AlleleFrequencyEstimate value, Integer sum) { return 0; }
|
public Integer reduce(AlleleFrequencyEstimate value, Integer sum) { return 0; }
|
||||||
|
|
|
||||||
|
|
@ -183,4 +183,4 @@ public class GenotypeLikelihoods {
|
||||||
return new AlleleFrequencyEstimate(location, ref, alt, 2, qhat, qstar, this.LodVsRef(ref), this.LodVsNextBest(), sorted_likelihoods[0], ref_likelihood, depth, bases, (double[][]) null, this.likelihoods);
|
return new AlleleFrequencyEstimate(location, ref, alt, 2, qhat, qstar, this.LodVsRef(ref), this.LodVsNextBest(), sorted_likelihoods[0], ref_likelihood, depth, bases, (double[][]) null, this.likelihoods);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -331,6 +331,15 @@ public class Utils {
|
||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static <T> List<T> PermuteList(List<T> list, Integer[] permutation)
|
||||||
|
{
|
||||||
|
List<T> output = new ArrayList<T>();
|
||||||
|
for (int i = 0; i < permutation.length; i++) {
|
||||||
|
output.add(list.get(permutation[i]));
|
||||||
|
}
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/** Draw N random elements from list. */
|
/** Draw N random elements from list. */
|
||||||
public static <T> List<T> RandomSubset(List<T> list, int N)
|
public static <T> List<T> RandomSubset(List<T> list, int N)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue