From 6cef8bd76c6fea0959991187a2bfe6060a792545 Mon Sep 17 00:00:00 2001 From: jmaguire Date: Wed, 22 Apr 2009 20:26:51 +0000 Subject: [PATCH] added k-best quality path enumeration. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@497 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/SingleSampleGenotyper.java | 147 +++++++++++++++++- .../playground/utils/GenotypeLikelihoods.java | 2 +- .../org/broadinstitute/sting/utils/Utils.java | 9 ++ 3 files changed, 150 insertions(+), 8 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java index 7421b2894..2b450bcfe 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java @@ -12,7 +12,7 @@ import org.broadinstitute.sting.utils.Utils; import net.sf.samtools.SAMRecord; -import java.util.List; +import java.util.*; // Draft single sample genotyper // j.maguire 3-7-2009 @@ -34,11 +34,16 @@ public class SingleSampleGenotyper extends LocusWalker paths = new ArrayList(); + List likelihoods = new ArrayList(); + + List output_paths = new ArrayList(); + List output_likelihoods = new ArrayList(); + + HashMap done_paths = new HashMap(); + + // 1. Find the best path. + int[] best_path = new int[probs.length]; + double best_likelihood = 0; + for (int i = 0; i < probs.length; i++) + { + int max; + double max_p; + if (probs[i][ref] >= probs[i][alt]) { max = ref; max_p = probs[i][ref]; } + else { max = alt; max_p = probs[i][alt]; } + best_path[i] = max; + best_likelihood += Math.log10(max_p); + } + output_paths.add(best_path); + output_likelihoods.add(best_likelihood); + String s = ""; for (int j = 0; j < best_path.length; j++) { s += best_path[j]; } + done_paths.put(s,true); + + // 2. Enumerate all paths one-away from the best path + for (int i = 0; i < best_path.length; i++) + { + int[] path = Arrays.copyOf(best_path, best_path.length); + double likelihood; + if (path[i] == ref) + { + path[i] = alt; + likelihood = best_likelihood - Math.log10(probs[i][ref]) + Math.log10(probs[i][alt]); + } + else + { + path[i] = ref; + likelihood = best_likelihood - Math.log10(probs[i][alt]) + Math.log10(probs[i][ref]); + } + paths.add(path); + likelihoods.add(likelihood); } - return new double[probs.length + 1]; + // 3. Sort paths by likelihood + Integer[] permutation = Utils.SortPermutation(likelihoods); + paths = Utils.PermuteList(paths, permutation); + likelihoods = Utils.PermuteList(likelihoods, permutation); + + while ((output_paths.size() < 10) && (paths.size() > 0)) + { + // 4. Choose the next best path + int[] next_best_path = paths.get(paths.size()-1); + double next_best_likelihood = likelihoods.get(likelihoods.size()-1); + output_paths.add(next_best_path); + output_likelihoods.add(next_best_likelihood); + paths.remove(paths.size()-1); + likelihoods.remove(likelihoods.size()-1); + s = ""; for (int j = 0; j < next_best_path.length; j++) { s += next_best_path[j]; } + done_paths.put(s,true); + + /* + if (likelihoods.get(likelihoods.size()-1) < probs.length*Math.log10(0.5)) + { + break; + } + */ + + // 5. Enumerate all paths one-away from next best + for (int i = 0; i < best_path.length; i++) + { + int[] path = Arrays.copyOf(next_best_path, next_best_path.length); + double likelihood; + if (path[i] == ref) + { + path[i] = alt; + likelihood = next_best_likelihood - Math.log10(probs[i][ref]) + Math.log10(probs[i][alt]); + } + else + { + path[i] = ref; + likelihood = next_best_likelihood - Math.log10(probs[i][alt]) + Math.log10(probs[i][ref]); + } + + s = ""; + for (int j = 0; j < path.length; j++) { s += path[j]; } + if (done_paths.get(s) == null) + { + paths.add(path); + likelihoods.add(likelihood); + } + } + + // 6. Re-sort + permutation = Utils.SortPermutation(likelihoods); + paths = Utils.PermuteList(paths, permutation); + likelihoods = Utils.PermuteList(likelihoods, permutation); + } + + for (int i = 0; i < output_paths.size(); i++) + { + int[] path = output_paths.get(i); + double likelihood = output_likelihoods.get(i); + + System.out.printf("DBG %d ", i); + for (int j = 0; j < path.length; j++) + { + System.out.printf("%c", BaseUtils.baseIndexToSimpleBase(path[j])); + } + System.out.printf(" %f\n", likelihood); + } + System.out.printf("\n"); + + double[] ans = new double[probs.length+1]; + for (int i = 0; i < ans.length; i++) { ans[i] = 1.0; } + return ans; } private double[][] getWeightTable(double[][] probs, int refBaseIndex, int altBaseIndex, int numReadsToConsider) { @@ -271,6 +399,11 @@ public class SingleSampleGenotyper extends LocusWalker List PermuteList(List list, Integer[] permutation) + { + List output = new ArrayList(); + for (int i = 0; i < permutation.length; i++) { + output.add(list.get(permutation[i])); + } + return output; + } + /** Draw N random elements from list. */ public static List RandomSubset(List list, int N)