diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java index f2df6f908..a21606519 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java @@ -9,6 +9,7 @@ import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.ReadBackedPileup; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.MathUtils; import net.sf.samtools.SAMRecord; @@ -27,9 +28,15 @@ public class SingleSampleGenotyper extends LocusWalker= 0 && altBaseIndex >= 0) { - /* - for (int i = 0; i < probs.length; i++) { - System.out.printf(" [ %4.4f %4.4f %4.4f %4.4f ]\n", probs[i][0], probs[i][1], probs[i][2], probs[i][3]); - } - */ - double[] obsWeights = getObservationWeights(probs, refBaseIndex, altBaseIndex); - /* - System.out.print(" Weights: "); - for (int i = 0; i < obsWeights.length; i++) { - System.out.print(obsWeights[i] + " "); - } - System.out.println(); - */ - double[] genotypePriors = { 0.999, 1e-3, 1e-5 }; - //double[] genotypeBalances = { 0.02, 0.5, 0.98 }; - double[] genotypeBalances = { 0.002, 0.5, 1.0 }; + double[] genotypeBalances = { 0.02, 0.5, 0.98 }; + double[] reportingBalances = { 0.0, 0.5, 1.0 }; // the metrics class doesn't like it if you don't tell it these exact values double[] posteriors = new double[3]; double qhat = 0.0, qstar = 0.0, lodVsRef = 0.0, pBest = Double.MIN_VALUE; @@ -105,28 +104,21 @@ public class SingleSampleGenotyper extends LocusWalker pBest) { - qhat = genotypeBalances[hypothesis]; - qstar = genotypeBalances[hypothesis]; + qhat = reportingBalances[hypothesis]; + qstar = reportingBalances[hypothesis]; lodVsRef = Math.log10(posteriors[hypothesis]) - refLikelihood; pBest = posteriors[hypothesis]; } @@ -140,15 +132,13 @@ public class SingleSampleGenotyper extends LocusWalker k; i--, j++) - t = t * i / j; - return t; - } - private double[] getObservationWeights(double[][] probs, int refBaseIndex, int altBaseIndex) { - //if (probs.length <= 10) - //{ - return getWeightTableTraces(getWeightTable(probs, refBaseIndex, altBaseIndex, probs.length)); - //} - //else - //{ - // return FastObservationWeights(probs, refBaseIndex, altBaseIndex); - //} - } - - private double[] FastObservationWeights(double[][] probs, int ref, int alt) - { - List paths = new ArrayList(); - List likelihoods = new ArrayList(); - - List output_paths = new ArrayList(); - List output_likelihoods = new ArrayList(); - - HashMap done_paths = new HashMap(); - - // 1. Find the best path. - int[] best_path = new int[probs.length]; - double best_likelihood = 0; - for (int i = 0; i < probs.length; i++) - { - int max; - double max_p; - if (probs[i][ref] >= probs[i][alt]) { max = ref; max_p = probs[i][ref]; } - else { max = alt; max_p = probs[i][alt]; } - best_path[i] = max; - best_likelihood += Math.log10(max_p); - } - output_paths.add(best_path); - output_likelihoods.add(best_likelihood); - //String s = ""; for (int j = 0; j < best_path.length; j++) { s += best_path[j]; } - String s = ""; for (int j = 0; j < best_path.length; j++) { s += BaseUtils.baseIndexToSimpleBase(best_path[j]); } - done_paths.put(s,true); - - // 2. Enumerate all paths one-away from the best path - for (int i = 0; i < best_path.length; i++) - { - int[] path = Arrays.copyOf(best_path, best_path.length); - double likelihood; - if (path[i] == ref) - { - path[i] = alt; - likelihood = best_likelihood - Math.log10(probs[i][ref]) + Math.log10(probs[i][alt]); - } - else - { - path[i] = ref; - likelihood = best_likelihood - Math.log10(probs[i][alt]) + Math.log10(probs[i][ref]); - } - - //s = ""; for (int j = 0; j < path.length; j++) { s += path[j]; } - s = ""; for (int j = 0; j < path.length; j++) { s += BaseUtils.baseIndexToSimpleBase(path[j]); } - - //if (!done_paths.containsKey(s)) { - paths.add(path); - likelihoods.add(likelihood); - //} - } - - // 3. Sort paths by likelihood - Integer[] permutation = Utils.SortPermutation(likelihoods); - paths = Utils.PermuteList(paths, permutation); - likelihoods = Utils.PermuteList(likelihoods, permutation); - - while (/*(output_paths.size() < 10) &&*/ (paths.size() > 0)) - { - // 4. Choose the next best path - int[] next_best_path = paths.get(paths.size()-1); - double next_best_likelihood = likelihoods.get(likelihoods.size()-1); - - s = ""; for (int j = 0; j < next_best_path.length; j++) { s += BaseUtils.baseIndexToSimpleBase(next_best_path[j]); } - if (done_paths.containsKey(s)) { break; } - - output_paths.add(next_best_path); - output_likelihoods.add(next_best_likelihood); - paths.remove(paths.size()-1); - likelihoods.remove(likelihoods.size()-1); - - //s = ""; for (int j = 0; j < next_best_path.length; j++) { s += next_best_path[j]; } - - done_paths.put(s,true); - - /* - if (likelihoods.get(likelihoods.size()-1) < probs.length*Math.log10(0.5)) - { - break; - } - */ - - // 5. Enumerate all paths one-away from next best - for (int i = 0; i < best_path.length; i++) - { - int[] path = Arrays.copyOf(next_best_path, next_best_path.length); - double likelihood; - if (path[i] == ref) - { - path[i] = alt; - likelihood = next_best_likelihood - Math.log10(probs[i][ref]) + Math.log10(probs[i][alt]); - } - else - { - path[i] = ref; - likelihood = next_best_likelihood - Math.log10(probs[i][alt]) + Math.log10(probs[i][ref]); - } - - //s = ""; for (int j = 0; j < path.length; j++) { s += path[j]; } - s = ""; for (int j = 0; j < path.length; j++) { s += BaseUtils.baseIndexToSimpleBase(path[j]); } - if (!done_paths.containsKey(s)) - { - paths.add(path); - likelihoods.add(likelihood); - } - } - - // 6. Re-sort - permutation = Utils.SortPermutation(likelihoods); - paths = Utils.PermuteList(paths, permutation); - likelihoods = Utils.PermuteList(likelihoods, permutation); - } - - double[] ans = new double[probs.length+1]; - - for (int i = 0; i < output_paths.size(); i++) - { - int[] path = output_paths.get(i); - double likelihood = output_likelihoods.get(i); - int altCount = 0; - - //System.out.printf("DBG %d ", i); - for (int j = 0; j < path.length; j++) - { - //System.out.printf("%c", BaseUtils.baseIndexToSimpleBase(path[j])); - - if (path[j] != ref) { altCount++; } - } - //System.out.printf(" %f\n", likelihood); - - ans[altCount] += Math.pow(10.0, likelihood); - //System.out.println(altCount + " " + likelihood + " " + Math.pow(10.0, likelihood)); - } - //System.out.printf("\n"); - - return ans; + return getWeightTableTraces(getWeightTable(probs, refBaseIndex, altBaseIndex, probs.length)); } private double[][] getWeightTable(double[][] probs, int refBaseIndex, int altBaseIndex, int numReadsToConsider) { @@ -441,8 +236,6 @@ public class SingleSampleGenotyper extends LocusWalker