1. Newest version of the joint estimation model. Faster than previous version and now qscores can get to be > 39.8 for hets.

2. More sanity checks in annotations git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2119 348d0f76-0448-11de-a6fe-93d51630548a
2009-11-23 17:05:50 +00:00 · 2009-11-23 17:05:50 +00:00 · 14bf6ce83c
parent ee2abd30c4
commit 14bf6ce83c
3 changed files with 121 additions and 23 deletions
--- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java
@ -99,7 +99,13 @@ public class AlleleBalance implements VariantAnnotation {
            double[] posteriors = ((PosteriorsBacked)g).getPosteriors();
            posteriors = MathUtils.normalizeFromLog10(posteriors);
-            weights.add(posteriors[bestGenotype.ordinal()]);
+            double weight = posteriors[bestGenotype.ordinal()];
            // sanity check
            if ( MathUtils.compareDoubles(weight, 0.0) == 0 )
                continue;
            weights.add(weight);
            refBalances.add((double)refCount / (double)(refCount + altCount));
        }
--- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/OnOffGenotype.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/OnOffGenotype.java
@ -103,7 +103,13 @@ public class OnOffGenotype implements VariantAnnotation {
            double[] posteriors = ((PosteriorsBacked)g).getPosteriors();
            posteriors = MathUtils.normalizeFromLog10(posteriors);
-            weights.add(posteriors[bestGenotype.ordinal()]);
+            double weight = posteriors[bestGenotype.ordinal()];
            // sanity check
            if ( MathUtils.compareDoubles(weight, 0.0) == 0 )
                continue;
            weights.add(weight);
            onOffBalances.add((double)onCount / (double)totalCount);
        }
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java
@ -12,6 +12,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
    // the GenotypeLikelihoods map
    private HashMap<String, GenotypeLikelihoods> GLs = new HashMap<String, GenotypeLikelihoods>();
    private HashMap<Character, AlleleFrequencyMatrix> AFMatrixMap = new HashMap<Character, AlleleFrequencyMatrix>();
    private enum GenotypeType { REF, HET, HOM }
@ -26,10 +27,18 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
    protected void initialize(char ref, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
        // initialize the GenotypeLikelihoods
        GLs.clear();
        AFMatrixMap.clear();
        // for each alternate allele, create a new matrix
        for ( char alt : BaseUtils.BASES ) {
            if ( alt != ref )
                AFMatrixMap.put(alt, new AlleleFrequencyMatrix(contexts.size()));
        }
        // use flat priors for GLs
        DiploidGenotypePriors priors = new DiploidGenotypePriors();
        int index = 0;
        for ( String sample : contexts.keySet() ) {
            AlignmentContextBySample context = contexts.get(sample);
            ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType));
@ -38,34 +47,33 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
            GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
            GL.add(pileup, true);
            GLs.put(sample, GL);
            double[] posteriors = GL.getPosteriors();
            // for each alternate allele, fill the matrix
            for ( char alt : BaseUtils.BASES ) {
                if ( alt != ref ) {
                    DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(ref);
                    DiploidGenotype hetGenotype = ref < alt ? DiploidGenotype.valueOf(String.valueOf(ref) + String.valueOf(alt)) : DiploidGenotype.valueOf(String.valueOf(alt) + String.valueOf(ref));
                    DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(alt);
                    AFMatrixMap.get(alt).setLikelihoods(posteriors[refGenotype.ordinal()], posteriors[hetGenotype.ordinal()], posteriors[homGenotype.ordinal()], index);
                }
            }
            index++;
        }
    }
    protected double computeLog10PofDgivenAFi(char ref, char alt, double f, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
        DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(ref);
        DiploidGenotype hetGenotype = ref < alt ? DiploidGenotype.valueOf(String.valueOf(ref) + String.valueOf(alt)) : DiploidGenotype.valueOf(String.valueOf(alt) + String.valueOf(ref));
        DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(alt);
-        double PofDgivenAFi = 0.0;
+        // *** note that this code assumes that allele frequencies are passed in IN ORDER from 0 to 2N
-        // for each sample
+        AlleleFrequencyMatrix matrix = AFMatrixMap.get(alt);
        for ( GenotypeLikelihoods GL : GLs.values() ) {
-            double[] posteriors = GL.getPosteriors();
+        // for any frequency other than zero, calculate the next greedy entry
        if ( MathUtils.compareDoubles(f, 0.0) != 0 )
            matrix.incrementFrequency();
-            double[] allelePosteriors = new double[] { posteriors[refGenotype.ordinal()], posteriors[hetGenotype.ordinal()], posteriors[homGenotype.ordinal()] };
+        return matrix.getLikelihoodsOfFrequency();
            allelePosteriors = MathUtils.normalizeFromLog10(allelePosteriors);
            // calculate the posterior weighted frequencies
            double[] HWvalues = getHardyWeinbergValues(f);
            double samplePofDgivenAFi = 0.0;
            samplePofDgivenAFi += HWvalues[GenotypeType.REF.ordinal()] * allelePosteriors[GenotypeType.REF.ordinal()];
            samplePofDgivenAFi += HWvalues[GenotypeType.HET.ordinal()] * allelePosteriors[GenotypeType.HET.ordinal()];
            samplePofDgivenAFi += HWvalues[GenotypeType.HOM.ordinal()] * allelePosteriors[GenotypeType.HOM.ordinal()];
            PofDgivenAFi += Math.log10(samplePofDgivenAFi);
        }
        return PofDgivenAFi;
    }
    protected List<Genotype> makeGenotypeCalls(char ref, char alt, HashMap<String, AlignmentContextBySample> contexts, GenomeLoc loc) {
@ -98,4 +106,82 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
        return calls;
    }
    protected class AlleleFrequencyMatrix {
        private double[][] matrix;
        private int[] indexes;
        private int N;
        private int frequency;
        public AlleleFrequencyMatrix(int N) {
            this.N = N;
            frequency = 0;
            matrix = new double[N][3];
            indexes = new int[N];
            for (int i = 0; i < N; i++)
                indexes[i] = 0;
        }
        public void setLikelihoods(double AA, double AB, double BB, int index) {
            matrix[index][GenotypeType.REF.ordinal()] = AA;
            matrix[index][GenotypeType.HET.ordinal()] = AB;
            matrix[index][GenotypeType.HOM.ordinal()] = BB;
        }
        public void incrementFrequency() {
            if ( frequency == 2 * N )
                throw new StingException("Frequency was incremented past N; how is this possible?");
            frequency++;
            double greedy = -1.0 * Double.MAX_VALUE;
            int greedyIndex = -1;
            for (int i = 0; i < N; i++) {
                if ( indexes[i] == GenotypeType.HET.ordinal() ) {
                    if ( matrix[i][GenotypeType.HOM.ordinal()] - matrix[i][GenotypeType.HET.ordinal()] > greedy ) {
                        greedy = matrix[i][GenotypeType.HOM.ordinal()] - matrix[i][GenotypeType.HET.ordinal()];
                        greedyIndex = i;
                    }
                }
                else if ( indexes[i] == GenotypeType.REF.ordinal() ) {
                    if ( matrix[i][GenotypeType.HET.ordinal()] - matrix[i][GenotypeType.REF.ordinal()] > greedy ) {
                        greedy = matrix[i][GenotypeType.HET.ordinal()] - matrix[i][GenotypeType.REF.ordinal()];
                        greedyIndex = i;
                    }
                    // note that we currently don't bother with breaking ties between samples
                    // (which would be done by looking at the HOM_VAR value) because it's highly
                    // unlikely that a collision will both occur and that the difference will
                    // be significant at HOM_VAR...
                }
                // if this person is already hom var, he can't add another alternate allele
                // so we can ignore that case
            }
            if ( greedyIndex == -1 )
                throw new StingException("There is no best choice for a new alternate allele; how is this possible?");
            if ( indexes[greedyIndex] == GenotypeType.HET.ordinal() )
                indexes[greedyIndex] = GenotypeType.HOM.ordinal();
            else
                indexes[greedyIndex] = GenotypeType.HET.ordinal();
        }
        public double getLikelihoodsOfFrequency() {
            double likelihoods = 0.0;
            for (int i = 0; i < N; i++)
                likelihoods += matrix[i][indexes[i]];
            //verboseWriter.write(frequency + "\n");
            //for (int i = 0; i < N; i++) {
            //    for (int j=0; j < 3; j++) {
            //        verboseWriter.write(String.valueOf(matrix[i][j]));
            //        verboseWriter.write(indexes[i] == j ? "* " : " ");
            //    }
            //    verboseWriter.write("\n");
            //}
            //verboseWriter.write(likelihoods + "\n\n");
            return likelihoods;
        }
    }
 }