diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java index 4ce32d971..63c0cf447 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java @@ -29,15 +29,9 @@ import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.collections.Pair; import org.broad.tribble.util.variantcontext.Genotype; -import org.broad.tribble.util.variantcontext.Allele; -import org.broad.tribble.util.variantcontext.GenotypeLikelihoods; -import org.broad.tribble.vcf.VCFConstants; import java.io.PrintStream; import java.util.*; @@ -54,154 +48,14 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable { GRID_SEARCH } - private static final double LOG10_REFERENCE_CALL_EPSILON = 1.0; - protected int N; - protected AlleleFrequencyMatrix AFMatrix; - protected Set refCalls; - + protected Logger logger; protected PrintStream verboseWriter; - protected boolean useReferenceSampleOptimization; + protected enum GenotypeType { AA, AB, BB } - protected AlleleFrequencyCalculationModel(int N, Logger logger, PrintStream verboseWriter, boolean useReferenceSampleOptimization) { - this.N = N; - this.logger = logger; - this.verboseWriter = verboseWriter; - this.useReferenceSampleOptimization = useReferenceSampleOptimization; - AFMatrix = new AlleleFrequencyMatrix(N); - refCalls = new HashSet(); - } - - /** - * Must be overridden by concrete subclasses - * @param tracker rod data - * @param ref reference context - * @param GLs genotype likelihoods - * @param log10AlleleFrequencyPriors priors - * @param log10AlleleFrequencyPosteriors array (pre-allocated) to store results - * @param minFrequencyToCalculate the minimum frequency which needs to be calculated - */ - public abstract void getLog10PNonRef(RefMetaDataTracker tracker, - ReferenceContext ref, - Map GLs, - double[] log10AlleleFrequencyPriors, - double[] log10AlleleFrequencyPosteriors, - int minFrequencyToCalculate); - - /** - * Can be overridden by concrete subclasses - * @param contexts alignment contexts - * @param GLs genotype likelihoods - * @param log10AlleleFrequencyPosteriors allele frequency results - * @param AFofMaxLikelihood allele frequency of max likelihood - * - * @return calls - */ - public Map assignGenotypes(Map contexts, - Map GLs, - double[] log10AlleleFrequencyPosteriors, - int AFofMaxLikelihood) { - initializeAFMatrix(GLs); - - // increment the grid - for (int i = 1; i <= AFofMaxLikelihood; i++) { - // add one more alternate allele - AFMatrix.incrementFrequency(); - } - - return generateCalls(contexts, GLs, AFofMaxLikelihood); - } - - protected Map generateCalls(Map contexts, - Map GLs, - int frequency) { - HashMap calls = new HashMap(); - - // first, the potential alt calls - for ( String sample : AFMatrix.getSamples() ) { - BiallelicGenotypeLikelihoods GL = GLs.get(sample); - Allele alleleA = GL.getAlleleA(); - Allele alleleB = GL.getAlleleB(); - - // set the genotype and confidence - Pair AFbasedGenotype = AFMatrix.getGenotype(frequency, sample); - ArrayList myAlleles = new ArrayList(); - if ( AFbasedGenotype.first == GenotypeType.AA.ordinal() ) { - myAlleles.add(alleleA); - myAlleles.add(alleleA); - } else if ( AFbasedGenotype.first == GenotypeType.AB.ordinal() ) { - myAlleles.add(alleleA); - myAlleles.add(alleleB); - } else { // ( AFbasedGenotype.first == GenotypeType.BB.ordinal() ) - myAlleles.add(alleleB); - myAlleles.add(alleleB); - } - - HashMap attributes = new HashMap(); - attributes.put(VCFConstants.DEPTH_KEY, getUnfilteredDepth(contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup())); - - GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(GL.getLikelihoods()); - attributes.put(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, likelihoods.getAsString()); - - calls.put(sample, new Genotype(sample, myAlleles, AFbasedGenotype.second, null, attributes, false)); - } - - // now, the clearly ref calls - for ( BiallelicGenotypeLikelihoods GL : refCalls ) { - String sample = GL.getSample(); - - Allele ref = GL.getAlleleA(); - ArrayList myAlleles = new ArrayList(); - myAlleles.add(ref); - myAlleles.add(ref); - - HashMap attributes = new HashMap(); - attributes.put(VCFConstants.DEPTH_KEY, getUnfilteredDepth(contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup())); - - GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(GL.getLikelihoods()); - attributes.put(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, likelihoods.getAsString()); - - double GQ = GL.getAALikelihoods() - Math.max(GL.getABLikelihoods(), GL.getBBLikelihoods()); - - calls.put(sample, new Genotype(sample, myAlleles, GQ, null, attributes, false)); - } - - return calls; - } - - protected void initializeAFMatrix(Map GLs) { - refCalls.clear(); - AFMatrix.clear(); - - for ( BiallelicGenotypeLikelihoods GL : GLs.values() ) { - if ( useReferenceSampleOptimization && isClearRefCall(GL) ) { - refCalls.add(GL); - } else { - AFMatrix.setLikelihoods(GL.getPosteriors(), GL.getSample()); - } - } - } - - private boolean isClearRefCall(BiallelicGenotypeLikelihoods GL) { - if ( GL.getAlleleA().isNonReference() ) - return false; - - double[] likelihoods = GL.getLikelihoods(); - double refLikelihoodMinusEpsilon = likelihoods[0] - LOG10_REFERENCE_CALL_EPSILON; - return ( refLikelihoodMinusEpsilon > likelihoods[1] && refLikelihoodMinusEpsilon > likelihoods[2]); - } - - private int getUnfilteredDepth(ReadBackedPileup pileup) { - int count = 0; - for ( PileupElement p : pileup ) { - if ( DiploidSNPGenotypeLikelihoods.usableBase(p, true) ) - count++; - } - - return count; - } + protected static final double VALUE_NOT_CALCULATED = -1.0 * Double.MAX_VALUE; protected class CalculatedAlleleFrequency { @@ -214,157 +68,49 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable { } } - protected enum GenotypeType { AA, AB, BB } - protected static final double VALUE_NOT_CALCULATED = -1.0 * Double.MAX_VALUE; + protected AlleleFrequencyCalculationModel(int N, Logger logger, PrintStream verboseWriter) { + this.N = N; + this.logger = logger; + this.verboseWriter = verboseWriter; + } - protected static class AlleleFrequencyMatrix { + /** + * Must be overridden by concrete subclasses + * @param tracker rod data + * @param ref reference context + * @param GLs genotype likelihoods + * @param log10AlleleFrequencyPriors priors + * @param log10AlleleFrequencyPosteriors array (pre-allocated) to store results + * @param minFrequencyToCalculate the minimum frequency which needs to be calculated + */ + protected abstract void getLog10PNonRef(RefMetaDataTracker tracker, + ReferenceContext ref, + Map GLs, + double[] log10AlleleFrequencyPriors, + double[] log10AlleleFrequencyPosteriors, + int minFrequencyToCalculate); - private double[][] matrix; // allele frequency matrix - private int[] indexes; // matrix to maintain which genotype is active - private int maxN; // total possible frequencies in data - private int frequency; // current frequency + /** + * Can be overridden by concrete subclasses + * @param contexts alignment contexts + * @param GLs genotype likelihoods + * @param log10AlleleFrequencyPosteriors allele frequency results + * @param AFofMaxLikelihood allele frequency of max likelihood + * + * @return calls + */ + protected abstract Map assignGenotypes(Map contexts, + Map GLs, + double[] log10AlleleFrequencyPosteriors, + int AFofMaxLikelihood); - // data structures necessary to maintain a list of the best genotypes and their scores - private ArrayList samples = new ArrayList(); - private HashMap>> samplesToGenotypesPerAF = new HashMap>>(); - - public AlleleFrequencyMatrix(int N) { - maxN = N; - matrix = new double[N][3]; - indexes = new int[N]; - clear(); + protected int getUnfilteredDepth(ReadBackedPileup pileup) { + int count = 0; + for ( PileupElement p : pileup ) { + if ( DiploidSNPGenotypeLikelihoods.usableBase(p, true) ) + count++; } - public List getSamples() { return samples; } - - public void clear() { - frequency = 0; - for (int i = 0; i < maxN; i++) - indexes[i] = 0; - samples.clear(); - samplesToGenotypesPerAF.clear(); - } - - public void setLikelihoods(double AA, double AB, double BB, String sample) { - int index = samples.size(); - samples.add(sample); - matrix[index][GenotypeType.AA.ordinal()] = AA; - matrix[index][GenotypeType.AB.ordinal()] = AB; - matrix[index][GenotypeType.BB.ordinal()] = BB; - } - - public void setLikelihoods(double[] GLs, String sample) { - int index = samples.size(); - samples.add(sample); - matrix[index][GenotypeType.AA.ordinal()] = GLs[0]; - matrix[index][GenotypeType.AB.ordinal()] = GLs[1]; - matrix[index][GenotypeType.BB.ordinal()] = GLs[2]; - } - - public void incrementFrequency() { - int N = samples.size(); - if ( frequency == 2 * N ) - throw new ReviewedStingException("Frequency was incremented past N; how is this possible?"); - frequency++; - - double greedy = VALUE_NOT_CALCULATED; - int greedyIndex = -1; - for (int i = 0; i < N; i++) { - - if ( indexes[i] == GenotypeType.AB.ordinal() ) { - if ( matrix[i][GenotypeType.BB.ordinal()] - matrix[i][GenotypeType.AB.ordinal()] > greedy ) { - greedy = matrix[i][GenotypeType.BB.ordinal()] - matrix[i][GenotypeType.AB.ordinal()]; - greedyIndex = i; - } - } - else if ( indexes[i] == GenotypeType.AA.ordinal() ) { - if ( matrix[i][GenotypeType.AB.ordinal()] - matrix[i][GenotypeType.AA.ordinal()] > greedy ) { - greedy = matrix[i][GenotypeType.AB.ordinal()] - matrix[i][GenotypeType.AA.ordinal()]; - greedyIndex = i; - } - // note that we currently don't bother with breaking ties between samples - // (which would be done by looking at the HOM_VAR value) because it's highly - // unlikely that a collision will both occur and that the difference will - // be significant at HOM_VAR... - } - // if this person is already hom var, he can't add another alternate allele - // so we can ignore that case - } - if ( greedyIndex == -1 ) - throw new ReviewedStingException("There is no best choice for a new alternate allele; how is this possible?"); - - if ( indexes[greedyIndex] == GenotypeType.AB.ordinal() ) - indexes[greedyIndex] = GenotypeType.BB.ordinal(); - else - indexes[greedyIndex] = GenotypeType.AB.ordinal(); - } - - public double getLikelihoodsOfFrequency() { - double likelihoods = 0.0; - int N = samples.size(); - for (int i = 0; i < N; i++) - likelihoods += matrix[i][indexes[i]]; - - /* - System.out.println(frequency); - for (int i = 0; i < N; i++) { - for (int j=0; j < 3; j++) { - System.out.print(String.valueOf(matrix[i][j])); - System.out.print(indexes[i] == j ? "* " : " "); - } - System.out.println(); - } - System.out.println(likelihoods); - System.out.println(); - */ - - recordGenotypes(); - - return likelihoods; - } - - public Pair getGenotype(int frequency, String sample) { - // it's possible that the genotype conformation hash wasn't initialized - if ( samplesToGenotypesPerAF.get(frequency) == null ) { - // confirm that the current frequency is the target one - if ( frequency != this.frequency ) - throw new IllegalStateException("Attempting to retrieve genotypes from an uninitialized hash for frequency " + frequency + " (the hash is current standing at frequency " + this.frequency + ")"); - recordGenotypes(); - } - return samplesToGenotypesPerAF.get(frequency).get(sample); - } - - private void recordGenotypes() { - HashMap> samplesToGenotypes = new HashMap>(); - - int index = 0; - for ( String sample : samples ) { - int genotype = indexes[index]; - - double score; - - int maxEntry = MathUtils.maxElementIndex(matrix[index]); - // if the max value is for the most likely genotype, we can compute next vs. next best - if ( genotype == maxEntry ) { - if ( genotype == GenotypeType.AA.ordinal() ) - score = matrix[index][genotype] - Math.max(matrix[index][GenotypeType.AB.ordinal()], matrix[index][GenotypeType.BB.ordinal()]); - else if ( genotype == GenotypeType.AB.ordinal() ) - score = matrix[index][genotype] - Math.max(matrix[index][GenotypeType.AA.ordinal()], matrix[index][GenotypeType.BB.ordinal()]); - else // ( genotype == GenotypeType.HOM.ordinal() ) - score = matrix[index][genotype] - Math.max(matrix[index][GenotypeType.AA.ordinal()], matrix[index][GenotypeType.AB.ordinal()]); - } - // otherwise, we need to calculate the probability of the genotype - else { - double[] normalized = MathUtils.normalizeFromLog10(matrix[index]); - double chosenGenotype = normalized[genotype]; - score = -1.0 * Math.log10(1.0 - chosenGenotype); - } - - samplesToGenotypes.put(sample, new Pair(genotype, Math.abs(score))); - index++; - } - - samplesToGenotypesPerAF.put(frequency, samplesToGenotypes); - } + return count; } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/ExactAFCalculationModel.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/ExactAFCalculationModel.java index c6c8938ee..78f99ddf8 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -46,7 +46,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { private static final double LOGEPS = -300; protected ExactAFCalculationModel(int N, Logger logger, PrintStream verboseWriter) { - super(N, logger, verboseWriter, true); + super(N, logger, verboseWriter); } public void getLog10PNonRef(RefMetaDataTracker tracker, @@ -151,25 +151,17 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { Map GLs, double[] log10AlleleFrequencyPosteriors, int AFofMaxLikelihood) { - - // overriding implementation in AlleleFrequencyCalculationModel to avoid filling out AF matrix which is not used. - return generateCalls(contexts, GLs, AFofMaxLikelihood); - } - - protected Map generateCalls(Map contexts, - Map GLs, - int bestAFguess) { - HashMap calls = new HashMap(); + HashMap calls = new HashMap(); - double[][] pathMetricArray = new double[GLs.size()+1][bestAFguess+1]; - int[][] tracebackArray = new int[GLs.size()+1][bestAFguess+1]; + double[][] pathMetricArray = new double[GLs.size()+1][AFofMaxLikelihood+1]; + int[][] tracebackArray = new int[GLs.size()+1][AFofMaxLikelihood+1]; ArrayList sampleIndices = new ArrayList(); int sampleIdx = 0; // todo - optimize initialization - for (int k=0; k <= bestAFguess; k++) + for (int k=0; k <= AFofMaxLikelihood; k++) for (int j=0; j <= GLs.size(); j++) pathMetricArray[j][k] = -1e30; @@ -186,7 +178,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { double[] likelihoods = GLs.get(sample).getLikelihoods(); - for (int k=0; k <= bestAFguess; k++) { + for (int k=0; k <= AFofMaxLikelihood; k++) { double bestMetric = pathMetricArray[sampleIdx][k] + likelihoods[0]; int bestIndex = k; @@ -214,7 +206,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } } - int startIdx = bestAFguess; + int startIdx = AFofMaxLikelihood; for (int k = sampleIdx; k > 0; k--) { int bestGTguess; String sample = sampleIndices.get(k-1); @@ -263,7 +255,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } - return calls; } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/GridSearchAFEstimation.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/GridSearchAFEstimation.java index 00adb3757..22929526f 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/GridSearchAFEstimation.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/GridSearchAFEstimation.java @@ -27,9 +27,15 @@ package org.broadinstitute.sting.playground.gatk.walkers.genotyper; import org.apache.log4j.Logger; import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.Allele; +import org.broad.tribble.util.variantcontext.GenotypeLikelihoods; +import org.broad.tribble.vcf.VCFConstants; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.MathUtils; import java.util.*; import java.io.PrintStream; @@ -40,17 +46,19 @@ public class GridSearchAFEstimation extends AlleleFrequencyCalculationModel { // how much off from the max likelihoods do we need to be before we can quit calculating? protected static final double LOG10_OPTIMIZATION_EPSILON = 8.0; + private AlleleFrequencyMatrix AFMatrix; + protected GridSearchAFEstimation(int N, Logger logger, PrintStream verboseWriter) { - super(N, logger, verboseWriter, false); + super(N, logger, verboseWriter); + AFMatrix = new AlleleFrequencyMatrix(N); } - public void getLog10PNonRef(RefMetaDataTracker tracker, - ReferenceContext ref, - Map GLs, - double[] log10AlleleFrequencyPriors, - double[] log10AlleleFrequencyPosteriors, - int minFrequencyToCalculate) { - + protected void getLog10PNonRef(RefMetaDataTracker tracker, + ReferenceContext ref, + Map GLs, + double[] log10AlleleFrequencyPriors, + double[] log10AlleleFrequencyPosteriors, + int minFrequencyToCalculate) { initializeAFMatrix(GLs); // first, calculate for AF=0 (no change to matrix) @@ -90,10 +98,192 @@ public class GridSearchAFEstimation extends AlleleFrequencyCalculationModel { * * @return calls */ - public Map assignGenotypes(Map contexts, - Map GLs, - double[] log10AlleleFrequencyPosteriors, - int AFofMaxLikelihood) { - return generateCalls(contexts, GLs, AFofMaxLikelihood); + protected Map assignGenotypes(Map contexts, + Map GLs, + double[] log10AlleleFrequencyPosteriors, + int AFofMaxLikelihood) { + HashMap calls = new HashMap(); + + // first, the potential alt calls + for ( String sample : AFMatrix.getSamples() ) { + BiallelicGenotypeLikelihoods GL = GLs.get(sample); + Allele alleleA = GL.getAlleleA(); + Allele alleleB = GL.getAlleleB(); + + // set the genotype and confidence + Pair AFbasedGenotype = AFMatrix.getGenotype(AFofMaxLikelihood, sample); + ArrayList myAlleles = new ArrayList(); + if ( AFbasedGenotype.first == GenotypeType.AA.ordinal() ) { + myAlleles.add(alleleA); + myAlleles.add(alleleA); + } else if ( AFbasedGenotype.first == GenotypeType.AB.ordinal() ) { + myAlleles.add(alleleA); + myAlleles.add(alleleB); + } else { // ( AFbasedGenotype.first == GenotypeType.BB.ordinal() ) + myAlleles.add(alleleB); + myAlleles.add(alleleB); + } + + HashMap attributes = new HashMap(); + attributes.put(VCFConstants.DEPTH_KEY, getUnfilteredDepth(contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup())); + + GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(GL.getLikelihoods()); + attributes.put(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, likelihoods.getAsString()); + + calls.put(sample, new Genotype(sample, myAlleles, AFbasedGenotype.second, null, attributes, false)); + } + + return calls; + } + + private void initializeAFMatrix(Map GLs) { + AFMatrix.clear(); + + for ( BiallelicGenotypeLikelihoods GL : GLs.values() ) + AFMatrix.setLikelihoods(GL.getPosteriors(), GL.getSample()); + } + + protected static class AlleleFrequencyMatrix { + + private double[][] matrix; // allele frequency matrix + private int[] indexes; // matrix to maintain which genotype is active + private int maxN; // total possible frequencies in data + private int frequency; // current frequency + + // data structures necessary to maintain a list of the best genotypes and their scores + private ArrayList samples = new ArrayList(); + private HashMap>> samplesToGenotypesPerAF = new HashMap>>(); + + public AlleleFrequencyMatrix(int N) { + maxN = N; + matrix = new double[N][3]; + indexes = new int[N]; + clear(); + } + + public List getSamples() { return samples; } + + public void clear() { + frequency = 0; + for (int i = 0; i < maxN; i++) + indexes[i] = 0; + samples.clear(); + samplesToGenotypesPerAF.clear(); + } + + public void setLikelihoods(double AA, double AB, double BB, String sample) { + int index = samples.size(); + samples.add(sample); + matrix[index][GenotypeType.AA.ordinal()] = AA; + matrix[index][GenotypeType.AB.ordinal()] = AB; + matrix[index][GenotypeType.BB.ordinal()] = BB; + } + + public void setLikelihoods(double[] GLs, String sample) { + int index = samples.size(); + samples.add(sample); + matrix[index][GenotypeType.AA.ordinal()] = GLs[0]; + matrix[index][GenotypeType.AB.ordinal()] = GLs[1]; + matrix[index][GenotypeType.BB.ordinal()] = GLs[2]; + } + + public void incrementFrequency() { + int N = samples.size(); + if ( frequency == 2 * N ) + throw new ReviewedStingException("Frequency was incremented past N; how is this possible?"); + frequency++; + + double greedy = VALUE_NOT_CALCULATED; + int greedyIndex = -1; + for (int i = 0; i < N; i++) { + + if ( indexes[i] == GenotypeType.AB.ordinal() ) { + if ( matrix[i][GenotypeType.BB.ordinal()] - matrix[i][GenotypeType.AB.ordinal()] > greedy ) { + greedy = matrix[i][GenotypeType.BB.ordinal()] - matrix[i][GenotypeType.AB.ordinal()]; + greedyIndex = i; + } + } + else if ( indexes[i] == GenotypeType.AA.ordinal() ) { + if ( matrix[i][GenotypeType.AB.ordinal()] - matrix[i][GenotypeType.AA.ordinal()] > greedy ) { + greedy = matrix[i][GenotypeType.AB.ordinal()] - matrix[i][GenotypeType.AA.ordinal()]; + greedyIndex = i; + } + // note that we currently don't bother with breaking ties between samples + // (which would be done by looking at the HOM_VAR value) because it's highly + // unlikely that a collision will both occur and that the difference will + // be significant at HOM_VAR... + } + // if this person is already hom var, he can't add another alternate allele + // so we can ignore that case + } + if ( greedyIndex == -1 ) + throw new ReviewedStingException("There is no best choice for a new alternate allele; how is this possible?"); + + if ( indexes[greedyIndex] == GenotypeType.AB.ordinal() ) + indexes[greedyIndex] = GenotypeType.BB.ordinal(); + else + indexes[greedyIndex] = GenotypeType.AB.ordinal(); + } + + public double getLikelihoodsOfFrequency() { + double likelihoods = 0.0; + int N = samples.size(); + for (int i = 0; i < N; i++) + likelihoods += matrix[i][indexes[i]]; + + /* + System.out.println(frequency); + for (int i = 0; i < N; i++) { + for (int j=0; j < 3; j++) { + System.out.print(String.valueOf(matrix[i][j])); + System.out.print(indexes[i] == j ? "* " : " "); + } + System.out.println(); + } + System.out.println(likelihoods); + System.out.println(); + */ + + recordGenotypes(); + + return likelihoods; + } + + public Pair getGenotype(int frequency, String sample) { + return samplesToGenotypesPerAF.get(frequency).get(sample); + } + + private void recordGenotypes() { + HashMap> samplesToGenotypes = new HashMap>(); + + int index = 0; + for ( String sample : samples ) { + int genotype = indexes[index]; + + double score; + + int maxEntry = MathUtils.maxElementIndex(matrix[index]); + // if the max value is for the most likely genotype, we can compute next vs. next best + if ( genotype == maxEntry ) { + if ( genotype == GenotypeType.AA.ordinal() ) + score = matrix[index][genotype] - Math.max(matrix[index][GenotypeType.AB.ordinal()], matrix[index][GenotypeType.BB.ordinal()]); + else if ( genotype == GenotypeType.AB.ordinal() ) + score = matrix[index][genotype] - Math.max(matrix[index][GenotypeType.AA.ordinal()], matrix[index][GenotypeType.BB.ordinal()]); + else // ( genotype == GenotypeType.HOM.ordinal() ) + score = matrix[index][genotype] - Math.max(matrix[index][GenotypeType.AA.ordinal()], matrix[index][GenotypeType.AB.ordinal()]); + } + // otherwise, we need to calculate the probability of the genotype + else { + double[] normalized = MathUtils.normalizeFromLog10(matrix[index]); + double chosenGenotype = normalized[genotype]; + score = -1.0 * Math.log10(1.0 - chosenGenotype); + } + + samplesToGenotypes.put(sample, new Pair(genotype, Math.abs(score))); + index++; + } + + samplesToGenotypesPerAF.put(frequency, samplesToGenotypes); + } } } \ No newline at end of file