diff --git a/java/src/org/broadinstitute/sting/playground/utils/GenotypeLikelihoods.java b/java/src/org/broadinstitute/sting/playground/utils/GenotypeLikelihoods.java index 2c0b21589..a5ac63319 100644 --- a/java/src/org/broadinstitute/sting/playground/utils/GenotypeLikelihoods.java +++ b/java/src/org/broadinstitute/sting/playground/utils/GenotypeLikelihoods.java @@ -87,7 +87,10 @@ public class GenotypeLikelihoods { } } - private double calculateAlleleLikelihood(char ref, char read, String genotype, byte qual) { + private double calculateAlleleLikelihood(char ref, char read, String genotype, byte qual) + { + if (qual == 0) { qual = 1; } // zero quals are wrong. + char h1 = genotype.charAt(0); char h2 = genotype.charAt(1); @@ -137,7 +140,7 @@ public class GenotypeLikelihoods { public void ApplyPrior(char ref, double p_alt) { for (int i = 0; i < genotypes.length; i++) { - if (p_alt == -1) + if ((p_alt == -1) || (p_alt <= 1e-6)) { if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) { // hom-ref @@ -218,7 +221,7 @@ public class GenotypeLikelihoods { public double LodVsRef(char ref) { if ((this.BestGenotype().charAt(0) == ref) && (this.BestGenotype().charAt(1) == ref)) { ref_likelihood = sorted_likelihoods[0]; - return this.LodVsNextBest(); + return (-1.0 * this.LodVsNextBest()); } else { for (int i = 0; i < genotypes.length; i++) { if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) { @@ -239,7 +242,7 @@ public class GenotypeLikelihoods { return this.sorted_likelihoods[0]; } - public AlleleFrequencyEstimate toAlleleFrequencyEstimate(GenomeLoc location, char ref, int depth, String bases, double[] posteriors) { + public AlleleFrequencyEstimate toAlleleFrequencyEstimate(GenomeLoc location, char ref, int depth, String bases, double[] posteriors, String sample_name) { this.sort(); double qhat = Double.NaN; double qstar = Double.NaN; @@ -252,13 +255,11 @@ public class GenotypeLikelihoods { alt = 'N'; } else if ((sorted_genotypes[0].charAt(0) != ref) && (sorted_genotypes[0].charAt(1) != ref)) { // hom-nonref - likelihoods[0] += Math.log10(1e-5); qhat = 1.0; qstar = 1.0; alt = sorted_genotypes[0].charAt(0); } else { // het - likelihoods[0] += Math.log10(1e-3); qhat = 0.5; qstar = 0.5; @@ -270,7 +271,108 @@ public class GenotypeLikelihoods { } } - return new AlleleFrequencyEstimate(location, ref, alt, 2, qhat, qstar, this.LodVsRef(ref), this.LodVsNextBest(), sorted_likelihoods[0], ref_likelihood, depth, bases, (double[][]) null, this.likelihoods); + this.LodVsRef(ref); //HACK + //System.out.printf("DBG: %f %f\n", sorted_likelihoods[0], ref_likelihood); + + return new AlleleFrequencyEstimate(location, ref, alt, 2, qhat, qstar, this.LodVsRef(ref), this.LodVsNextBest(), sorted_likelihoods[0], ref_likelihood, depth, bases, (double[][]) null, this.likelihoods, sample_name); } + public static class IndelCall + { + public String type; + public String[] alleles; + public double p; + public double lod; + + public IndelCall(String type, String[] alleles, double p, double lod) + { + this.type = type; + this.alleles = alleles; + this.p = p; + this.lod = lod; + } + + public String toString() + { + return String.format("%s/%s %f %f", alleles[0], alleles[1], p, lod); + } + } + + public static IndelCall callIndel(String[] indels) + { + HashMap indel_allele_counts = new HashMap(); + for (int i = 0; i < indels.length; i++) + { + if (! indel_allele_counts.containsKey(indels[i])) + { + indel_allele_counts.put(indels[i], 1); + } + else + { + indel_allele_counts.put(indels[i], indel_allele_counts.get(indels[i])+1); + } + } + + Object[] keys = indel_allele_counts.keySet().toArray(); + String[] alleles = new String[keys.length]; + int[] counts = new int[keys.length]; + double likelihoods[] = new double[keys.length]; + int null_count = 0; + String max_allele = null; + int max_count = -1; + if ((keys.length > 0) && (! ((keys.length == 1) && (((String)keys[0]).equals("null"))))) + { + for (int i = 0; i < keys.length; i++) + { + Integer count = (Integer)indel_allele_counts.get(keys[i]); + alleles[i] = (String)keys[i]; + counts[i] = count; + if (alleles[i].equals("null")) { null_count = counts[i]; } + else if (counts[i] > max_count) { max_count = counts[i]; max_allele = alleles[i]; } + //System.out.printf("%s[%d] ", keys[i], count); + } + //System.out.printf("\n"); + + double eps = 1e-3; + double pRef = null_count*Math.log10(1.0 - eps) + max_count*Math.log10(eps) + Math.log10(0.999); + double pHet = null_count*Math.log10(0.5 - eps/2) + max_count*Math.log10(0.5-eps/2) + Math.log10(1e-3); + double pHom = null_count*Math.log10(eps) + max_count*Math.log10(1.0 - eps) + Math.log10(1e-5); + + double lodRef = pRef - Math.max(pHet, pHom); + double lodHet = pHet - pRef; + double lodHom = pHom - pRef; + + //System.out.printf("%s/%s %f %f\n", "null", "null", pRef, lodRef); + //System.out.printf("%s/%s %f %f\n", max_allele, "null", pHet, lodHet); + //System.out.printf("%s/%s %f %f\n", max_allele, max_allele, pHom, lodHom); + //System.out.printf("\n"); + + if (lodRef > 0) + { + // reference call + String[] genotype = new String[2]; + genotype[0] = "null"; + genotype[1] = "null"; + return new IndelCall("ref", genotype, pRef, lodRef); + } + else if (lodHet > lodHom) + { + // het call + String[] genotype = new String[2]; + genotype[0] = "null"; + genotype[1] = max_allele; + return new IndelCall("het", genotype, pHet, lodHet); + } + else + { + // hom call + String[] genotype = new String[2]; + genotype[0] = max_allele; + genotype[1] = max_allele; + return new IndelCall("hom", genotype, pHom, lodHom); + } + } + return null; + } + }