From e2780c17afed8518c556afdf3b450b2cdc4b6d86 Mon Sep 17 00:00:00 2001 From: jmaguire Date: Thu, 3 Sep 2009 00:23:28 +0000 Subject: [PATCH] Checkin of the Multi-Sample SNP caller. Doesn't work yet; same command I used to use now causes GATK to throw an exception. Will check with Matt & Aaron tomorrow, then do a regression test. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1509 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/MultiSampleCaller.java | 436 +++++++++++++--- .../utils/ClassicGenotypeLikelihoods.java | 469 ++++++++++++++++++ .../playground/utils/ConfusionMatrix.java | 65 +++ 3 files changed, 902 insertions(+), 68 deletions(-) create mode 100644 java/src/org/broadinstitute/sting/playground/utils/ClassicGenotypeLikelihoods.java create mode 100644 java/src/org/broadinstitute/sting/playground/utils/ConfusionMatrix.java diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/MultiSampleCaller.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/MultiSampleCaller.java index 10090a384..6b7b3cc9d 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/MultiSampleCaller.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/MultiSampleCaller.java @@ -9,8 +9,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import org.broadinstitute.sting.gatk.walkers.genotyper.OldAndBustedGenotypeLikelihoods; -import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotypePriors; +import org.broadinstitute.sting.playground.utils.*; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.ReadBackedPileup; import org.broadinstitute.sting.utils.cmdLine.Argument; @@ -28,17 +27,26 @@ public class MultiSampleCaller extends LocusWalker sample_names; protected SAMFileHeader header; protected PrintStream individual_output_file; protected PrintStream discovery_output_file; + protected PrintStream stats_output_file; class MultiSampleCallResult { + GenomeLoc location; char ref; char alt; EM_Result em_result; @@ -52,8 +60,10 @@ public class MultiSampleCaller extends LocusWalker reads = context.getReads(); + List offsets = context.getOffsets(); + Pileup pileup = new ReadBackedPileup(ref, context); + + ans += String.format("%s ", context.getLocation()); + ans += String.format("%c ", ref); + ans += String.format("%d ", reads.size()); + ans += String.format("%d ", countBase(context, 'A', "+")); + ans += String.format("%d ", countBase(context, 'C', "+")); + ans += String.format("%d ", countBase(context, 'G', "+")); + ans += String.format("%d ", countBase(context, 'T', "+")); + ans += String.format("%d ", countBase(context, 'A', "-")); + ans += String.format("%d ", countBase(context, 'C', "-")); + ans += String.format("%d ", countBase(context, 'G', "-")); + ans += String.format("%d ", countBase(context, 'T', "-")); + + ans += String.format("%s ", Stats(BasicPileup.mappingQualPileup(reads))); + + return ans; + } + + static int countBase(AlignmentContext context, char base, String strand) + { + int count = 0; + List reads = context.getReads(); + List offsets = context.getOffsets(); + for (int i = 0; i < reads.size(); i++) + { + if (reads.get(i).getReadString().charAt(offsets.get(i)) == base) + { + if (strand.equals("+") && (reads.get(i).getReadNegativeStrandFlag()==false)) { count += 1; } + else if (strand.equals("-") && (reads.get(i).getReadNegativeStrandFlag()==true)) { count += 1; } + else if (! (strand.equals("+") || strand.equals("-"))) { count += 1; } + } + } + return count; + } + + public static String Stats(ArrayList X) + { + Collections.sort(X); + + long count = 0; + long sum = 0; + long min = X.get(0); + long max = X.get(0); + long median = X.get(0); + for (int i = 0; i < X.size(); i++) + { + int x = X.get(i); + if (x < min) { min = x; } + if (x > max) { max = x; } + sum += x; + count += 1; + if (i == X.size()/2) { median = x; } + } + + double mean = sum/count; + for (int i = 0; i < X.size(); i++) + { + int x = X.get(i); + sum += (x-mean)*(x-mean); + count += 1; + } + double sd = Math.sqrt(sum/count); + + return String.format("%d %f %d %d %f", min, mean, median, max, sd); + } } @@ -75,13 +181,20 @@ public class MultiSampleCaller extends LocusWalker read_groups = header.getReadGroups(); + GenomeAnalysisEngine toolkit = this.getToolkit(); + this.header = toolkit.getSAMFileHeader(); + List read_groups = header.getReadGroups(); sample_names = new ArrayList(); @@ -101,27 +213,49 @@ public class MultiSampleCaller extends LocusWalker indel_allele_likelihoods = new HashMap(); @@ -258,10 +454,10 @@ public class MultiSampleCaller extends LocusWalker sample_names, OldAndBustedGenotypeLikelihoods[] genotype_likelihoods, double[] allele_likelihoods) + public EM_Result(List sample_names, ClassicGenotypeLikelihoods[] genotype_likelihoods, double[] allele_likelihoods, double[] sample_weights) { this.sample_names = new String[1]; this.sample_names = sample_names.toArray(this.sample_names); this.genotype_likelihoods = genotype_likelihoods; this.allele_likelihoods = allele_likelihoods; + this.sample_weights = sample_weights; EM_N = 0; for (int i = 0; i < genotype_likelihoods.length; i++) { if (genotype_likelihoods[i].coverage > 0) { EM_N += 1; } } + } + } EM_Result EM(AlignmentContext[] contexts) @@ -338,43 +584,70 @@ public class MultiSampleCaller extends LocusWalker sample_names) + MultiSampleCallResult MultiSampleCall(RefMetaDataTracker tracker, char ref, AlignmentContext context, List sample_names) { String in_dbsnp; if (tracker.lookup("DBSNP", null) != null) { in_dbsnp = "known"; } else { in_dbsnp = "novel"; } - AlignmentContext[] contexts = filterAlignmentContextBySample(context, sample_names, 0); + AlignmentContext[] contexts = filterAlignmentContext(context, sample_names, 0); double lod = LOD(contexts); double strand_score = StrandScore(context); //EM_Result em_result = EM(contexts); - OldAndBustedGenotypeLikelihoods population_genotype_likelihoods = HardyWeinberg(em_result.allele_likelihoods); + ClassicGenotypeLikelihoods population_genotype_likelihoods = HardyWeinberg(em_result.allele_likelihoods); //double pD = Compute_pD(em_result.genotype_likelihoods); //double pNull = Compute_pNull(contexts); @@ -483,16 +756,13 @@ public class MultiSampleCaller extends LocusWalker 0.0) { alt = PickAlt(ref, em_result.allele_likelihoods); } - int n_ref = Compute_n_ref(ref, em_result.genotype_likelihoods); int n_het = Compute_n_het(ref, em_result.genotype_likelihoods); int n_hom = Compute_n_hom(ref, em_result.genotype_likelihoods); - discovery_output_file.printf("%s %c %c %f %f %f %f %f %s ", context.getLocation(), ref, alt, lod, strand_score, pD, pNull, discovery_lod, in_dbsnp); - for (int i = 0; i < 4; i++) { discovery_output_file.printf("%f ", em_result.allele_likelihoods[i]); } - discovery_output_file.printf("%f %d %d %d %d\n", alt_freq, em_result.EM_N, n_ref, n_het, n_hom); + char alt = 'N'; + //if (lod > 0.0) { alt = PickAlt(ref, em_result.allele_likelihoods); } + if ((n_het > 0) || (n_hom > 0)) { alt = PickAlt(ref, em_result.allele_likelihoods); } for (int i = 0; i < em_result.genotype_likelihoods.length; i++) { @@ -512,7 +782,7 @@ public class MultiSampleCaller extends LocusWalker reads = new ArrayList(); ArrayList offsets = new ArrayList(); @@ -542,7 +812,7 @@ public class MultiSampleCaller extends LocusWalker sample_names, int downsample) + protected AlignmentContext[] filterAlignmentContext(AlignmentContext context, List sample_names, int downsample) { HashMap index = new HashMap(); for (int i = 0; i < sample_names.size(); i++) @@ -608,6 +878,36 @@ public class MultiSampleCaller extends LocusWalker reads = new ArrayList(); + ArrayList offsets = new ArrayList(); + + for (int i = 0; i < L.getReads().size(); i++) + { + SAMRecord read = L.getReads().get(i); + Integer offset = L.getOffsets().get(i); + String RG = (String)(read.getAttribute("RG")); + + assert(this.header != null); + //assert(this.header.getReadGroup(RG) != null); + if (this.header.getReadGroup(RG) == null) { continue; } + + // skip bogus data + if (read.getMappingQuality() == 0) { continue; } + + String sample = this.header.getReadGroup(RG).getSample(); + //if (SAMPLE_NAME_REGEX != null) { sample = sample.replaceAll(SAMPLE_NAME_REGEX, "$1"); } + + reads.add(read); + offsets.add(offset); + } + + AlignmentContext ans = new AlignmentContext(L.getLocation(), reads, offsets); + + return ans; + } + // END Utility functions ///////// diff --git a/java/src/org/broadinstitute/sting/playground/utils/ClassicGenotypeLikelihoods.java b/java/src/org/broadinstitute/sting/playground/utils/ClassicGenotypeLikelihoods.java new file mode 100644 index 000000000..382e894f9 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/utils/ClassicGenotypeLikelihoods.java @@ -0,0 +1,469 @@ +package org.broadinstitute.sting.playground.utils; + +import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.QualityUtils; + +import static java.lang.Math.log10; +import static java.lang.Math.pow; +import java.util.HashMap; + +public class ClassicGenotypeLikelihoods { + // precalculate these for performance (pow/log10 is expensive!) + private static final double[] oneMinusData = new double[Byte.MAX_VALUE]; + + static { + for (int qual = 0; qual < Byte.MAX_VALUE; qual++) { + oneMinusData[qual] = log10(1.0 - pow(10, (qual / -10.0))); + //oneMinusData[qual] = log10(1.0 - QualityUtils.qualToProb(qual)); + } + } + + private static double getOneMinusQual(final byte qual) { + return oneMinusData[qual]; + } + + private static final double[] oneHalfMinusData = new double[Byte.MAX_VALUE]; + + static { + for (int qual = 0; qual < Byte.MAX_VALUE; qual++) { + oneHalfMinusData[qual] = log10(0.5 - pow(10, (qual / -10.0)) / 2.0); + //oneHalfMinusData[qual] = log10(0.5 - QualityUtils.qualToProb(qual) / 2.0); + } + } + + private static double getOneHalfMinusQual(final byte qual) { + return oneHalfMinusData[qual]; + } + + public double[] likelihoods; + public String[] genotypes; + public int coverage; + + // The genotype priors; + private double priorHomRef; + private double priorHet; + private double priorHomVar; + + // Store the 2nd-best base priors for on-genotype primary bases + private HashMap onNextBestBasePriors = new HashMap(); + + // Store the 2nd-best base priors for off-genotype primary bases + private HashMap offNextBestBasePriors = new HashMap(); + + public ClassicGenotypeLikelihoods() { + double[] p2ndon = {0.000, 0.302, 0.366, 0.142, 0.000, 0.548, 0.370, 0.000, 0.319, 0.000}; + double[] p2ndoff = {0.480, 0.769, 0.744, 0.538, 0.575, 0.727, 0.768, 0.589, 0.762, 0.505}; + + initialize(1.0 - 1e-3, 1e-3, 1e-5, p2ndon, p2ndoff); + } + + public ClassicGenotypeLikelihoods(double priorHomRef, double priorHet, double priorHomVar) { + double[] p2ndon = {0.000, 0.302, 0.366, 0.142, 0.000, 0.548, 0.370, 0.000, 0.319, 0.000}; + double[] p2ndoff = {0.480, 0.769, 0.744, 0.538, 0.575, 0.727, 0.768, 0.589, 0.762, 0.505}; + + initialize(priorHomRef, priorHet, priorHomVar, p2ndon, p2ndoff); + } + + public ClassicGenotypeLikelihoods(double priorHomRef, double priorHet, double priorHomVar, double[] p2ndon, double[] p2ndoff) { + initialize(priorHomRef, priorHet, priorHomVar, p2ndon, p2ndoff); + } + + private void initialize(double priorHomRef, double priorHet, double priorHomVar, double[] p2ndon, double[] p2ndoff) { + this.priorHomRef = priorHomRef; + this.priorHet = priorHet; + this.priorHomVar = priorHomVar; + + likelihoods = new double[10]; + genotypes = new String[10]; + coverage = 0; + + for (int i = 0; i < likelihoods.length; i++) { likelihoods[i] = Math.log10(0.1); } + + genotypes[0] = "AA"; + genotypes[1] = "AC"; + genotypes[2] = "AG"; + genotypes[3] = "AT"; + genotypes[4] = "CC"; + genotypes[5] = "CG"; + genotypes[6] = "CT"; + genotypes[7] = "GG"; + genotypes[8] = "GT"; + genotypes[9] = "TT"; + + for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) { + onNextBestBasePriors.put(genotypes[genotypeIndex], p2ndon[genotypeIndex]); + offNextBestBasePriors.put(genotypes[genotypeIndex], p2ndoff[genotypeIndex]); + } + } + + public double getHomRefPrior() { + return priorHomRef; + } + + public void setHomRefPrior(double priorHomRef) { + this.priorHomRef = priorHomRef; + } + + public double getHetPrior() { + return priorHet; + } + + public void setHetPrior(double priorHet) { + this.priorHet = priorHet; + } + + public double getHomVarPrior() { + return priorHomVar; + } + + public void setHomVarPrior(double priorHomVar) { + this.priorHomVar = priorHomVar; + } + + public double[] getOnGenotypeSecondaryPriors() { + double[] p2ndon = new double[10]; + + for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) { + p2ndon[genotypeIndex] = onNextBestBasePriors.get(genotypes[genotypeIndex]); + } + + return p2ndon; + } + + public void setOnGenotypeSecondaryPriors(double[] p2ndon) { + for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) { + onNextBestBasePriors.put(genotypes[genotypeIndex], p2ndon[genotypeIndex]); + } + } + + public double[] getOffGenotypeSecondaryPriors() { + double[] p2ndoff = new double[10]; + + for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) { + p2ndoff[genotypeIndex] = offNextBestBasePriors.get(genotypes[genotypeIndex]); + } + + return p2ndoff; + } + + public void setOffGenotypeSecondaryPriors(double[] p2ndoff) { + for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) { + offNextBestBasePriors.put(genotypes[genotypeIndex], p2ndoff[genotypeIndex]); + } + } + + public void add(char ref, char read, byte qual) + { + if (qual <= 0) { qual = 1; } + + if (coverage == 0) + { + for (int i = 0; i < likelihoods.length; i++) + { + likelihoods[i] = 0; + } + } + double sum = 0.0; + for (int i = 0; i < genotypes.length; i++) + { + double likelihood = calculateAlleleLikelihood(ref, read, genotypes[i], qual); + likelihoods[i] += likelihood; + } + coverage += 1; + } + + public void add(char ref, char read, byte qual, ConfusionMatrix matrix, String platform) + { + if (qual <= 0) { qual = 1; } + if (platform == null) { platform = "ILLUMINA"; } + if (read == 'N') { return; } + + if (coverage == 0) + { + for (int i = 0; i < likelihoods.length; i++) + { + likelihoods[i] = 0; + } + } + double sum = 0.0; + for (int i = 0; i < genotypes.length; i++) + { + char h1 = genotypes[i].charAt(0); + char h2 = genotypes[i].charAt(1); + + double p1 = matrix.lookup(platform, read, h1); + double p2 = matrix.lookup(platform, read, h2); + + double likelihood = calculateAlleleLikelihood(ref, read, genotypes[i], qual, p1, p2); + + //System.out.printf("DBG: %c %c %s %d %f %f %f\n", ref, read, genotypes[i], qual, p1, p2, likelihood); + + likelihoods[i] += likelihood; + } + coverage += 1; + } + + private double calculateAlleleLikelihood(char ref, char read, String genotype, byte qual) { + if (qual == 0) { + qual = 1; + } // zero quals are wrong. + + char h1 = genotype.charAt(0); + char h2 = genotype.charAt(1); + + double p_base; + + if ((h1 == h2) && (h1 == read)) { + // hom + p_base = getOneMinusQual(qual); + } else if ((h1 != h2) && ((h1 == read) || (h2 == read))) { + // het + p_base = getOneHalfMinusQual(qual); + } else { + // error + p_base = qual / -10.0; + } + + return p_base; + } + + public void TEST() + { + double p_A2A = 1.00; + double p_T2T = 1.00; + + double p_A2T = 0.75; + double p_T2A = 0.25; + + char ref = 'A'; + + System.out.printf("\tA\tT\n"); + System.out.printf("A\t%.02f\t%.02f\n", p_A2A, p_A2T); + System.out.printf("T\t%.02f\t%.02f\n", p_T2A, p_T2T); + System.out.printf("\n"); + + System.out.printf("P(A,Q20|AA) = %f\n", calculateAlleleLikelihood(ref, 'A', "AA", (byte)20, p_A2A, p_A2A)); + System.out.printf("P(A,Q20|AT) = %f\n", calculateAlleleLikelihood(ref, 'A', "AT", (byte)20, p_A2A, p_A2T)); + System.out.printf("P(A,Q20|TT) = %f\n", calculateAlleleLikelihood(ref, 'A', "TT", (byte)20, p_A2T, p_A2T)); + + System.out.printf("P(T,Q20|AA) = %f\n", calculateAlleleLikelihood(ref, 'T', "AA", (byte)20, p_T2A, p_T2A)); + System.out.printf("P(T,Q20|AT) = %f\n", calculateAlleleLikelihood(ref, 'T', "AT", (byte)20, p_T2A, p_T2T)); + System.out.printf("P(T,Q20|TT) = %f\n", calculateAlleleLikelihood(ref, 'T', "TT", (byte)20, p_T2T, p_T2T)); + + //System.exit(0); + } + + private double calculateAlleleLikelihood(char ref, char read, String genotype, byte qual, double p1, double p2) { + if (qual == 0) { + qual = 1; + } // zero quals are wrong. + + char h1 = genotype.charAt(0); + char h2 = genotype.charAt(1); + + double perr = Math.pow(10.0,qual/-10.0); + + double p_base = 0; + + if (read == h1) + { + p_base += (1.0 - perr); + } + else + { + p_base += perr * p1; + } + + if (read == h2) + { + p_base += (1.0 - perr); + } + else + { + p_base += perr * p2; + } + + p_base = Math.log10(p_base/2.0); + + return p_base; + } + + public String[] sorted_genotypes; + public double[] sorted_likelihoods; + + public void sort() { + Integer[] permutation = Utils.SortPermutation(likelihoods); + + Integer[] reverse_permutation = new Integer[permutation.length]; + for (int i = 0; i < reverse_permutation.length; i++) { + reverse_permutation[i] = permutation[(permutation.length - 1) - i]; + } + + sorted_genotypes = Utils.PermuteArray(genotypes, reverse_permutation); + sorted_likelihoods = Utils.PermuteArray(likelihoods, reverse_permutation); + } + + public String toString(char ref) { + this.sort(); + double sum = 0; + String s = String.format("%s %f %f ", this.BestGenotype(), this.LodVsNextBest(), this.LodVsRef(ref)); + for (int i = 0; i < sorted_genotypes.length; i++) { + if (i != 0) { + s = s + " "; + } + s = s + sorted_genotypes[i] + ":" + String.format("%.2f", sorted_likelihoods[i]); + sum += Math.pow(10,sorted_likelihoods[i]); + } + s = s + String.format(" %f", sum); + return s; + } + + public void ApplyPrior(char ref, double[] allele_likelihoods) + { + int k = 0; + for (int i = 0; i < 4; i++) + { + for (int j = i; j < 4; j++) + { + if (i == j) + { + this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]); + } + else + { + this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]) + Math.log10(2); + } + k++; + } + } + this.sort(); + } + + public void ApplyPrior(char ref, char alt, double p_alt) { + for (int i = 0; i < genotypes.length; i++) { + if ((p_alt == -1) || (p_alt <= 1e-6)) { + if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) { + // hom-ref + likelihoods[i] += Math.log10(priorHomRef); + } else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) { + // hom-nonref + likelihoods[i] += Math.log10(priorHomVar); + } else { + // het + likelihoods[i] += Math.log10(priorHet); + } + if (Double.isInfinite(likelihoods[i])) { + likelihoods[i] = -1000; + } + } else { + if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) { + // hom-ref + likelihoods[i] += 2.0 * Math.log10(1.0 - p_alt); + } else if ((genotypes[i].charAt(0) == alt) && (genotypes[i].charAt(1) == alt)) { + // hom-nonref + likelihoods[i] += 2.0 * Math.log10(p_alt); + } else if (((genotypes[i].charAt(0) == alt) && (genotypes[i].charAt(1) == ref)) || + ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == alt))) { + // het + likelihoods[i] += Math.log10((1.0 - p_alt) * p_alt * 2.0); + } else { + // something else (noise!) + likelihoods[i] += Math.log10(1e-5); + } + + if (Double.isInfinite(likelihoods[i])) { + likelihoods[i] = -1000; + } + } + } + this.sort(); + } + + public void ApplyWeight(double weight) + { + for (int i = 0; i < genotypes.length; i++) { likelihoods[i] += Math.log10(weight); } + this.sort(); + } + + public void applySecondBaseDistributionPrior(String primaryBases, String secondaryBases) { + for (int genotypeIndex = 0; genotypeIndex < genotypes.length; genotypeIndex++) { + char firstAllele = genotypes[genotypeIndex].charAt(0); + char secondAllele = genotypes[genotypeIndex].charAt(1); + + int offIsGenotypic = 0; + int offTotal = 0; + + int onIsGenotypic = 0; + int onTotal = 0; + + for (int pileupIndex = 0; pileupIndex < primaryBases.length(); pileupIndex++) { + char primaryBase = primaryBases.charAt(pileupIndex); + + if (secondaryBases != null) { + char secondaryBase = secondaryBases.charAt(pileupIndex); + + if (primaryBase != firstAllele && primaryBase != secondAllele) { + if (secondaryBase == firstAllele || secondaryBase == secondAllele) { + offIsGenotypic++; + } + offTotal++; + } else { + if (secondaryBase == firstAllele || secondaryBase == secondAllele) { + onIsGenotypic++; + } + onTotal++; + } + } + } + + double offPrior = MathUtils.binomialProbability(offIsGenotypic, offTotal, offNextBestBasePriors.get(genotypes[genotypeIndex])); + double onPrior = MathUtils.binomialProbability(onIsGenotypic, onTotal, onNextBestBasePriors.get(genotypes[genotypeIndex])); + + likelihoods[genotypeIndex] += Math.log10(offPrior) + Math.log10(onPrior); + } + this.sort(); + } + + public double LodVsNextBest() { + this.sort(); + return sorted_likelihoods[0] - sorted_likelihoods[1]; + } + + double ref_likelihood = Double.NaN; + + public double LodVsRef(char ref) { + if ((this.BestGenotype().charAt(0) == ref) && (this.BestGenotype().charAt(1) == ref)) { + ref_likelihood = sorted_likelihoods[0]; + return (-1.0 * this.LodVsNextBest()); + } else { + for (int i = 0; i < genotypes.length; i++) { + if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) { + ref_likelihood = likelihoods[i]; + } + } + } + return sorted_likelihoods[0] - ref_likelihood; + } + + public String BestGenotype() { + this.sort(); + return this.sorted_genotypes[0]; + } + + public double BestPosterior() { + this.sort(); + return this.sorted_likelihoods[0]; + } + + public double RefPosterior(char ref) + { + this.LodVsRef(ref); + return this.ref_likelihood; + } + + private IndelLikelihood indel_likelihood; + public void addIndelLikelihood(IndelLikelihood indel_likelihood) { this.indel_likelihood = indel_likelihood; } + public IndelLikelihood getIndelLikelihood() { return this.indel_likelihood; } + +} diff --git a/java/src/org/broadinstitute/sting/playground/utils/ConfusionMatrix.java b/java/src/org/broadinstitute/sting/playground/utils/ConfusionMatrix.java new file mode 100644 index 000000000..19c06b433 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/utils/ConfusionMatrix.java @@ -0,0 +1,65 @@ +package org.broadinstitute.sting.playground.utils; + +import java.lang.*; +import java.util.*; +import java.io.*; + +import org.broadinstitute.sting.utils.*; + +public class ConfusionMatrix +{ + private double[][] ILLUMINA; + private double[][] solid; + private double[][] LS454; + + public ConfusionMatrix(String file_name) + { + //System.out.println("DBG: ConfusionMatrix constructor! (" + file_name + ")"); + + ILLUMINA = new double[4][4]; + solid = new double[4][4]; + LS454 = new double[4][4]; + + try + { + Scanner sc = new Scanner(new File(file_name)); + while (sc.hasNext()) + { + String platform = sc.next(); + char read = sc.next().charAt(0); + char ref = sc.next().charAt(0); + double p = sc.nextDouble(); + + int read_i = BaseUtils.simpleBaseToBaseIndex(read); + int ref_i = BaseUtils.simpleBaseToBaseIndex(ref); + + if (platform.equals("ILLUMINA")) { ILLUMINA[read_i][ref_i] = p; } + if (platform.equals("solid")) { solid[read_i][ref_i] = p; } + if (platform.equals("LS454")) { LS454[read_i][ref_i] = p; } + + //System.out.println("DBG: " + key + " " + p); + } + } + catch (Exception e) + { + e.printStackTrace(); + System.exit(-1); + } + + } + + double lookup(String platform, char read, char truth) + { + int read_i = BaseUtils.simpleBaseToBaseIndex(read); + int truth_i = BaseUtils.simpleBaseToBaseIndex(truth); + + double d = 0; + + if (platform.equals("ILLUMINA")) { d = ILLUMINA[read_i][truth_i]; } + else if (platform.equals("solid")) { d = solid[read_i][truth_i]; } + else if (platform.equals("LS454")) { d = LS454[read_i][truth_i]; } + else { assert(false); } + + return d; + } +}