For on and off-genotype primary bases, optionally compute the concordance of the secondary bases to their expected distributions. Each genotype has slightly different profiles.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@580 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
16467ae7cf
commit
58c80d8d87
|
|
@ -2,13 +2,16 @@ package org.broadinstitute.sting.playground.utils;
|
|||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
|
||||
import static java.lang.Math.log10;
|
||||
import static java.lang.Math.pow;
|
||||
import java.util.HashMap;
|
||||
|
||||
public class GenotypeLikelihoods {
|
||||
// precalculate these for performance (pow/log10 is expensive!)
|
||||
private static final double[] oneMinusData = new double[Byte.MAX_VALUE];
|
||||
|
||||
static {
|
||||
for(int qual=0; qual < Byte.MAX_VALUE; qual++) {
|
||||
oneMinusData[qual] = log10(1.0 - pow(10,(qual/-10.0)));
|
||||
|
|
@ -34,6 +37,12 @@ public class GenotypeLikelihoods {
|
|||
public double[] likelihoods;
|
||||
public String[] genotypes;
|
||||
|
||||
// Store the 2nd-best base priors for on-genotype primary bases
|
||||
HashMap<String, Double> onNextBestBasePriors = new HashMap<String, Double>();
|
||||
|
||||
// Store the 2nd-best base priors for off-genotype primary bases
|
||||
HashMap<String, Double> offNextBestBasePriors = new HashMap<String, Double>();
|
||||
|
||||
public GenotypeLikelihoods() {
|
||||
likelihoods = new double[10];
|
||||
genotypes = new String[10];
|
||||
|
|
@ -48,6 +57,28 @@ public class GenotypeLikelihoods {
|
|||
genotypes[7] = "GG";
|
||||
genotypes[8] = "GT";
|
||||
genotypes[9] = "TT";
|
||||
|
||||
onNextBestBasePriors.put("AA", 0.000);
|
||||
onNextBestBasePriors.put("AC", 0.302);
|
||||
onNextBestBasePriors.put("AG", 0.366);
|
||||
onNextBestBasePriors.put("AT", 0.142);
|
||||
onNextBestBasePriors.put("CC", 0.000);
|
||||
onNextBestBasePriors.put("CG", 0.548);
|
||||
onNextBestBasePriors.put("CT", 0.370);
|
||||
onNextBestBasePriors.put("GG", 0.000);
|
||||
onNextBestBasePriors.put("GT", 0.319);
|
||||
onNextBestBasePriors.put("TT", 0.000);
|
||||
|
||||
offNextBestBasePriors.put("AA", 0.480);
|
||||
offNextBestBasePriors.put("AC", 0.769);
|
||||
offNextBestBasePriors.put("AG", 0.744);
|
||||
offNextBestBasePriors.put("AT", 0.538);
|
||||
offNextBestBasePriors.put("CC", 0.575);
|
||||
offNextBestBasePriors.put("CG", 0.727);
|
||||
offNextBestBasePriors.put("CT", 0.768);
|
||||
offNextBestBasePriors.put("GG", 0.589);
|
||||
offNextBestBasePriors.put("GT", 0.762);
|
||||
offNextBestBasePriors.put("TT", 0.505);
|
||||
}
|
||||
|
||||
public void add(char ref, char read, byte qual) {
|
||||
|
|
@ -139,6 +170,44 @@ public class GenotypeLikelihoods {
|
|||
this.sort();
|
||||
}
|
||||
|
||||
public void applyFourBaseDistributionPrior(String primaryBases, String secondaryBases) {
|
||||
for (int genotypeIndex = 0; genotypeIndex < genotypes.length; genotypeIndex++) {
|
||||
char firstAllele = genotypes[genotypeIndex].charAt(0);
|
||||
char secondAllele = genotypes[genotypeIndex].charAt(1);
|
||||
|
||||
int offIsGenotypic = 0;
|
||||
int offTotal = 0;
|
||||
|
||||
int onIsGenotypic = 0;
|
||||
int onTotal = 0;
|
||||
|
||||
for (int pileupIndex = 0; pileupIndex < primaryBases.length(); pileupIndex++) {
|
||||
char primaryBase = primaryBases.charAt(pileupIndex);
|
||||
char secondaryBase = secondaryBases.charAt(pileupIndex);
|
||||
|
||||
if (primaryBase != firstAllele && primaryBase != secondAllele) {
|
||||
if (secondaryBase == firstAllele || secondaryBase == secondAllele) {
|
||||
offIsGenotypic++;
|
||||
}
|
||||
offTotal++;
|
||||
} else {
|
||||
if (secondaryBase == firstAllele || secondaryBase == secondAllele) {
|
||||
onIsGenotypic++;
|
||||
}
|
||||
onTotal++;
|
||||
}
|
||||
}
|
||||
|
||||
double offPrior = MathUtils.binomialProbability(offIsGenotypic, offTotal, offNextBestBasePriors.get(genotypes[genotypeIndex]));
|
||||
double onPrior = MathUtils.binomialProbability(onIsGenotypic, onTotal, onNextBestBasePriors.get(genotypes[genotypeIndex]));
|
||||
|
||||
likelihoods[genotypeIndex] += Math.log10(offPrior) + Math.log10(onPrior);
|
||||
|
||||
//System.out.println(genotypes[genotypeIndex] + " " + offNextBestBasePriors.get(genotypes[genotypeIndex]) + " " + offIsGenotypic + " " + offTotal + " " + (((double) offIsGenotypic)/((double) offTotal)) + " " + offPrior);
|
||||
}
|
||||
this.sort();
|
||||
}
|
||||
|
||||
public double LodVsNextBest() {
|
||||
this.sort();
|
||||
return sorted_likelihoods[0] - sorted_likelihoods[1];
|
||||
|
|
|
|||
Loading…
Reference in New Issue