add code for computing indel genotype likelihoods

make reference lods negative


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@664 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
jmaguire 2009-05-12 15:09:29 +00:00
parent 11723fbcc2
commit 0267ccae7f
1 changed files with 109 additions and 7 deletions

View File

@ -87,7 +87,10 @@ public class GenotypeLikelihoods {
}
}
private double calculateAlleleLikelihood(char ref, char read, String genotype, byte qual) {
private double calculateAlleleLikelihood(char ref, char read, String genotype, byte qual)
{
if (qual == 0) { qual = 1; } // zero quals are wrong.
char h1 = genotype.charAt(0);
char h2 = genotype.charAt(1);
@ -137,7 +140,7 @@ public class GenotypeLikelihoods {
public void ApplyPrior(char ref, double p_alt)
{
for (int i = 0; i < genotypes.length; i++) {
if (p_alt == -1)
if ((p_alt == -1) || (p_alt <= 1e-6))
{
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
// hom-ref
@ -218,7 +221,7 @@ public class GenotypeLikelihoods {
public double LodVsRef(char ref) {
if ((this.BestGenotype().charAt(0) == ref) && (this.BestGenotype().charAt(1) == ref)) {
ref_likelihood = sorted_likelihoods[0];
return this.LodVsNextBest();
return (-1.0 * this.LodVsNextBest());
} else {
for (int i = 0; i < genotypes.length; i++) {
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
@ -239,7 +242,7 @@ public class GenotypeLikelihoods {
return this.sorted_likelihoods[0];
}
public AlleleFrequencyEstimate toAlleleFrequencyEstimate(GenomeLoc location, char ref, int depth, String bases, double[] posteriors) {
public AlleleFrequencyEstimate toAlleleFrequencyEstimate(GenomeLoc location, char ref, int depth, String bases, double[] posteriors, String sample_name) {
this.sort();
double qhat = Double.NaN;
double qstar = Double.NaN;
@ -252,13 +255,11 @@ public class GenotypeLikelihoods {
alt = 'N';
} else if ((sorted_genotypes[0].charAt(0) != ref) && (sorted_genotypes[0].charAt(1) != ref)) {
// hom-nonref
likelihoods[0] += Math.log10(1e-5);
qhat = 1.0;
qstar = 1.0;
alt = sorted_genotypes[0].charAt(0);
} else {
// het
likelihoods[0] += Math.log10(1e-3);
qhat = 0.5;
qstar = 0.5;
@ -270,7 +271,108 @@ public class GenotypeLikelihoods {
}
}
return new AlleleFrequencyEstimate(location, ref, alt, 2, qhat, qstar, this.LodVsRef(ref), this.LodVsNextBest(), sorted_likelihoods[0], ref_likelihood, depth, bases, (double[][]) null, this.likelihoods);
this.LodVsRef(ref); //HACK
//System.out.printf("DBG: %f %f\n", sorted_likelihoods[0], ref_likelihood);
return new AlleleFrequencyEstimate(location, ref, alt, 2, qhat, qstar, this.LodVsRef(ref), this.LodVsNextBest(), sorted_likelihoods[0], ref_likelihood, depth, bases, (double[][]) null, this.likelihoods, sample_name);
}
public static class IndelCall
{
public String type;
public String[] alleles;
public double p;
public double lod;
public IndelCall(String type, String[] alleles, double p, double lod)
{
this.type = type;
this.alleles = alleles;
this.p = p;
this.lod = lod;
}
public String toString()
{
return String.format("%s/%s %f %f", alleles[0], alleles[1], p, lod);
}
}
public static IndelCall callIndel(String[] indels)
{
HashMap<String,Integer> indel_allele_counts = new HashMap<String,Integer>();
for (int i = 0; i < indels.length; i++)
{
if (! indel_allele_counts.containsKey(indels[i]))
{
indel_allele_counts.put(indels[i], 1);
}
else
{
indel_allele_counts.put(indels[i], indel_allele_counts.get(indels[i])+1);
}
}
Object[] keys = indel_allele_counts.keySet().toArray();
String[] alleles = new String[keys.length];
int[] counts = new int[keys.length];
double likelihoods[] = new double[keys.length];
int null_count = 0;
String max_allele = null;
int max_count = -1;
if ((keys.length > 0) && (! ((keys.length == 1) && (((String)keys[0]).equals("null")))))
{
for (int i = 0; i < keys.length; i++)
{
Integer count = (Integer)indel_allele_counts.get(keys[i]);
alleles[i] = (String)keys[i];
counts[i] = count;
if (alleles[i].equals("null")) { null_count = counts[i]; }
else if (counts[i] > max_count) { max_count = counts[i]; max_allele = alleles[i]; }
//System.out.printf("%s[%d] ", keys[i], count);
}
//System.out.printf("\n");
double eps = 1e-3;
double pRef = null_count*Math.log10(1.0 - eps) + max_count*Math.log10(eps) + Math.log10(0.999);
double pHet = null_count*Math.log10(0.5 - eps/2) + max_count*Math.log10(0.5-eps/2) + Math.log10(1e-3);
double pHom = null_count*Math.log10(eps) + max_count*Math.log10(1.0 - eps) + Math.log10(1e-5);
double lodRef = pRef - Math.max(pHet, pHom);
double lodHet = pHet - pRef;
double lodHom = pHom - pRef;
//System.out.printf("%s/%s %f %f\n", "null", "null", pRef, lodRef);
//System.out.printf("%s/%s %f %f\n", max_allele, "null", pHet, lodHet);
//System.out.printf("%s/%s %f %f\n", max_allele, max_allele, pHom, lodHom);
//System.out.printf("\n");
if (lodRef > 0)
{
// reference call
String[] genotype = new String[2];
genotype[0] = "null";
genotype[1] = "null";
return new IndelCall("ref", genotype, pRef, lodRef);
}
else if (lodHet > lodHom)
{
// het call
String[] genotype = new String[2];
genotype[0] = "null";
genotype[1] = max_allele;
return new IndelCall("het", genotype, pHet, lodHet);
}
else
{
// hom call
String[] genotype = new String[2];
genotype[0] = max_allele;
genotype[1] = max_allele;
return new IndelCall("hom", genotype, pHom, lodHom);
}
}
return null;
}
}