add code for computing indel genotype likelihoods
make reference lods negative git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@664 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
11723fbcc2
commit
0267ccae7f
|
|
@ -87,7 +87,10 @@ public class GenotypeLikelihoods {
|
|||
}
|
||||
}
|
||||
|
||||
private double calculateAlleleLikelihood(char ref, char read, String genotype, byte qual) {
|
||||
private double calculateAlleleLikelihood(char ref, char read, String genotype, byte qual)
|
||||
{
|
||||
if (qual == 0) { qual = 1; } // zero quals are wrong.
|
||||
|
||||
char h1 = genotype.charAt(0);
|
||||
char h2 = genotype.charAt(1);
|
||||
|
||||
|
|
@ -137,7 +140,7 @@ public class GenotypeLikelihoods {
|
|||
public void ApplyPrior(char ref, double p_alt)
|
||||
{
|
||||
for (int i = 0; i < genotypes.length; i++) {
|
||||
if (p_alt == -1)
|
||||
if ((p_alt == -1) || (p_alt <= 1e-6))
|
||||
{
|
||||
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
|
||||
// hom-ref
|
||||
|
|
@ -218,7 +221,7 @@ public class GenotypeLikelihoods {
|
|||
public double LodVsRef(char ref) {
|
||||
if ((this.BestGenotype().charAt(0) == ref) && (this.BestGenotype().charAt(1) == ref)) {
|
||||
ref_likelihood = sorted_likelihoods[0];
|
||||
return this.LodVsNextBest();
|
||||
return (-1.0 * this.LodVsNextBest());
|
||||
} else {
|
||||
for (int i = 0; i < genotypes.length; i++) {
|
||||
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
|
||||
|
|
@ -239,7 +242,7 @@ public class GenotypeLikelihoods {
|
|||
return this.sorted_likelihoods[0];
|
||||
}
|
||||
|
||||
public AlleleFrequencyEstimate toAlleleFrequencyEstimate(GenomeLoc location, char ref, int depth, String bases, double[] posteriors) {
|
||||
public AlleleFrequencyEstimate toAlleleFrequencyEstimate(GenomeLoc location, char ref, int depth, String bases, double[] posteriors, String sample_name) {
|
||||
this.sort();
|
||||
double qhat = Double.NaN;
|
||||
double qstar = Double.NaN;
|
||||
|
|
@ -252,13 +255,11 @@ public class GenotypeLikelihoods {
|
|||
alt = 'N';
|
||||
} else if ((sorted_genotypes[0].charAt(0) != ref) && (sorted_genotypes[0].charAt(1) != ref)) {
|
||||
// hom-nonref
|
||||
likelihoods[0] += Math.log10(1e-5);
|
||||
qhat = 1.0;
|
||||
qstar = 1.0;
|
||||
alt = sorted_genotypes[0].charAt(0);
|
||||
} else {
|
||||
// het
|
||||
likelihoods[0] += Math.log10(1e-3);
|
||||
qhat = 0.5;
|
||||
qstar = 0.5;
|
||||
|
||||
|
|
@ -270,7 +271,108 @@ public class GenotypeLikelihoods {
|
|||
}
|
||||
}
|
||||
|
||||
return new AlleleFrequencyEstimate(location, ref, alt, 2, qhat, qstar, this.LodVsRef(ref), this.LodVsNextBest(), sorted_likelihoods[0], ref_likelihood, depth, bases, (double[][]) null, this.likelihoods);
|
||||
this.LodVsRef(ref); //HACK
|
||||
//System.out.printf("DBG: %f %f\n", sorted_likelihoods[0], ref_likelihood);
|
||||
|
||||
return new AlleleFrequencyEstimate(location, ref, alt, 2, qhat, qstar, this.LodVsRef(ref), this.LodVsNextBest(), sorted_likelihoods[0], ref_likelihood, depth, bases, (double[][]) null, this.likelihoods, sample_name);
|
||||
}
|
||||
|
||||
public static class IndelCall
|
||||
{
|
||||
public String type;
|
||||
public String[] alleles;
|
||||
public double p;
|
||||
public double lod;
|
||||
|
||||
public IndelCall(String type, String[] alleles, double p, double lod)
|
||||
{
|
||||
this.type = type;
|
||||
this.alleles = alleles;
|
||||
this.p = p;
|
||||
this.lod = lod;
|
||||
}
|
||||
|
||||
public String toString()
|
||||
{
|
||||
return String.format("%s/%s %f %f", alleles[0], alleles[1], p, lod);
|
||||
}
|
||||
}
|
||||
|
||||
public static IndelCall callIndel(String[] indels)
|
||||
{
|
||||
HashMap<String,Integer> indel_allele_counts = new HashMap<String,Integer>();
|
||||
for (int i = 0; i < indels.length; i++)
|
||||
{
|
||||
if (! indel_allele_counts.containsKey(indels[i]))
|
||||
{
|
||||
indel_allele_counts.put(indels[i], 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
indel_allele_counts.put(indels[i], indel_allele_counts.get(indels[i])+1);
|
||||
}
|
||||
}
|
||||
|
||||
Object[] keys = indel_allele_counts.keySet().toArray();
|
||||
String[] alleles = new String[keys.length];
|
||||
int[] counts = new int[keys.length];
|
||||
double likelihoods[] = new double[keys.length];
|
||||
int null_count = 0;
|
||||
String max_allele = null;
|
||||
int max_count = -1;
|
||||
if ((keys.length > 0) && (! ((keys.length == 1) && (((String)keys[0]).equals("null")))))
|
||||
{
|
||||
for (int i = 0; i < keys.length; i++)
|
||||
{
|
||||
Integer count = (Integer)indel_allele_counts.get(keys[i]);
|
||||
alleles[i] = (String)keys[i];
|
||||
counts[i] = count;
|
||||
if (alleles[i].equals("null")) { null_count = counts[i]; }
|
||||
else if (counts[i] > max_count) { max_count = counts[i]; max_allele = alleles[i]; }
|
||||
//System.out.printf("%s[%d] ", keys[i], count);
|
||||
}
|
||||
//System.out.printf("\n");
|
||||
|
||||
double eps = 1e-3;
|
||||
double pRef = null_count*Math.log10(1.0 - eps) + max_count*Math.log10(eps) + Math.log10(0.999);
|
||||
double pHet = null_count*Math.log10(0.5 - eps/2) + max_count*Math.log10(0.5-eps/2) + Math.log10(1e-3);
|
||||
double pHom = null_count*Math.log10(eps) + max_count*Math.log10(1.0 - eps) + Math.log10(1e-5);
|
||||
|
||||
double lodRef = pRef - Math.max(pHet, pHom);
|
||||
double lodHet = pHet - pRef;
|
||||
double lodHom = pHom - pRef;
|
||||
|
||||
//System.out.printf("%s/%s %f %f\n", "null", "null", pRef, lodRef);
|
||||
//System.out.printf("%s/%s %f %f\n", max_allele, "null", pHet, lodHet);
|
||||
//System.out.printf("%s/%s %f %f\n", max_allele, max_allele, pHom, lodHom);
|
||||
//System.out.printf("\n");
|
||||
|
||||
if (lodRef > 0)
|
||||
{
|
||||
// reference call
|
||||
String[] genotype = new String[2];
|
||||
genotype[0] = "null";
|
||||
genotype[1] = "null";
|
||||
return new IndelCall("ref", genotype, pRef, lodRef);
|
||||
}
|
||||
else if (lodHet > lodHom)
|
||||
{
|
||||
// het call
|
||||
String[] genotype = new String[2];
|
||||
genotype[0] = "null";
|
||||
genotype[1] = max_allele;
|
||||
return new IndelCall("het", genotype, pHet, lodHet);
|
||||
}
|
||||
else
|
||||
{
|
||||
// hom call
|
||||
String[] genotype = new String[2];
|
||||
genotype[0] = max_allele;
|
||||
genotype[1] = max_allele;
|
||||
return new IndelCall("hom", genotype, pHom, lodHom);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue