better handling of edge cases (zero coverage, reference mistakes, etc.)

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@747 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
jmaguire 2009-05-18 18:04:37 +00:00
parent 7c615c8fb0
commit 3441795d9c
3 changed files with 63 additions and 14 deletions

View File

@ -98,6 +98,8 @@ public class PoolCaller extends LocusWalker<AlleleFrequencyEstimate, String>
public AlleleFrequencyEstimate map(RefMetaDataTracker tracker, char ref, LocusContext context)
{
if (ref == 'N') { return null; }
// 1. seperate each context.
LocusContext[] contexts = filterLocusContext(context, sample_names, 0);
@ -114,7 +116,31 @@ public class PoolCaller extends LocusWalker<AlleleFrequencyEstimate, String>
double[] trajectory = new double[MAX_ITERATIONS + 1]; trajectory[0] = EM_alt_freq;
double[] likelihood_trajectory = new double[MAX_ITERATIONS + 1]; likelihood_trajectory[0] = 0.0;
boolean is_a_snp = false;
// Pick the alternate base
char alt = 'N';
{
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
String bases = pileup.getBases();
int A = 0;
int C = 0;
int G = 0;
int T = 0;
int max_count = -1;
for (int i = 0; i < bases.length(); i++)
{
char b = bases.charAt(i);
if (b == ref) { continue; }
switch (b)
{
case 'A' : A += 1; if (A > max_count) { max_count = A; alt = 'A'; } break;
case 'C' : C += 1; if (C > max_count) { max_count = C; alt = 'C'; } break;
case 'G' : G += 1; if (G > max_count) { max_count = G; alt = 'G'; } break;
case 'T' : T += 1; if (T > max_count) { max_count = T; alt = 'T'; } break;
}
}
}
for (int iterations = 0; iterations < MAX_ITERATIONS; iterations++)
{
// 6. Re-call from shallow coverage using the estimated frequency as a prior,
@ -128,12 +154,10 @@ public class PoolCaller extends LocusWalker<AlleleFrequencyEstimate, String>
for (int i = 0; i < sample_names.size(); i++)
{
callers.get(i).setAlleleFrequencyPrior(EM_alt_freq);
callers.get(i).setAlleleFrequencyPrior(EM_alt_freq, alt);
calls[i] = callers.get(i).map(tracker, ref, contexts[i]);
String genotype = calls[i].genotype();
if (calls[i].alt != 'N') { alt = calls[i].alt; }
likelihood += calls[i].pBest;
if (! FRACTIONAL_COUNTS)
@ -157,7 +181,7 @@ public class PoolCaller extends LocusWalker<AlleleFrequencyEstimate, String>
}
}
EM_alt_freq = EM_sum / EM_N;
EM_alt_freq = Math.min(EM_sum / EM_N, 0.99999);
trajectory[iterations+1] = EM_alt_freq;
likelihood_trajectory[iterations+1] = likelihood;

View File

@ -278,6 +278,12 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
String bases = pileup.getBases();
if (bases.length() == 0)
{
GenotypeLikelihoods G = new GenotypeLikelihoods();
return G.toAlleleFrequencyEstimate(context.getLocation(), ref, bases.length(), bases, G.likelihoods, sample_name);
}
List<SAMRecord> reads = context.getReads();
List<Integer> offsets = context.getOffsets();
ref = Character.toUpperCase(ref);
@ -305,7 +311,7 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
G.add(ref, read.getReadString().charAt(offset), read.getBaseQualities()[offset]);
}
G.ApplyPrior(ref, this.allele_frequency_prior);
G.ApplyPrior(ref, this.alt_allele, this.allele_frequency_prior);
if (fourBaseMode) {
G.applyFourBaseDistributionPrior(pileup.getBases(), pileup.getSecondaryBasePileup());
@ -333,9 +339,11 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
}
double allele_frequency_prior = -1;
public void setAlleleFrequencyPrior(double freq)
char alt_allele;
public void setAlleleFrequencyPrior(double freq, char alt)
{
this.allele_frequency_prior = freq;
this.alt_allele = alt;
}
// Given result of map function

View File

@ -137,18 +137,24 @@ public class GenotypeLikelihoods {
return s;
}
public void ApplyPrior(char ref, double p_alt)
public void ApplyPrior(char ref, char alt, double p_alt)
{
for (int i = 0; i < genotypes.length; i++) {
for (int i = 0; i < genotypes.length; i++)
{
if ((p_alt == -1) || (p_alt <= 1e-6))
{
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref))
{
// hom-ref
likelihoods[i] += Math.log10(1.0 - 1e-3);
} else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
}
else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref))
{
// hom-nonref
likelihoods[i] += Math.log10(1e-5);
} else {
}
else
{
// het
likelihoods[i] += Math.log10(1e-3);
}
@ -156,16 +162,27 @@ public class GenotypeLikelihoods {
}
else
{
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref))
{
// hom-ref
likelihoods[i] += 2.0 * Math.log10(1.0 - p_alt);
} else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
}
else if ((genotypes[i].charAt(0) == alt) && (genotypes[i].charAt(1) == alt))
{
// hom-nonref
likelihoods[i] += 2.0 * Math.log10(p_alt);
} else {
}
else if (((genotypes[i].charAt(0) == alt) && (genotypes[i].charAt(1) == ref)) ||
((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == alt)))
{
// het
likelihoods[i] += Math.log10((1.0-p_alt) * p_alt * 2.0);
}
else
{
// something else (noise!)
likelihoods[i] += Math.log10(1e-5);
}
if (Double.isInfinite(likelihoods[i])) { likelihoods[i] = -1000; }
}