better handling of edge cases (zero coverage, reference mistakes, etc.)
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@747 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
7c615c8fb0
commit
3441795d9c
|
|
@ -98,6 +98,8 @@ public class PoolCaller extends LocusWalker<AlleleFrequencyEstimate, String>
|
|||
|
||||
public AlleleFrequencyEstimate map(RefMetaDataTracker tracker, char ref, LocusContext context)
|
||||
{
|
||||
if (ref == 'N') { return null; }
|
||||
|
||||
// 1. seperate each context.
|
||||
LocusContext[] contexts = filterLocusContext(context, sample_names, 0);
|
||||
|
||||
|
|
@ -114,7 +116,31 @@ public class PoolCaller extends LocusWalker<AlleleFrequencyEstimate, String>
|
|||
double[] trajectory = new double[MAX_ITERATIONS + 1]; trajectory[0] = EM_alt_freq;
|
||||
double[] likelihood_trajectory = new double[MAX_ITERATIONS + 1]; likelihood_trajectory[0] = 0.0;
|
||||
boolean is_a_snp = false;
|
||||
|
||||
// Pick the alternate base
|
||||
char alt = 'N';
|
||||
{
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||
String bases = pileup.getBases();
|
||||
int A = 0;
|
||||
int C = 0;
|
||||
int G = 0;
|
||||
int T = 0;
|
||||
int max_count = -1;
|
||||
for (int i = 0; i < bases.length(); i++)
|
||||
{
|
||||
char b = bases.charAt(i);
|
||||
if (b == ref) { continue; }
|
||||
switch (b)
|
||||
{
|
||||
case 'A' : A += 1; if (A > max_count) { max_count = A; alt = 'A'; } break;
|
||||
case 'C' : C += 1; if (C > max_count) { max_count = C; alt = 'C'; } break;
|
||||
case 'G' : G += 1; if (G > max_count) { max_count = G; alt = 'G'; } break;
|
||||
case 'T' : T += 1; if (T > max_count) { max_count = T; alt = 'T'; } break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int iterations = 0; iterations < MAX_ITERATIONS; iterations++)
|
||||
{
|
||||
// 6. Re-call from shallow coverage using the estimated frequency as a prior,
|
||||
|
|
@ -128,12 +154,10 @@ public class PoolCaller extends LocusWalker<AlleleFrequencyEstimate, String>
|
|||
|
||||
for (int i = 0; i < sample_names.size(); i++)
|
||||
{
|
||||
callers.get(i).setAlleleFrequencyPrior(EM_alt_freq);
|
||||
callers.get(i).setAlleleFrequencyPrior(EM_alt_freq, alt);
|
||||
calls[i] = callers.get(i).map(tracker, ref, contexts[i]);
|
||||
String genotype = calls[i].genotype();
|
||||
|
||||
if (calls[i].alt != 'N') { alt = calls[i].alt; }
|
||||
|
||||
likelihood += calls[i].pBest;
|
||||
|
||||
if (! FRACTIONAL_COUNTS)
|
||||
|
|
@ -157,7 +181,7 @@ public class PoolCaller extends LocusWalker<AlleleFrequencyEstimate, String>
|
|||
}
|
||||
|
||||
}
|
||||
EM_alt_freq = EM_sum / EM_N;
|
||||
EM_alt_freq = Math.min(EM_sum / EM_N, 0.99999);
|
||||
trajectory[iterations+1] = EM_alt_freq;
|
||||
likelihood_trajectory[iterations+1] = likelihood;
|
||||
|
||||
|
|
|
|||
|
|
@ -278,6 +278,12 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
|||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||
String bases = pileup.getBases();
|
||||
|
||||
if (bases.length() == 0)
|
||||
{
|
||||
GenotypeLikelihoods G = new GenotypeLikelihoods();
|
||||
return G.toAlleleFrequencyEstimate(context.getLocation(), ref, bases.length(), bases, G.likelihoods, sample_name);
|
||||
}
|
||||
|
||||
List<SAMRecord> reads = context.getReads();
|
||||
List<Integer> offsets = context.getOffsets();
|
||||
ref = Character.toUpperCase(ref);
|
||||
|
|
@ -305,7 +311,7 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
|||
|
||||
G.add(ref, read.getReadString().charAt(offset), read.getBaseQualities()[offset]);
|
||||
}
|
||||
G.ApplyPrior(ref, this.allele_frequency_prior);
|
||||
G.ApplyPrior(ref, this.alt_allele, this.allele_frequency_prior);
|
||||
|
||||
if (fourBaseMode) {
|
||||
G.applyFourBaseDistributionPrior(pileup.getBases(), pileup.getSecondaryBasePileup());
|
||||
|
|
@ -333,9 +339,11 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
|||
}
|
||||
|
||||
double allele_frequency_prior = -1;
|
||||
public void setAlleleFrequencyPrior(double freq)
|
||||
char alt_allele;
|
||||
public void setAlleleFrequencyPrior(double freq, char alt)
|
||||
{
|
||||
this.allele_frequency_prior = freq;
|
||||
this.alt_allele = alt;
|
||||
}
|
||||
|
||||
// Given result of map function
|
||||
|
|
|
|||
|
|
@ -137,18 +137,24 @@ public class GenotypeLikelihoods {
|
|||
return s;
|
||||
}
|
||||
|
||||
public void ApplyPrior(char ref, double p_alt)
|
||||
public void ApplyPrior(char ref, char alt, double p_alt)
|
||||
{
|
||||
for (int i = 0; i < genotypes.length; i++) {
|
||||
for (int i = 0; i < genotypes.length; i++)
|
||||
{
|
||||
if ((p_alt == -1) || (p_alt <= 1e-6))
|
||||
{
|
||||
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
|
||||
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref))
|
||||
{
|
||||
// hom-ref
|
||||
likelihoods[i] += Math.log10(1.0 - 1e-3);
|
||||
} else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
|
||||
}
|
||||
else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref))
|
||||
{
|
||||
// hom-nonref
|
||||
likelihoods[i] += Math.log10(1e-5);
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
// het
|
||||
likelihoods[i] += Math.log10(1e-3);
|
||||
}
|
||||
|
|
@ -156,16 +162,27 @@ public class GenotypeLikelihoods {
|
|||
}
|
||||
else
|
||||
{
|
||||
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
|
||||
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref))
|
||||
{
|
||||
// hom-ref
|
||||
likelihoods[i] += 2.0 * Math.log10(1.0 - p_alt);
|
||||
} else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
|
||||
}
|
||||
else if ((genotypes[i].charAt(0) == alt) && (genotypes[i].charAt(1) == alt))
|
||||
{
|
||||
// hom-nonref
|
||||
likelihoods[i] += 2.0 * Math.log10(p_alt);
|
||||
} else {
|
||||
}
|
||||
else if (((genotypes[i].charAt(0) == alt) && (genotypes[i].charAt(1) == ref)) ||
|
||||
((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == alt)))
|
||||
{
|
||||
// het
|
||||
likelihoods[i] += Math.log10((1.0-p_alt) * p_alt * 2.0);
|
||||
}
|
||||
else
|
||||
{
|
||||
// something else (noise!)
|
||||
likelihoods[i] += Math.log10(1e-5);
|
||||
}
|
||||
|
||||
if (Double.isInfinite(likelihoods[i])) { likelihoods[i] = -1000; }
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue