gatk-3.8/java/lib/edu/mit/broad/sam/apps/allelecaller/FlatQualityAlleleCaller.java

75 lines
2.7 KiB
Java

package edu.mit.broad.sam.apps.allelecaller;
import java.io.IOException;
import java.io.BufferedWriter;
import java.io.File;
import java.util.*;
import static java.lang.Math.*;
/**
* Bayesian-based allele caller using flat qualities and a 1e-3 error rate, based on CRD algorithm
*/
public class FlatQualityAlleleCaller extends AbstractAlleleCaller {
public FlatQualityAlleleCaller(final File fastbReference, final BufferedWriter writer) throws IOException {
super(fastbReference, writer);
}
protected SortedSet<GenotypeTheory> call(final char ref, final String bases, final List<Byte> quals) {
final float eps = 1e-3f;
// count up the base by nucleotide and put them into a map
final int depth = bases.length();
int a = 0,c = 0,g = 0,t = 0;
for(int i=0; i< bases.length(); i++) {
if (bases.charAt(i) == 'A') { a++; }
else if (bases.charAt(i) == 'C') { c++; }
else if (bases.charAt(i) == 'G') { g++; }
else if (bases.charAt(i) == 'T') { t++; }
else { throw new RuntimeException("Unknown Base " + bases.charAt(i)); }
}
final Map<Character, Integer> counts = new HashMap<Character, Integer>();
counts.put('A', a);
counts.put('C', c);
counts.put('G', g);
counts.put('T', t);
// for each of the 10 theories, calculate the likelihood
final SortedSet<GenotypeTheory> results = new TreeSet<GenotypeTheory>();
for(final DiploidGenotype theory : DiploidGenotype.values()) {
final double likelihood;
final char allele1 = theory.getAllele1();
final char allele2 = theory.getAllele2();
if (!theory.isHet()) {
likelihood = log10(1-eps)*counts.get(allele1) + log10(eps)*(depth - counts.get(allele1));
} else {
final int major_allele_counts;
final int minor_allele_counts;
if (counts.get(allele1) > counts.get(allele2)) {
major_allele_counts = counts.get(allele1);
minor_allele_counts = counts.get(allele2);
} else {
major_allele_counts = counts.get(allele2);
minor_allele_counts = counts.get(allele1);
}
likelihood = log10(0.5 - (eps/2.0) )*major_allele_counts +
log10(0.5 - (eps/2.0) )*minor_allele_counts +
log10(eps)*(depth - major_allele_counts - minor_allele_counts);
}
final double prior = getPrior(ref, theory);
results.add(new GenotypeTheory(theory, likelihood + log10(prior)));
}
return results;
}
}