First implementation of a primary base filter. Assumes distribution of on/off bases is distributed according to a binomial.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1278 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
mmelgar 2009-07-17 18:43:35 +00:00
parent 24ebfee604
commit 8da754eb4e
1 changed files with 68 additions and 0 deletions

View File

@ -0,0 +1,68 @@
package org.broadinstitute.sting.playground.gatk.walkers.variants;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.ReadBackedPileup;
public class IVFPrimaryBases implements IndependentVariantFeature {
private double[] p = { 0.933, 0.972, 0.970, 0.960, 0.945, 0.990, 0.971, 0.943, 0.978, 0.928 };
/**
* Method so that features can initialize themselves based on a short argument string. At the moment, each feature is
* responsible for interpreting their own argument string.
*
* @param arguments the arguments!
*/
public void initialize(String arguments) {
if (arguments != null && !arguments.isEmpty()) {
String[] argPieces = arguments.split(",");
for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
p[genotypeIndex] = Double.valueOf(argPieces[genotypeIndex]);
}
}
}
/**
* Method to compute the result of this feature for each of the ten genotypes. The return value must be a double array
* of length 10 (one for each genotype) and the value must be in log10-space.
*
* @param ref the reference base
* @param context the context for the given locus
* @return a ten-element array of log-likelihood result of the feature applied to each genotype
*/
public double[] compute(char ref, LocusContext context) {
double[] likelihoods = new double[10];
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
String primaryBases = pileup.getBases();
for (int genotypeIndex = 0; genotypeIndex < Genotype.values().length; genotypeIndex++) {
char firstAllele = Genotype.values()[genotypeIndex].toString().charAt(0);
char secondAllele = Genotype.values()[genotypeIndex].toString().charAt(1);
int offTotal = 0;
int onTotal = 0;
for (int pileupIndex = 0; pileupIndex < primaryBases.length(); pileupIndex++) {
char primaryBase = primaryBases.charAt(pileupIndex);
if (primaryBase != firstAllele && primaryBase != secondAllele) {
offTotal++;
} else {
onTotal++;
}
}
int Total = onTotal + offTotal;
double Prior = MathUtils.binomialProbability(offTotal, Total, p[genotypeIndex]);
likelihoods[genotypeIndex] = Math.log10(Prior);
}
return likelihoods;
}
}