Various refactoring to achieve hapmap and dbsnp awareness, the ability to set pop-gen and secondary base priors from the command-line, and general code cleanup.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1007 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kiran 2009-06-15 07:20:22 +00:00
parent f6af190b74
commit f2946fa3e8
6 changed files with 296 additions and 277 deletions

View File

@ -10,11 +10,9 @@ import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import java.io.PrintStream;
import net.sf.samtools.SAMRecord;
import java.io.File;
public class ContrastiveGenotypers extends LocusWalker<Integer, Integer> {
private SingleSampleGenotyper caller_1b;
@ -26,28 +24,26 @@ public class ContrastiveGenotypers extends LocusWalker<Integer, Integer> {
public void initialize() {
caller_1b = new SingleSampleGenotyper();
caller_1b.metricsFileName = "/dev/stdout";
caller_1b.metricsInterval = 6000;
caller_1b.printMetrics = false;
caller_1b.fourBaseMode = false;
caller_1b.METRICS_FILE = new File("/dev/stdout");
caller_1b.METRICS_INTERVAL = 6000;
caller_1b.NO_SECONDARY_BASES = false;
//caller_1b.retest = false;
//caller_1b.qHom = 0.04;
//caller_1b.qHet = 0.49;
//caller_1b.qHomNonRef = 0.97;
caller_1b.lodThreshold = 5.0;
caller_1b.LOD_THRESHOLD = 5.0;
caller_1b.initialize();
caller_1b.reduceInit();
caller_4b = new SingleSampleGenotyper();
caller_4b.metricsFileName = "/dev/stdout";
caller_4b.metricsInterval = caller_1b.metricsInterval;
caller_4b.printMetrics = false;
caller_4b.fourBaseMode = true;
caller_4b.METRICS_FILE = new File("/dev/stdout");
caller_4b.METRICS_INTERVAL = caller_1b.METRICS_INTERVAL;
caller_4b.NO_SECONDARY_BASES = true;
//caller_4b.retest = true;
//caller_4b.qHom = 0.04;
//caller_4b.qHet = 0.49;
//caller_4b.qHomNonRef = 0.97;
caller_4b.lodThreshold = 5.0;
caller_4b.LOD_THRESHOLD = 5.0;
caller_4b.initialize();
caller_4b.reduceInit();
}
@ -119,16 +115,18 @@ public class ContrastiveGenotypers extends LocusWalker<Integer, Integer> {
bases2
);
caller_1b.metrics.nextPosition(call_1b, tracker);
caller_4b.metrics.nextPosition(call_4b, tracker);
caller_1b.metricsOut.nextPosition(call_1b, tracker);
caller_4b.metricsOut.nextPosition(call_4b, tracker);
if (caller_1b.metrics.num_loci_total % caller_1b.metricsInterval == 0) {
/*
if (caller_1b.metricsOut.num_loci_total % caller_1b.METRICS_INTERVAL == 0) {
System.out.print("1-Base Metrics");
caller_1b.metrics.printMetricsAtLocusIntervals(caller_1b.metricsInterval);
caller_1b.metrics.printMetricsAtLocusIntervals(caller_1b.METRICS_INTERVAL);
System.out.print("4-Base Metrics");
caller_4b.metrics.printMetricsAtLocusIntervals(caller_1b.metricsInterval);
caller_4b.metrics.printMetricsAtLocusIntervals(caller_1b.METRICS_INTERVAL);
System.out.println("--------------");
}
*/
return null;
}
@ -269,9 +267,9 @@ public class ContrastiveGenotypers extends LocusWalker<Integer, Integer> {
}
System.out.print("1-Base Metrics");
caller_1b.metrics.printMetricsAtLocusIntervals(1);
caller_1b.metricsOut.printMetricsAtLocusIntervals(1);
System.out.print("4-Base Metrics");
caller_4b.metrics.printMetricsAtLocusIntervals(1);
caller_4b.metricsOut.printMetricsAtLocusIntervals(1);
System.out.println("--------------");
}
}

View File

@ -85,14 +85,14 @@ public class PoolCaller extends LocusWalker<AlleleFrequencyEstimate, String>
System.out.println("SAMPLE: " + sample_name);
SingleSampleGenotyper caller = new SingleSampleGenotyper();
caller.callsFileName = null;
caller.metricsFileName = null;
caller.lodThreshold = lodThreshold;
caller.fourBaseMode = false;
caller.printMetrics = false;
caller.VARIANTS_FILE = null;
caller.METRICS_FILE = null;
caller.LOD_THRESHOLD = lodThreshold;
caller.NO_SECONDARY_BASES = false;
caller.PRINT_METRICS = false;
caller.SAMPLE_NAME_REGEX = SAMPLE_NAME_REGEX;
caller.initialize();
caller.calls_file = individual_output_file;
caller.variantsOut = individual_output_file;
caller_sums.add(caller.reduceInit());
callers.add(caller);
}

View File

@ -7,6 +7,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
public class AlleleFrequencyEstimate {
/*
static
{
boolean assertsEnabled = false;
@ -17,6 +18,7 @@ public class AlleleFrequencyEstimate {
throw new RuntimeException("Asserts must be enabled!");
}
}
*/
//AlleleFrequencyEstimate();
public GenomeLoc location;
@ -44,6 +46,7 @@ public class AlleleFrequencyEstimate {
public AlleleFrequencyEstimate(GenomeLoc location, char ref, char alt, int N, double qhat, double qstar, double lodVsRef, double lodVsNextBest, double pBest, double pRef, int depth, String bases, double[][] quals, double[] posteriors, String sample_name)
{
/*
if( Double.isNaN(lodVsRef)) { System.out.printf("%s: lodVsRef is NaN\n", location.toString()); }
if( Double.isNaN(lodVsNextBest)) { System.out.printf("%s lodVsNextBest is NaN\n", location.toString()); }
if( Double.isNaN(qhat)) { System.out.printf("%s qhat is NaN\n", location.toString()); }
@ -78,6 +81,7 @@ public class AlleleFrequencyEstimate {
assert(! Double.isInfinite(qstar));
assert(! Double.isInfinite(pBest));
assert(! Double.isInfinite(pRef));
*/
this.location = location;
this.ref = ref;

View File

@ -8,60 +8,50 @@ import org.broadinstitute.sting.playground.gatk.walkers.AlleleFrequencyWalker;
import java.util.List;
import java.io.PrintStream;
import java.io.File;
import java.io.FileNotFoundException;
/**
* Created by IntelliJ IDEA.
* User: andrewk
* Date: Apr 1, 2009
* Time: 5:53:21 PM
* To change this template use File | Settings | File Templates.
*/
public class AlleleMetrics {
long dbsnp_hits=0;
long num_variants=0;
public long num_loci_total=0;
long num_loci_confident=0;
double LOD_cutoff = 5;
long hapmap_genotype_correct = 0;
long hapmap_genotype_incorrect = 0;
long hapmap_refvar_correct = 0;
long hapmap_refvar_incorrect = 0;
private double LOD_cutoff = 5;
private long dbsnp_hits = 0;
private long num_variants = 0;
private long num_loci_total = 0;
private long num_loci_confident = 0;
private long hapmap_genotype_correct = 0;
private long hapmap_genotype_incorrect = 0;
private long hapmap_refvar_correct = 0;
private long hapmap_refvar_incorrect = 0;
private final double dbl_cmp_precision = 0.0001;
protected PrintStream out;
public AlleleMetrics(String MetricsOutputFile) {
try
{
/*if ( MetricsOutputFile.equals("-") )
this.out = out;
else*/
this.out = new PrintStream(MetricsOutputFile);
}
catch (Exception e)
{
e.printStackTrace();
System.exit(-1);
}
public AlleleMetrics(String metricsOutputPath) {
initialize(new File(metricsOutputPath), LOD_cutoff);
}
public AlleleMetrics(String MetricsOutputFile, double lodThreshold) {
LOD_cutoff = lodThreshold;
public AlleleMetrics(String metricsOutputPath, double lodThreshold) {
initialize(new File(metricsOutputPath), lodThreshold);
}
try
{
/*if ( MetricsOutputFile.equals("-") )
this.out = out;
else*/
this.out = new PrintStream(MetricsOutputFile);
}
catch (Exception e)
{
e.printStackTrace();
public AlleleMetrics(File metricsOutputFile) {
initialize(metricsOutputFile, LOD_cutoff);
}
public AlleleMetrics(File metricsOutputFile, double lodThreshold) {
initialize(metricsOutputFile, lodThreshold);
}
private void initialize(File metricsOutputFile, double lodThresold) {
try {
this.out = new PrintStream(metricsOutputFile);
} catch (FileNotFoundException e) {
System.err.format("Unable to open file '%s'. Perhaps the parent directory does not exist or is read-only.", metricsOutputFile.getAbsolutePath());
System.exit(-1);
}
this.LOD_cutoff = lodThresold;
}
public void nextPosition(AlleleFrequencyEstimate alleleFreq, RefMetaDataTracker tracker) {

View File

@ -1,8 +1,9 @@
package org.broadinstitute.sting.playground.utils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.QualityUtils;
import static java.lang.Math.log10;
import static java.lang.Math.pow;
@ -13,18 +14,22 @@ public class GenotypeLikelihoods {
private static final double[] oneMinusData = new double[Byte.MAX_VALUE];
static {
for(int qual=0; qual < Byte.MAX_VALUE; qual++) {
oneMinusData[qual] = log10(1.0 - pow(10,(qual/-10.0)));
for (int qual = 0; qual < Byte.MAX_VALUE; qual++) {
oneMinusData[qual] = log10(1.0 - pow(10, (qual / -10.0)));
//oneMinusData[qual] = log10(1.0 - QualityUtils.qualToProb(qual));
}
}
private static double getOneMinusQual(final byte qual) {
return oneMinusData[qual];
}
private static final double[] oneHalfMinusData = new double[Byte.MAX_VALUE];
static {
for(int qual=0; qual < Byte.MAX_VALUE; qual++) {
oneHalfMinusData[qual] = log10(0.5-pow(10,(qual/-10.0))/2.0);
for (int qual = 0; qual < Byte.MAX_VALUE; qual++) {
oneHalfMinusData[qual] = log10(0.5 - pow(10, (qual / -10.0)) / 2.0);
//oneHalfMinusData[qual] = log10(0.5 - QualityUtils.qualToProb(qual) / 2.0);
}
}
@ -32,8 +37,6 @@ public class GenotypeLikelihoods {
return oneHalfMinusData[qual];
}
public double[] likelihoods;
public String[] genotypes;
@ -43,20 +46,30 @@ public class GenotypeLikelihoods {
private double priorHomVar;
// Store the 2nd-best base priors for on-genotype primary bases
HashMap<String, Double> onNextBestBasePriors = new HashMap<String, Double>();
private HashMap<String, Double> onNextBestBasePriors = new HashMap<String, Double>();
// Store the 2nd-best base priors for off-genotype primary bases
HashMap<String, Double> offNextBestBasePriors = new HashMap<String, Double>();
private HashMap<String, Double> offNextBestBasePriors = new HashMap<String, Double>();
public GenotypeLikelihoods() {
initialize(1.0 - 1e-3, 1e-3, 1e-5);
double[] p2ndon = {0.000, 0.302, 0.366, 0.142, 0.000, 0.548, 0.370, 0.000, 0.319, 0.000};
double[] p2ndoff = {0.480, 0.769, 0.744, 0.538, 0.575, 0.727, 0.768, 0.589, 0.762, 0.505};
initialize(1.0 - 1e-3, 1e-3, 1e-5, p2ndon, p2ndoff);
}
public GenotypeLikelihoods(double priorHomRef, double priorHet, double priorHomVar) {
initialize(priorHomRef, priorHet, priorHomVar);
double[] p2ndon = {0.000, 0.302, 0.366, 0.142, 0.000, 0.548, 0.370, 0.000, 0.319, 0.000};
double[] p2ndoff = {0.480, 0.769, 0.744, 0.538, 0.575, 0.727, 0.768, 0.589, 0.762, 0.505};
initialize(priorHomRef, priorHet, priorHomVar, p2ndon, p2ndoff);
}
private void initialize(double priorHomRef, double priorHet, double priorHomVar) {
public GenotypeLikelihoods(double priorHomRef, double priorHet, double priorHomVar, double[] p2ndon, double[] p2ndoff) {
initialize(priorHomRef, priorHet, priorHomVar, p2ndon, p2ndoff);
}
private void initialize(double priorHomRef, double priorHet, double priorHomVar, double[] p2ndon, double[] p2ndoff) {
this.priorHomRef = priorHomRef;
this.priorHet = priorHet;
this.priorHomVar = priorHomVar;
@ -75,69 +88,93 @@ public class GenotypeLikelihoods {
genotypes[8] = "GT";
genotypes[9] = "TT";
onNextBestBasePriors.put("AA", 0.000);
onNextBestBasePriors.put("AC", 0.302);
onNextBestBasePriors.put("AG", 0.366);
onNextBestBasePriors.put("AT", 0.142);
onNextBestBasePriors.put("CC", 0.000);
onNextBestBasePriors.put("CG", 0.548);
onNextBestBasePriors.put("CT", 0.370);
onNextBestBasePriors.put("GG", 0.000);
onNextBestBasePriors.put("GT", 0.319);
onNextBestBasePriors.put("TT", 0.000);
offNextBestBasePriors.put("AA", 0.480);
offNextBestBasePriors.put("AC", 0.769);
offNextBestBasePriors.put("AG", 0.744);
offNextBestBasePriors.put("AT", 0.538);
offNextBestBasePriors.put("CC", 0.575);
offNextBestBasePriors.put("CG", 0.727);
offNextBestBasePriors.put("CT", 0.768);
offNextBestBasePriors.put("GG", 0.589);
offNextBestBasePriors.put("GT", 0.762);
offNextBestBasePriors.put("TT", 0.505);
for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
onNextBestBasePriors.put(genotypes[genotypeIndex], p2ndon[genotypeIndex]);
offNextBestBasePriors.put(genotypes[genotypeIndex], p2ndoff[genotypeIndex]);
}
}
public double getHomRefPrior() { return priorHomRef; }
public void setHomRefPrior(double priorHomRef) { this.priorHomRef = priorHomRef; }
public double getHomRefPrior() {
return priorHomRef;
}
public double getHetPrior() { return priorHet; }
public void setHetPrior(double priorHet) { this.priorHet = priorHet; }
public void setHomRefPrior(double priorHomRef) {
this.priorHomRef = priorHomRef;
}
public double getHomVarPrior() { return priorHomVar; }
public void setHomVarPrior(double priorHomVar) { this.priorHomVar = priorHomVar; }
public double getHetPrior() {
return priorHet;
}
public void add(char ref, char read, byte qual)
{
for (int i = 0; i < genotypes.length; i++)
{
public void setHetPrior(double priorHet) {
this.priorHet = priorHet;
}
public double getHomVarPrior() {
return priorHomVar;
}
public void setHomVarPrior(double priorHomVar) {
this.priorHomVar = priorHomVar;
}
public double[] getOnGenotypeSecondaryPriors() {
double[] p2ndon = new double[10];
for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
p2ndon[genotypeIndex] = onNextBestBasePriors.get(genotypes[genotypeIndex]);
}
return p2ndon;
}
public void setOnGenotypeSecondaryPriors(double[] p2ndon) {
for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
onNextBestBasePriors.put(genotypes[genotypeIndex], p2ndon[genotypeIndex]);
}
}
public double[] getOffGenotypeSecondaryPriors() {
double[] p2ndoff = new double[10];
for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
p2ndoff[genotypeIndex] = offNextBestBasePriors.get(genotypes[genotypeIndex]);
}
return p2ndoff;
}
public void setOffGenotypeSecondaryPriors(double[] p2ndoff) {
for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
offNextBestBasePriors.put(genotypes[genotypeIndex], p2ndoff[genotypeIndex]);
}
}
public void add(char ref, char read, byte qual) {
for (int i = 0; i < genotypes.length; i++) {
likelihoods[i] += calculateAlleleLikelihood(ref, read, genotypes[i], qual);
}
}
private double calculateAlleleLikelihood(char ref, char read, String genotype, byte qual)
{
if (qual == 0) { qual = 1; } // zero quals are wrong.
private double calculateAlleleLikelihood(char ref, char read, String genotype, byte qual) {
if (qual == 0) {
qual = 1;
} // zero quals are wrong.
char h1 = genotype.charAt(0);
char h2 = genotype.charAt(1);
double p_base;
if ((h1 == h2) && (h1 == read))
{
// hom
p_base = getOneMinusQual(qual);
}
else if ((h1 != h2) && ((h1 == read) || (h2 == read)))
{
// het
p_base = getOneHalfMinusQual(qual);
}
else
{
// error
p_base = qual/-10.0;
if ((h1 == h2) && (h1 == read)) {
// hom
p_base = getOneMinusQual(qual);
} else if ((h1 != h2) && ((h1 == read) || (h2 == read))) {
// het
p_base = getOneHalfMinusQual(qual);
} else {
// error
p_base = qual / -10.0;
}
return p_base;
@ -170,54 +207,41 @@ public class GenotypeLikelihoods {
return s;
}
public void ApplyPrior(char ref, char alt, double p_alt)
{
for (int i = 0; i < genotypes.length; i++)
{
if ((p_alt == -1) || (p_alt <= 1e-6))
{
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref))
{
// hom-ref
likelihoods[i] += Math.log10(priorHomRef);
}
else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref))
{
// hom-nonref
likelihoods[i] += Math.log10(priorHomVar);
}
else
{
// het
likelihoods[i] += Math.log10(priorHet);
}
if (Double.isInfinite(likelihoods[i])) { likelihoods[i] = -1000; }
}
else
{
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref))
{
// hom-ref
likelihoods[i] += 2.0 * Math.log10(1.0 - p_alt);
}
else if ((genotypes[i].charAt(0) == alt) && (genotypes[i].charAt(1) == alt))
{
// hom-nonref
likelihoods[i] += 2.0 * Math.log10(p_alt);
}
else if (((genotypes[i].charAt(0) == alt) && (genotypes[i].charAt(1) == ref)) ||
((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == alt)))
{
// het
likelihoods[i] += Math.log10((1.0-p_alt) * p_alt * 2.0);
}
else
{
// something else (noise!)
likelihoods[i] += Math.log10(1e-5);
}
public void ApplyPrior(char ref, char alt, double p_alt) {
for (int i = 0; i < genotypes.length; i++) {
if ((p_alt == -1) || (p_alt <= 1e-6)) {
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
// hom-ref
likelihoods[i] += Math.log10(priorHomRef);
} else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
// hom-nonref
likelihoods[i] += Math.log10(priorHomVar);
} else {
// het
likelihoods[i] += Math.log10(priorHet);
}
if (Double.isInfinite(likelihoods[i])) {
likelihoods[i] = -1000;
}
} else {
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
// hom-ref
likelihoods[i] += 2.0 * Math.log10(1.0 - p_alt);
} else if ((genotypes[i].charAt(0) == alt) && (genotypes[i].charAt(1) == alt)) {
// hom-nonref
likelihoods[i] += 2.0 * Math.log10(p_alt);
} else if (((genotypes[i].charAt(0) == alt) && (genotypes[i].charAt(1) == ref)) ||
((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == alt))) {
// het
likelihoods[i] += Math.log10((1.0 - p_alt) * p_alt * 2.0);
} else {
// something else (noise!)
likelihoods[i] += Math.log10(1e-5);
}
if (Double.isInfinite(likelihoods[i])) { likelihoods[i] = -1000; }
if (Double.isInfinite(likelihoods[i])) {
likelihoods[i] = -1000;
}
}
}
this.sort();
@ -322,108 +346,9 @@ public class GenotypeLikelihoods {
}
}
this.LodVsRef(ref); //HACK
//System.out.printf("DBG: %f %f\n", sorted_likelihoods[0], ref_likelihood);
this.LodVsRef(ref); //HACK
//System.out.printf("DBG: %f %f\n", sorted_likelihoods[0], ref_likelihood);
return new AlleleFrequencyEstimate(location, ref, alt, 2, qhat, qstar, this.LodVsRef(ref), this.LodVsNextBest(), sorted_likelihoods[0], ref_likelihood, depth, bases, (double[][]) null, this.likelihoods, sample_name);
return new AlleleFrequencyEstimate(location, ref, alt, 2, qhat, qstar, this.LodVsRef(ref), this.LodVsNextBest(), sorted_likelihoods[0], ref_likelihood, depth, bases, null, this.likelihoods, sample_name);
}
public static class IndelCall
{
public String type;
public String[] alleles;
public double p;
public double lod;
public IndelCall(String type, String[] alleles, double p, double lod)
{
this.type = type;
this.alleles = alleles;
this.p = p;
this.lod = lod;
}
public String toString()
{
return String.format("%s/%s %f %f", alleles[0], alleles[1], p, lod);
}
}
public static IndelCall callIndel(String[] indels)
{
HashMap<String,Integer> indel_allele_counts = new HashMap<String,Integer>();
for (int i = 0; i < indels.length; i++)
{
if (! indel_allele_counts.containsKey(indels[i]))
{
indel_allele_counts.put(indels[i], 1);
}
else
{
indel_allele_counts.put(indels[i], indel_allele_counts.get(indels[i])+1);
}
}
Object[] keys = indel_allele_counts.keySet().toArray();
String[] alleles = new String[keys.length];
int[] counts = new int[keys.length];
double likelihoods[] = new double[keys.length];
int null_count = 0;
String max_allele = null;
int max_count = -1;
if ((keys.length > 0) && (! ((keys.length == 1) && (((String)keys[0]).equals("null")))))
{
for (int i = 0; i < keys.length; i++)
{
Integer count = (Integer)indel_allele_counts.get(keys[i]);
alleles[i] = (String)keys[i];
counts[i] = count;
if (alleles[i].equals("null")) { null_count = counts[i]; }
else if (counts[i] > max_count) { max_count = counts[i]; max_allele = alleles[i]; }
//System.out.printf("%s[%d] ", keys[i], count);
}
//System.out.printf("\n");
double eps = 1e-3;
double pRef = null_count*Math.log10(1.0 - eps) + max_count*Math.log10(eps) + Math.log10(0.999);
double pHet = null_count*Math.log10(0.5 - eps/2) + max_count*Math.log10(0.5-eps/2) + Math.log10(1e-3);
double pHom = null_count*Math.log10(eps) + max_count*Math.log10(1.0 - eps) + Math.log10(1e-5);
double lodRef = pRef - Math.max(pHet, pHom);
double lodHet = pHet - pRef;
double lodHom = pHom - pRef;
//System.out.printf("%s/%s %f %f\n", "null", "null", pRef, lodRef);
//System.out.printf("%s/%s %f %f\n", max_allele, "null", pHet, lodHet);
//System.out.printf("%s/%s %f %f\n", max_allele, max_allele, pHom, lodHom);
//System.out.printf("\n");
if (lodRef > 0)
{
// reference call
String[] genotype = new String[2];
genotype[0] = "null";
genotype[1] = "null";
return new IndelCall("ref", genotype, pRef, lodRef);
}
else if (lodHet > lodHom)
{
// het call
String[] genotype = new String[2];
genotype[0] = "null";
genotype[1] = max_allele;
return new IndelCall("het", genotype, pHet, lodHet);
}
else
{
// hom call
String[] genotype = new String[2];
genotype[0] = max_allele;
genotype[1] = max_allele;
return new IndelCall("hom", genotype, pHom, lodHom);
}
}
return null;
}
}

View File

@ -0,0 +1,102 @@
package org.broadinstitute.sting.playground.utils;
import java.util.HashMap;
public class IndelLikelihood {
private String type;
private String[] alleles;
private double p;
private double lod;
public IndelLikelihood(String type, String[] alleles, double p, double lod) {
initialize(type, alleles, p, lod);
}
public IndelLikelihood(String[] indels) {
HashMap<String,Integer> indel_allele_counts = new HashMap<String,Integer>();
for (int i = 0; i < indels.length; i++) {
if (! indel_allele_counts.containsKey(indels[i])) {
indel_allele_counts.put(indels[i], 1);
} else {
indel_allele_counts.put(indels[i], indel_allele_counts.get(indels[i])+1);
}
}
Object[] keys = indel_allele_counts.keySet().toArray();
String[] alleles = new String[keys.length];
int[] counts = new int[keys.length];
//double likelihoods[] = new double[keys.length];
int null_count = 0;
String max_allele = null;
int max_count = -1;
if ((keys.length > 0) && (! ((keys.length == 1) && (((String)keys[0]).equals("null"))))) {
for (int i = 0; i < keys.length; i++) {
Integer count = (Integer)indel_allele_counts.get(keys[i]);
alleles[i] = (String)keys[i];
counts[i] = count;
if (alleles[i].equals("null")) { null_count = counts[i]; }
else if (counts[i] > max_count) { max_count = counts[i]; max_allele = alleles[i]; }
//System.out.printf("%s[%d] ", keys[i], count);
}
//System.out.printf("\n");
double eps = 1e-3;
double pRef = null_count*Math.log10(1.0 - eps) + max_count*Math.log10(eps) + Math.log10(0.999);
double pHet = null_count*Math.log10(0.5 - eps/2) + max_count*Math.log10(0.5-eps/2) + Math.log10(1e-3);
double pHom = null_count*Math.log10(eps) + max_count*Math.log10(1.0 - eps) + Math.log10(1e-5);
double lodRef = pRef - Math.max(pHet, pHom);
double lodHet = pHet - pRef;
double lodHom = pHom - pRef;
//System.out.printf("%s/%s %f %f\n", "null", "null", pRef, lodRef);
//System.out.printf("%s/%s %f %f\n", max_allele, "null", pHet, lodHet);
//System.out.printf("%s/%s %f %f\n", max_allele, max_allele, pHom, lodHom);
//System.out.printf("\n");
if (lodRef > 0) {
// reference call
String[] genotype = new String[2];
genotype[0] = "null";
genotype[1] = "null";
//return new IndelLikelihood("ref", genotype, pRef, lodRef);
initialize("ref", genotype, pRef, lodRef);
} else if (lodHet > lodHom) {
// het call
String[] genotype = new String[2];
genotype[0] = "null";
genotype[1] = max_allele;
//return new IndelLikelihood("het", genotype, pHet, lodHet);
initialize("het", genotype, pHet, lodHet);
} else {
// hom call
String[] genotype = new String[2];
genotype[0] = max_allele;
genotype[1] = max_allele;
//return new IndelLikelihood("hom", genotype, pHom, lodHom);
initialize("hom", genotype, pHom, lodHom);
}
}
}
private void initialize(String type, String[] alleles, double p, double lod) {
this.type = type;
this.alleles = alleles;
this.p = p;
this.lod = lod;
}
public String getType() { return type; }
public String[] getAlleles() { return alleles; }
public double getPosteriorProbability() { return p; }
public double getLOD() { return lod; }
public String toString() {
return String.format("%s/%s %f %f", alleles[0], alleles[1], p, lod);
}
}