Various refactoring to achieve hapmap and dbsnp awareness, the ability to set pop-gen and secondary base priors from the command-line, and general code cleanup.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1007 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kiran 2009-06-15 07:20:22 +00:00
parent f6af190b74
commit f2946fa3e8
6 changed files with 296 additions and 277 deletions

View File

@ -10,11 +10,9 @@ import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.ReadBackedPileup; import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import java.io.PrintStream; import java.io.PrintStream;
import java.io.File;
import net.sf.samtools.SAMRecord;
public class ContrastiveGenotypers extends LocusWalker<Integer, Integer> { public class ContrastiveGenotypers extends LocusWalker<Integer, Integer> {
private SingleSampleGenotyper caller_1b; private SingleSampleGenotyper caller_1b;
@ -26,28 +24,26 @@ public class ContrastiveGenotypers extends LocusWalker<Integer, Integer> {
public void initialize() { public void initialize() {
caller_1b = new SingleSampleGenotyper(); caller_1b = new SingleSampleGenotyper();
caller_1b.metricsFileName = "/dev/stdout"; caller_1b.METRICS_FILE = new File("/dev/stdout");
caller_1b.metricsInterval = 6000; caller_1b.METRICS_INTERVAL = 6000;
caller_1b.printMetrics = false; caller_1b.NO_SECONDARY_BASES = false;
caller_1b.fourBaseMode = false;
//caller_1b.retest = false; //caller_1b.retest = false;
//caller_1b.qHom = 0.04; //caller_1b.qHom = 0.04;
//caller_1b.qHet = 0.49; //caller_1b.qHet = 0.49;
//caller_1b.qHomNonRef = 0.97; //caller_1b.qHomNonRef = 0.97;
caller_1b.lodThreshold = 5.0; caller_1b.LOD_THRESHOLD = 5.0;
caller_1b.initialize(); caller_1b.initialize();
caller_1b.reduceInit(); caller_1b.reduceInit();
caller_4b = new SingleSampleGenotyper(); caller_4b = new SingleSampleGenotyper();
caller_4b.metricsFileName = "/dev/stdout"; caller_4b.METRICS_FILE = new File("/dev/stdout");
caller_4b.metricsInterval = caller_1b.metricsInterval; caller_4b.METRICS_INTERVAL = caller_1b.METRICS_INTERVAL;
caller_4b.printMetrics = false; caller_4b.NO_SECONDARY_BASES = true;
caller_4b.fourBaseMode = true;
//caller_4b.retest = true; //caller_4b.retest = true;
//caller_4b.qHom = 0.04; //caller_4b.qHom = 0.04;
//caller_4b.qHet = 0.49; //caller_4b.qHet = 0.49;
//caller_4b.qHomNonRef = 0.97; //caller_4b.qHomNonRef = 0.97;
caller_4b.lodThreshold = 5.0; caller_4b.LOD_THRESHOLD = 5.0;
caller_4b.initialize(); caller_4b.initialize();
caller_4b.reduceInit(); caller_4b.reduceInit();
} }
@ -119,16 +115,18 @@ public class ContrastiveGenotypers extends LocusWalker<Integer, Integer> {
bases2 bases2
); );
caller_1b.metrics.nextPosition(call_1b, tracker); caller_1b.metricsOut.nextPosition(call_1b, tracker);
caller_4b.metrics.nextPosition(call_4b, tracker); caller_4b.metricsOut.nextPosition(call_4b, tracker);
if (caller_1b.metrics.num_loci_total % caller_1b.metricsInterval == 0) { /*
if (caller_1b.metricsOut.num_loci_total % caller_1b.METRICS_INTERVAL == 0) {
System.out.print("1-Base Metrics"); System.out.print("1-Base Metrics");
caller_1b.metrics.printMetricsAtLocusIntervals(caller_1b.metricsInterval); caller_1b.metrics.printMetricsAtLocusIntervals(caller_1b.METRICS_INTERVAL);
System.out.print("4-Base Metrics"); System.out.print("4-Base Metrics");
caller_4b.metrics.printMetricsAtLocusIntervals(caller_1b.metricsInterval); caller_4b.metrics.printMetricsAtLocusIntervals(caller_1b.METRICS_INTERVAL);
System.out.println("--------------"); System.out.println("--------------");
} }
*/
return null; return null;
} }
@ -269,9 +267,9 @@ public class ContrastiveGenotypers extends LocusWalker<Integer, Integer> {
} }
System.out.print("1-Base Metrics"); System.out.print("1-Base Metrics");
caller_1b.metrics.printMetricsAtLocusIntervals(1); caller_1b.metricsOut.printMetricsAtLocusIntervals(1);
System.out.print("4-Base Metrics"); System.out.print("4-Base Metrics");
caller_4b.metrics.printMetricsAtLocusIntervals(1); caller_4b.metricsOut.printMetricsAtLocusIntervals(1);
System.out.println("--------------"); System.out.println("--------------");
} }
} }

View File

@ -85,14 +85,14 @@ public class PoolCaller extends LocusWalker<AlleleFrequencyEstimate, String>
System.out.println("SAMPLE: " + sample_name); System.out.println("SAMPLE: " + sample_name);
SingleSampleGenotyper caller = new SingleSampleGenotyper(); SingleSampleGenotyper caller = new SingleSampleGenotyper();
caller.callsFileName = null; caller.VARIANTS_FILE = null;
caller.metricsFileName = null; caller.METRICS_FILE = null;
caller.lodThreshold = lodThreshold; caller.LOD_THRESHOLD = lodThreshold;
caller.fourBaseMode = false; caller.NO_SECONDARY_BASES = false;
caller.printMetrics = false; caller.PRINT_METRICS = false;
caller.SAMPLE_NAME_REGEX = SAMPLE_NAME_REGEX; caller.SAMPLE_NAME_REGEX = SAMPLE_NAME_REGEX;
caller.initialize(); caller.initialize();
caller.calls_file = individual_output_file; caller.variantsOut = individual_output_file;
caller_sums.add(caller.reduceInit()); caller_sums.add(caller.reduceInit());
callers.add(caller); callers.add(caller);
} }

View File

@ -7,6 +7,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
public class AlleleFrequencyEstimate { public class AlleleFrequencyEstimate {
/*
static static
{ {
boolean assertsEnabled = false; boolean assertsEnabled = false;
@ -17,6 +18,7 @@ public class AlleleFrequencyEstimate {
throw new RuntimeException("Asserts must be enabled!"); throw new RuntimeException("Asserts must be enabled!");
} }
} }
*/
//AlleleFrequencyEstimate(); //AlleleFrequencyEstimate();
public GenomeLoc location; public GenomeLoc location;
@ -44,6 +46,7 @@ public class AlleleFrequencyEstimate {
public AlleleFrequencyEstimate(GenomeLoc location, char ref, char alt, int N, double qhat, double qstar, double lodVsRef, double lodVsNextBest, double pBest, double pRef, int depth, String bases, double[][] quals, double[] posteriors, String sample_name) public AlleleFrequencyEstimate(GenomeLoc location, char ref, char alt, int N, double qhat, double qstar, double lodVsRef, double lodVsNextBest, double pBest, double pRef, int depth, String bases, double[][] quals, double[] posteriors, String sample_name)
{ {
/*
if( Double.isNaN(lodVsRef)) { System.out.printf("%s: lodVsRef is NaN\n", location.toString()); } if( Double.isNaN(lodVsRef)) { System.out.printf("%s: lodVsRef is NaN\n", location.toString()); }
if( Double.isNaN(lodVsNextBest)) { System.out.printf("%s lodVsNextBest is NaN\n", location.toString()); } if( Double.isNaN(lodVsNextBest)) { System.out.printf("%s lodVsNextBest is NaN\n", location.toString()); }
if( Double.isNaN(qhat)) { System.out.printf("%s qhat is NaN\n", location.toString()); } if( Double.isNaN(qhat)) { System.out.printf("%s qhat is NaN\n", location.toString()); }
@ -78,6 +81,7 @@ public class AlleleFrequencyEstimate {
assert(! Double.isInfinite(qstar)); assert(! Double.isInfinite(qstar));
assert(! Double.isInfinite(pBest)); assert(! Double.isInfinite(pBest));
assert(! Double.isInfinite(pRef)); assert(! Double.isInfinite(pRef));
*/
this.location = location; this.location = location;
this.ref = ref; this.ref = ref;

View File

@ -8,60 +8,50 @@ import org.broadinstitute.sting.playground.gatk.walkers.AlleleFrequencyWalker;
import java.util.List; import java.util.List;
import java.io.PrintStream; import java.io.PrintStream;
import java.io.File;
import java.io.FileNotFoundException;
/**
* Created by IntelliJ IDEA.
* User: andrewk
* Date: Apr 1, 2009
* Time: 5:53:21 PM
* To change this template use File | Settings | File Templates.
*/
public class AlleleMetrics { public class AlleleMetrics {
private double LOD_cutoff = 5;
long dbsnp_hits=0; private long dbsnp_hits = 0;
long num_variants=0; private long num_variants = 0;
public long num_loci_total=0; private long num_loci_total = 0;
long num_loci_confident=0; private long num_loci_confident = 0;
double LOD_cutoff = 5; private long hapmap_genotype_correct = 0;
long hapmap_genotype_correct = 0; private long hapmap_genotype_incorrect = 0;
long hapmap_genotype_incorrect = 0; private long hapmap_refvar_correct = 0;
long hapmap_refvar_correct = 0; private long hapmap_refvar_incorrect = 0;
long hapmap_refvar_incorrect = 0;
private final double dbl_cmp_precision = 0.0001; private final double dbl_cmp_precision = 0.0001;
protected PrintStream out; protected PrintStream out;
public AlleleMetrics(String MetricsOutputFile) { public AlleleMetrics(String metricsOutputPath) {
try initialize(new File(metricsOutputPath), LOD_cutoff);
{
/*if ( MetricsOutputFile.equals("-") )
this.out = out;
else*/
this.out = new PrintStream(MetricsOutputFile);
}
catch (Exception e)
{
e.printStackTrace();
System.exit(-1);
}
} }
public AlleleMetrics(String MetricsOutputFile, double lodThreshold) { public AlleleMetrics(String metricsOutputPath, double lodThreshold) {
LOD_cutoff = lodThreshold; initialize(new File(metricsOutputPath), lodThreshold);
try
{
/*if ( MetricsOutputFile.equals("-") )
this.out = out;
else*/
this.out = new PrintStream(MetricsOutputFile);
} }
catch (Exception e)
{ public AlleleMetrics(File metricsOutputFile) {
e.printStackTrace(); initialize(metricsOutputFile, LOD_cutoff);
}
public AlleleMetrics(File metricsOutputFile, double lodThreshold) {
initialize(metricsOutputFile, lodThreshold);
}
private void initialize(File metricsOutputFile, double lodThresold) {
try {
this.out = new PrintStream(metricsOutputFile);
} catch (FileNotFoundException e) {
System.err.format("Unable to open file '%s'. Perhaps the parent directory does not exist or is read-only.", metricsOutputFile.getAbsolutePath());
System.exit(-1); System.exit(-1);
} }
this.LOD_cutoff = lodThresold;
} }
public void nextPosition(AlleleFrequencyEstimate alleleFreq, RefMetaDataTracker tracker) { public void nextPosition(AlleleFrequencyEstimate alleleFreq, RefMetaDataTracker tracker) {

View File

@ -1,8 +1,9 @@
package org.broadinstitute.sting.playground.utils; package org.broadinstitute.sting.playground.utils;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.QualityUtils;
import static java.lang.Math.log10; import static java.lang.Math.log10;
import static java.lang.Math.pow; import static java.lang.Math.pow;
@ -13,18 +14,22 @@ public class GenotypeLikelihoods {
private static final double[] oneMinusData = new double[Byte.MAX_VALUE]; private static final double[] oneMinusData = new double[Byte.MAX_VALUE];
static { static {
for(int qual=0; qual < Byte.MAX_VALUE; qual++) { for (int qual = 0; qual < Byte.MAX_VALUE; qual++) {
oneMinusData[qual] = log10(1.0 - pow(10,(qual/-10.0))); oneMinusData[qual] = log10(1.0 - pow(10, (qual / -10.0)));
//oneMinusData[qual] = log10(1.0 - QualityUtils.qualToProb(qual));
} }
} }
private static double getOneMinusQual(final byte qual) { private static double getOneMinusQual(final byte qual) {
return oneMinusData[qual]; return oneMinusData[qual];
} }
private static final double[] oneHalfMinusData = new double[Byte.MAX_VALUE]; private static final double[] oneHalfMinusData = new double[Byte.MAX_VALUE];
static { static {
for(int qual=0; qual < Byte.MAX_VALUE; qual++) { for (int qual = 0; qual < Byte.MAX_VALUE; qual++) {
oneHalfMinusData[qual] = log10(0.5-pow(10,(qual/-10.0))/2.0); oneHalfMinusData[qual] = log10(0.5 - pow(10, (qual / -10.0)) / 2.0);
//oneHalfMinusData[qual] = log10(0.5 - QualityUtils.qualToProb(qual) / 2.0);
} }
} }
@ -32,8 +37,6 @@ public class GenotypeLikelihoods {
return oneHalfMinusData[qual]; return oneHalfMinusData[qual];
} }
public double[] likelihoods; public double[] likelihoods;
public String[] genotypes; public String[] genotypes;
@ -43,20 +46,30 @@ public class GenotypeLikelihoods {
private double priorHomVar; private double priorHomVar;
// Store the 2nd-best base priors for on-genotype primary bases // Store the 2nd-best base priors for on-genotype primary bases
HashMap<String, Double> onNextBestBasePriors = new HashMap<String, Double>(); private HashMap<String, Double> onNextBestBasePriors = new HashMap<String, Double>();
// Store the 2nd-best base priors for off-genotype primary bases // Store the 2nd-best base priors for off-genotype primary bases
HashMap<String, Double> offNextBestBasePriors = new HashMap<String, Double>(); private HashMap<String, Double> offNextBestBasePriors = new HashMap<String, Double>();
public GenotypeLikelihoods() { public GenotypeLikelihoods() {
initialize(1.0 - 1e-3, 1e-3, 1e-5); double[] p2ndon = {0.000, 0.302, 0.366, 0.142, 0.000, 0.548, 0.370, 0.000, 0.319, 0.000};
double[] p2ndoff = {0.480, 0.769, 0.744, 0.538, 0.575, 0.727, 0.768, 0.589, 0.762, 0.505};
initialize(1.0 - 1e-3, 1e-3, 1e-5, p2ndon, p2ndoff);
} }
public GenotypeLikelihoods(double priorHomRef, double priorHet, double priorHomVar) { public GenotypeLikelihoods(double priorHomRef, double priorHet, double priorHomVar) {
initialize(priorHomRef, priorHet, priorHomVar); double[] p2ndon = {0.000, 0.302, 0.366, 0.142, 0.000, 0.548, 0.370, 0.000, 0.319, 0.000};
double[] p2ndoff = {0.480, 0.769, 0.744, 0.538, 0.575, 0.727, 0.768, 0.589, 0.762, 0.505};
initialize(priorHomRef, priorHet, priorHomVar, p2ndon, p2ndoff);
} }
private void initialize(double priorHomRef, double priorHet, double priorHomVar) { public GenotypeLikelihoods(double priorHomRef, double priorHet, double priorHomVar, double[] p2ndon, double[] p2ndoff) {
initialize(priorHomRef, priorHet, priorHomVar, p2ndon, p2ndoff);
}
private void initialize(double priorHomRef, double priorHet, double priorHomVar, double[] p2ndon, double[] p2ndoff) {
this.priorHomRef = priorHomRef; this.priorHomRef = priorHomRef;
this.priorHet = priorHet; this.priorHet = priorHet;
this.priorHomVar = priorHomVar; this.priorHomVar = priorHomVar;
@ -75,69 +88,93 @@ public class GenotypeLikelihoods {
genotypes[8] = "GT"; genotypes[8] = "GT";
genotypes[9] = "TT"; genotypes[9] = "TT";
onNextBestBasePriors.put("AA", 0.000); for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
onNextBestBasePriors.put("AC", 0.302); onNextBestBasePriors.put(genotypes[genotypeIndex], p2ndon[genotypeIndex]);
onNextBestBasePriors.put("AG", 0.366); offNextBestBasePriors.put(genotypes[genotypeIndex], p2ndoff[genotypeIndex]);
onNextBestBasePriors.put("AT", 0.142); }
onNextBestBasePriors.put("CC", 0.000);
onNextBestBasePriors.put("CG", 0.548);
onNextBestBasePriors.put("CT", 0.370);
onNextBestBasePriors.put("GG", 0.000);
onNextBestBasePriors.put("GT", 0.319);
onNextBestBasePriors.put("TT", 0.000);
offNextBestBasePriors.put("AA", 0.480);
offNextBestBasePriors.put("AC", 0.769);
offNextBestBasePriors.put("AG", 0.744);
offNextBestBasePriors.put("AT", 0.538);
offNextBestBasePriors.put("CC", 0.575);
offNextBestBasePriors.put("CG", 0.727);
offNextBestBasePriors.put("CT", 0.768);
offNextBestBasePriors.put("GG", 0.589);
offNextBestBasePriors.put("GT", 0.762);
offNextBestBasePriors.put("TT", 0.505);
} }
public double getHomRefPrior() { return priorHomRef; } public double getHomRefPrior() {
public void setHomRefPrior(double priorHomRef) { this.priorHomRef = priorHomRef; } return priorHomRef;
}
public double getHetPrior() { return priorHet; } public void setHomRefPrior(double priorHomRef) {
public void setHetPrior(double priorHet) { this.priorHet = priorHet; } this.priorHomRef = priorHomRef;
}
public double getHomVarPrior() { return priorHomVar; } public double getHetPrior() {
public void setHomVarPrior(double priorHomVar) { this.priorHomVar = priorHomVar; } return priorHet;
}
public void add(char ref, char read, byte qual) public void setHetPrior(double priorHet) {
{ this.priorHet = priorHet;
for (int i = 0; i < genotypes.length; i++) }
{
public double getHomVarPrior() {
return priorHomVar;
}
public void setHomVarPrior(double priorHomVar) {
this.priorHomVar = priorHomVar;
}
public double[] getOnGenotypeSecondaryPriors() {
double[] p2ndon = new double[10];
for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
p2ndon[genotypeIndex] = onNextBestBasePriors.get(genotypes[genotypeIndex]);
}
return p2ndon;
}
public void setOnGenotypeSecondaryPriors(double[] p2ndon) {
for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
onNextBestBasePriors.put(genotypes[genotypeIndex], p2ndon[genotypeIndex]);
}
}
public double[] getOffGenotypeSecondaryPriors() {
double[] p2ndoff = new double[10];
for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
p2ndoff[genotypeIndex] = offNextBestBasePriors.get(genotypes[genotypeIndex]);
}
return p2ndoff;
}
public void setOffGenotypeSecondaryPriors(double[] p2ndoff) {
for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
offNextBestBasePriors.put(genotypes[genotypeIndex], p2ndoff[genotypeIndex]);
}
}
public void add(char ref, char read, byte qual) {
for (int i = 0; i < genotypes.length; i++) {
likelihoods[i] += calculateAlleleLikelihood(ref, read, genotypes[i], qual); likelihoods[i] += calculateAlleleLikelihood(ref, read, genotypes[i], qual);
} }
} }
private double calculateAlleleLikelihood(char ref, char read, String genotype, byte qual) private double calculateAlleleLikelihood(char ref, char read, String genotype, byte qual) {
{ if (qual == 0) {
if (qual == 0) { qual = 1; } // zero quals are wrong. qual = 1;
} // zero quals are wrong.
char h1 = genotype.charAt(0); char h1 = genotype.charAt(0);
char h2 = genotype.charAt(1); char h2 = genotype.charAt(1);
double p_base; double p_base;
if ((h1 == h2) && (h1 == read)) if ((h1 == h2) && (h1 == read)) {
{
// hom // hom
p_base = getOneMinusQual(qual); p_base = getOneMinusQual(qual);
} } else if ((h1 != h2) && ((h1 == read) || (h2 == read))) {
else if ((h1 != h2) && ((h1 == read) || (h2 == read)))
{
// het // het
p_base = getOneHalfMinusQual(qual); p_base = getOneHalfMinusQual(qual);
} } else {
else
{
// error // error
p_base = qual/-10.0; p_base = qual / -10.0;
} }
return p_base; return p_base;
@ -170,54 +207,41 @@ public class GenotypeLikelihoods {
return s; return s;
} }
public void ApplyPrior(char ref, char alt, double p_alt) public void ApplyPrior(char ref, char alt, double p_alt) {
{ for (int i = 0; i < genotypes.length; i++) {
for (int i = 0; i < genotypes.length; i++) if ((p_alt == -1) || (p_alt <= 1e-6)) {
{ if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
if ((p_alt == -1) || (p_alt <= 1e-6))
{
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref))
{
// hom-ref // hom-ref
likelihoods[i] += Math.log10(priorHomRef); likelihoods[i] += Math.log10(priorHomRef);
} } else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref))
{
// hom-nonref // hom-nonref
likelihoods[i] += Math.log10(priorHomVar); likelihoods[i] += Math.log10(priorHomVar);
} } else {
else
{
// het // het
likelihoods[i] += Math.log10(priorHet); likelihoods[i] += Math.log10(priorHet);
} }
if (Double.isInfinite(likelihoods[i])) { likelihoods[i] = -1000; } if (Double.isInfinite(likelihoods[i])) {
likelihoods[i] = -1000;
} }
else } else {
{ if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref))
{
// hom-ref // hom-ref
likelihoods[i] += 2.0 * Math.log10(1.0 - p_alt); likelihoods[i] += 2.0 * Math.log10(1.0 - p_alt);
} } else if ((genotypes[i].charAt(0) == alt) && (genotypes[i].charAt(1) == alt)) {
else if ((genotypes[i].charAt(0) == alt) && (genotypes[i].charAt(1) == alt))
{
// hom-nonref // hom-nonref
likelihoods[i] += 2.0 * Math.log10(p_alt); likelihoods[i] += 2.0 * Math.log10(p_alt);
} } else if (((genotypes[i].charAt(0) == alt) && (genotypes[i].charAt(1) == ref)) ||
else if (((genotypes[i].charAt(0) == alt) && (genotypes[i].charAt(1) == ref)) || ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == alt))) {
((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == alt)))
{
// het // het
likelihoods[i] += Math.log10((1.0-p_alt) * p_alt * 2.0); likelihoods[i] += Math.log10((1.0 - p_alt) * p_alt * 2.0);
} } else {
else
{
// something else (noise!) // something else (noise!)
likelihoods[i] += Math.log10(1e-5); likelihoods[i] += Math.log10(1e-5);
} }
if (Double.isInfinite(likelihoods[i])) { likelihoods[i] = -1000; } if (Double.isInfinite(likelihoods[i])) {
likelihoods[i] = -1000;
}
} }
} }
this.sort(); this.sort();
@ -325,105 +349,6 @@ public class GenotypeLikelihoods {
this.LodVsRef(ref); //HACK this.LodVsRef(ref); //HACK
//System.out.printf("DBG: %f %f\n", sorted_likelihoods[0], ref_likelihood); //System.out.printf("DBG: %f %f\n", sorted_likelihoods[0], ref_likelihood);
return new AlleleFrequencyEstimate(location, ref, alt, 2, qhat, qstar, this.LodVsRef(ref), this.LodVsNextBest(), sorted_likelihoods[0], ref_likelihood, depth, bases, (double[][]) null, this.likelihoods, sample_name); return new AlleleFrequencyEstimate(location, ref, alt, 2, qhat, qstar, this.LodVsRef(ref), this.LodVsNextBest(), sorted_likelihoods[0], ref_likelihood, depth, bases, null, this.likelihoods, sample_name);
} }
public static class IndelCall
{
public String type;
public String[] alleles;
public double p;
public double lod;
public IndelCall(String type, String[] alleles, double p, double lod)
{
this.type = type;
this.alleles = alleles;
this.p = p;
this.lod = lod;
}
public String toString()
{
return String.format("%s/%s %f %f", alleles[0], alleles[1], p, lod);
}
}
public static IndelCall callIndel(String[] indels)
{
HashMap<String,Integer> indel_allele_counts = new HashMap<String,Integer>();
for (int i = 0; i < indels.length; i++)
{
if (! indel_allele_counts.containsKey(indels[i]))
{
indel_allele_counts.put(indels[i], 1);
}
else
{
indel_allele_counts.put(indels[i], indel_allele_counts.get(indels[i])+1);
}
}
Object[] keys = indel_allele_counts.keySet().toArray();
String[] alleles = new String[keys.length];
int[] counts = new int[keys.length];
double likelihoods[] = new double[keys.length];
int null_count = 0;
String max_allele = null;
int max_count = -1;
if ((keys.length > 0) && (! ((keys.length == 1) && (((String)keys[0]).equals("null")))))
{
for (int i = 0; i < keys.length; i++)
{
Integer count = (Integer)indel_allele_counts.get(keys[i]);
alleles[i] = (String)keys[i];
counts[i] = count;
if (alleles[i].equals("null")) { null_count = counts[i]; }
else if (counts[i] > max_count) { max_count = counts[i]; max_allele = alleles[i]; }
//System.out.printf("%s[%d] ", keys[i], count);
}
//System.out.printf("\n");
double eps = 1e-3;
double pRef = null_count*Math.log10(1.0 - eps) + max_count*Math.log10(eps) + Math.log10(0.999);
double pHet = null_count*Math.log10(0.5 - eps/2) + max_count*Math.log10(0.5-eps/2) + Math.log10(1e-3);
double pHom = null_count*Math.log10(eps) + max_count*Math.log10(1.0 - eps) + Math.log10(1e-5);
double lodRef = pRef - Math.max(pHet, pHom);
double lodHet = pHet - pRef;
double lodHom = pHom - pRef;
//System.out.printf("%s/%s %f %f\n", "null", "null", pRef, lodRef);
//System.out.printf("%s/%s %f %f\n", max_allele, "null", pHet, lodHet);
//System.out.printf("%s/%s %f %f\n", max_allele, max_allele, pHom, lodHom);
//System.out.printf("\n");
if (lodRef > 0)
{
// reference call
String[] genotype = new String[2];
genotype[0] = "null";
genotype[1] = "null";
return new IndelCall("ref", genotype, pRef, lodRef);
}
else if (lodHet > lodHom)
{
// het call
String[] genotype = new String[2];
genotype[0] = "null";
genotype[1] = max_allele;
return new IndelCall("het", genotype, pHet, lodHet);
}
else
{
// hom call
String[] genotype = new String[2];
genotype[0] = max_allele;
genotype[1] = max_allele;
return new IndelCall("hom", genotype, pHom, lodHom);
}
}
return null;
}
} }

View File

@ -0,0 +1,102 @@
package org.broadinstitute.sting.playground.utils;
import java.util.HashMap;
public class IndelLikelihood {
private String type;
private String[] alleles;
private double p;
private double lod;
public IndelLikelihood(String type, String[] alleles, double p, double lod) {
initialize(type, alleles, p, lod);
}
public IndelLikelihood(String[] indels) {
HashMap<String,Integer> indel_allele_counts = new HashMap<String,Integer>();
for (int i = 0; i < indels.length; i++) {
if (! indel_allele_counts.containsKey(indels[i])) {
indel_allele_counts.put(indels[i], 1);
} else {
indel_allele_counts.put(indels[i], indel_allele_counts.get(indels[i])+1);
}
}
Object[] keys = indel_allele_counts.keySet().toArray();
String[] alleles = new String[keys.length];
int[] counts = new int[keys.length];
//double likelihoods[] = new double[keys.length];
int null_count = 0;
String max_allele = null;
int max_count = -1;
if ((keys.length > 0) && (! ((keys.length == 1) && (((String)keys[0]).equals("null"))))) {
for (int i = 0; i < keys.length; i++) {
Integer count = (Integer)indel_allele_counts.get(keys[i]);
alleles[i] = (String)keys[i];
counts[i] = count;
if (alleles[i].equals("null")) { null_count = counts[i]; }
else if (counts[i] > max_count) { max_count = counts[i]; max_allele = alleles[i]; }
//System.out.printf("%s[%d] ", keys[i], count);
}
//System.out.printf("\n");
double eps = 1e-3;
double pRef = null_count*Math.log10(1.0 - eps) + max_count*Math.log10(eps) + Math.log10(0.999);
double pHet = null_count*Math.log10(0.5 - eps/2) + max_count*Math.log10(0.5-eps/2) + Math.log10(1e-3);
double pHom = null_count*Math.log10(eps) + max_count*Math.log10(1.0 - eps) + Math.log10(1e-5);
double lodRef = pRef - Math.max(pHet, pHom);
double lodHet = pHet - pRef;
double lodHom = pHom - pRef;
//System.out.printf("%s/%s %f %f\n", "null", "null", pRef, lodRef);
//System.out.printf("%s/%s %f %f\n", max_allele, "null", pHet, lodHet);
//System.out.printf("%s/%s %f %f\n", max_allele, max_allele, pHom, lodHom);
//System.out.printf("\n");
if (lodRef > 0) {
// reference call
String[] genotype = new String[2];
genotype[0] = "null";
genotype[1] = "null";
//return new IndelLikelihood("ref", genotype, pRef, lodRef);
initialize("ref", genotype, pRef, lodRef);
} else if (lodHet > lodHom) {
// het call
String[] genotype = new String[2];
genotype[0] = "null";
genotype[1] = max_allele;
//return new IndelLikelihood("het", genotype, pHet, lodHet);
initialize("het", genotype, pHet, lodHet);
} else {
// hom call
String[] genotype = new String[2];
genotype[0] = max_allele;
genotype[1] = max_allele;
//return new IndelLikelihood("hom", genotype, pHom, lodHom);
initialize("hom", genotype, pHom, lodHom);
}
}
}
private void initialize(String type, String[] alleles, double p, double lod) {
this.type = type;
this.alleles = alleles;
this.p = p;
this.lod = lod;
}
public String getType() { return type; }
public String[] getAlleles() { return alleles; }
public double getPosteriorProbability() { return p; }
public double getLOD() { return lod; }
public String toString() {
return String.format("%s/%s %f %f", alleles[0], alleles[1], p, lod);
}
}