Various refactoring to achieve hapmap and dbsnp awareness, the ability to set pop-gen and secondary base priors from the command-line, and general code cleanup.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1007 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
f6af190b74
commit
f2946fa3e8
|
|
@ -10,11 +10,9 @@ import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate;
|
|||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import java.io.File;
|
||||
|
||||
public class ContrastiveGenotypers extends LocusWalker<Integer, Integer> {
|
||||
private SingleSampleGenotyper caller_1b;
|
||||
|
|
@ -26,28 +24,26 @@ public class ContrastiveGenotypers extends LocusWalker<Integer, Integer> {
|
|||
|
||||
public void initialize() {
|
||||
caller_1b = new SingleSampleGenotyper();
|
||||
caller_1b.metricsFileName = "/dev/stdout";
|
||||
caller_1b.metricsInterval = 6000;
|
||||
caller_1b.printMetrics = false;
|
||||
caller_1b.fourBaseMode = false;
|
||||
caller_1b.METRICS_FILE = new File("/dev/stdout");
|
||||
caller_1b.METRICS_INTERVAL = 6000;
|
||||
caller_1b.NO_SECONDARY_BASES = false;
|
||||
//caller_1b.retest = false;
|
||||
//caller_1b.qHom = 0.04;
|
||||
//caller_1b.qHet = 0.49;
|
||||
//caller_1b.qHomNonRef = 0.97;
|
||||
caller_1b.lodThreshold = 5.0;
|
||||
caller_1b.LOD_THRESHOLD = 5.0;
|
||||
caller_1b.initialize();
|
||||
caller_1b.reduceInit();
|
||||
|
||||
caller_4b = new SingleSampleGenotyper();
|
||||
caller_4b.metricsFileName = "/dev/stdout";
|
||||
caller_4b.metricsInterval = caller_1b.metricsInterval;
|
||||
caller_4b.printMetrics = false;
|
||||
caller_4b.fourBaseMode = true;
|
||||
caller_4b.METRICS_FILE = new File("/dev/stdout");
|
||||
caller_4b.METRICS_INTERVAL = caller_1b.METRICS_INTERVAL;
|
||||
caller_4b.NO_SECONDARY_BASES = true;
|
||||
//caller_4b.retest = true;
|
||||
//caller_4b.qHom = 0.04;
|
||||
//caller_4b.qHet = 0.49;
|
||||
//caller_4b.qHomNonRef = 0.97;
|
||||
caller_4b.lodThreshold = 5.0;
|
||||
caller_4b.LOD_THRESHOLD = 5.0;
|
||||
caller_4b.initialize();
|
||||
caller_4b.reduceInit();
|
||||
}
|
||||
|
|
@ -119,16 +115,18 @@ public class ContrastiveGenotypers extends LocusWalker<Integer, Integer> {
|
|||
bases2
|
||||
);
|
||||
|
||||
caller_1b.metrics.nextPosition(call_1b, tracker);
|
||||
caller_4b.metrics.nextPosition(call_4b, tracker);
|
||||
caller_1b.metricsOut.nextPosition(call_1b, tracker);
|
||||
caller_4b.metricsOut.nextPosition(call_4b, tracker);
|
||||
|
||||
if (caller_1b.metrics.num_loci_total % caller_1b.metricsInterval == 0) {
|
||||
/*
|
||||
if (caller_1b.metricsOut.num_loci_total % caller_1b.METRICS_INTERVAL == 0) {
|
||||
System.out.print("1-Base Metrics");
|
||||
caller_1b.metrics.printMetricsAtLocusIntervals(caller_1b.metricsInterval);
|
||||
caller_1b.metrics.printMetricsAtLocusIntervals(caller_1b.METRICS_INTERVAL);
|
||||
System.out.print("4-Base Metrics");
|
||||
caller_4b.metrics.printMetricsAtLocusIntervals(caller_1b.metricsInterval);
|
||||
caller_4b.metrics.printMetricsAtLocusIntervals(caller_1b.METRICS_INTERVAL);
|
||||
System.out.println("--------------");
|
||||
}
|
||||
*/
|
||||
|
||||
return null;
|
||||
}
|
||||
|
|
@ -269,9 +267,9 @@ public class ContrastiveGenotypers extends LocusWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
System.out.print("1-Base Metrics");
|
||||
caller_1b.metrics.printMetricsAtLocusIntervals(1);
|
||||
caller_1b.metricsOut.printMetricsAtLocusIntervals(1);
|
||||
System.out.print("4-Base Metrics");
|
||||
caller_4b.metrics.printMetricsAtLocusIntervals(1);
|
||||
caller_4b.metricsOut.printMetricsAtLocusIntervals(1);
|
||||
System.out.println("--------------");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -85,14 +85,14 @@ public class PoolCaller extends LocusWalker<AlleleFrequencyEstimate, String>
|
|||
System.out.println("SAMPLE: " + sample_name);
|
||||
|
||||
SingleSampleGenotyper caller = new SingleSampleGenotyper();
|
||||
caller.callsFileName = null;
|
||||
caller.metricsFileName = null;
|
||||
caller.lodThreshold = lodThreshold;
|
||||
caller.fourBaseMode = false;
|
||||
caller.printMetrics = false;
|
||||
caller.VARIANTS_FILE = null;
|
||||
caller.METRICS_FILE = null;
|
||||
caller.LOD_THRESHOLD = lodThreshold;
|
||||
caller.NO_SECONDARY_BASES = false;
|
||||
caller.PRINT_METRICS = false;
|
||||
caller.SAMPLE_NAME_REGEX = SAMPLE_NAME_REGEX;
|
||||
caller.initialize();
|
||||
caller.calls_file = individual_output_file;
|
||||
caller.variantsOut = individual_output_file;
|
||||
caller_sums.add(caller.reduceInit());
|
||||
callers.add(caller);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
|
||||
public class AlleleFrequencyEstimate {
|
||||
|
||||
/*
|
||||
static
|
||||
{
|
||||
boolean assertsEnabled = false;
|
||||
|
|
@ -17,6 +18,7 @@ public class AlleleFrequencyEstimate {
|
|||
throw new RuntimeException("Asserts must be enabled!");
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
//AlleleFrequencyEstimate();
|
||||
public GenomeLoc location;
|
||||
|
|
@ -44,6 +46,7 @@ public class AlleleFrequencyEstimate {
|
|||
|
||||
public AlleleFrequencyEstimate(GenomeLoc location, char ref, char alt, int N, double qhat, double qstar, double lodVsRef, double lodVsNextBest, double pBest, double pRef, int depth, String bases, double[][] quals, double[] posteriors, String sample_name)
|
||||
{
|
||||
/*
|
||||
if( Double.isNaN(lodVsRef)) { System.out.printf("%s: lodVsRef is NaN\n", location.toString()); }
|
||||
if( Double.isNaN(lodVsNextBest)) { System.out.printf("%s lodVsNextBest is NaN\n", location.toString()); }
|
||||
if( Double.isNaN(qhat)) { System.out.printf("%s qhat is NaN\n", location.toString()); }
|
||||
|
|
@ -78,6 +81,7 @@ public class AlleleFrequencyEstimate {
|
|||
assert(! Double.isInfinite(qstar));
|
||||
assert(! Double.isInfinite(pBest));
|
||||
assert(! Double.isInfinite(pRef));
|
||||
*/
|
||||
|
||||
this.location = location;
|
||||
this.ref = ref;
|
||||
|
|
|
|||
|
|
@ -8,60 +8,50 @@ import org.broadinstitute.sting.playground.gatk.walkers.AlleleFrequencyWalker;
|
|||
|
||||
import java.util.List;
|
||||
import java.io.PrintStream;
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: andrewk
|
||||
* Date: Apr 1, 2009
|
||||
* Time: 5:53:21 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class AlleleMetrics {
|
||||
|
||||
long dbsnp_hits=0;
|
||||
long num_variants=0;
|
||||
public long num_loci_total=0;
|
||||
long num_loci_confident=0;
|
||||
double LOD_cutoff = 5;
|
||||
long hapmap_genotype_correct = 0;
|
||||
long hapmap_genotype_incorrect = 0;
|
||||
long hapmap_refvar_correct = 0;
|
||||
long hapmap_refvar_incorrect = 0;
|
||||
private double LOD_cutoff = 5;
|
||||
|
||||
private long dbsnp_hits = 0;
|
||||
private long num_variants = 0;
|
||||
private long num_loci_total = 0;
|
||||
private long num_loci_confident = 0;
|
||||
private long hapmap_genotype_correct = 0;
|
||||
private long hapmap_genotype_incorrect = 0;
|
||||
private long hapmap_refvar_correct = 0;
|
||||
private long hapmap_refvar_incorrect = 0;
|
||||
|
||||
private final double dbl_cmp_precision = 0.0001;
|
||||
|
||||
protected PrintStream out;
|
||||
|
||||
public AlleleMetrics(String MetricsOutputFile) {
|
||||
try
|
||||
{
|
||||
/*if ( MetricsOutputFile.equals("-") )
|
||||
this.out = out;
|
||||
else*/
|
||||
this.out = new PrintStream(MetricsOutputFile);
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
e.printStackTrace();
|
||||
System.exit(-1);
|
||||
}
|
||||
public AlleleMetrics(String metricsOutputPath) {
|
||||
initialize(new File(metricsOutputPath), LOD_cutoff);
|
||||
}
|
||||
|
||||
public AlleleMetrics(String MetricsOutputFile, double lodThreshold) {
|
||||
LOD_cutoff = lodThreshold;
|
||||
public AlleleMetrics(String metricsOutputPath, double lodThreshold) {
|
||||
initialize(new File(metricsOutputPath), lodThreshold);
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
/*if ( MetricsOutputFile.equals("-") )
|
||||
this.out = out;
|
||||
else*/
|
||||
this.out = new PrintStream(MetricsOutputFile);
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
e.printStackTrace();
|
||||
public AlleleMetrics(File metricsOutputFile) {
|
||||
initialize(metricsOutputFile, LOD_cutoff);
|
||||
}
|
||||
|
||||
public AlleleMetrics(File metricsOutputFile, double lodThreshold) {
|
||||
initialize(metricsOutputFile, lodThreshold);
|
||||
}
|
||||
|
||||
private void initialize(File metricsOutputFile, double lodThresold) {
|
||||
try {
|
||||
this.out = new PrintStream(metricsOutputFile);
|
||||
} catch (FileNotFoundException e) {
|
||||
System.err.format("Unable to open file '%s'. Perhaps the parent directory does not exist or is read-only.", metricsOutputFile.getAbsolutePath());
|
||||
System.exit(-1);
|
||||
}
|
||||
|
||||
this.LOD_cutoff = lodThresold;
|
||||
}
|
||||
|
||||
public void nextPosition(AlleleFrequencyEstimate alleleFreq, RefMetaDataTracker tracker) {
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
package org.broadinstitute.sting.playground.utils;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
||||
import static java.lang.Math.log10;
|
||||
import static java.lang.Math.pow;
|
||||
|
|
@ -13,18 +14,22 @@ public class GenotypeLikelihoods {
|
|||
private static final double[] oneMinusData = new double[Byte.MAX_VALUE];
|
||||
|
||||
static {
|
||||
for(int qual=0; qual < Byte.MAX_VALUE; qual++) {
|
||||
oneMinusData[qual] = log10(1.0 - pow(10,(qual/-10.0)));
|
||||
for (int qual = 0; qual < Byte.MAX_VALUE; qual++) {
|
||||
oneMinusData[qual] = log10(1.0 - pow(10, (qual / -10.0)));
|
||||
//oneMinusData[qual] = log10(1.0 - QualityUtils.qualToProb(qual));
|
||||
}
|
||||
}
|
||||
|
||||
private static double getOneMinusQual(final byte qual) {
|
||||
return oneMinusData[qual];
|
||||
}
|
||||
|
||||
private static final double[] oneHalfMinusData = new double[Byte.MAX_VALUE];
|
||||
|
||||
static {
|
||||
for(int qual=0; qual < Byte.MAX_VALUE; qual++) {
|
||||
oneHalfMinusData[qual] = log10(0.5-pow(10,(qual/-10.0))/2.0);
|
||||
for (int qual = 0; qual < Byte.MAX_VALUE; qual++) {
|
||||
oneHalfMinusData[qual] = log10(0.5 - pow(10, (qual / -10.0)) / 2.0);
|
||||
//oneHalfMinusData[qual] = log10(0.5 - QualityUtils.qualToProb(qual) / 2.0);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -32,8 +37,6 @@ public class GenotypeLikelihoods {
|
|||
return oneHalfMinusData[qual];
|
||||
}
|
||||
|
||||
|
||||
|
||||
public double[] likelihoods;
|
||||
public String[] genotypes;
|
||||
|
||||
|
|
@ -43,20 +46,30 @@ public class GenotypeLikelihoods {
|
|||
private double priorHomVar;
|
||||
|
||||
// Store the 2nd-best base priors for on-genotype primary bases
|
||||
HashMap<String, Double> onNextBestBasePriors = new HashMap<String, Double>();
|
||||
private HashMap<String, Double> onNextBestBasePriors = new HashMap<String, Double>();
|
||||
|
||||
// Store the 2nd-best base priors for off-genotype primary bases
|
||||
HashMap<String, Double> offNextBestBasePriors = new HashMap<String, Double>();
|
||||
private HashMap<String, Double> offNextBestBasePriors = new HashMap<String, Double>();
|
||||
|
||||
public GenotypeLikelihoods() {
|
||||
initialize(1.0 - 1e-3, 1e-3, 1e-5);
|
||||
double[] p2ndon = {0.000, 0.302, 0.366, 0.142, 0.000, 0.548, 0.370, 0.000, 0.319, 0.000};
|
||||
double[] p2ndoff = {0.480, 0.769, 0.744, 0.538, 0.575, 0.727, 0.768, 0.589, 0.762, 0.505};
|
||||
|
||||
initialize(1.0 - 1e-3, 1e-3, 1e-5, p2ndon, p2ndoff);
|
||||
}
|
||||
|
||||
public GenotypeLikelihoods(double priorHomRef, double priorHet, double priorHomVar) {
|
||||
initialize(priorHomRef, priorHet, priorHomVar);
|
||||
double[] p2ndon = {0.000, 0.302, 0.366, 0.142, 0.000, 0.548, 0.370, 0.000, 0.319, 0.000};
|
||||
double[] p2ndoff = {0.480, 0.769, 0.744, 0.538, 0.575, 0.727, 0.768, 0.589, 0.762, 0.505};
|
||||
|
||||
initialize(priorHomRef, priorHet, priorHomVar, p2ndon, p2ndoff);
|
||||
}
|
||||
|
||||
private void initialize(double priorHomRef, double priorHet, double priorHomVar) {
|
||||
public GenotypeLikelihoods(double priorHomRef, double priorHet, double priorHomVar, double[] p2ndon, double[] p2ndoff) {
|
||||
initialize(priorHomRef, priorHet, priorHomVar, p2ndon, p2ndoff);
|
||||
}
|
||||
|
||||
private void initialize(double priorHomRef, double priorHet, double priorHomVar, double[] p2ndon, double[] p2ndoff) {
|
||||
this.priorHomRef = priorHomRef;
|
||||
this.priorHet = priorHet;
|
||||
this.priorHomVar = priorHomVar;
|
||||
|
|
@ -75,69 +88,93 @@ public class GenotypeLikelihoods {
|
|||
genotypes[8] = "GT";
|
||||
genotypes[9] = "TT";
|
||||
|
||||
onNextBestBasePriors.put("AA", 0.000);
|
||||
onNextBestBasePriors.put("AC", 0.302);
|
||||
onNextBestBasePriors.put("AG", 0.366);
|
||||
onNextBestBasePriors.put("AT", 0.142);
|
||||
onNextBestBasePriors.put("CC", 0.000);
|
||||
onNextBestBasePriors.put("CG", 0.548);
|
||||
onNextBestBasePriors.put("CT", 0.370);
|
||||
onNextBestBasePriors.put("GG", 0.000);
|
||||
onNextBestBasePriors.put("GT", 0.319);
|
||||
onNextBestBasePriors.put("TT", 0.000);
|
||||
|
||||
offNextBestBasePriors.put("AA", 0.480);
|
||||
offNextBestBasePriors.put("AC", 0.769);
|
||||
offNextBestBasePriors.put("AG", 0.744);
|
||||
offNextBestBasePriors.put("AT", 0.538);
|
||||
offNextBestBasePriors.put("CC", 0.575);
|
||||
offNextBestBasePriors.put("CG", 0.727);
|
||||
offNextBestBasePriors.put("CT", 0.768);
|
||||
offNextBestBasePriors.put("GG", 0.589);
|
||||
offNextBestBasePriors.put("GT", 0.762);
|
||||
offNextBestBasePriors.put("TT", 0.505);
|
||||
for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
|
||||
onNextBestBasePriors.put(genotypes[genotypeIndex], p2ndon[genotypeIndex]);
|
||||
offNextBestBasePriors.put(genotypes[genotypeIndex], p2ndoff[genotypeIndex]);
|
||||
}
|
||||
}
|
||||
|
||||
public double getHomRefPrior() { return priorHomRef; }
|
||||
public void setHomRefPrior(double priorHomRef) { this.priorHomRef = priorHomRef; }
|
||||
public double getHomRefPrior() {
|
||||
return priorHomRef;
|
||||
}
|
||||
|
||||
public double getHetPrior() { return priorHet; }
|
||||
public void setHetPrior(double priorHet) { this.priorHet = priorHet; }
|
||||
public void setHomRefPrior(double priorHomRef) {
|
||||
this.priorHomRef = priorHomRef;
|
||||
}
|
||||
|
||||
public double getHomVarPrior() { return priorHomVar; }
|
||||
public void setHomVarPrior(double priorHomVar) { this.priorHomVar = priorHomVar; }
|
||||
public double getHetPrior() {
|
||||
return priorHet;
|
||||
}
|
||||
|
||||
public void add(char ref, char read, byte qual)
|
||||
{
|
||||
for (int i = 0; i < genotypes.length; i++)
|
||||
{
|
||||
public void setHetPrior(double priorHet) {
|
||||
this.priorHet = priorHet;
|
||||
}
|
||||
|
||||
public double getHomVarPrior() {
|
||||
return priorHomVar;
|
||||
}
|
||||
|
||||
public void setHomVarPrior(double priorHomVar) {
|
||||
this.priorHomVar = priorHomVar;
|
||||
}
|
||||
|
||||
public double[] getOnGenotypeSecondaryPriors() {
|
||||
double[] p2ndon = new double[10];
|
||||
|
||||
for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
|
||||
p2ndon[genotypeIndex] = onNextBestBasePriors.get(genotypes[genotypeIndex]);
|
||||
}
|
||||
|
||||
return p2ndon;
|
||||
}
|
||||
|
||||
public void setOnGenotypeSecondaryPriors(double[] p2ndon) {
|
||||
for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
|
||||
onNextBestBasePriors.put(genotypes[genotypeIndex], p2ndon[genotypeIndex]);
|
||||
}
|
||||
}
|
||||
|
||||
public double[] getOffGenotypeSecondaryPriors() {
|
||||
double[] p2ndoff = new double[10];
|
||||
|
||||
for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
|
||||
p2ndoff[genotypeIndex] = offNextBestBasePriors.get(genotypes[genotypeIndex]);
|
||||
}
|
||||
|
||||
return p2ndoff;
|
||||
}
|
||||
|
||||
public void setOffGenotypeSecondaryPriors(double[] p2ndoff) {
|
||||
for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
|
||||
offNextBestBasePriors.put(genotypes[genotypeIndex], p2ndoff[genotypeIndex]);
|
||||
}
|
||||
}
|
||||
|
||||
public void add(char ref, char read, byte qual) {
|
||||
for (int i = 0; i < genotypes.length; i++) {
|
||||
likelihoods[i] += calculateAlleleLikelihood(ref, read, genotypes[i], qual);
|
||||
}
|
||||
}
|
||||
|
||||
private double calculateAlleleLikelihood(char ref, char read, String genotype, byte qual)
|
||||
{
|
||||
if (qual == 0) { qual = 1; } // zero quals are wrong.
|
||||
private double calculateAlleleLikelihood(char ref, char read, String genotype, byte qual) {
|
||||
if (qual == 0) {
|
||||
qual = 1;
|
||||
} // zero quals are wrong.
|
||||
|
||||
char h1 = genotype.charAt(0);
|
||||
char h2 = genotype.charAt(1);
|
||||
|
||||
double p_base;
|
||||
|
||||
if ((h1 == h2) && (h1 == read))
|
||||
{
|
||||
// hom
|
||||
p_base = getOneMinusQual(qual);
|
||||
}
|
||||
else if ((h1 != h2) && ((h1 == read) || (h2 == read)))
|
||||
{
|
||||
// het
|
||||
p_base = getOneHalfMinusQual(qual);
|
||||
}
|
||||
else
|
||||
{
|
||||
// error
|
||||
p_base = qual/-10.0;
|
||||
if ((h1 == h2) && (h1 == read)) {
|
||||
// hom
|
||||
p_base = getOneMinusQual(qual);
|
||||
} else if ((h1 != h2) && ((h1 == read) || (h2 == read))) {
|
||||
// het
|
||||
p_base = getOneHalfMinusQual(qual);
|
||||
} else {
|
||||
// error
|
||||
p_base = qual / -10.0;
|
||||
}
|
||||
|
||||
return p_base;
|
||||
|
|
@ -170,54 +207,41 @@ public class GenotypeLikelihoods {
|
|||
return s;
|
||||
}
|
||||
|
||||
public void ApplyPrior(char ref, char alt, double p_alt)
|
||||
{
|
||||
for (int i = 0; i < genotypes.length; i++)
|
||||
{
|
||||
if ((p_alt == -1) || (p_alt <= 1e-6))
|
||||
{
|
||||
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref))
|
||||
{
|
||||
// hom-ref
|
||||
likelihoods[i] += Math.log10(priorHomRef);
|
||||
}
|
||||
else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref))
|
||||
{
|
||||
// hom-nonref
|
||||
likelihoods[i] += Math.log10(priorHomVar);
|
||||
}
|
||||
else
|
||||
{
|
||||
// het
|
||||
likelihoods[i] += Math.log10(priorHet);
|
||||
}
|
||||
if (Double.isInfinite(likelihoods[i])) { likelihoods[i] = -1000; }
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref))
|
||||
{
|
||||
// hom-ref
|
||||
likelihoods[i] += 2.0 * Math.log10(1.0 - p_alt);
|
||||
}
|
||||
else if ((genotypes[i].charAt(0) == alt) && (genotypes[i].charAt(1) == alt))
|
||||
{
|
||||
// hom-nonref
|
||||
likelihoods[i] += 2.0 * Math.log10(p_alt);
|
||||
}
|
||||
else if (((genotypes[i].charAt(0) == alt) && (genotypes[i].charAt(1) == ref)) ||
|
||||
((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == alt)))
|
||||
{
|
||||
// het
|
||||
likelihoods[i] += Math.log10((1.0-p_alt) * p_alt * 2.0);
|
||||
}
|
||||
else
|
||||
{
|
||||
// something else (noise!)
|
||||
likelihoods[i] += Math.log10(1e-5);
|
||||
}
|
||||
public void ApplyPrior(char ref, char alt, double p_alt) {
|
||||
for (int i = 0; i < genotypes.length; i++) {
|
||||
if ((p_alt == -1) || (p_alt <= 1e-6)) {
|
||||
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
|
||||
// hom-ref
|
||||
likelihoods[i] += Math.log10(priorHomRef);
|
||||
} else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
|
||||
// hom-nonref
|
||||
likelihoods[i] += Math.log10(priorHomVar);
|
||||
} else {
|
||||
// het
|
||||
likelihoods[i] += Math.log10(priorHet);
|
||||
}
|
||||
if (Double.isInfinite(likelihoods[i])) {
|
||||
likelihoods[i] = -1000;
|
||||
}
|
||||
} else {
|
||||
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
|
||||
// hom-ref
|
||||
likelihoods[i] += 2.0 * Math.log10(1.0 - p_alt);
|
||||
} else if ((genotypes[i].charAt(0) == alt) && (genotypes[i].charAt(1) == alt)) {
|
||||
// hom-nonref
|
||||
likelihoods[i] += 2.0 * Math.log10(p_alt);
|
||||
} else if (((genotypes[i].charAt(0) == alt) && (genotypes[i].charAt(1) == ref)) ||
|
||||
((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == alt))) {
|
||||
// het
|
||||
likelihoods[i] += Math.log10((1.0 - p_alt) * p_alt * 2.0);
|
||||
} else {
|
||||
// something else (noise!)
|
||||
likelihoods[i] += Math.log10(1e-5);
|
||||
}
|
||||
|
||||
if (Double.isInfinite(likelihoods[i])) { likelihoods[i] = -1000; }
|
||||
if (Double.isInfinite(likelihoods[i])) {
|
||||
likelihoods[i] = -1000;
|
||||
}
|
||||
}
|
||||
}
|
||||
this.sort();
|
||||
|
|
@ -322,108 +346,9 @@ public class GenotypeLikelihoods {
|
|||
}
|
||||
}
|
||||
|
||||
this.LodVsRef(ref); //HACK
|
||||
//System.out.printf("DBG: %f %f\n", sorted_likelihoods[0], ref_likelihood);
|
||||
this.LodVsRef(ref); //HACK
|
||||
//System.out.printf("DBG: %f %f\n", sorted_likelihoods[0], ref_likelihood);
|
||||
|
||||
return new AlleleFrequencyEstimate(location, ref, alt, 2, qhat, qstar, this.LodVsRef(ref), this.LodVsNextBest(), sorted_likelihoods[0], ref_likelihood, depth, bases, (double[][]) null, this.likelihoods, sample_name);
|
||||
return new AlleleFrequencyEstimate(location, ref, alt, 2, qhat, qstar, this.LodVsRef(ref), this.LodVsNextBest(), sorted_likelihoods[0], ref_likelihood, depth, bases, null, this.likelihoods, sample_name);
|
||||
}
|
||||
|
||||
public static class IndelCall
|
||||
{
|
||||
public String type;
|
||||
public String[] alleles;
|
||||
public double p;
|
||||
public double lod;
|
||||
|
||||
public IndelCall(String type, String[] alleles, double p, double lod)
|
||||
{
|
||||
this.type = type;
|
||||
this.alleles = alleles;
|
||||
this.p = p;
|
||||
this.lod = lod;
|
||||
}
|
||||
|
||||
public String toString()
|
||||
{
|
||||
return String.format("%s/%s %f %f", alleles[0], alleles[1], p, lod);
|
||||
}
|
||||
}
|
||||
|
||||
public static IndelCall callIndel(String[] indels)
|
||||
{
|
||||
HashMap<String,Integer> indel_allele_counts = new HashMap<String,Integer>();
|
||||
for (int i = 0; i < indels.length; i++)
|
||||
{
|
||||
if (! indel_allele_counts.containsKey(indels[i]))
|
||||
{
|
||||
indel_allele_counts.put(indels[i], 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
indel_allele_counts.put(indels[i], indel_allele_counts.get(indels[i])+1);
|
||||
}
|
||||
}
|
||||
|
||||
Object[] keys = indel_allele_counts.keySet().toArray();
|
||||
String[] alleles = new String[keys.length];
|
||||
int[] counts = new int[keys.length];
|
||||
double likelihoods[] = new double[keys.length];
|
||||
int null_count = 0;
|
||||
String max_allele = null;
|
||||
int max_count = -1;
|
||||
if ((keys.length > 0) && (! ((keys.length == 1) && (((String)keys[0]).equals("null")))))
|
||||
{
|
||||
for (int i = 0; i < keys.length; i++)
|
||||
{
|
||||
Integer count = (Integer)indel_allele_counts.get(keys[i]);
|
||||
alleles[i] = (String)keys[i];
|
||||
counts[i] = count;
|
||||
if (alleles[i].equals("null")) { null_count = counts[i]; }
|
||||
else if (counts[i] > max_count) { max_count = counts[i]; max_allele = alleles[i]; }
|
||||
//System.out.printf("%s[%d] ", keys[i], count);
|
||||
}
|
||||
//System.out.printf("\n");
|
||||
|
||||
double eps = 1e-3;
|
||||
double pRef = null_count*Math.log10(1.0 - eps) + max_count*Math.log10(eps) + Math.log10(0.999);
|
||||
double pHet = null_count*Math.log10(0.5 - eps/2) + max_count*Math.log10(0.5-eps/2) + Math.log10(1e-3);
|
||||
double pHom = null_count*Math.log10(eps) + max_count*Math.log10(1.0 - eps) + Math.log10(1e-5);
|
||||
|
||||
double lodRef = pRef - Math.max(pHet, pHom);
|
||||
double lodHet = pHet - pRef;
|
||||
double lodHom = pHom - pRef;
|
||||
|
||||
//System.out.printf("%s/%s %f %f\n", "null", "null", pRef, lodRef);
|
||||
//System.out.printf("%s/%s %f %f\n", max_allele, "null", pHet, lodHet);
|
||||
//System.out.printf("%s/%s %f %f\n", max_allele, max_allele, pHom, lodHom);
|
||||
//System.out.printf("\n");
|
||||
|
||||
if (lodRef > 0)
|
||||
{
|
||||
// reference call
|
||||
String[] genotype = new String[2];
|
||||
genotype[0] = "null";
|
||||
genotype[1] = "null";
|
||||
return new IndelCall("ref", genotype, pRef, lodRef);
|
||||
}
|
||||
else if (lodHet > lodHom)
|
||||
{
|
||||
// het call
|
||||
String[] genotype = new String[2];
|
||||
genotype[0] = "null";
|
||||
genotype[1] = max_allele;
|
||||
return new IndelCall("het", genotype, pHet, lodHet);
|
||||
}
|
||||
else
|
||||
{
|
||||
// hom call
|
||||
String[] genotype = new String[2];
|
||||
genotype[0] = max_allele;
|
||||
genotype[1] = max_allele;
|
||||
return new IndelCall("hom", genotype, pHom, lodHom);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,102 @@
|
|||
package org.broadinstitute.sting.playground.utils;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
||||
public class IndelLikelihood {
|
||||
private String type;
|
||||
private String[] alleles;
|
||||
private double p;
|
||||
private double lod;
|
||||
|
||||
public IndelLikelihood(String type, String[] alleles, double p, double lod) {
|
||||
initialize(type, alleles, p, lod);
|
||||
}
|
||||
|
||||
public IndelLikelihood(String[] indels) {
|
||||
HashMap<String,Integer> indel_allele_counts = new HashMap<String,Integer>();
|
||||
|
||||
for (int i = 0; i < indels.length; i++) {
|
||||
if (! indel_allele_counts.containsKey(indels[i])) {
|
||||
indel_allele_counts.put(indels[i], 1);
|
||||
} else {
|
||||
indel_allele_counts.put(indels[i], indel_allele_counts.get(indels[i])+1);
|
||||
}
|
||||
}
|
||||
|
||||
Object[] keys = indel_allele_counts.keySet().toArray();
|
||||
String[] alleles = new String[keys.length];
|
||||
int[] counts = new int[keys.length];
|
||||
//double likelihoods[] = new double[keys.length];
|
||||
int null_count = 0;
|
||||
String max_allele = null;
|
||||
int max_count = -1;
|
||||
|
||||
if ((keys.length > 0) && (! ((keys.length == 1) && (((String)keys[0]).equals("null"))))) {
|
||||
for (int i = 0; i < keys.length; i++) {
|
||||
Integer count = (Integer)indel_allele_counts.get(keys[i]);
|
||||
alleles[i] = (String)keys[i];
|
||||
counts[i] = count;
|
||||
if (alleles[i].equals("null")) { null_count = counts[i]; }
|
||||
else if (counts[i] > max_count) { max_count = counts[i]; max_allele = alleles[i]; }
|
||||
//System.out.printf("%s[%d] ", keys[i], count);
|
||||
}
|
||||
//System.out.printf("\n");
|
||||
|
||||
double eps = 1e-3;
|
||||
double pRef = null_count*Math.log10(1.0 - eps) + max_count*Math.log10(eps) + Math.log10(0.999);
|
||||
double pHet = null_count*Math.log10(0.5 - eps/2) + max_count*Math.log10(0.5-eps/2) + Math.log10(1e-3);
|
||||
double pHom = null_count*Math.log10(eps) + max_count*Math.log10(1.0 - eps) + Math.log10(1e-5);
|
||||
|
||||
double lodRef = pRef - Math.max(pHet, pHom);
|
||||
double lodHet = pHet - pRef;
|
||||
double lodHom = pHom - pRef;
|
||||
|
||||
//System.out.printf("%s/%s %f %f\n", "null", "null", pRef, lodRef);
|
||||
//System.out.printf("%s/%s %f %f\n", max_allele, "null", pHet, lodHet);
|
||||
//System.out.printf("%s/%s %f %f\n", max_allele, max_allele, pHom, lodHom);
|
||||
//System.out.printf("\n");
|
||||
|
||||
if (lodRef > 0) {
|
||||
// reference call
|
||||
String[] genotype = new String[2];
|
||||
genotype[0] = "null";
|
||||
genotype[1] = "null";
|
||||
|
||||
//return new IndelLikelihood("ref", genotype, pRef, lodRef);
|
||||
initialize("ref", genotype, pRef, lodRef);
|
||||
} else if (lodHet > lodHom) {
|
||||
// het call
|
||||
String[] genotype = new String[2];
|
||||
genotype[0] = "null";
|
||||
genotype[1] = max_allele;
|
||||
|
||||
//return new IndelLikelihood("het", genotype, pHet, lodHet);
|
||||
initialize("het", genotype, pHet, lodHet);
|
||||
} else {
|
||||
// hom call
|
||||
String[] genotype = new String[2];
|
||||
genotype[0] = max_allele;
|
||||
genotype[1] = max_allele;
|
||||
|
||||
//return new IndelLikelihood("hom", genotype, pHom, lodHom);
|
||||
initialize("hom", genotype, pHom, lodHom);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void initialize(String type, String[] alleles, double p, double lod) {
|
||||
this.type = type;
|
||||
this.alleles = alleles;
|
||||
this.p = p;
|
||||
this.lod = lod;
|
||||
}
|
||||
|
||||
public String getType() { return type; }
|
||||
public String[] getAlleles() { return alleles; }
|
||||
public double getPosteriorProbability() { return p; }
|
||||
public double getLOD() { return lod; }
|
||||
|
||||
public String toString() {
|
||||
return String.format("%s/%s %f %f", alleles[0], alleles[1], p, lod);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue