Misc:
1. Added logGamma function to utils 2. Required asserts to be enabled in the allele caller (run with java -ea) 3. put checks and asserts of NaN and Infinity in AlleleFrequencyEstimate 4. Added option FRACTIONAL_COUNTS to the pooled caller (not working right yet) AlleleFrequencyWalker: 5. Made FORCE_1BASE_PROBS not static in AlleleFrequencyWalker (an argument should never be static! Jeez.) 6. changed quality_precision to be 1e-4 (Q40) 7. don't adjust by quality_precision unless the qual is actually zero. 8. added more asserts for NaN and Infinity 9. put in a correction for zero probs in P_D_q 10. changed pG to be hardy-weinberg in the presence of an allele frequency prior (duh) 11. rewrote binomialProb() to not overflow on deep coverage 12. rewrote nchoosek() to behave right on deep coverage 13. put in some binomailProb() tests in the main() routine (they come out right when compared with R) Hunt for loci where 4bp should change things: 14. added FindNonrandomSecondBestBasePiles walker. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@471 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
eafb4633ba
commit
af6788fa3d
|
|
@ -24,7 +24,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
@Argument(doc="File to output GFF formatted allele frequency calls") public String GFF_OUTPUT_FILE;
|
||||
@Argument(shortName="met", doc="Turns on logging of metrics on the fly with AlleleFrequency calculation") public boolean LOG_METRICS;
|
||||
@Argument(required=false, defaultValue="metrics.out", doc="Name of file where metrics will output") public String METRICS_OUTPUT_FILE;
|
||||
@Argument(required=false, doc="Ignores 4-base probabilities and only uses the quality of the best/called base") public static boolean FORCE_1BASE_PROBS;
|
||||
@Argument(required=false, doc="Ignores 4-base probabilities and only uses the quality of the best/called base") public boolean FORCE_1BASE_PROBS;
|
||||
|
||||
protected static Logger logger = Logger.getLogger(AlleleFrequencyWalker.class);
|
||||
|
||||
|
|
@ -32,7 +32,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
PrintStream output;
|
||||
|
||||
private boolean initalized = false;
|
||||
static private double quality_precision = 1e-5;
|
||||
static private double quality_precision = 1e-4;
|
||||
public void initalize()
|
||||
{
|
||||
if (initalized) { return; }
|
||||
|
|
@ -213,7 +213,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
}
|
||||
*/
|
||||
|
||||
static public double[][] getQuals (LocusContext context)
|
||||
public double[][] getQuals (LocusContext context)
|
||||
{
|
||||
int numReads = context.getReads().size(); //numReads();
|
||||
double[][] quals = new double[numReads][4];
|
||||
|
|
@ -236,8 +236,13 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
// Set all nonref qual scores to their share of the remaining probality not "used" by the reference base's qual
|
||||
double nonref_quals = (1.0 - quals[i][callednum]) / 3;
|
||||
for (int b=0; b<4; b++)
|
||||
{
|
||||
if (b != callednum)
|
||||
quals[i][b] = nonref_quals + quality_precision;
|
||||
{
|
||||
quals[i][b] = nonref_quals;
|
||||
if (quals[i][b] <= quality_precision) { quals[i][b] = quality_precision; } // avoid zero probs.
|
||||
}
|
||||
}
|
||||
}else{
|
||||
assert (SQ_field instanceof byte[]);
|
||||
byte[] hex_quals = (byte[]) SQ_field;
|
||||
|
|
@ -259,7 +264,8 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
*/
|
||||
double qual2 = QualityUtils.compressedQualityToProb(hex_qual);
|
||||
//System.out.printf("2ND %x %d %f\n", hex_qual, called2num, qual2);
|
||||
quals[i][called2num] = qual2 + quality_precision;
|
||||
quals[i][called2num] = qual2;
|
||||
if (quals[i][called2num] <= quality_precision) { quals[i][called2num] = quality_precision; } // avoid zero probs.
|
||||
|
||||
double nonref_quals = (1.0 - quals[i][callednum] - quals[i][called2num]) / 2.0;
|
||||
for (int b=0; b<4; b++)
|
||||
|
|
@ -308,6 +314,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
|
||||
// Initialize empyt MixtureLikelihood holders for each possible qstar (N+1)
|
||||
MixtureLikelihood[] bestMixtures = new MixtureLikelihood[N+1];
|
||||
double posteriors[] = new double[N+1];
|
||||
|
||||
{
|
||||
double q = ML_q_byEM(bases, quals, refnum, altnum);
|
||||
|
|
@ -316,6 +323,14 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
|
||||
posterior_null_hyp = P_D_q(bases, quals, 0.0, refnum, altnum) + P_q_G(bases, N, 0.0, 0, 0) + P_G(N, 0);
|
||||
bestMixtures[0] = new MixtureLikelihood(posterior_null_hyp, 0.0, 0.0);
|
||||
posteriors[0] = posterior_null_hyp;
|
||||
|
||||
assert(! Double.isNaN(P_D_q(bases, quals, 0.0, refnum, altnum)));
|
||||
assert(! Double.isNaN(P_q_G(bases, N, 0.0, 0, 0)));
|
||||
assert(! Double.isNaN(P_G(N, 0)));
|
||||
|
||||
assert(! Double.isNaN(posteriors[0]));
|
||||
assert(! Double.isInfinite(posteriors[0]));
|
||||
|
||||
//System.out.format("DBG %s %.2f %.2f %5.2f %5.2f %5.2f %5.2f %d %d %s\n", location, 0.0, 0.0, P_D_q(bases, quals, 0.0, refnum, altnum), P_q_G(bases, N, 0.0, 0, 0), P_G(N, 0), posterior_null_hyp, (int)(q*bases.length), (int)((1.0-q)*bases.length), new String(bases));
|
||||
|
||||
|
|
@ -330,12 +345,14 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
double posterior = pDq + pqG + pG;
|
||||
|
||||
bestMixtures[qstar_N] = new MixtureLikelihood(posterior, qstar, q);
|
||||
posteriors[qstar_N] = posterior;
|
||||
|
||||
//System.out.format("DBG %s %.2f %.2f %5.2f %5.2f %5.2f %5.2f %d %d %s\n", location, q, qstar, pDq, pqG, pG, posterior, (int)(q*bases.length), (int)((1.0-q)*bases.length), new String(bases));
|
||||
//System.out.format("DBG %s %.2f %.2f %5.2f %5.2f %5.2f %5.2f %d %d %s\n",
|
||||
// location, q, qstar, pDq, pqG, pG, posterior, (int)(q*bases.length), (int)((1.0-q)*bases.length), new String(bases));
|
||||
}
|
||||
}
|
||||
|
||||
// First reverse sort NONREF mixtures according to highest posterior probabililty
|
||||
// First reverse sort mixtures according to highest posterior probabililty
|
||||
Arrays.sort(bestMixtures);
|
||||
|
||||
// Calculate Lod of any variant call versus the reference call
|
||||
|
|
@ -364,7 +381,8 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
posterior_null_hyp,
|
||||
depth,
|
||||
new String(bases),
|
||||
quals);
|
||||
quals,
|
||||
posteriors);
|
||||
return alleleFreq;
|
||||
}
|
||||
|
||||
|
|
@ -424,10 +442,14 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
double P_D_q(byte[] bases, double[][]quals, double q, int refnum, int altnum)
|
||||
{
|
||||
double p = 0.0;
|
||||
double epsilon = 1e-4;
|
||||
|
||||
for (int i=0; i<bases.length; i++)
|
||||
{
|
||||
p += Math.log10((1-q) * quals[i][refnum] + q * quals[i][altnum]);
|
||||
double atomic = (1-q) * quals[i][refnum] + q * quals[i][altnum];
|
||||
if (atomic <= 0) { atomic = epsilon; }
|
||||
if (Double.isNaN(Math.log10(atomic))) { System.out.printf("DBG: %f %f %f\n", q, quals[i][refnum], quals[i][altnum]); }
|
||||
p += Math.log10(atomic);
|
||||
}
|
||||
|
||||
return p;
|
||||
|
|
@ -440,7 +462,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
if (qstar == 1) { qstar = 1.0 - epsilon; }
|
||||
|
||||
if (N != 2) { return 0.0; }
|
||||
else { return Math.log10(binomialProb(q_R, bases.length, qstar)); }
|
||||
else { return binomialProb(q_R, bases.length, qstar); }
|
||||
}
|
||||
|
||||
double P_G(int N, int qstar_N)
|
||||
|
|
@ -449,7 +471,16 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
|
||||
if ((N == 2) && (prior_alt_frequency != -1))
|
||||
{
|
||||
return ((double)qstar_N * prior_alt_frequency) + (((double)N-(double)qstar_N)*(1.0-prior_alt_frequency));
|
||||
double p = -1.0;
|
||||
|
||||
if (qstar_N == 0) { p = Math.pow(1.0 - prior_alt_frequency, 2.0); }
|
||||
else if (qstar_N == 1) { p = 2.0 * (prior_alt_frequency * (1.0 - prior_alt_frequency)); }
|
||||
else if (qstar_N == 2) { p = Math.pow(prior_alt_frequency, 2.0); }
|
||||
else { assert(false); }
|
||||
|
||||
//System.out.printf("DBG2: %d %d %f %f\n", N, qstar_N, prior_alt_frequency, p);
|
||||
|
||||
return Math.log10(p);
|
||||
}
|
||||
|
||||
if (qstar_N == 0) { return Math.log10(0.999); }
|
||||
|
|
@ -474,28 +505,59 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
// n - number of Bernoulli trials
|
||||
// p - probability of success
|
||||
|
||||
if ((n*p < 5) && (n*(1-p) < 5))
|
||||
double ans;
|
||||
|
||||
//if (((double)n*p < 5.0) && ((double)n*(1.0-p) < 5.0))
|
||||
if (n < 1000)
|
||||
{
|
||||
// For small n and the edges, compute it directly.
|
||||
return (double)nchoosek(n, k) * Math.pow(p, k) * Math.pow(1-p, n-k);
|
||||
ans = Math.log10((double)nchoosek(n, k)) + Math.log10(Math.pow(p, k)) + Math.log10(Math.pow(1-p, n-k));
|
||||
//System.out.printf("DBG1: %d %d %f %f\n", k, n, p, ans);
|
||||
}
|
||||
else
|
||||
{
|
||||
// For large n, approximate with a gaussian.
|
||||
double mean = (double)(n*p);
|
||||
double var = Math.sqrt((double)(n*p)*(1.0-p));
|
||||
double ans = (double)(1.0 / (var*Math.sqrt(2*Math.PI)))*Math.exp(-1.0 * Math.pow((double)k-mean,2)/(2.0*var*var));
|
||||
double check = (double)nchoosek(n, k) * Math.pow(p, k) * Math.pow(1-p, n-k);
|
||||
double residual = ans - check;
|
||||
double ans_1 = Math.log10((double)(1.0 / (var*Math.sqrt(2*Math.PI)))*Math.exp(-1.0 * Math.pow((double)k-mean,2)/(2.0*var*var)));
|
||||
double ans_2 = ((Utils.logGamma(n+1) - Utils.logGamma(k+1) - Utils.logGamma(n-k+1))/Math.log(10)) + Math.log10(Math.pow(p, k)) + Math.log10(Math.pow(1-p, n-k));
|
||||
double check = Math.log10((double)nchoosek(n, k)) + Math.log10(Math.pow(p, k)) + Math.log10(Math.pow(1-p, n-k));
|
||||
double residual = ans_2 - check;
|
||||
|
||||
//System.out.format("DBG: %d %.10f %.10f\n", nchoosek(n, k), Math.pow(p, k), Math.pow(1-p, n-k));
|
||||
//System.out.format("RESIDUAL: (%d,%d,%f) %.10f %.10f %.10f\n", k, n, p, ans, check, residual);
|
||||
|
||||
return check;
|
||||
//System.out.printf("DBG2: %d %d %f %f %f %f %f\n", k, n, p, check, ans_1, ans_2, residual);
|
||||
|
||||
//if ((Double.isInfinite(check) || (Double.isNaN(check))))
|
||||
//{
|
||||
// System.out.printf("DBG2: %d %d %f %f\n", k, n, p, check);
|
||||
//}
|
||||
|
||||
ans = ans_1;
|
||||
}
|
||||
|
||||
//System.out.printf("DBG3: %f\n", ans);
|
||||
|
||||
return ans;
|
||||
}
|
||||
|
||||
static long nchoosek(long n, long k) {
|
||||
static double nchoosek(long n, long k)
|
||||
{
|
||||
|
||||
if (k > n)
|
||||
return 0;
|
||||
|
||||
if (k > n/2)
|
||||
k = n-k; // Take advantage of symmetry
|
||||
|
||||
double accum = 1;
|
||||
for (long i = 1; i <= k; i++)
|
||||
accum = accum * (n-k+i) / i;
|
||||
|
||||
return accum + 0.5; // avoid rounding error
|
||||
|
||||
/*
|
||||
long m = n - k;
|
||||
if (k < m)
|
||||
k = m;
|
||||
|
|
@ -504,6 +566,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
for (long i = n, j = 1; i > k; i--, j++)
|
||||
t = t * i / j;
|
||||
return t;
|
||||
*/
|
||||
}
|
||||
|
||||
public static String repeat(char letter, long count) {
|
||||
|
|
@ -690,6 +753,9 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
AlleleFrequencyWalker.binomialProb(5,10,0.5);
|
||||
AlleleFrequencyWalker.binomialProb(50,100,0.5);
|
||||
AlleleFrequencyWalker.binomialProb(1500,2965,0.508065);
|
||||
AlleleFrequencyWalker.binomialProb(0,197,0.5);
|
||||
AlleleFrequencyWalker.binomialProb(13,197,0.5);
|
||||
System.exit(0);
|
||||
|
||||
{
|
||||
int N = 2;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,187 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.Pileup;
|
||||
import org.broadinstitute.sting.utils.BasicPileup;
|
||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.AlleleFrequencyWalker;
|
||||
import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
public class FindNonrandomSecondBestBasePiles extends LocusWalker<Integer, Integer> {
|
||||
@Argument(fullName="verbose",required=false,defaultValue="false")
|
||||
public boolean VERBOSE;
|
||||
|
||||
private AlleleFrequencyWalker caller_1b;
|
||||
private AlleleFrequencyWalker caller_4b;
|
||||
public void initialize()
|
||||
{
|
||||
caller_1b = new AlleleFrequencyWalker();
|
||||
caller_1b.N = 2;
|
||||
caller_1b.DOWNSAMPLE = 0;
|
||||
caller_1b.GFF_OUTPUT_FILE = "/dev/null";
|
||||
caller_1b.FORCE_1BASE_PROBS = true;
|
||||
caller_1b.initalize();
|
||||
caller_1b.reduceInit();
|
||||
|
||||
caller_4b = new AlleleFrequencyWalker();
|
||||
caller_4b.N = 2;
|
||||
caller_4b.DOWNSAMPLE = 0;
|
||||
caller_4b.GFF_OUTPUT_FILE = "/dev/null";
|
||||
caller_4b.FORCE_1BASE_PROBS = false;
|
||||
caller_4b.initalize();
|
||||
caller_4b.reduceInit();
|
||||
}
|
||||
|
||||
// Do we actually want to operate on the context?
|
||||
public boolean filter(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||
return true; // We are keeping all the reads
|
||||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, char ref, LocusContext context)
|
||||
{
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||
|
||||
// First, call the site, because we're mostly interested in hets
|
||||
ref = Character.toUpperCase(ref);
|
||||
AlleleFrequencyEstimate call_1b = caller_1b.map(tracker, ref, context);
|
||||
AlleleFrequencyEstimate call_4b = caller_4b.map(tracker, ref, context);
|
||||
|
||||
// Only output data for sites that are confident disagreements.
|
||||
if (Math.abs(call_1b.lodVsRef) < 5.0) { return 0; }
|
||||
if (Math.abs(call_4b.lodVsRef) < 5.0) { return 0; }
|
||||
if (call_1b.genotype().equals(call_4b.genotype())) { return 0; }
|
||||
|
||||
List<SAMRecord> reads = pileup.getReads();
|
||||
List<Integer> offsets = pileup.getOffsets();
|
||||
String best_bases = pileup.getBases();
|
||||
String second_bases;
|
||||
double[] quals = new double[reads.size()];
|
||||
double[] second_quals = new double[reads.size()];
|
||||
{
|
||||
char[] second_bases_array = new char[reads.size()];
|
||||
for ( int i = 0; i < reads.size(); i++ )
|
||||
{
|
||||
SAMRecord read = reads.get(i);
|
||||
int offset = offsets.get(i);
|
||||
byte qual = (byte)read.getBaseQualities()[offset];
|
||||
quals[i] = 1.0 - Math.pow(10,(double)qual/-10.0);
|
||||
|
||||
second_bases_array[i] = BaseUtils.baseIndexToSimpleBase(QualityUtils.compressedQualityToBaseIndex(((byte[])read.getAttribute("SQ"))[offset]));
|
||||
second_quals[i] = QualityUtils.compressedQualityToProb(((byte[])read.getAttribute("SQ"))[offset]);
|
||||
}
|
||||
second_bases = new String(second_bases_array);
|
||||
}
|
||||
|
||||
String rodString = "";
|
||||
for ( ReferenceOrderedDatum datum : tracker.getAllRods() ) {
|
||||
if ( datum != null && ! (datum instanceof rodDbSNP)) {
|
||||
//System.out.printf("rod = %s%n", datum.toSimpleString());
|
||||
rodString += datum.toSimpleString();
|
||||
//System.out.printf("Rod string %s%n", rodString);
|
||||
}
|
||||
}
|
||||
|
||||
rodDbSNP dbsnp = (rodDbSNP)tracker.lookup("dbSNP", null);
|
||||
if ( dbsnp != null )
|
||||
rodString += dbsnp.toMediumString();
|
||||
|
||||
if ( rodString != "" )
|
||||
rodString = "[ROD: " + rodString + "]";
|
||||
|
||||
char[] bases = {'A', 'C', 'G', 'T'};
|
||||
double[][] counts = new double[4][];
|
||||
double[] totals = new double[4];
|
||||
double[][] fractional_counts = new double[4][];
|
||||
double[] fractional_totals = new double[4];
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
counts[i] = new double[4];
|
||||
fractional_counts[i] = new double[4];
|
||||
for (int j = 0; j < best_bases.length(); j++)
|
||||
{
|
||||
if (best_bases.charAt(j) == bases[i])
|
||||
{
|
||||
counts[BaseUtils.simpleBaseToBaseIndex(bases[i])][BaseUtils.simpleBaseToBaseIndex(second_bases.charAt(j))] += 1;
|
||||
totals[BaseUtils.simpleBaseToBaseIndex(bases[i])] += 1;
|
||||
|
||||
fractional_counts[BaseUtils.simpleBaseToBaseIndex(bases[i])][BaseUtils.simpleBaseToBaseIndex(second_bases.charAt(j))] += second_quals[j];
|
||||
fractional_totals[BaseUtils.simpleBaseToBaseIndex(bases[i])] += second_quals[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
out.printf("%s\t%c\t%s\n%s\t%c\t%s\n",
|
||||
pileup.getLocation(),
|
||||
ref,
|
||||
best_bases,
|
||||
pileup.getLocation(),
|
||||
ref,
|
||||
second_bases);
|
||||
out.printf("%s\t%s\t%f\t%f\n",
|
||||
pileup.getLocation(),
|
||||
call_1b.genotype(),
|
||||
call_1b.lodVsRef,
|
||||
call_1b.lodVsNextBest);
|
||||
out.printf("%s\t%s\t%f\t%f\n",
|
||||
pileup.getLocation(),
|
||||
call_4b.genotype(),
|
||||
call_4b.lodVsRef,
|
||||
call_4b.lodVsNextBest);
|
||||
for (int i = 0; i < quals.length; i++)
|
||||
{
|
||||
out.printf("%s\t%c %c %f\t%f\t%f\t%f\t%f\n",
|
||||
pileup.getLocation(),
|
||||
best_bases.charAt(i),
|
||||
second_bases.charAt(i),
|
||||
quals[i],
|
||||
second_quals[i],
|
||||
-10.0 * Math.log10(1.0 - quals[i]),
|
||||
-10.0 * Math.log10(1.0 - second_quals[i]),
|
||||
Math.log10((quals[i])/(second_quals[i])));
|
||||
}
|
||||
out.printf("\n");
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
out.printf("%s\t%c ", pileup.getLocation(), bases[i]);
|
||||
for (int j = 0; j < 4; j++)
|
||||
{
|
||||
out.printf("%.03f ", counts[i][j] / totals[i]);
|
||||
}
|
||||
out.printf("\n");
|
||||
}
|
||||
/*
|
||||
out.printf("\n");
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
out.printf("%s\t%c ", pileup.getLocation(), bases[i]);
|
||||
for (int j = 0; j < 4; j++)
|
||||
{
|
||||
out.printf("%.03f ", fractional_counts[i][j] / fractional_totals[i]);
|
||||
}
|
||||
out.printf("\n");
|
||||
}
|
||||
*/
|
||||
out.printf("\n\n");
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Given result of map function
|
||||
public Integer reduceInit() { return 0; }
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return value + sum;
|
||||
}
|
||||
}
|
||||
|
|
@ -27,6 +27,7 @@ public class PoolCallingExperiment extends LocusWalker<AlleleFrequencyEstimate,
|
|||
@Argument(required=false, shortName="downsample", defaultValue="4") public int DOWNSAMPLE;
|
||||
@Argument(required=false, shortName="downsample_noise", defaultValue="3") public int DOWNSAMPLE_NOISE;
|
||||
@Argument(required=false, shortName="log_metrics", defaultValue="true") public boolean LOG_METRICS;
|
||||
@Argument(required=false, shortName="fractional_counts", defaultValue="false") public boolean FRACTIONAL_COUNTS;
|
||||
|
||||
private Random random;
|
||||
|
||||
|
|
@ -167,11 +168,27 @@ public class PoolCallingExperiment extends LocusWalker<AlleleFrequencyEstimate,
|
|||
shallow_calls[i].lodVsRef);
|
||||
*/
|
||||
|
||||
if (deep_genotype.equals(shallow_genotype)) { correct_shallow_calls += 1; }
|
||||
if (deep_genotype.equals(shallow_genotype))
|
||||
{
|
||||
correct_shallow_calls += 1;
|
||||
}
|
||||
total_shallow_calls += 1;
|
||||
|
||||
EM_sum += shallow_calls[i].emperical_allele_frequency() * shallow_calls[i].N;
|
||||
EM_N += 2;
|
||||
if (! FRACTIONAL_COUNTS)
|
||||
{
|
||||
EM_sum += shallow_calls[i].emperical_allele_frequency() * shallow_calls[i].N;
|
||||
EM_N += shallow_calls[i].N;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int j = 0; j <= shallow_calls[i].N; j++)
|
||||
{
|
||||
//System.out.printf("DBG3: %d %f %d\n", j, shallow_calls[i].posteriors[j], shallow_calls[i].N);
|
||||
EM_sum += shallow_calls[i].posteriors[j] * (double)j;
|
||||
EM_sum += shallow_calls[i].N;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
EM_alt_freq = EM_sum / EM_N;
|
||||
|
|
|
|||
|
|
@ -6,6 +6,18 @@ import java.lang.Math;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
public class AlleleFrequencyEstimate {
|
||||
|
||||
static
|
||||
{
|
||||
boolean assertsEnabled = false;
|
||||
assert assertsEnabled = true; // Intentional side effect!!!
|
||||
if (!assertsEnabled)
|
||||
{
|
||||
System.err.printf("\n\n\nERROR: You must run with asserts enabled. \"java -ea\".\n\n\n");
|
||||
throw new RuntimeException("Asserts must be enabled!");
|
||||
}
|
||||
}
|
||||
|
||||
//AlleleFrequencyEstimate();
|
||||
public GenomeLoc location;
|
||||
public char ref;
|
||||
|
|
@ -21,11 +33,40 @@ public class AlleleFrequencyEstimate {
|
|||
public String notes;
|
||||
public String bases;
|
||||
public double[][] quals;
|
||||
public double[] posteriors;
|
||||
|
||||
GenomeLoc l;
|
||||
|
||||
public AlleleFrequencyEstimate(GenomeLoc location, char ref, char alt, int N, double qhat, double qstar, double lodVsRef, double lodVsNextBest, double pBest, double pRef, int depth, String bases, double[][] quals)
|
||||
public AlleleFrequencyEstimate(GenomeLoc location, char ref, char alt, int N, double qhat, double qstar, double lodVsRef, double lodVsNextBest, double pBest, double pRef, int depth, String bases, double[][] quals, double[] posteriors)
|
||||
{
|
||||
if( Double.isNaN(lodVsRef)) { System.out.printf("lodVsRef is NaN\n"); }
|
||||
if( Double.isNaN(lodVsNextBest)) { System.out.printf("lodVsNextBest is NaN\n"); }
|
||||
if( Double.isNaN(qhat)) { System.out.printf("qhat is NaN\n"); }
|
||||
if( Double.isNaN(qstar)) { System.out.printf("qstar is NaN\n"); }
|
||||
if( Double.isNaN(pBest)) { System.out.printf("pBest is NaN\n"); }
|
||||
if( Double.isNaN(pRef)) { System.out.printf("pRef is NaN\n"); }
|
||||
|
||||
if( Double.isInfinite(lodVsRef)) { System.out.printf("lodVsRef is Infinite\n"); }
|
||||
if( Double.isInfinite(lodVsNextBest)) { System.out.printf("lodVsNextBest is Infinite\n"); }
|
||||
if( Double.isInfinite(qhat)) { System.out.printf("qhat is Infinite\n"); }
|
||||
if( Double.isInfinite(qstar)) { System.out.printf("qstar is Infinite\n"); }
|
||||
if( Double.isInfinite(pBest)) { System.out.printf("pBest is Infinite\n"); }
|
||||
if( Double.isInfinite(pRef)) { System.out.printf("pRef is Infinite\n"); }
|
||||
|
||||
assert(! Double.isNaN(lodVsRef));
|
||||
assert(! Double.isNaN(lodVsNextBest));
|
||||
assert(! Double.isNaN(qhat));
|
||||
assert(! Double.isNaN(qstar));
|
||||
assert(! Double.isNaN(pBest));
|
||||
assert(! Double.isNaN(pRef));
|
||||
|
||||
assert(! Double.isInfinite(lodVsRef));
|
||||
assert(! Double.isInfinite(lodVsNextBest));
|
||||
assert(! Double.isInfinite(qhat));
|
||||
assert(! Double.isInfinite(qstar));
|
||||
assert(! Double.isInfinite(pBest));
|
||||
assert(! Double.isInfinite(pRef));
|
||||
|
||||
this.location = location;
|
||||
this.ref = ref;
|
||||
this.alt = alt;
|
||||
|
|
@ -38,6 +79,7 @@ public class AlleleFrequencyEstimate {
|
|||
this.notes = "";
|
||||
this.bases = bases;
|
||||
this.quals = quals;
|
||||
this.posteriors = posteriors;
|
||||
}
|
||||
|
||||
/** Return the most likely genotype. */
|
||||
|
|
@ -55,12 +97,12 @@ public class AlleleFrequencyEstimate {
|
|||
|
||||
public double emperical_allele_frequency()
|
||||
{
|
||||
return (double)Math.round((double)qhat * (double)N) / (double)N;
|
||||
return (double)Math.round((double)qstar * (double)N) / (double)N;
|
||||
}
|
||||
|
||||
public double emperical_allele_frequency(int N)
|
||||
{
|
||||
return (double)Math.round((double)qhat * (double)N) / (double)N;
|
||||
return (double)Math.round((double)qstar * (double)N) / (double)N;
|
||||
}
|
||||
|
||||
public String asGFFString()
|
||||
|
|
|
|||
|
|
@ -350,7 +350,17 @@ public class Utils {
|
|||
return ans;
|
||||
}
|
||||
|
||||
|
||||
// lifted from the internet
|
||||
// http://www.cs.princeton.edu/introcs/91float/Gamma.java.html
|
||||
public static double logGamma(double x)
|
||||
{
|
||||
double tmp = (x - 0.5) * Math.log(x + 4.5) - (x + 4.5);
|
||||
double ser = 1.0 + 76.18009173 / (x + 0) - 86.50532033 / (x + 1)
|
||||
+ 24.01409822 / (x + 2) - 1.231739516 / (x + 3)
|
||||
+ 0.00120858003 / (x + 4) - 0.00000536382 / (x + 5);
|
||||
return tmp + Math.log(ser * Math.sqrt(2 * Math.PI));
|
||||
}
|
||||
|
||||
public static double percentage(double x, double base) { return (base> 0 ? (x/base)*100.0 : 0); }
|
||||
public static double percentage(int x, int base) { return (base> 0 ? ((double)x/(double)base)*100.0 : 0); }
|
||||
public static double percentage(long x, long base) { return (base> 0 ? ((double)x/(double)base)*100.0 : 0); }
|
||||
|
|
|
|||
Loading…
Reference in New Issue