Strand test and misc touch-ups
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@871 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
fc91e3e30e
commit
417f5b145e
|
|
@ -101,8 +101,35 @@ public class PoolCaller extends LocusWalker<AlleleFrequencyEstimate, String>
|
|||
public AlleleFrequencyEstimate map(RefMetaDataTracker tracker, char ref, LocusContext context)
|
||||
{
|
||||
if (ref == 'N') { return null; }
|
||||
ref = Character.toUpperCase(ref);
|
||||
|
||||
// 1. seperate each context.
|
||||
// seperate each context.
|
||||
LocusContext forward = filterLocusContextByStrand(context, "+");
|
||||
LocusContext backward = filterLocusContextByStrand(context, "-");
|
||||
|
||||
if (forward.getReads().size() == 0) { return null; }
|
||||
if (backward.getReads().size() == 0) { return null; }
|
||||
|
||||
AlleleFrequencyEstimate estimate_both = EM(tracker, ref, context);
|
||||
AlleleFrequencyEstimate estimate_forward = EM(tracker, ref, forward);
|
||||
AlleleFrequencyEstimate estimate_backward = EM(tracker, ref, backward);
|
||||
|
||||
discovery_output_file.printf("%s %c %f %c %f\n",
|
||||
estimate_both.asPoolTabularString(),
|
||||
estimate_forward.alt,
|
||||
estimate_forward.lodVsRef,
|
||||
estimate_backward.alt,
|
||||
estimate_backward.lodVsRef);
|
||||
//discovery_output_file.printf("%s\n", estimate_forward.asPoolTabularString());
|
||||
//discovery_output_file.printf("%s\n", estimate_backward.asPoolTabularString());
|
||||
//discovery_output_file.printf("\n");
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private AlleleFrequencyEstimate EM(RefMetaDataTracker tracker, char ref, LocusContext context)
|
||||
{
|
||||
if (context.getReads().size() == 0) { return null; }
|
||||
LocusContext[] contexts = filterLocusContext(context, sample_names, 0);
|
||||
|
||||
// EM Loop:
|
||||
|
|
@ -160,6 +187,8 @@ public class PoolCaller extends LocusWalker<AlleleFrequencyEstimate, String>
|
|||
calls[i] = callers.get(i).map(tracker, ref, contexts[i]);
|
||||
String genotype = calls[i].genotype();
|
||||
|
||||
if (calls[i].depth == 0) { continue; }
|
||||
|
||||
likelihood += calls[i].pBest;
|
||||
|
||||
if (! FRACTIONAL_COUNTS)
|
||||
|
|
@ -200,33 +229,61 @@ public class PoolCaller extends LocusWalker<AlleleFrequencyEstimate, String>
|
|||
}
|
||||
|
||||
// 7. Output some statistics.
|
||||
double discovery_posterior = 0;
|
||||
double discovery_likelihood = 0;
|
||||
double discovery_null = 0;
|
||||
double discovery_prior = 0;
|
||||
double discovery_null_prior = 0;
|
||||
int n_ref = 0;
|
||||
int n_het = 0;
|
||||
int n_hom = 0;
|
||||
for (int i = 1; i < EM_N-1; i++)
|
||||
{
|
||||
//discovery_prior += 1e-3/(double)i;
|
||||
}
|
||||
//discovery_prior = Math.log10(discovery_prior);
|
||||
//discovery_null_prior = Math.log10(1.0 - discovery_prior);
|
||||
for (int i = 0; i < sample_names.size(); i++)
|
||||
{
|
||||
discovery_posterior += calls[i].pBest;
|
||||
discovery_null += calls[i].pRef;
|
||||
//System.out.printf("DBG %f %f %c %s\n", calls[i].pBest, calls[i].pRef, ref, calls[i].bases);
|
||||
}
|
||||
double discovery_lod = discovery_posterior - discovery_null;
|
||||
if (discovery_lod == 0) { alt = 'N'; }
|
||||
discovery_output_file.printf("%s %c %c %f %f %f %f\n", context.getLocation(), ref, alt, EM_alt_freq, discovery_posterior, discovery_null, discovery_lod);
|
||||
if (calls[i].depth == 0) { continue; }
|
||||
|
||||
discovery_likelihood += calls[i].pBest;
|
||||
discovery_null += calls[i].pRef;
|
||||
//System.out.printf("DBG %f %f %c %s\n", calls[i].pBest, calls[i].pRef, ref, calls[i].bases);
|
||||
|
||||
if (calls[i].qhat == 0.0) { n_ref += 1; }
|
||||
if (calls[i].qhat == 0.5) { n_het += 1; }
|
||||
if (calls[i].qhat == 1.0) { n_hom += 1; }
|
||||
}
|
||||
double discovery_lod = (discovery_likelihood + discovery_prior) - (discovery_null + discovery_null_prior);
|
||||
if (discovery_lod <= 0) { alt = 'N'; }
|
||||
//discovery_output_file.printf("%s %c %c %f %f %f %f %f %f %d %d %d\n", context.getLocation(), ref, alt, EM_alt_freq, discovery_likelihood, discovery_null, discovery_prior, discovery_lod, EM_N, n_ref, n_het, n_hom);
|
||||
|
||||
AlleleFrequencyEstimate estimate = new AlleleFrequencyEstimate(context.getLocation(),
|
||||
ref,
|
||||
alt,
|
||||
(int)EM_N,
|
||||
EM_alt_freq,
|
||||
EM_alt_freq,
|
||||
discovery_lod,
|
||||
0.0,
|
||||
discovery_likelihood + discovery_prior,
|
||||
discovery_null + discovery_prior,
|
||||
context.getReads().size(),
|
||||
(String)null,
|
||||
(double[][])null,
|
||||
(double[])null,
|
||||
(String)null);
|
||||
estimate.n_ref = n_ref; // HACK
|
||||
estimate.n_het = n_het; // HACK
|
||||
estimate.n_hom = n_hom; // HACK
|
||||
return estimate;
|
||||
|
||||
|
||||
for (int i = 0; i < sample_names.size(); i++)
|
||||
{
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, contexts[i]);
|
||||
if (calls[i].depth == 0) { continue; }
|
||||
//if (calls[i].lodVsRef < lodThreshold) { continue; }
|
||||
//discovery_output_file.printf("%s %s %c %f %s %f %f %f %f %f %s\n", context.getLocation(), sample_names.get(i), ref, EM_alt_freq, calls[i].genotype(), calls[i].lodVsRef, calls[i].lodVsNextBest, calls[i].pBest, calls[i].pRef, discovery_lod, pileup.getBases());
|
||||
}
|
||||
|
||||
//for (int i = 0; i < likelihood_trajectory.length; i++)
|
||||
//{
|
||||
// System.out.printf("TRAJECTORY %f %f\n", trajectory[i], likelihood_trajectory[i]);
|
||||
//}
|
||||
//System.out.print("\n\n");
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private LocusContext poolLocusContext(LocusContext[] contexts)
|
||||
|
|
@ -249,6 +306,25 @@ public class PoolCaller extends LocusWalker<AlleleFrequencyEstimate, String>
|
|||
return new LocusContext(location, reads, offsets);
|
||||
}
|
||||
|
||||
private LocusContext filterLocusContextByStrand(LocusContext context, String strand)
|
||||
{
|
||||
ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>();
|
||||
ArrayList<Integer> offsets = new ArrayList<Integer>();
|
||||
|
||||
for (int i = 0; i < context.getReads().size(); i++)
|
||||
{
|
||||
SAMRecord read = context.getReads().get(i);
|
||||
Integer offset = context.getOffsets().get(i);
|
||||
|
||||
// Filter for strandedness
|
||||
if ((!strand.contains("+")) && (read.getReadNegativeStrandFlag() == false)) { continue; }
|
||||
if ((!strand.contains("-")) && (read.getReadNegativeStrandFlag() == true)) { continue; }
|
||||
reads.add(read);
|
||||
offsets.add(offset);
|
||||
}
|
||||
return new LocusContext(context.getLocation(), reads, offsets);
|
||||
}
|
||||
|
||||
private LocusContext[] filterLocusContext(LocusContext context, List<String> sample_names, int downsample)
|
||||
{
|
||||
HashMap<String,Integer> index = new HashMap<String,Integer>();
|
||||
|
|
@ -315,9 +391,15 @@ public class PoolCaller extends LocusWalker<AlleleFrequencyEstimate, String>
|
|||
return contexts;
|
||||
}
|
||||
|
||||
private void CollectStrandInformation(char ref, LocusContext context)
|
||||
{
|
||||
List<SAMRecord> reads = context.getReads();
|
||||
List<Integer> offsets = context.getOffsets();
|
||||
|
||||
}
|
||||
|
||||
public void onTraversalDone(String result)
|
||||
{
|
||||
System.out.printf("OTD\n");
|
||||
try
|
||||
{
|
||||
discovery_output_file.flush();
|
||||
|
|
@ -334,7 +416,7 @@ public class PoolCaller extends LocusWalker<AlleleFrequencyEstimate, String>
|
|||
|
||||
public String reduceInit()
|
||||
{
|
||||
discovery_output_file.printf("loc ref alt EM_alt_freq discovery_posterior discovery_null discovery_lod\n");
|
||||
discovery_output_file.printf("loc ref alt EM_alt_freq discovery_likelihood discovery_null discovery_prior discovery_lod EM_N n_ref n_het n_hom fw_alt fw_lod bw_alt bw_lod\n");
|
||||
for (int i = 0; i < callers.size(); i++)
|
||||
{
|
||||
callers.get(i).reduceInit();
|
||||
|
|
|
|||
|
|
@ -35,6 +35,9 @@ public class AlleleFrequencyEstimate {
|
|||
public double[][] quals;
|
||||
public double[] posteriors;
|
||||
public String sample_name;
|
||||
public int n_ref;
|
||||
public int n_het;
|
||||
public int n_hom;
|
||||
|
||||
GenomeLoc l;
|
||||
|
||||
|
|
@ -42,11 +45,11 @@ public class AlleleFrequencyEstimate {
|
|||
public AlleleFrequencyEstimate(GenomeLoc location, char ref, char alt, int N, double qhat, double qstar, double lodVsRef, double lodVsNextBest, double pBest, double pRef, int depth, String bases, double[][] quals, double[] posteriors, String sample_name)
|
||||
{
|
||||
if( Double.isNaN(lodVsRef)) { System.out.printf("%s: lodVsRef is NaN\n", location.toString()); }
|
||||
if( Double.isNaN(lodVsNextBest)) { System.out.printf("lodVsNextBest is NaN\n"); }
|
||||
if( Double.isNaN(qhat)) { System.out.printf("qhat is NaN\n"); }
|
||||
if( Double.isNaN(qstar)) { System.out.printf("qstar is NaN\n"); }
|
||||
if( Double.isNaN(pBest)) { System.out.printf("pBest is NaN\n"); }
|
||||
if( Double.isNaN(pRef)) { System.out.printf("pRef is NaN (%c %s)\n", ref, bases); }
|
||||
if( Double.isNaN(lodVsNextBest)) { System.out.printf("%s lodVsNextBest is NaN\n", location.toString()); }
|
||||
if( Double.isNaN(qhat)) { System.out.printf("%s qhat is NaN\n", location.toString()); }
|
||||
if( Double.isNaN(qstar)) { System.out.printf("%s qstar is NaN\n", location.toString()); }
|
||||
if( Double.isNaN(pBest)) { System.out.printf("%s pBest is NaN\n", location.toString()); }
|
||||
if( Double.isNaN(pRef)) { System.out.printf("%s pRef is NaN (%c %s)\n", location.toString(), ref, bases); }
|
||||
|
||||
if( Double.isInfinite(lodVsRef))
|
||||
{
|
||||
|
|
@ -206,6 +209,23 @@ public class AlleleFrequencyEstimate {
|
|||
}
|
||||
}
|
||||
|
||||
public String asPoolTabularString()
|
||||
{
|
||||
return String.format("%s %c %c %f %f %f %s %f %d %d %d %d",
|
||||
location,
|
||||
ref,
|
||||
alt,
|
||||
qstar,
|
||||
pBest,
|
||||
pRef,
|
||||
"NA",
|
||||
lodVsRef,
|
||||
N,
|
||||
n_ref,
|
||||
n_het,
|
||||
n_hom);
|
||||
}
|
||||
|
||||
|
||||
public double posterior()
|
||||
{
|
||||
|
|
|
|||
|
|
@ -81,8 +81,10 @@ public class GenotypeLikelihoods {
|
|||
offNextBestBasePriors.put("TT", 0.505);
|
||||
}
|
||||
|
||||
public void add(char ref, char read, byte qual) {
|
||||
for (int i = 0; i < genotypes.length; i++) {
|
||||
public void add(char ref, char read, byte qual)
|
||||
{
|
||||
for (int i = 0; i < genotypes.length; i++)
|
||||
{
|
||||
likelihoods[i] += calculateAlleleLikelihood(ref, read, genotypes[i], qual);
|
||||
}
|
||||
}
|
||||
|
|
@ -96,14 +98,19 @@ public class GenotypeLikelihoods {
|
|||
|
||||
double p_base;
|
||||
|
||||
if ((h1 == h2) && (h1 == read)) {
|
||||
p_base = getOneMinusQual(qual); //Math.log10(1 - p_error);
|
||||
} else if ((h1 != h2) && (h1 == read) || (h2 == read)) {
|
||||
p_base = getOneHalfMinusQual(qual); // )Math.log10(0.5 - (p_error / 2.0));
|
||||
} else {
|
||||
// the real math would be
|
||||
// likelihood += log10(pow(10,(qual/-10.0)));
|
||||
// but it simplifies to
|
||||
if ((h1 == h2) && (h1 == read))
|
||||
{
|
||||
// hom
|
||||
p_base = getOneMinusQual(qual);
|
||||
}
|
||||
else if ((h1 != h2) && ((h1 == read) || (h2 == read)))
|
||||
{
|
||||
// het
|
||||
p_base = getOneHalfMinusQual(qual);
|
||||
}
|
||||
else
|
||||
{
|
||||
// error
|
||||
p_base = qual/-10.0;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue