Strand test and misc touch-ups
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@871 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
fc91e3e30e
commit
417f5b145e
|
|
@ -101,8 +101,35 @@ public class PoolCaller extends LocusWalker<AlleleFrequencyEstimate, String>
|
||||||
public AlleleFrequencyEstimate map(RefMetaDataTracker tracker, char ref, LocusContext context)
|
public AlleleFrequencyEstimate map(RefMetaDataTracker tracker, char ref, LocusContext context)
|
||||||
{
|
{
|
||||||
if (ref == 'N') { return null; }
|
if (ref == 'N') { return null; }
|
||||||
|
ref = Character.toUpperCase(ref);
|
||||||
|
|
||||||
// 1. seperate each context.
|
// seperate each context.
|
||||||
|
LocusContext forward = filterLocusContextByStrand(context, "+");
|
||||||
|
LocusContext backward = filterLocusContextByStrand(context, "-");
|
||||||
|
|
||||||
|
if (forward.getReads().size() == 0) { return null; }
|
||||||
|
if (backward.getReads().size() == 0) { return null; }
|
||||||
|
|
||||||
|
AlleleFrequencyEstimate estimate_both = EM(tracker, ref, context);
|
||||||
|
AlleleFrequencyEstimate estimate_forward = EM(tracker, ref, forward);
|
||||||
|
AlleleFrequencyEstimate estimate_backward = EM(tracker, ref, backward);
|
||||||
|
|
||||||
|
discovery_output_file.printf("%s %c %f %c %f\n",
|
||||||
|
estimate_both.asPoolTabularString(),
|
||||||
|
estimate_forward.alt,
|
||||||
|
estimate_forward.lodVsRef,
|
||||||
|
estimate_backward.alt,
|
||||||
|
estimate_backward.lodVsRef);
|
||||||
|
//discovery_output_file.printf("%s\n", estimate_forward.asPoolTabularString());
|
||||||
|
//discovery_output_file.printf("%s\n", estimate_backward.asPoolTabularString());
|
||||||
|
//discovery_output_file.printf("\n");
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private AlleleFrequencyEstimate EM(RefMetaDataTracker tracker, char ref, LocusContext context)
|
||||||
|
{
|
||||||
|
if (context.getReads().size() == 0) { return null; }
|
||||||
LocusContext[] contexts = filterLocusContext(context, sample_names, 0);
|
LocusContext[] contexts = filterLocusContext(context, sample_names, 0);
|
||||||
|
|
||||||
// EM Loop:
|
// EM Loop:
|
||||||
|
|
@ -160,6 +187,8 @@ public class PoolCaller extends LocusWalker<AlleleFrequencyEstimate, String>
|
||||||
calls[i] = callers.get(i).map(tracker, ref, contexts[i]);
|
calls[i] = callers.get(i).map(tracker, ref, contexts[i]);
|
||||||
String genotype = calls[i].genotype();
|
String genotype = calls[i].genotype();
|
||||||
|
|
||||||
|
if (calls[i].depth == 0) { continue; }
|
||||||
|
|
||||||
likelihood += calls[i].pBest;
|
likelihood += calls[i].pBest;
|
||||||
|
|
||||||
if (! FRACTIONAL_COUNTS)
|
if (! FRACTIONAL_COUNTS)
|
||||||
|
|
@ -200,33 +229,61 @@ public class PoolCaller extends LocusWalker<AlleleFrequencyEstimate, String>
|
||||||
}
|
}
|
||||||
|
|
||||||
// 7. Output some statistics.
|
// 7. Output some statistics.
|
||||||
double discovery_posterior = 0;
|
double discovery_likelihood = 0;
|
||||||
double discovery_null = 0;
|
double discovery_null = 0;
|
||||||
|
double discovery_prior = 0;
|
||||||
|
double discovery_null_prior = 0;
|
||||||
|
int n_ref = 0;
|
||||||
|
int n_het = 0;
|
||||||
|
int n_hom = 0;
|
||||||
|
for (int i = 1; i < EM_N-1; i++)
|
||||||
|
{
|
||||||
|
//discovery_prior += 1e-3/(double)i;
|
||||||
|
}
|
||||||
|
//discovery_prior = Math.log10(discovery_prior);
|
||||||
|
//discovery_null_prior = Math.log10(1.0 - discovery_prior);
|
||||||
for (int i = 0; i < sample_names.size(); i++)
|
for (int i = 0; i < sample_names.size(); i++)
|
||||||
{
|
{
|
||||||
discovery_posterior += calls[i].pBest;
|
if (calls[i].depth == 0) { continue; }
|
||||||
discovery_null += calls[i].pRef;
|
|
||||||
//System.out.printf("DBG %f %f %c %s\n", calls[i].pBest, calls[i].pRef, ref, calls[i].bases);
|
discovery_likelihood += calls[i].pBest;
|
||||||
}
|
discovery_null += calls[i].pRef;
|
||||||
double discovery_lod = discovery_posterior - discovery_null;
|
//System.out.printf("DBG %f %f %c %s\n", calls[i].pBest, calls[i].pRef, ref, calls[i].bases);
|
||||||
if (discovery_lod == 0) { alt = 'N'; }
|
|
||||||
discovery_output_file.printf("%s %c %c %f %f %f %f\n", context.getLocation(), ref, alt, EM_alt_freq, discovery_posterior, discovery_null, discovery_lod);
|
if (calls[i].qhat == 0.0) { n_ref += 1; }
|
||||||
|
if (calls[i].qhat == 0.5) { n_het += 1; }
|
||||||
|
if (calls[i].qhat == 1.0) { n_hom += 1; }
|
||||||
|
}
|
||||||
|
double discovery_lod = (discovery_likelihood + discovery_prior) - (discovery_null + discovery_null_prior);
|
||||||
|
if (discovery_lod <= 0) { alt = 'N'; }
|
||||||
|
//discovery_output_file.printf("%s %c %c %f %f %f %f %f %f %d %d %d\n", context.getLocation(), ref, alt, EM_alt_freq, discovery_likelihood, discovery_null, discovery_prior, discovery_lod, EM_N, n_ref, n_het, n_hom);
|
||||||
|
|
||||||
|
AlleleFrequencyEstimate estimate = new AlleleFrequencyEstimate(context.getLocation(),
|
||||||
|
ref,
|
||||||
|
alt,
|
||||||
|
(int)EM_N,
|
||||||
|
EM_alt_freq,
|
||||||
|
EM_alt_freq,
|
||||||
|
discovery_lod,
|
||||||
|
0.0,
|
||||||
|
discovery_likelihood + discovery_prior,
|
||||||
|
discovery_null + discovery_prior,
|
||||||
|
context.getReads().size(),
|
||||||
|
(String)null,
|
||||||
|
(double[][])null,
|
||||||
|
(double[])null,
|
||||||
|
(String)null);
|
||||||
|
estimate.n_ref = n_ref; // HACK
|
||||||
|
estimate.n_het = n_het; // HACK
|
||||||
|
estimate.n_hom = n_hom; // HACK
|
||||||
|
return estimate;
|
||||||
|
|
||||||
for (int i = 0; i < sample_names.size(); i++)
|
|
||||||
{
|
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, contexts[i]);
|
|
||||||
if (calls[i].depth == 0) { continue; }
|
|
||||||
//if (calls[i].lodVsRef < lodThreshold) { continue; }
|
|
||||||
//discovery_output_file.printf("%s %s %c %f %s %f %f %f %f %f %s\n", context.getLocation(), sample_names.get(i), ref, EM_alt_freq, calls[i].genotype(), calls[i].lodVsRef, calls[i].lodVsNextBest, calls[i].pBest, calls[i].pRef, discovery_lod, pileup.getBases());
|
|
||||||
}
|
|
||||||
|
|
||||||
//for (int i = 0; i < likelihood_trajectory.length; i++)
|
//for (int i = 0; i < likelihood_trajectory.length; i++)
|
||||||
//{
|
//{
|
||||||
// System.out.printf("TRAJECTORY %f %f\n", trajectory[i], likelihood_trajectory[i]);
|
// System.out.printf("TRAJECTORY %f %f\n", trajectory[i], likelihood_trajectory[i]);
|
||||||
//}
|
//}
|
||||||
//System.out.print("\n\n");
|
//System.out.print("\n\n");
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private LocusContext poolLocusContext(LocusContext[] contexts)
|
private LocusContext poolLocusContext(LocusContext[] contexts)
|
||||||
|
|
@ -249,6 +306,25 @@ public class PoolCaller extends LocusWalker<AlleleFrequencyEstimate, String>
|
||||||
return new LocusContext(location, reads, offsets);
|
return new LocusContext(location, reads, offsets);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private LocusContext filterLocusContextByStrand(LocusContext context, String strand)
|
||||||
|
{
|
||||||
|
ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>();
|
||||||
|
ArrayList<Integer> offsets = new ArrayList<Integer>();
|
||||||
|
|
||||||
|
for (int i = 0; i < context.getReads().size(); i++)
|
||||||
|
{
|
||||||
|
SAMRecord read = context.getReads().get(i);
|
||||||
|
Integer offset = context.getOffsets().get(i);
|
||||||
|
|
||||||
|
// Filter for strandedness
|
||||||
|
if ((!strand.contains("+")) && (read.getReadNegativeStrandFlag() == false)) { continue; }
|
||||||
|
if ((!strand.contains("-")) && (read.getReadNegativeStrandFlag() == true)) { continue; }
|
||||||
|
reads.add(read);
|
||||||
|
offsets.add(offset);
|
||||||
|
}
|
||||||
|
return new LocusContext(context.getLocation(), reads, offsets);
|
||||||
|
}
|
||||||
|
|
||||||
private LocusContext[] filterLocusContext(LocusContext context, List<String> sample_names, int downsample)
|
private LocusContext[] filterLocusContext(LocusContext context, List<String> sample_names, int downsample)
|
||||||
{
|
{
|
||||||
HashMap<String,Integer> index = new HashMap<String,Integer>();
|
HashMap<String,Integer> index = new HashMap<String,Integer>();
|
||||||
|
|
@ -315,9 +391,15 @@ public class PoolCaller extends LocusWalker<AlleleFrequencyEstimate, String>
|
||||||
return contexts;
|
return contexts;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void CollectStrandInformation(char ref, LocusContext context)
|
||||||
|
{
|
||||||
|
List<SAMRecord> reads = context.getReads();
|
||||||
|
List<Integer> offsets = context.getOffsets();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
public void onTraversalDone(String result)
|
public void onTraversalDone(String result)
|
||||||
{
|
{
|
||||||
System.out.printf("OTD\n");
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
discovery_output_file.flush();
|
discovery_output_file.flush();
|
||||||
|
|
@ -334,7 +416,7 @@ public class PoolCaller extends LocusWalker<AlleleFrequencyEstimate, String>
|
||||||
|
|
||||||
public String reduceInit()
|
public String reduceInit()
|
||||||
{
|
{
|
||||||
discovery_output_file.printf("loc ref alt EM_alt_freq discovery_posterior discovery_null discovery_lod\n");
|
discovery_output_file.printf("loc ref alt EM_alt_freq discovery_likelihood discovery_null discovery_prior discovery_lod EM_N n_ref n_het n_hom fw_alt fw_lod bw_alt bw_lod\n");
|
||||||
for (int i = 0; i < callers.size(); i++)
|
for (int i = 0; i < callers.size(); i++)
|
||||||
{
|
{
|
||||||
callers.get(i).reduceInit();
|
callers.get(i).reduceInit();
|
||||||
|
|
|
||||||
|
|
@ -35,6 +35,9 @@ public class AlleleFrequencyEstimate {
|
||||||
public double[][] quals;
|
public double[][] quals;
|
||||||
public double[] posteriors;
|
public double[] posteriors;
|
||||||
public String sample_name;
|
public String sample_name;
|
||||||
|
public int n_ref;
|
||||||
|
public int n_het;
|
||||||
|
public int n_hom;
|
||||||
|
|
||||||
GenomeLoc l;
|
GenomeLoc l;
|
||||||
|
|
||||||
|
|
@ -42,11 +45,11 @@ public class AlleleFrequencyEstimate {
|
||||||
public AlleleFrequencyEstimate(GenomeLoc location, char ref, char alt, int N, double qhat, double qstar, double lodVsRef, double lodVsNextBest, double pBest, double pRef, int depth, String bases, double[][] quals, double[] posteriors, String sample_name)
|
public AlleleFrequencyEstimate(GenomeLoc location, char ref, char alt, int N, double qhat, double qstar, double lodVsRef, double lodVsNextBest, double pBest, double pRef, int depth, String bases, double[][] quals, double[] posteriors, String sample_name)
|
||||||
{
|
{
|
||||||
if( Double.isNaN(lodVsRef)) { System.out.printf("%s: lodVsRef is NaN\n", location.toString()); }
|
if( Double.isNaN(lodVsRef)) { System.out.printf("%s: lodVsRef is NaN\n", location.toString()); }
|
||||||
if( Double.isNaN(lodVsNextBest)) { System.out.printf("lodVsNextBest is NaN\n"); }
|
if( Double.isNaN(lodVsNextBest)) { System.out.printf("%s lodVsNextBest is NaN\n", location.toString()); }
|
||||||
if( Double.isNaN(qhat)) { System.out.printf("qhat is NaN\n"); }
|
if( Double.isNaN(qhat)) { System.out.printf("%s qhat is NaN\n", location.toString()); }
|
||||||
if( Double.isNaN(qstar)) { System.out.printf("qstar is NaN\n"); }
|
if( Double.isNaN(qstar)) { System.out.printf("%s qstar is NaN\n", location.toString()); }
|
||||||
if( Double.isNaN(pBest)) { System.out.printf("pBest is NaN\n"); }
|
if( Double.isNaN(pBest)) { System.out.printf("%s pBest is NaN\n", location.toString()); }
|
||||||
if( Double.isNaN(pRef)) { System.out.printf("pRef is NaN (%c %s)\n", ref, bases); }
|
if( Double.isNaN(pRef)) { System.out.printf("%s pRef is NaN (%c %s)\n", location.toString(), ref, bases); }
|
||||||
|
|
||||||
if( Double.isInfinite(lodVsRef))
|
if( Double.isInfinite(lodVsRef))
|
||||||
{
|
{
|
||||||
|
|
@ -206,6 +209,23 @@ public class AlleleFrequencyEstimate {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String asPoolTabularString()
|
||||||
|
{
|
||||||
|
return String.format("%s %c %c %f %f %f %s %f %d %d %d %d",
|
||||||
|
location,
|
||||||
|
ref,
|
||||||
|
alt,
|
||||||
|
qstar,
|
||||||
|
pBest,
|
||||||
|
pRef,
|
||||||
|
"NA",
|
||||||
|
lodVsRef,
|
||||||
|
N,
|
||||||
|
n_ref,
|
||||||
|
n_het,
|
||||||
|
n_hom);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public double posterior()
|
public double posterior()
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -81,8 +81,10 @@ public class GenotypeLikelihoods {
|
||||||
offNextBestBasePriors.put("TT", 0.505);
|
offNextBestBasePriors.put("TT", 0.505);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void add(char ref, char read, byte qual) {
|
public void add(char ref, char read, byte qual)
|
||||||
for (int i = 0; i < genotypes.length; i++) {
|
{
|
||||||
|
for (int i = 0; i < genotypes.length; i++)
|
||||||
|
{
|
||||||
likelihoods[i] += calculateAlleleLikelihood(ref, read, genotypes[i], qual);
|
likelihoods[i] += calculateAlleleLikelihood(ref, read, genotypes[i], qual);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -96,14 +98,19 @@ public class GenotypeLikelihoods {
|
||||||
|
|
||||||
double p_base;
|
double p_base;
|
||||||
|
|
||||||
if ((h1 == h2) && (h1 == read)) {
|
if ((h1 == h2) && (h1 == read))
|
||||||
p_base = getOneMinusQual(qual); //Math.log10(1 - p_error);
|
{
|
||||||
} else if ((h1 != h2) && (h1 == read) || (h2 == read)) {
|
// hom
|
||||||
p_base = getOneHalfMinusQual(qual); // )Math.log10(0.5 - (p_error / 2.0));
|
p_base = getOneMinusQual(qual);
|
||||||
} else {
|
}
|
||||||
// the real math would be
|
else if ((h1 != h2) && ((h1 == read) || (h2 == read)))
|
||||||
// likelihood += log10(pow(10,(qual/-10.0)));
|
{
|
||||||
// but it simplifies to
|
// het
|
||||||
|
p_base = getOneHalfMinusQual(qual);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// error
|
||||||
p_base = qual/-10.0;
|
p_base = qual/-10.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue