do pooled calling properly for 1kg
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@667 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
313a6d0fb5
commit
c8d7223789
|
|
@ -17,21 +17,36 @@ import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
import java.io.*;
|
||||||
|
|
||||||
public class PoolCaller extends LocusWalker<AlleleFrequencyEstimate, String>
|
public class PoolCaller extends LocusWalker<AlleleFrequencyEstimate, String>
|
||||||
{
|
{
|
||||||
List<SingleSampleGenotyper> callers = null;
|
List<SingleSampleGenotyper> callers = null;
|
||||||
List<String> sample_names = null;
|
List<String> sample_names = null;
|
||||||
|
|
||||||
//@Argument(required=false, shortName="log_metrics", defaultValue="true") public boolean LOG_METRICS;
|
@Argument(required=false, shortName="fractional_counts", doc="should we use fractional counts?") public boolean FRACTIONAL_COUNTS = false;
|
||||||
@Argument(required=false, shortName="fractional_counts", doc="fractional counts") public boolean FRACTIONAL_COUNTS = false;
|
@Argument(required=false, shortName="max_iterations", doc="Maximum number of iterations for EM") public int MAX_ITERATIONS = 10;
|
||||||
|
@Argument(fullName="lodThreshold", shortName="lod", required=false, doc="lod threshold for outputting individual genotypes") public Double lodThreshold = 2.0;
|
||||||
|
@Argument(fullName="discovery_output", shortName="discovery_output", required=true, doc="file to write SNP discovery output to") public String DISCOVERY_OUTPUT;
|
||||||
|
|
||||||
private Random random;
|
private Random random;
|
||||||
|
|
||||||
private SAMFileHeader header;
|
private SAMFileHeader header;
|
||||||
|
|
||||||
|
private PrintStream discovery_output_file;
|
||||||
|
|
||||||
public void initialize()
|
public void initialize()
|
||||||
{
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
discovery_output_file = new PrintStream(DISCOVERY_OUTPUT);
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
e.printStackTrace();
|
||||||
|
System.exit(-1);
|
||||||
|
}
|
||||||
|
|
||||||
GenomeAnalysisEngine toolkit = this.getToolkit();
|
GenomeAnalysisEngine toolkit = this.getToolkit();
|
||||||
this.header = toolkit.getEngine().getSAMHeader();
|
this.header = toolkit.getEngine().getSAMHeader();
|
||||||
List<SAMReadGroupRecord> read_groups = header.getReadGroups();
|
List<SAMReadGroupRecord> read_groups = header.getReadGroups();
|
||||||
|
|
@ -51,11 +66,11 @@ public class PoolCaller extends LocusWalker<AlleleFrequencyEstimate, String>
|
||||||
{
|
{
|
||||||
String sample_name = read_groups.get(i).getSample();
|
String sample_name = read_groups.get(i).getSample();
|
||||||
sample_names.add(sample_name);
|
sample_names.add(sample_name);
|
||||||
//System.out.println("SAMPLE: " + sample_name);
|
System.out.println("SAMPLE: " + sample_name);
|
||||||
|
|
||||||
SingleSampleGenotyper caller = new SingleSampleGenotyper();
|
SingleSampleGenotyper caller = new SingleSampleGenotyper();
|
||||||
caller.metricsFileName = "/dev/null";
|
caller.metricsFileName = "/dev/null";
|
||||||
caller.lodThreshold = 5.0;
|
caller.lodThreshold = lodThreshold;
|
||||||
caller.fourBaseMode = false;
|
caller.fourBaseMode = false;
|
||||||
caller.printMetrics = false;
|
caller.printMetrics = false;
|
||||||
caller.initialize();
|
caller.initialize();
|
||||||
|
|
@ -65,6 +80,8 @@ public class PoolCaller extends LocusWalker<AlleleFrequencyEstimate, String>
|
||||||
|
|
||||||
public AlleleFrequencyEstimate map(RefMetaDataTracker tracker, char ref, LocusContext context)
|
public AlleleFrequencyEstimate map(RefMetaDataTracker tracker, char ref, LocusContext context)
|
||||||
{
|
{
|
||||||
|
if (ref == 'N') { return null; }
|
||||||
|
|
||||||
// 1. seperate each context.
|
// 1. seperate each context.
|
||||||
LocusContext[] contexts = new LocusContext[sample_names.size()];
|
LocusContext[] contexts = new LocusContext[sample_names.size()];
|
||||||
for (int i = 0; i < sample_names.size(); i++)
|
for (int i = 0; i < sample_names.size(); i++)
|
||||||
|
|
@ -76,15 +93,18 @@ public class PoolCaller extends LocusWalker<AlleleFrequencyEstimate, String>
|
||||||
|
|
||||||
// EM Loop:
|
// EM Loop:
|
||||||
AlleleFrequencyEstimate[] calls = null;
|
AlleleFrequencyEstimate[] calls = null;
|
||||||
double EM_alt_freq = 0;
|
double EM_alt_freq;
|
||||||
double EM_N = 0;
|
double EM_N = 0;
|
||||||
|
|
||||||
|
// this line is kinda hacky
|
||||||
|
if (MAX_ITERATIONS == 1) { EM_alt_freq = -1; }
|
||||||
|
else { EM_alt_freq = 0.5; }
|
||||||
|
|
||||||
// (this loop is the EM cycle)
|
// (this loop is the EM cycle)
|
||||||
EM_alt_freq = 0.5;
|
double[] trajectory = new double[MAX_ITERATIONS + 1]; trajectory[0] = EM_alt_freq;
|
||||||
int num_iterations = 10;
|
double[] likelihood_trajectory = new double[MAX_ITERATIONS + 1]; likelihood_trajectory[0] = 0.0;
|
||||||
double[] trajectory = new double[num_iterations + 1]; trajectory[0] = EM_alt_freq;
|
boolean is_a_snp = false;
|
||||||
double[] likelihood_trajectory = new double[num_iterations + 1]; likelihood_trajectory[0] = 0.0;
|
for (int iterations = 0; iterations < MAX_ITERATIONS; iterations++)
|
||||||
for (int iterations = 0; iterations < num_iterations; iterations++)
|
|
||||||
{
|
{
|
||||||
// 6. Re-call from shallow coverage using the estimated frequency as a prior,
|
// 6. Re-call from shallow coverage using the estimated frequency as a prior,
|
||||||
// and compare to true deep calls,
|
// and compare to true deep calls,
|
||||||
|
|
@ -93,6 +113,7 @@ public class PoolCaller extends LocusWalker<AlleleFrequencyEstimate, String>
|
||||||
EM_N = 0.0;
|
EM_N = 0.0;
|
||||||
double EM_sum = 0.0;
|
double EM_sum = 0.0;
|
||||||
double likelihood = 0.0;
|
double likelihood = 0.0;
|
||||||
|
is_a_snp = false;
|
||||||
|
|
||||||
for (int i = 0; i < sample_names.size(); i++)
|
for (int i = 0; i < sample_names.size(); i++)
|
||||||
{
|
{
|
||||||
|
|
@ -102,16 +123,15 @@ public class PoolCaller extends LocusWalker<AlleleFrequencyEstimate, String>
|
||||||
|
|
||||||
likelihood += calls[i].posterior();
|
likelihood += calls[i].posterior();
|
||||||
|
|
||||||
//System.out.printf("DBG: %f %f %f %f\n",
|
|
||||||
// deep_calls[i].lodVsNextBest,
|
|
||||||
// deep_calls[i].lodVsRef,
|
|
||||||
// shallow_calls[i].lodVsNextBest,
|
|
||||||
// shallow_calls[i].lodVsRef);
|
|
||||||
|
|
||||||
if (! FRACTIONAL_COUNTS)
|
if (! FRACTIONAL_COUNTS)
|
||||||
{
|
{
|
||||||
EM_sum += calls[i].emperical_allele_frequency() * calls[i].N;
|
//System.out.printf("DBG: %s %f %f\n",
|
||||||
EM_N += calls[i].N;
|
// context.getLocation(),
|
||||||
|
// calls[i].lodVsNextBest,
|
||||||
|
// calls[i].lodVsRef);
|
||||||
|
EM_sum += calls[i].emperical_allele_frequency() * calls[i].N;
|
||||||
|
EM_N += calls[i].N;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
@ -132,21 +152,33 @@ public class PoolCaller extends LocusWalker<AlleleFrequencyEstimate, String>
|
||||||
//System.out.printf("DBGTRAJ %f %f %f %f\n", EM_sum, EM_N, trajectory[iterations], trajectory[iterations+1]);
|
//System.out.printf("DBGTRAJ %f %f %f %f\n", EM_sum, EM_N, trajectory[iterations], trajectory[iterations+1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 7. Output some statistics.
|
||||||
|
double discovery_posterior = 0;
|
||||||
|
double discovery_null = 0;
|
||||||
|
for (int i = 0; i < sample_names.size(); i++)
|
||||||
|
{
|
||||||
|
discovery_posterior += calls[i].pBest;
|
||||||
|
discovery_null += calls[i].pRef;
|
||||||
|
//System.out.printf("DBG %f %f %c %s\n", calls[i].pBest, calls[i].pRef, ref, calls[i].bases);
|
||||||
|
}
|
||||||
|
double discovery_lod = discovery_posterior - discovery_null;
|
||||||
|
discovery_output_file.printf("%s %f %f %f %f\n", context.getLocation(), EM_alt_freq, discovery_posterior, discovery_null, discovery_lod);
|
||||||
|
|
||||||
for (int i = 0; i < sample_names.size(); i++)
|
for (int i = 0; i < sample_names.size(); i++)
|
||||||
{
|
{
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, contexts[i]);
|
ReadBackedPileup pileup = new ReadBackedPileup(ref, contexts[i]);
|
||||||
System.out.printf("CALL %s %s %f %f %s\n", sample_names.get(i), calls[i].genotype(), calls[i].lodVsRef, calls[i].lodVsNextBest, pileup.getBases());
|
if (calls[i].depth == 0) { continue; }
|
||||||
|
//if (calls[i].lodVsRef < lodThreshold) { continue; }
|
||||||
|
out.printf("%s %s %c %f %s %f %f %f %f %f %s\n", context.getLocation(), sample_names.get(i), ref, EM_alt_freq, calls[i].genotype(), calls[i].lodVsRef, calls[i].lodVsNextBest, calls[i].pBest, calls[i].pRef, discovery_lod, pileup.getBases());
|
||||||
}
|
}
|
||||||
|
|
||||||
// 7. Compare to estimation from the pool.
|
System.out.printf("EVAL %s\n", context.getLocation());
|
||||||
System.out.printf("EVAL %s %f\n",
|
|
||||||
context.getLocation(),
|
|
||||||
EM_alt_freq);
|
|
||||||
//for (int i = 0; i < likelihood_trajectory.length; i++)
|
//for (int i = 0; i < likelihood_trajectory.length; i++)
|
||||||
//{
|
//{
|
||||||
// System.out.printf("TRAJECTORY %f %f\n", trajectory[i], likelihood_trajectory[i]);
|
// System.out.printf("TRAJECTORY %f %f\n", trajectory[i], likelihood_trajectory[i]);
|
||||||
//}
|
//}
|
||||||
System.out.print("\n\n");
|
//System.out.print("\n\n");
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
@ -181,6 +213,11 @@ public class PoolCaller extends LocusWalker<AlleleFrequencyEstimate, String>
|
||||||
SAMRecord read = context.getReads().get(i);
|
SAMRecord read = context.getReads().get(i);
|
||||||
Integer offset = context.getOffsets().get(i);
|
Integer offset = context.getOffsets().get(i);
|
||||||
String RG = (String)(read.getAttribute("RG"));
|
String RG = (String)(read.getAttribute("RG"));
|
||||||
|
|
||||||
|
assert(header != null);
|
||||||
|
//System.out.printf("RG: %s\n", RG);
|
||||||
|
assert(header.getReadGroup(RG) != null);
|
||||||
|
|
||||||
String sample = header.getReadGroup(RG).getSample();
|
String sample = header.getReadGroup(RG).getSample();
|
||||||
if (sample == sample_name)
|
if (sample == sample_name)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue