First draft of actual pooled EM caller.
Produces sane looking output on region of 1kG pilot1:
CALL NA12813.SRP000031.2009_02.bam CC 0.609084 0.609084
CALL NA12003.SRP000031.2009_02.bam CC 2.114234 2.114234 CCCCC
CALL NA06994.SRP000031.2009_02.bam CC 0.910114 0.910114 C
CALL NA18940.SRP000031.2009_02.bam CT 2.589749 0.910114 T
CALL NA18555.SRP000031.2009_02.bam CC 0.609084 0.609084
Next up, eval vs. Baseline pilot1 calls and pilot3 deep-coverage truth.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@525 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
13d4692d2e
commit
dd408a2a9a
|
|
@ -38,7 +38,7 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
private boolean locusMode = true;
|
||||
|
||||
// How strict should we be with SAM/BAM parsing?
|
||||
protected SAMFileReader.ValidationStringency strictness = SAMFileReader.ValidationStringency.STRICT;
|
||||
protected SAMFileReader.ValidationStringency strictness = SAMFileReader.ValidationStringency.SILENT; //JRM for CSH
|
||||
|
||||
// our list of readers
|
||||
private final List<File> samFileList = new ArrayList<File>();
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
|
||||
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisTK;
|
||||
|
|
@ -27,6 +28,20 @@ public class ListSampleIds extends LocusWalker<Boolean, Boolean>
|
|||
|
||||
public Boolean map(RefMetaDataTracker tracker, char ref, LocusContext context)
|
||||
{
|
||||
List<SAMRecord> reads = context.getReads();
|
||||
StringBuilder readNames = new StringBuilder();
|
||||
|
||||
for ( int i = 0; i < reads.size(); i++ )
|
||||
{
|
||||
SAMRecord read = reads.get(i);
|
||||
String rg = (String) read.getAttribute("RG");
|
||||
SAMFileHeader header = read.getHeader();
|
||||
SAMReadGroupRecord readGroup = header.getReadGroup(rg);
|
||||
if (readGroup == null) { System.out.printf("."); return false; }
|
||||
String sample = readGroup.getSample();
|
||||
System.out.printf("FROM_MAP %s\n", sample);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -43,7 +58,6 @@ public class ListSampleIds extends LocusWalker<Boolean, Boolean>
|
|||
public Boolean reduce(Boolean mapresult, Boolean sum)
|
||||
{
|
||||
out.flush();
|
||||
System.exit(0);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ public class PoolCallingExperiment extends LocusWalker<AlleleFrequencyEstimate,
|
|||
public void initialize()
|
||||
{
|
||||
GenomeAnalysisTK toolkit = this.getToolkit();
|
||||
SAMFileHeader header = toolkit.getSamReader().getFileHeader();
|
||||
SAMFileHeader header = toolkit.getEngine().getSAMHeader();
|
||||
List<SAMReadGroupRecord> read_groups = header.getReadGroups();
|
||||
|
||||
sample_names = new ArrayList<String>();
|
||||
|
|
@ -160,14 +160,12 @@ public class PoolCallingExperiment extends LocusWalker<AlleleFrequencyEstimate,
|
|||
|
||||
likelihood += shallow_calls[i].lodVsNextBest;
|
||||
|
||||
/*
|
||||
System.out.printf("DBG: %f %f %f %f\n",
|
||||
deep_calls[i].lodVsNextBest,
|
||||
deep_calls[i].lodVsRef,
|
||||
shallow_calls[i].lodVsNextBest,
|
||||
shallow_calls[i].lodVsRef);
|
||||
*/
|
||||
|
||||
//System.out.printf("DBG: %f %f %f %f\n",
|
||||
// deep_calls[i].lodVsNextBest,
|
||||
// deep_calls[i].lodVsRef,
|
||||
// shallow_calls[i].lodVsNextBest,
|
||||
// shallow_calls[i].lodVsRef);
|
||||
|
||||
if (deep_genotype.equals(shallow_genotype))
|
||||
{
|
||||
correct_shallow_calls += 1;
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ import java.util.*;
|
|||
// j.maguire 3-7-2009
|
||||
|
||||
public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate, Integer> {
|
||||
@Argument(fullName="metrics",required=true)
|
||||
@Argument(fullName="metrics",required=false,defaultValue="/dev/null")
|
||||
public String metricsFileName;
|
||||
|
||||
@Argument(fullName="lodThreshold",shortName="lod",required=false,defaultValue="5.0")
|
||||
|
|
@ -228,13 +228,14 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
|||
ref = Character.toUpperCase(ref);
|
||||
|
||||
GenotypeLikelihoods G = new GenotypeLikelihoods();
|
||||
for ( int i = 0; i < reads.size(); i++ ) {
|
||||
for ( int i = 0; i < reads.size(); i++ )
|
||||
{
|
||||
SAMRecord read = reads.get(i);
|
||||
int offset = offsets.get(i);
|
||||
|
||||
G.add(ref, read.getReadString().charAt(offset), read.getBaseQualities()[offset]);
|
||||
}
|
||||
G.ApplyPrior(ref, Double.NaN);
|
||||
G.ApplyPrior(ref, this.allele_frequency_prior);
|
||||
|
||||
return G.toAlleleFrequencyEstimate(context.getLocation(), ref, bases.length(), bases, G.likelihoods);
|
||||
}
|
||||
|
|
@ -257,9 +258,10 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
|||
return rodString;
|
||||
}
|
||||
|
||||
double allele_frequency_prior = -1;
|
||||
public void setAlleleFrequencyPrior(double freq)
|
||||
{
|
||||
assert(false);
|
||||
this.allele_frequency_prior = freq;
|
||||
}
|
||||
|
||||
// Given result of map function
|
||||
|
|
|
|||
|
|
@ -103,17 +103,37 @@ public class GenotypeLikelihoods {
|
|||
return s;
|
||||
}
|
||||
|
||||
public void ApplyPrior(char ref, double p_alt) {
|
||||
public void ApplyPrior(char ref, double p_alt)
|
||||
{
|
||||
for (int i = 0; i < genotypes.length; i++) {
|
||||
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
|
||||
// hom-ref
|
||||
likelihoods[i] += Math.log10(1.0 - 1e-3);
|
||||
} else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
|
||||
// hom-nonref
|
||||
likelihoods[i] += Math.log10(1e-5);
|
||||
} else {
|
||||
// het
|
||||
likelihoods[i] += Math.log10(1e-3);
|
||||
if (p_alt == -1)
|
||||
{
|
||||
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
|
||||
// hom-ref
|
||||
likelihoods[i] += Math.log10(1.0 - 1e-3);
|
||||
} else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
|
||||
// hom-nonref
|
||||
likelihoods[i] += Math.log10(1e-5);
|
||||
} else {
|
||||
// het
|
||||
likelihoods[i] += Math.log10(1e-3);
|
||||
}
|
||||
if (Double.isInfinite(likelihoods[i])) { likelihoods[i] = -1000; }
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
|
||||
// hom-ref
|
||||
likelihoods[i] += 2.0 * Math.log10(1.0 - p_alt);
|
||||
} else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
|
||||
// hom-nonref
|
||||
likelihoods[i] += 2.0 * Math.log10(p_alt);
|
||||
} else {
|
||||
// het
|
||||
likelihoods[i] += Math.log10((1.0-p_alt) * p_alt * 2.0);
|
||||
}
|
||||
|
||||
if (Double.isInfinite(likelihoods[i])) { likelihoods[i] = -1000; }
|
||||
}
|
||||
}
|
||||
this.sort();
|
||||
|
|
@ -151,6 +171,7 @@ public class GenotypeLikelihoods {
|
|||
}
|
||||
|
||||
public AlleleFrequencyEstimate toAlleleFrequencyEstimate(GenomeLoc location, char ref, int depth, String bases, double[] posteriors) {
|
||||
this.sort();
|
||||
double qhat = Double.NaN;
|
||||
double qstar = Double.NaN;
|
||||
char alt = 'N';
|
||||
|
|
|
|||
|
|
@ -59,6 +59,13 @@ public class MathUtils {
|
|||
return 1;
|
||||
}
|
||||
|
||||
public static double NormalDistribution(double mean, double sd, double x)
|
||||
{
|
||||
double a = 1.0 / (sd*Math.sqrt(2.0 * Math.PI));
|
||||
double b = Math.exp(-1.0 * (Math.pow(x - mean,2.0)/(2.0 * sd * sd)));
|
||||
return a * b;
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes a binomial probability
|
||||
*
|
||||
|
|
|
|||
Binary file not shown.
Loading…
Reference in New Issue