First draft of actual pooled EM caller.

Produces sane looking output on region of 1kG pilot1:

    CALL NA12813.SRP000031.2009_02.bam CC 0.609084 0.609084
    CALL NA12003.SRP000031.2009_02.bam CC 2.114234 2.114234 CCCCC
    CALL NA06994.SRP000031.2009_02.bam CC 0.910114 0.910114 C
    CALL NA18940.SRP000031.2009_02.bam CT 2.589749 0.910114 T
    CALL NA18555.SRP000031.2009_02.bam CC 0.609084 0.609084

Next up, eval vs. Baseline pilot1 calls and pilot3 deep-coverage truth.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@525 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
jmaguire 2009-04-24 13:42:15 +00:00
parent 13d4692d2e
commit dd408a2a9a
7 changed files with 67 additions and 25 deletions

View File

@ -38,7 +38,7 @@ public class SAMDataSource implements SimpleDataSource {
private boolean locusMode = true;
// How strict should we be with SAM/BAM parsing?
protected SAMFileReader.ValidationStringency strictness = SAMFileReader.ValidationStringency.STRICT;
protected SAMFileReader.ValidationStringency strictness = SAMFileReader.ValidationStringency.SILENT; //JRM for CSH
// our list of readers
private final List<File> samFileList = new ArrayList<File>();

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.playground.gatk.walkers;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisTK;
@ -27,6 +28,20 @@ public class ListSampleIds extends LocusWalker<Boolean, Boolean>
public Boolean map(RefMetaDataTracker tracker, char ref, LocusContext context)
{
List<SAMRecord> reads = context.getReads();
StringBuilder readNames = new StringBuilder();
for ( int i = 0; i < reads.size(); i++ )
{
SAMRecord read = reads.get(i);
String rg = (String) read.getAttribute("RG");
SAMFileHeader header = read.getHeader();
SAMReadGroupRecord readGroup = header.getReadGroup(rg);
if (readGroup == null) { System.out.printf("."); return false; }
String sample = readGroup.getSample();
System.out.printf("FROM_MAP %s\n", sample);
}
return true;
}
@ -43,7 +58,6 @@ public class ListSampleIds extends LocusWalker<Boolean, Boolean>
public Boolean reduce(Boolean mapresult, Boolean sum)
{
out.flush();
System.exit(0);
return true;
}
}

View File

@ -34,7 +34,7 @@ public class PoolCallingExperiment extends LocusWalker<AlleleFrequencyEstimate,
public void initialize()
{
GenomeAnalysisTK toolkit = this.getToolkit();
SAMFileHeader header = toolkit.getSamReader().getFileHeader();
SAMFileHeader header = toolkit.getEngine().getSAMHeader();
List<SAMReadGroupRecord> read_groups = header.getReadGroups();
sample_names = new ArrayList<String>();
@ -160,14 +160,12 @@ public class PoolCallingExperiment extends LocusWalker<AlleleFrequencyEstimate,
likelihood += shallow_calls[i].lodVsNextBest;
/*
System.out.printf("DBG: %f %f %f %f\n",
deep_calls[i].lodVsNextBest,
deep_calls[i].lodVsRef,
shallow_calls[i].lodVsNextBest,
shallow_calls[i].lodVsRef);
*/
//System.out.printf("DBG: %f %f %f %f\n",
// deep_calls[i].lodVsNextBest,
// deep_calls[i].lodVsRef,
// shallow_calls[i].lodVsNextBest,
// shallow_calls[i].lodVsRef);
if (deep_genotype.equals(shallow_genotype))
{
correct_shallow_calls += 1;

View File

@ -19,7 +19,7 @@ import java.util.*;
// j.maguire 3-7-2009
public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate, Integer> {
@Argument(fullName="metrics",required=true)
@Argument(fullName="metrics",required=false,defaultValue="/dev/null")
public String metricsFileName;
@Argument(fullName="lodThreshold",shortName="lod",required=false,defaultValue="5.0")
@ -228,13 +228,14 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
ref = Character.toUpperCase(ref);
GenotypeLikelihoods G = new GenotypeLikelihoods();
for ( int i = 0; i < reads.size(); i++ ) {
for ( int i = 0; i < reads.size(); i++ )
{
SAMRecord read = reads.get(i);
int offset = offsets.get(i);
G.add(ref, read.getReadString().charAt(offset), read.getBaseQualities()[offset]);
}
G.ApplyPrior(ref, Double.NaN);
G.ApplyPrior(ref, this.allele_frequency_prior);
return G.toAlleleFrequencyEstimate(context.getLocation(), ref, bases.length(), bases, G.likelihoods);
}
@ -257,9 +258,10 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
return rodString;
}
double allele_frequency_prior = -1;
public void setAlleleFrequencyPrior(double freq)
{
assert(false);
this.allele_frequency_prior = freq;
}
// Given result of map function

View File

@ -103,17 +103,37 @@ public class GenotypeLikelihoods {
return s;
}
public void ApplyPrior(char ref, double p_alt) {
public void ApplyPrior(char ref, double p_alt)
{
for (int i = 0; i < genotypes.length; i++) {
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
// hom-ref
likelihoods[i] += Math.log10(1.0 - 1e-3);
} else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
// hom-nonref
likelihoods[i] += Math.log10(1e-5);
} else {
// het
likelihoods[i] += Math.log10(1e-3);
if (p_alt == -1)
{
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
// hom-ref
likelihoods[i] += Math.log10(1.0 - 1e-3);
} else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
// hom-nonref
likelihoods[i] += Math.log10(1e-5);
} else {
// het
likelihoods[i] += Math.log10(1e-3);
}
if (Double.isInfinite(likelihoods[i])) { likelihoods[i] = -1000; }
}
else
{
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
// hom-ref
likelihoods[i] += 2.0 * Math.log10(1.0 - p_alt);
} else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
// hom-nonref
likelihoods[i] += 2.0 * Math.log10(p_alt);
} else {
// het
likelihoods[i] += Math.log10((1.0-p_alt) * p_alt * 2.0);
}
if (Double.isInfinite(likelihoods[i])) { likelihoods[i] = -1000; }
}
}
this.sort();
@ -151,6 +171,7 @@ public class GenotypeLikelihoods {
}
public AlleleFrequencyEstimate toAlleleFrequencyEstimate(GenomeLoc location, char ref, int depth, String bases, double[] posteriors) {
this.sort();
double qhat = Double.NaN;
double qstar = Double.NaN;
char alt = 'N';

View File

@ -59,6 +59,13 @@ public class MathUtils {
return 1;
}
public static double NormalDistribution(double mean, double sd, double x)
{
double a = 1.0 / (sd*Math.sqrt(2.0 * Math.PI));
double b = Math.exp(-1.0 * (Math.pow(x - mean,2.0)/(2.0 * sd * sd)));
return a * b;
}
/**
* Computes a binomial probability
*