First draft of actual pooled EM caller.

Produces sane looking output on region of 1kG pilot1:

    CALL NA12813.SRP000031.2009_02.bam CC 0.609084 0.609084
    CALL NA12003.SRP000031.2009_02.bam CC 2.114234 2.114234 CCCCC
    CALL NA06994.SRP000031.2009_02.bam CC 0.910114 0.910114 C
    CALL NA18940.SRP000031.2009_02.bam CT 2.589749 0.910114 T
    CALL NA18555.SRP000031.2009_02.bam CC 0.609084 0.609084

Next up, eval vs. Baseline pilot1 calls and pilot3 deep-coverage truth.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@525 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
jmaguire 2009-04-24 13:42:15 +00:00
parent 13d4692d2e
commit dd408a2a9a
7 changed files with 67 additions and 25 deletions

View File

@ -38,7 +38,7 @@ public class SAMDataSource implements SimpleDataSource {
private boolean locusMode = true; private boolean locusMode = true;
// How strict should we be with SAM/BAM parsing? // How strict should we be with SAM/BAM parsing?
protected SAMFileReader.ValidationStringency strictness = SAMFileReader.ValidationStringency.STRICT; protected SAMFileReader.ValidationStringency strictness = SAMFileReader.ValidationStringency.SILENT; //JRM for CSH
// our list of readers // our list of readers
private final List<File> samFileList = new ArrayList<File>(); private final List<File> samFileList = new ArrayList<File>();

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.playground.gatk.walkers; package org.broadinstitute.sting.playground.gatk.walkers;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMReadGroupRecord; import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisTK; import org.broadinstitute.sting.gatk.GenomeAnalysisTK;
@ -27,6 +28,20 @@ public class ListSampleIds extends LocusWalker<Boolean, Boolean>
public Boolean map(RefMetaDataTracker tracker, char ref, LocusContext context) public Boolean map(RefMetaDataTracker tracker, char ref, LocusContext context)
{ {
List<SAMRecord> reads = context.getReads();
StringBuilder readNames = new StringBuilder();
for ( int i = 0; i < reads.size(); i++ )
{
SAMRecord read = reads.get(i);
String rg = (String) read.getAttribute("RG");
SAMFileHeader header = read.getHeader();
SAMReadGroupRecord readGroup = header.getReadGroup(rg);
if (readGroup == null) { System.out.printf("."); return false; }
String sample = readGroup.getSample();
System.out.printf("FROM_MAP %s\n", sample);
}
return true; return true;
} }
@ -43,7 +58,6 @@ public class ListSampleIds extends LocusWalker<Boolean, Boolean>
public Boolean reduce(Boolean mapresult, Boolean sum) public Boolean reduce(Boolean mapresult, Boolean sum)
{ {
out.flush(); out.flush();
System.exit(0);
return true; return true;
} }
} }

View File

@ -34,7 +34,7 @@ public class PoolCallingExperiment extends LocusWalker<AlleleFrequencyEstimate,
public void initialize() public void initialize()
{ {
GenomeAnalysisTK toolkit = this.getToolkit(); GenomeAnalysisTK toolkit = this.getToolkit();
SAMFileHeader header = toolkit.getSamReader().getFileHeader(); SAMFileHeader header = toolkit.getEngine().getSAMHeader();
List<SAMReadGroupRecord> read_groups = header.getReadGroups(); List<SAMReadGroupRecord> read_groups = header.getReadGroups();
sample_names = new ArrayList<String>(); sample_names = new ArrayList<String>();
@ -160,13 +160,11 @@ public class PoolCallingExperiment extends LocusWalker<AlleleFrequencyEstimate,
likelihood += shallow_calls[i].lodVsNextBest; likelihood += shallow_calls[i].lodVsNextBest;
/* //System.out.printf("DBG: %f %f %f %f\n",
System.out.printf("DBG: %f %f %f %f\n", // deep_calls[i].lodVsNextBest,
deep_calls[i].lodVsNextBest, // deep_calls[i].lodVsRef,
deep_calls[i].lodVsRef, // shallow_calls[i].lodVsNextBest,
shallow_calls[i].lodVsNextBest, // shallow_calls[i].lodVsRef);
shallow_calls[i].lodVsRef);
*/
if (deep_genotype.equals(shallow_genotype)) if (deep_genotype.equals(shallow_genotype))
{ {

View File

@ -19,7 +19,7 @@ import java.util.*;
// j.maguire 3-7-2009 // j.maguire 3-7-2009
public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate, Integer> { public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate, Integer> {
@Argument(fullName="metrics",required=true) @Argument(fullName="metrics",required=false,defaultValue="/dev/null")
public String metricsFileName; public String metricsFileName;
@Argument(fullName="lodThreshold",shortName="lod",required=false,defaultValue="5.0") @Argument(fullName="lodThreshold",shortName="lod",required=false,defaultValue="5.0")
@ -228,13 +228,14 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
ref = Character.toUpperCase(ref); ref = Character.toUpperCase(ref);
GenotypeLikelihoods G = new GenotypeLikelihoods(); GenotypeLikelihoods G = new GenotypeLikelihoods();
for ( int i = 0; i < reads.size(); i++ ) { for ( int i = 0; i < reads.size(); i++ )
{
SAMRecord read = reads.get(i); SAMRecord read = reads.get(i);
int offset = offsets.get(i); int offset = offsets.get(i);
G.add(ref, read.getReadString().charAt(offset), read.getBaseQualities()[offset]); G.add(ref, read.getReadString().charAt(offset), read.getBaseQualities()[offset]);
} }
G.ApplyPrior(ref, Double.NaN); G.ApplyPrior(ref, this.allele_frequency_prior);
return G.toAlleleFrequencyEstimate(context.getLocation(), ref, bases.length(), bases, G.likelihoods); return G.toAlleleFrequencyEstimate(context.getLocation(), ref, bases.length(), bases, G.likelihoods);
} }
@ -257,9 +258,10 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
return rodString; return rodString;
} }
double allele_frequency_prior = -1;
public void setAlleleFrequencyPrior(double freq) public void setAlleleFrequencyPrior(double freq)
{ {
assert(false); this.allele_frequency_prior = freq;
} }
// Given result of map function // Given result of map function

View File

@ -103,8 +103,11 @@ public class GenotypeLikelihoods {
return s; return s;
} }
public void ApplyPrior(char ref, double p_alt) { public void ApplyPrior(char ref, double p_alt)
{
for (int i = 0; i < genotypes.length; i++) { for (int i = 0; i < genotypes.length; i++) {
if (p_alt == -1)
{
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) { if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
// hom-ref // hom-ref
likelihoods[i] += Math.log10(1.0 - 1e-3); likelihoods[i] += Math.log10(1.0 - 1e-3);
@ -115,6 +118,23 @@ public class GenotypeLikelihoods {
// het // het
likelihoods[i] += Math.log10(1e-3); likelihoods[i] += Math.log10(1e-3);
} }
if (Double.isInfinite(likelihoods[i])) { likelihoods[i] = -1000; }
}
else
{
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
// hom-ref
likelihoods[i] += 2.0 * Math.log10(1.0 - p_alt);
} else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
// hom-nonref
likelihoods[i] += 2.0 * Math.log10(p_alt);
} else {
// het
likelihoods[i] += Math.log10((1.0-p_alt) * p_alt * 2.0);
}
if (Double.isInfinite(likelihoods[i])) { likelihoods[i] = -1000; }
}
} }
this.sort(); this.sort();
} }
@ -151,6 +171,7 @@ public class GenotypeLikelihoods {
} }
public AlleleFrequencyEstimate toAlleleFrequencyEstimate(GenomeLoc location, char ref, int depth, String bases, double[] posteriors) { public AlleleFrequencyEstimate toAlleleFrequencyEstimate(GenomeLoc location, char ref, int depth, String bases, double[] posteriors) {
this.sort();
double qhat = Double.NaN; double qhat = Double.NaN;
double qstar = Double.NaN; double qstar = Double.NaN;
char alt = 'N'; char alt = 'N';

View File

@ -59,6 +59,13 @@ public class MathUtils {
return 1; return 1;
} }
public static double NormalDistribution(double mean, double sd, double x)
{
double a = 1.0 / (sd*Math.sqrt(2.0 * Math.PI));
double b = Math.exp(-1.0 * (Math.pow(x - mean,2.0)/(2.0 * sd * sd)));
return a * b;
}
/** /**
* Computes a binomial probability * Computes a binomial probability
* *