First draft of actual pooled EM caller.
Produces sane looking output on region of 1kG pilot1:
CALL NA12813.SRP000031.2009_02.bam CC 0.609084 0.609084
CALL NA12003.SRP000031.2009_02.bam CC 2.114234 2.114234 CCCCC
CALL NA06994.SRP000031.2009_02.bam CC 0.910114 0.910114 C
CALL NA18940.SRP000031.2009_02.bam CT 2.589749 0.910114 T
CALL NA18555.SRP000031.2009_02.bam CC 0.609084 0.609084
Next up, eval vs. Baseline pilot1 calls and pilot3 deep-coverage truth.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@525 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
13d4692d2e
commit
dd408a2a9a
|
|
@ -38,7 +38,7 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
private boolean locusMode = true;
|
private boolean locusMode = true;
|
||||||
|
|
||||||
// How strict should we be with SAM/BAM parsing?
|
// How strict should we be with SAM/BAM parsing?
|
||||||
protected SAMFileReader.ValidationStringency strictness = SAMFileReader.ValidationStringency.STRICT;
|
protected SAMFileReader.ValidationStringency strictness = SAMFileReader.ValidationStringency.SILENT; //JRM for CSH
|
||||||
|
|
||||||
// our list of readers
|
// our list of readers
|
||||||
private final List<File> samFileList = new ArrayList<File>();
|
private final List<File> samFileList = new ArrayList<File>();
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers;
|
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.samtools.SAMFileHeader;
|
||||||
import net.sf.samtools.SAMReadGroupRecord;
|
import net.sf.samtools.SAMReadGroupRecord;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisTK;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisTK;
|
||||||
|
|
@ -27,6 +28,20 @@ public class ListSampleIds extends LocusWalker<Boolean, Boolean>
|
||||||
|
|
||||||
public Boolean map(RefMetaDataTracker tracker, char ref, LocusContext context)
|
public Boolean map(RefMetaDataTracker tracker, char ref, LocusContext context)
|
||||||
{
|
{
|
||||||
|
List<SAMRecord> reads = context.getReads();
|
||||||
|
StringBuilder readNames = new StringBuilder();
|
||||||
|
|
||||||
|
for ( int i = 0; i < reads.size(); i++ )
|
||||||
|
{
|
||||||
|
SAMRecord read = reads.get(i);
|
||||||
|
String rg = (String) read.getAttribute("RG");
|
||||||
|
SAMFileHeader header = read.getHeader();
|
||||||
|
SAMReadGroupRecord readGroup = header.getReadGroup(rg);
|
||||||
|
if (readGroup == null) { System.out.printf("."); return false; }
|
||||||
|
String sample = readGroup.getSample();
|
||||||
|
System.out.printf("FROM_MAP %s\n", sample);
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -43,7 +58,6 @@ public class ListSampleIds extends LocusWalker<Boolean, Boolean>
|
||||||
public Boolean reduce(Boolean mapresult, Boolean sum)
|
public Boolean reduce(Boolean mapresult, Boolean sum)
|
||||||
{
|
{
|
||||||
out.flush();
|
out.flush();
|
||||||
System.exit(0);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -34,7 +34,7 @@ public class PoolCallingExperiment extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
public void initialize()
|
public void initialize()
|
||||||
{
|
{
|
||||||
GenomeAnalysisTK toolkit = this.getToolkit();
|
GenomeAnalysisTK toolkit = this.getToolkit();
|
||||||
SAMFileHeader header = toolkit.getSamReader().getFileHeader();
|
SAMFileHeader header = toolkit.getEngine().getSAMHeader();
|
||||||
List<SAMReadGroupRecord> read_groups = header.getReadGroups();
|
List<SAMReadGroupRecord> read_groups = header.getReadGroups();
|
||||||
|
|
||||||
sample_names = new ArrayList<String>();
|
sample_names = new ArrayList<String>();
|
||||||
|
|
@ -160,13 +160,11 @@ public class PoolCallingExperiment extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
|
|
||||||
likelihood += shallow_calls[i].lodVsNextBest;
|
likelihood += shallow_calls[i].lodVsNextBest;
|
||||||
|
|
||||||
/*
|
//System.out.printf("DBG: %f %f %f %f\n",
|
||||||
System.out.printf("DBG: %f %f %f %f\n",
|
// deep_calls[i].lodVsNextBest,
|
||||||
deep_calls[i].lodVsNextBest,
|
// deep_calls[i].lodVsRef,
|
||||||
deep_calls[i].lodVsRef,
|
// shallow_calls[i].lodVsNextBest,
|
||||||
shallow_calls[i].lodVsNextBest,
|
// shallow_calls[i].lodVsRef);
|
||||||
shallow_calls[i].lodVsRef);
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (deep_genotype.equals(shallow_genotype))
|
if (deep_genotype.equals(shallow_genotype))
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,7 @@ import java.util.*;
|
||||||
// j.maguire 3-7-2009
|
// j.maguire 3-7-2009
|
||||||
|
|
||||||
public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate, Integer> {
|
public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate, Integer> {
|
||||||
@Argument(fullName="metrics",required=true)
|
@Argument(fullName="metrics",required=false,defaultValue="/dev/null")
|
||||||
public String metricsFileName;
|
public String metricsFileName;
|
||||||
|
|
||||||
@Argument(fullName="lodThreshold",shortName="lod",required=false,defaultValue="5.0")
|
@Argument(fullName="lodThreshold",shortName="lod",required=false,defaultValue="5.0")
|
||||||
|
|
@ -228,13 +228,14 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
ref = Character.toUpperCase(ref);
|
ref = Character.toUpperCase(ref);
|
||||||
|
|
||||||
GenotypeLikelihoods G = new GenotypeLikelihoods();
|
GenotypeLikelihoods G = new GenotypeLikelihoods();
|
||||||
for ( int i = 0; i < reads.size(); i++ ) {
|
for ( int i = 0; i < reads.size(); i++ )
|
||||||
|
{
|
||||||
SAMRecord read = reads.get(i);
|
SAMRecord read = reads.get(i);
|
||||||
int offset = offsets.get(i);
|
int offset = offsets.get(i);
|
||||||
|
|
||||||
G.add(ref, read.getReadString().charAt(offset), read.getBaseQualities()[offset]);
|
G.add(ref, read.getReadString().charAt(offset), read.getBaseQualities()[offset]);
|
||||||
}
|
}
|
||||||
G.ApplyPrior(ref, Double.NaN);
|
G.ApplyPrior(ref, this.allele_frequency_prior);
|
||||||
|
|
||||||
return G.toAlleleFrequencyEstimate(context.getLocation(), ref, bases.length(), bases, G.likelihoods);
|
return G.toAlleleFrequencyEstimate(context.getLocation(), ref, bases.length(), bases, G.likelihoods);
|
||||||
}
|
}
|
||||||
|
|
@ -257,9 +258,10 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
return rodString;
|
return rodString;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
double allele_frequency_prior = -1;
|
||||||
public void setAlleleFrequencyPrior(double freq)
|
public void setAlleleFrequencyPrior(double freq)
|
||||||
{
|
{
|
||||||
assert(false);
|
this.allele_frequency_prior = freq;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Given result of map function
|
// Given result of map function
|
||||||
|
|
|
||||||
|
|
@ -103,17 +103,37 @@ public class GenotypeLikelihoods {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void ApplyPrior(char ref, double p_alt) {
|
public void ApplyPrior(char ref, double p_alt)
|
||||||
|
{
|
||||||
for (int i = 0; i < genotypes.length; i++) {
|
for (int i = 0; i < genotypes.length; i++) {
|
||||||
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
|
if (p_alt == -1)
|
||||||
// hom-ref
|
{
|
||||||
likelihoods[i] += Math.log10(1.0 - 1e-3);
|
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
|
||||||
} else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
|
// hom-ref
|
||||||
// hom-nonref
|
likelihoods[i] += Math.log10(1.0 - 1e-3);
|
||||||
likelihoods[i] += Math.log10(1e-5);
|
} else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
|
||||||
} else {
|
// hom-nonref
|
||||||
// het
|
likelihoods[i] += Math.log10(1e-5);
|
||||||
likelihoods[i] += Math.log10(1e-3);
|
} else {
|
||||||
|
// het
|
||||||
|
likelihoods[i] += Math.log10(1e-3);
|
||||||
|
}
|
||||||
|
if (Double.isInfinite(likelihoods[i])) { likelihoods[i] = -1000; }
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
|
||||||
|
// hom-ref
|
||||||
|
likelihoods[i] += 2.0 * Math.log10(1.0 - p_alt);
|
||||||
|
} else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
|
||||||
|
// hom-nonref
|
||||||
|
likelihoods[i] += 2.0 * Math.log10(p_alt);
|
||||||
|
} else {
|
||||||
|
// het
|
||||||
|
likelihoods[i] += Math.log10((1.0-p_alt) * p_alt * 2.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Double.isInfinite(likelihoods[i])) { likelihoods[i] = -1000; }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
this.sort();
|
this.sort();
|
||||||
|
|
@ -151,6 +171,7 @@ public class GenotypeLikelihoods {
|
||||||
}
|
}
|
||||||
|
|
||||||
public AlleleFrequencyEstimate toAlleleFrequencyEstimate(GenomeLoc location, char ref, int depth, String bases, double[] posteriors) {
|
public AlleleFrequencyEstimate toAlleleFrequencyEstimate(GenomeLoc location, char ref, int depth, String bases, double[] posteriors) {
|
||||||
|
this.sort();
|
||||||
double qhat = Double.NaN;
|
double qhat = Double.NaN;
|
||||||
double qstar = Double.NaN;
|
double qstar = Double.NaN;
|
||||||
char alt = 'N';
|
char alt = 'N';
|
||||||
|
|
|
||||||
|
|
@ -59,6 +59,13 @@ public class MathUtils {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static double NormalDistribution(double mean, double sd, double x)
|
||||||
|
{
|
||||||
|
double a = 1.0 / (sd*Math.sqrt(2.0 * Math.PI));
|
||||||
|
double b = Math.exp(-1.0 * (Math.pow(x - mean,2.0)/(2.0 * sd * sd)));
|
||||||
|
return a * b;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Computes a binomial probability
|
* Computes a binomial probability
|
||||||
*
|
*
|
||||||
|
|
|
||||||
Binary file not shown.
Loading…
Reference in New Issue