Added a ROD (SangerSNP) for parsing the Sanger's chr20 pilot1 SNP calls.

Some doodling around with indel calling in an EM context.
 



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1116 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
jmaguire 2009-06-29 16:32:12 +00:00
parent ceeeec13b8
commit 65a788f18a
6 changed files with 120 additions and 26 deletions

View File

@ -40,7 +40,7 @@ public class PooledEMSNPROD extends TabularROD implements SNPCallFromGenotypes {
public double getMAF() { return Double.parseDouble(this.get("EM_alt_freq")); }
public double getHeterozygosity() { return 2 * getMAF() * (1 - getMAF()); }
public boolean isGenotype() { return false; }
public double getVariationConfidence() { return Double.parseDouble(this.get("discovery_lod")) * 10; }
public double getVariationConfidence() { return Double.parseDouble(this.get("discovery_lod")); }
public double getConsensusConfidence() { return -1; }
public List<String> getGenotype() throws IllegalStateException { throw new IllegalStateException(); }
public int getPloidy() throws IllegalStateException { return 2; }
@ -52,4 +52,4 @@ public class PooledEMSNPROD extends TabularROD implements SNPCallFromGenotypes {
public int nHetGenotypes() { return Integer.parseInt(this.get("n_het")); }
public int nHomVarGenotypes() { return Integer.parseInt(this.get("n_hom")); }
public List<Genotype> getGenotypes() { return null; }
}
}

View File

@ -67,6 +67,7 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
addModule("Table", TabularROD.class);
addModule("PooledEM", PooledEMSNPROD.class);
addModule("1KGSNPs", KGenomesSNPROD.class);
addModule("SangerSNP", SangerSNPROD.class);
addModule("Intervals", IntervalRod.class);
addModule("Variants", rodVariants.class);
}

View File

@ -0,0 +1,41 @@
package org.broadinstitute.sting.gatk.refdata;
import java.util.*;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
public class SangerSNPROD extends TabularROD implements SNPCallFromGenotypes {
public SangerSNPROD(final String name) {
super(name);
}
public GenomeLoc getLocation() {
loc = GenomeLocParser.createGenomeLoc(this.get("0"), Long.parseLong(this.get("1")));
return loc;
}
public String getRefBasesFWD() { return this.get("2"); }
public char getRefSnpFWD() throws IllegalStateException { return getRefBasesFWD().charAt(0); }
public String getAltBasesFWD() { return this.get("3"); }
public char getAltSnpFWD() throws IllegalStateException { return getAltBasesFWD().charAt(0); }
public boolean isReference() { return getVariationConfidence() < 0.01; }
public boolean isSNP() { return ! isReference(); }
public boolean isInsertion() { return false; }
public boolean isDeletion() { return false; }
public boolean isIndel() { return false; }
public double getMAF() { return -1; }
public double getHeterozygosity() { return -1; }
public boolean isGenotype() { return false; }
public double getVariationConfidence() { return -1; }
public double getConsensusConfidence() { return -1; }
public List<String> getGenotype() throws IllegalStateException { throw new IllegalStateException(); }
public int getPloidy() throws IllegalStateException { return 2; }
public boolean isBiallelic() { return true; }
// SNPCallFromGenotypes interface
public int nIndividuals() { return -1; }
public int nHomRefGenotypes() { return -1; }
public int nHetGenotypes() { return -1; }
public int nHomVarGenotypes() { return -1; }
public List<Genotype> getGenotypes() { return null; }
}

View File

@ -28,6 +28,7 @@ public class MultiSampleCaller extends LocusWalker<String,String>
@Argument(fullName="discovery_output", shortName="discovery_output", required=true, doc="file to write SNP discovery output to") public String DISCOVERY_OUTPUT;
@Argument(fullName="individual_output", shortName="individual_output", required=true, doc="file to write individual SNP calls to") public String INDIVIDUAL_OUTPUT;
@Argument(fullName="sample_name_regex", shortName="sample_name_regex", required=false, doc="sample_name_regex") public String SAMPLE_NAME_REGEX = null;
@Argument(fullName="call_indels", shortName="call_indels", required=false, doc="call indels?") public boolean CALL_INDELS = false;
// Private state.
List<String> sample_names;
@ -111,7 +112,7 @@ public class MultiSampleCaller extends LocusWalker<String,String>
char ref;
GenotypeLikelihoods Genotype(LocusContext context, double[] allele_likelihoods)
GenotypeLikelihoods Genotype(LocusContext context, double[] allele_likelihoods, double indel_alt_freq)
{
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
String bases = pileup.getBases();
@ -125,22 +126,6 @@ public class MultiSampleCaller extends LocusWalker<String,String>
List<SAMRecord> reads = context.getReads();
List<Integer> offsets = context.getOffsets();
ref = Character.toUpperCase(ref);
/*
// Handle indels.
if (call_indels)
{
String[] indels = BasicPileup.indelPileup(reads, offsets);
IndelCall indel_call = GenotypeLikelihoods.callIndel(indels);
if (indel_call != null)
{
if (! indel_call.type.equals("ref"))
{
System.out.printf("INDEL %s %s\n", context.getLocation(), indel_call);
}
}
}
*/
// Handle single-base polymorphisms.
GenotypeLikelihoods G = new GenotypeLikelihoods();
@ -152,6 +137,21 @@ public class MultiSampleCaller extends LocusWalker<String,String>
}
G.ApplyPrior(ref, allele_likelihoods);
// Handle indels
if (CALL_INDELS)
{
String[] indels = BasicPileup.indelPileup(reads, offsets);
IndelLikelihood indel_call = new IndelLikelihood(indels, indel_alt_freq);
if (indel_call.getType() != null)
{
G.addIndelLikelihood(indel_call);
}
else
{
G.addIndelLikelihood(null);
}
}
/*
// Handle 2nd-best base calls.
if (fourBaseMode && pileup.getBases().length() < 750)
@ -163,7 +163,6 @@ public class MultiSampleCaller extends LocusWalker<String,String>
return G;
}
// thoughly check this function
double[] CountFreqs(GenotypeLikelihoods[] genotype_likelihoods)
{
double[] allele_likelihoods = new double[4];
@ -199,6 +198,35 @@ public class MultiSampleCaller extends LocusWalker<String,String>
return allele_likelihoods;
}
double CountIndelFreq(GenotypeLikelihoods[] genotype_likelihoods)
{
HashMap<String, Double> indel_allele_likelihoods = new HashMap<String, Double>();
double pRef = 0;
double pAlt = 0;
for (int j = 0; j < sample_names.size(); j++)
{
double personal_pRef = 0;
double personal_pAlt = 0;
IndelLikelihood indel_likelihood = genotype_likelihoods[j].getIndelLikelihood();
personal_pRef += 2*Math.pow(10, indel_likelihood.pRef()) + Math.pow(10, indel_likelihood.pHet());
personal_pAlt += 2*Math.pow(10, indel_likelihood.pHom()) + Math.pow(10, indel_likelihood.pHet());
personal_pRef = personal_pRef / (personal_pAlt + personal_pRef);
personal_pAlt = personal_pAlt / (personal_pAlt + personal_pRef);
pRef += personal_pRef;
pAlt += personal_pAlt;
}
pRef = pRef / (pRef + pAlt);
pAlt = pAlt / (pRef + pAlt);
return pAlt;
}
// Potential precision error here.
double Compute_pD(GenotypeLikelihoods[] genotype_likelihoods)
{
@ -223,7 +251,7 @@ public class MultiSampleCaller extends LocusWalker<String,String>
GenotypeLikelihoods[] G = new GenotypeLikelihoods[sample_names.size()];
for (int j = 0; j < sample_names.size(); j++)
{
G[j] = Genotype(contexts[j], allele_likelihoods);
G[j] = Genotype(contexts[j], allele_likelihoods, 1e-6);
}
return Compute_pD(G);
}
@ -271,16 +299,22 @@ public class MultiSampleCaller extends LocusWalker<String,String>
if (i == BaseUtils.simpleBaseToBaseIndex(ref)) { allele_likelihoods[i] = 0.9994999; } //sqrt(0.999)
else { allele_likelihoods[i] = 0.0005002502; } // 0.001 / (2 * sqrt(0.999)
}
double indel_alt_freq = 1e-4;
GenotypeLikelihoods[] G = new GenotypeLikelihoods[sample_names.size()];
for (int i = 0; i < MAX_ITERATIONS; i++)
{
for (int j = 0; j < sample_names.size(); j++)
{
G[j] = Genotype(contexts[j], allele_likelihoods);
G[j] = Genotype(contexts[j], allele_likelihoods, indel_alt_freq);
}
allele_likelihoods = CountFreqs(G);
if (CALL_INDELS)
{
indel_alt_freq = CountIndelFreq(G);
}
}
return new EM_Result(G, allele_likelihoods);

View File

@ -155,6 +155,8 @@ public class GenotypeLikelihoods {
public void add(char ref, char read, byte qual)
{
if (qual <= 0) { qual = 1; }
if (coverage == 0)
{
for (int i = 0; i < likelihoods.length; i++)
@ -398,4 +400,9 @@ public class GenotypeLikelihoods {
AFE.genotypeLikelihoods = this;
return AFE;
}
private IndelLikelihood indel_likelihood;
public void addIndelLikelihood(IndelLikelihood indel_likelihood) { this.indel_likelihood = indel_likelihood; }
public IndelLikelihood getIndelLikelihood() { return this.indel_likelihood; }
}

View File

@ -8,11 +8,17 @@ public class IndelLikelihood {
private double p;
private double lod;
private double pRef;
private double pHet;
private double pHom;
private String alt;
public IndelLikelihood(String type, String[] alleles, double p, double lod) {
initialize(type, alleles, p, lod);
}
public IndelLikelihood(String[] indels) {
public IndelLikelihood(String[] indels, double indel_alt_freq)
{
HashMap<String,Integer> indel_allele_counts = new HashMap<String,Integer>();
for (int i = 0; i < indels.length; i++) {
@ -43,9 +49,9 @@ public class IndelLikelihood {
//System.out.printf("\n");
double eps = 1e-3;
double pRef = null_count*Math.log10(1.0 - eps) + max_count*Math.log10(eps) + Math.log10(0.999);
double pHet = null_count*Math.log10(0.5 - eps/2) + max_count*Math.log10(0.5-eps/2) + Math.log10(1e-3);
double pHom = null_count*Math.log10(eps) + max_count*Math.log10(1.0 - eps) + Math.log10(1e-5);
pRef = null_count*Math.log10(1.0 - eps) + max_count*Math.log10(eps) + 2*Math.log10(1-indel_alt_freq);
pHet = null_count*Math.log10(0.5 - eps/2) + max_count*Math.log10(0.5-eps/2) + Math.log10((1-indel_alt_freq)*indel_alt_freq);
pHom = null_count*Math.log10(eps) + max_count*Math.log10(1.0 - eps) + 2*Math.log10(indel_alt_freq);
double lodRef = pRef - Math.max(pHet, pHom);
double lodHet = pHet - pRef;
@ -91,6 +97,11 @@ public class IndelLikelihood {
this.lod = lod;
}
public String getAlt() { return alt; }
public double pRef() { return pRef; }
public double pHet() { return pHet; }
public double pHom() { return pHom; }
public String getType() { return type; }
public String[] getAlleles() { return alleles; }
public double getPosteriorProbability() { return p; }