completed my old draft of the old school single sample genotype walker

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@475 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
jmaguire 2009-04-21 05:38:04 +00:00
parent b8233d92c8
commit 11c520b283
3 changed files with 157 additions and 35 deletions

View File

@ -41,7 +41,7 @@ public class PoolCallingExperiment extends LocusWalker<AlleleFrequencyEstimate,
deep_callers = new ArrayList<AlleleFrequencyWalker>(); deep_callers = new ArrayList<AlleleFrequencyWalker>();
shallow_callers = new ArrayList<AlleleFrequencyWalker>(); shallow_callers = new ArrayList<AlleleFrequencyWalker>();
random = new Random(666); random = new Random(42);
for (int i = 0; i < read_groups.size(); i++) for (int i = 0; i < read_groups.size(); i++)
{ {

View File

@ -1,11 +1,14 @@
package org.broadinstitute.sting.playground.gatk.walkers; package org.broadinstitute.sting.playground.gatk.walkers;
import org.broadinstitute.sting.gatk.LocusContext; import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.*;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP; import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.playground.utils.*;
import org.broadinstitute.sting.playground.gatk.*;
import org.broadinstitute.sting.playground.gatk.walkers.*;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import java.util.List; import java.util.List;
@ -13,13 +16,22 @@ import java.util.List;
// Draft single sample genotyper // Draft single sample genotyper
// j.maguire 3-7-2009 // j.maguire 3-7-2009
public class SingleSampleGenotyper extends LocusWalker<Integer, Integer> { public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate, Integer>
public boolean filter(RefMetaDataTracker tracker, char ref, LocusContext context) { {
AlleleMetrics metrics;
public boolean filter(RefMetaDataTracker tracker, char ref, LocusContext context)
{
return true; // We are keeping all the reads return true; // We are keeping all the reads
} }
public boolean requiresReads() { return true; } public boolean requiresReads() { return true; }
public void initialize()
{
metrics = new AlleleMetrics("metrics.out");
}
protected class GenotypeLikelihoods protected class GenotypeLikelihoods
{ {
public double[] likelihoods; public double[] likelihoods;
@ -65,38 +77,138 @@ public class SingleSampleGenotyper extends LocusWalker<Integer, Integer> {
return p_base; return p_base;
} }
public String toString() public String[] sorted_genotypes;
{ public double[] sorted_likelihoods;
Integer[] permutation = Utils.SortPermutation(likelihoods);
String[] sorted_genotypes = Utils.PermuteArray(genotypes, permutation);
double[] sorted_likelihoods = Utils.PermuteArray(likelihoods, permutation);
String s = ""; public void sort()
for (int i = sorted_genotypes.length-1; i >= 0; i--) {
Integer[] permutation = Utils.SortPermutation(likelihoods);
Integer[] reverse_permutation = new Integer[permutation.length];
for (int i = 0; i < reverse_permutation.length; i++) { reverse_permutation[i] = permutation[(permutation.length-1) - i]; }
sorted_genotypes = Utils.PermuteArray(genotypes, reverse_permutation);
sorted_likelihoods = Utils.PermuteArray(likelihoods, reverse_permutation);
}
public String toString(char ref)
{
this.sort();
String s = String.format("%s %f %f ", this.BestGenotype(), this.LodVsNextBest(), this.LodVsRef(ref));
for (int i = 0; i < sorted_genotypes.length; i++)
{ {
if (i != sorted_genotypes.length-1) { s = s + " "; } if (i != 0) { s = s + " "; }
s = s + sorted_genotypes[i] + ":" + sorted_likelihoods[i]; s = s + sorted_genotypes[i] + ":" + String.format("%.2f", sorted_likelihoods[i]);
} }
return s; return s;
} }
public void ApplyPrior(char ref, double p_alt)
{
for (int i = 0; i < genotypes.length; i++)
{
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref))
{
// hom-ref
likelihoods[i] += Math.log10(1.0 - 1e-3);
}
else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref))
{
// hom-nonref
likelihoods[i] += Math.log10(1e-5);
}
else
{
// het
likelihoods[i] += Math.log10(1e-3);
}
}
this.sort();
}
public double LodVsNextBest()
{
this.sort();
return sorted_likelihoods[0] - sorted_likelihoods[1];
}
double ref_likelihood = Double.NaN;
public double LodVsRef(char ref)
{
if ((this.BestGenotype().charAt(0) == ref) && (this.BestGenotype().charAt(1) == ref)) { ref_likelihood = sorted_likelihoods[0]; return LodVsNextBest(); }
else
{
for (int i = 0; i < genotypes.length; i++)
{
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref))
{
ref_likelihood = likelihoods[i];
}
}
}
return sorted_likelihoods[0] - ref_likelihood;
}
public String BestGenotype()
{
this.sort();
return this.sorted_genotypes[0];
}
public double BestPosterior()
{
this.sort();
return this.sorted_likelihoods[0];
}
public AlleleFrequencyEstimate toAlleleFrequencyEstimate(GenomeLoc location, char ref, int depth, String bases, double[] posteriors)
{
double qhat = Double.NaN;
double qstar = Double.NaN;
char alt = 'N';
if ((sorted_genotypes[0].charAt(0) == ref) && (sorted_genotypes[0].charAt(1) == ref))
{
// hom-ref
qhat = 0.0;
qstar = 0.0;
alt = 'N';
}
else if ((sorted_genotypes[0].charAt(0) != ref) && (sorted_genotypes[0].charAt(1) != ref))
{
// hom-nonref
likelihoods[0] += Math.log10(1e-5);
qhat = 1.0;
qstar = 1.0;
alt = sorted_genotypes[0].charAt(0);
}
else
{
// het
likelihoods[0] += Math.log10(1e-3);
qhat = 0.5;
qstar = 0.5;
if (sorted_genotypes[0].charAt(0) != ref) { alt = sorted_genotypes[0].charAt(0); }
if (sorted_genotypes[0].charAt(1) != ref) { alt = sorted_genotypes[0].charAt(1); }
}
return new AlleleFrequencyEstimate(location, ref, alt, 2, qhat, qstar, this.LodVsRef(ref), this.LodVsNextBest(), sorted_likelihoods[0], ref_likelihood, depth, bases, (double[][])null, this.likelihoods);
}
} }
// Map over the org.broadinstitute.sting.gatk.LocusContext public AlleleFrequencyEstimate map(RefMetaDataTracker tracker, char ref, LocusContext context)
public Integer map(RefMetaDataTracker tracker, char ref, LocusContext context) { {
//System.out.printf("Reads %s:%d %d%n", context.getContig(), context.getPosition(), context.getReads().size());
//for ( SAMRecord read : context.getReads() ) {
// System.out.println(" -> " + read.getReadName());
//}
List<SAMRecord> reads = context.getReads(); List<SAMRecord> reads = context.getReads();
List<Integer> offsets = context.getOffsets(); List<Integer> offsets = context.getOffsets();
String bases = ""; String bases = "";
String quals = ""; String quals = "";
//String offsetString = "";
// Look up hapmap and dbsnp priors ref = Character.toUpperCase(ref);
String rodString = ""; String rodString = "";
// Look up dbsnp priors
for ( ReferenceOrderedDatum datum : tracker.getAllRods() ) for ( ReferenceOrderedDatum datum : tracker.getAllRods() )
{ {
if ( datum != null ) if ( datum != null )
@ -104,7 +216,7 @@ public class SingleSampleGenotyper extends LocusWalker<Integer, Integer> {
if ( datum instanceof rodDbSNP) if ( datum instanceof rodDbSNP)
{ {
rodDbSNP dbsnp = (rodDbSNP)datum; rodDbSNP dbsnp = (rodDbSNP)datum;
rodString += dbsnp.toMediumString(); rodString += dbsnp.toString();
} }
else else
{ {
@ -126,18 +238,22 @@ public class SingleSampleGenotyper extends LocusWalker<Integer, Integer> {
G.add(ref, read.getReadString().charAt(offset), read.getBaseQualities()[offset]); G.add(ref, read.getReadString().charAt(offset), read.getBaseQualities()[offset]);
} }
G.ApplyPrior(ref, Double.NaN);
if ( context.getLocation().getStart() % 1 == 0 ) { System.out.printf("%s %s %s %s\n", context.getLocation(), ref, bases, G.toString(ref), rodString);
//System.out.printf("%s: %s %s %s %s%n", context.getLocation(), ref, bases, quals, rodString);
out.printf("%s %s %s %s\n", ref, bases, G.toString(), rodString);
}
return 1; AlleleFrequencyEstimate freq = G.toAlleleFrequencyEstimate(context.getLocation(), ref, bases.length(), bases, G.likelihoods);
metrics.nextPosition(freq, tracker);
metrics.printMetricsAtLocusIntervals(1000);
return freq;
} }
// Given result of map function // Given result of map function
public Integer reduceInit() { return 0; } public Integer reduceInit() { return 0; }
public Integer reduce(Integer value, Integer sum) { public Integer reduce(AlleleFrequencyEstimate value, Integer sum)
return value + sum; {
return 0;
} }
} }

View File

@ -166,4 +166,10 @@ public class AlleleFrequencyEstimate {
return AlleleFrequencyWalker.repeat(alt, numNonrefBases) + AlleleFrequencyWalker.repeat(ref, numRefBases); return AlleleFrequencyWalker.repeat(alt, numNonrefBases) + AlleleFrequencyWalker.repeat(ref, numRefBases);
} }
} }
public double posterior()
{
return this.posteriors[(int)this.qstar * this.N];
}
} }