From 01fc8da27075f33b43ba36f5f333ef8a09d104cc Mon Sep 17 00:00:00 2001 From: aaron Date: Tue, 14 Jul 2009 16:57:18 +0000 Subject: [PATCH] adding the GenotypeLikelihoodsWalker, which generates GLF genotype likelihoods that are pretty much identical to the samtools calls. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1235 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/GenotypeLikelihoodsWalker.java | 125 ++++++++++++++++++ .../gatk/walkers/SingleSampleGenotyper.java | 7 +- .../playground/utils/GenotypeLikelihoods.java | 17 +-- .../utils/genotype/GenotypeWriterFactory.java | 4 +- .../utils/genotype/LikelihoodObject.java | 2 +- .../sting/utils/genotype/glf/GLFRecord.java | 6 +- .../sting/utils/genotype/glf/GLFWriter.java | 2 +- 7 files changed, 137 insertions(+), 26 deletions(-) create mode 100644 java/src/org/broadinstitute/sting/playground/gatk/walkers/GenotypeLikelihoodsWalker.java diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/GenotypeLikelihoodsWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/GenotypeLikelihoodsWalker.java new file mode 100644 index 000000000..d844379c4 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/GenotypeLikelihoodsWalker.java @@ -0,0 +1,125 @@ +package org.broadinstitute.sting.playground.gatk.walkers; + +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMSequenceRecord; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.walkers.ReadFilters; +import org.broadinstitute.sting.playground.utils.GenotypeLikelihoods; +import org.broadinstitute.sting.utils.ReadBackedPileup; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.utils.genotype.GenotypeWriter; +import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; +import org.broadinstitute.sting.utils.genotype.LikelihoodObject; + +import java.io.File; +import java.util.List; + + +/** + * + * @author aaron + * + * Class LikelihoodWalker + * + * a simple walker to calculate the genotype likelihoods + */ +@ReadFilters(ZeroMappingQualityReadFilter.class) +public class GenotypeLikelihoodsWalker extends LocusWalker { + @Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = true) public File VARIANTS_FILE; + @Argument(fullName = "metrics_out", shortName = "metout", doc = "File to which metrics should be written", required = false) public File METRICS_FILE = new File("/dev/stderr"); + @Argument(fullName = "variant_output_format", shortName = "vf", doc = "File to which metrics should be written", required = false) public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.GLF; + + + @Override + public LikelihoodWrapper map(RefMetaDataTracker tracker, char ref, LocusContext context) { + ReadBackedPileup pileup = new ReadBackedPileup(ref, context); + String bases = pileup.getBases(); + List reads = context.getReads(); + List offsets = context.getOffsets(); + double rmsSum = 0.0; + // Handle single-base polymorphisms. + GenotypeLikelihoods G = new GenotypeLikelihoods(); + for (int i = 0; i < reads.size(); i++) { + SAMRecord read = reads.get(i); + int offset = offsets.get(i); + rmsSum += (read.getMappingQuality() * read.getMappingQuality()); + G.add(ref, read.getReadString().charAt(offset), read.getBaseQualities()[offset]); + } + + // our return + LikelihoodWrapper wrap = new LikelihoodWrapper(); + + return wrapLikelihoods(ref, context, reads, rmsSum, G, wrap); + } + + /** + * wrap the likelihood values in with the other data we'll need + * @param ref the ref base + * @param context the locus context + * @param reads the reads + * @param rmsSum the rms square total (not the actual rms yet) + * @param g the genotypeLikelihoods + * @param wrap the object to place the values into + * @return a likelihood wrapper + */ + private LikelihoodWrapper wrapLikelihoods(char ref, LocusContext context, List reads, double rmsSum, GenotypeLikelihoods g, LikelihoodWrapper wrap) { + wrap.obj = new LikelihoodObject(); + wrap.obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.LOG); + for (int x = 0; x < GenotypeLikelihoods.genotypes.length; x++) { + wrap.obj.setLikelihood(LikelihoodObject.GENOTYPE.valueOf(GenotypeLikelihoods.genotypes[x]), g.likelihoods[x]*10.0); + } + wrap.obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.NEGITIVE_LOG); + wrap.loc = GenomeLocParser.getContigInfo(context.getContig()); + wrap.readDepth = reads.size(); + float rms = (float)(Math.sqrt(rmsSum/reads.size())); + wrap.rms = (rms > 255) ? 255 : rms; + wrap.ref = ref; + wrap.position = context.getLocation().getStart(); + return wrap; + } + + /** + * Provide an initial value for reduce computations. + * + * @return Initial value of reduce. + */ + @Override + public GenotypeWriter reduceInit() { + return GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getEngine().getSAMHeader(),VARIANTS_FILE); + } + + /** + * Reduces a single map with the accumulator provided as the ReduceType. + * + * @param value result of the map. + * @param sum accumulator for the reduce. + * + * @return accumulator with result of the map taken into account. + */ + @Override + public GenotypeWriter reduce(LikelihoodWrapper value, GenotypeWriter sum) { + + sum.addGenotypeCall(value.loc,(int)value.position,value.rms,value.ref,value.readDepth,value.obj); + return sum; + } + + /** Close the variant file. */ + public void onTraversalDone(GenotypeWriter result) { + result.close(); + } +} + + +class LikelihoodWrapper { + public LikelihoodObject obj; + public SAMSequenceRecord loc; + public long position; + public float rms; + public char ref; + public int readDepth; +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java index c3ac13450..8e69af716 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java @@ -9,7 +9,6 @@ import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.ReadFilters; -import org.broadinstitute.sting.gatk.walkers.WalkerName; import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate; import org.broadinstitute.sting.playground.utils.AlleleMetrics; import org.broadinstitute.sting.playground.utils.GenotypeLikelihoods; @@ -21,8 +20,8 @@ import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; import org.broadinstitute.sting.utils.genotype.LikelihoodObject; import java.io.File; -import java.io.PrintStream; import java.io.FileNotFoundException; +import java.io.PrintStream; import java.util.List; @ReadFilters(ZeroMappingQualityReadFilter.class) @@ -106,10 +105,8 @@ public class SingleSampleGenotyper extends LocusWalker 255) ? 255 : (short)min; } } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java index cbab0c727..b988929b5 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java @@ -55,7 +55,7 @@ public class GLFWriter implements GenotypeWriter { private long referenceSequenceLength = 0; // the last position written - private int lastPos = 0; + private int lastPos = 1; /** * The public constructor for creating a GLF object