From 9a7cec7d2e52580af144c4ee49360dbe1c4a2b67 Mon Sep 17 00:00:00 2001 From: kiran Date: Thu, 18 Jun 2009 07:20:38 +0000 Subject: [PATCH] Directory to house variant calling and filtration tools. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1040 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/variants/IVFBinomialStrand.java | 43 ++++++++++++ .../gatk/walkers/variants/IVFNull.java | 11 ++++ .../variants/IndependentVariantFeature.java | 11 ++++ .../variants/VariantFiltrationWalker.java | 66 +++++++++++++++++++ 4 files changed, 131 insertions(+) create mode 100755 java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFBinomialStrand.java create mode 100755 java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFNull.java create mode 100755 java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IndependentVariantFeature.java create mode 100755 java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantFiltrationWalker.java diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFBinomialStrand.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFBinomialStrand.java new file mode 100755 index 000000000..8a10ca335 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFBinomialStrand.java @@ -0,0 +1,43 @@ +package org.broadinstitute.sting.playground.gatk.walkers.variants; + +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.utils.ReadBackedPileup; +import org.broadinstitute.sting.utils.MathUtils; +import net.sf.samtools.SAMRecord; + +import java.util.List; + +public class IVFBinomialStrand implements IndependentVariantFeature { + public String getFeatureName() { return "binomial"; } + + public double[] compute(char ref, LocusContext context) { + double[] likelihoods = new double[10]; + + ReadBackedPileup pileup = new ReadBackedPileup(ref, context); + List reads = context.getReads(); + String bases = pileup.getBases(); + + for (int genotypeIndex = 0; genotypeIndex < Genotype.values().length; genotypeIndex++) { + Genotype genotype = Genotype.values()[genotypeIndex]; + + char[] alleles = { genotype.toString().charAt(0), genotype.toString().charAt(1) }; + int[] strandCounts = { 0, 0 }; + + for (int pileupIndex = 0; pileupIndex < bases.length(); pileupIndex++) { + for (int alleleIndex = 0; alleleIndex < alleles.length; alleleIndex++) { + if (bases.charAt(pileupIndex) == alleles[alleleIndex]) { + if (reads.get(pileupIndex).getReadNegativeStrandFlag()) { + strandCounts[1]++; + } else { + strandCounts[0]++; + } + } + } + } + + likelihoods[genotypeIndex] = Math.log10(MathUtils.binomialProbability(strandCounts[0], strandCounts[0] + strandCounts[1], 0.5)); + } + + return likelihoods; + } +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFNull.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFNull.java new file mode 100755 index 000000000..809206452 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFNull.java @@ -0,0 +1,11 @@ +package org.broadinstitute.sting.playground.gatk.walkers.variants; + +import org.broadinstitute.sting.gatk.LocusContext; + +public class IVFNull implements IndependentVariantFeature { + public String getFeatureName() { return "null"; } + + public double[] compute(char ref, LocusContext context) { + return new double[0]; + } +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IndependentVariantFeature.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IndependentVariantFeature.java new file mode 100755 index 000000000..7ccda69b3 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IndependentVariantFeature.java @@ -0,0 +1,11 @@ +package org.broadinstitute.sting.playground.gatk.walkers.variants; + +import org.broadinstitute.sting.gatk.LocusContext; + +public interface IndependentVariantFeature { + public enum Genotype { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT } + + public String getFeatureName(); + + public double[] compute(char ref, LocusContext context); +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantFiltrationWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantFiltrationWalker.java new file mode 100755 index 000000000..b289fc2ea --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantFiltrationWalker.java @@ -0,0 +1,66 @@ +package org.broadinstitute.sting.playground.gatk.walkers.variants; + +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.walkers.RMD; +import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.refdata.rodVariants; +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate; + +import java.io.File; +import java.io.PrintWriter; +import java.io.FileNotFoundException; + +@Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="variant",type=rodVariants.class)) +public class VariantFiltrationWalker extends LocusWalker { + @Argument(fullName="features", shortName="F", doc="Comma-separated list of feature tests to apply to genotype posteriors.") public String FEATURES; + @Argument(fullName="variants_out", shortName="VO", doc="File to which modified variants should be written") public File VARIANTS_OUT; + + private String[] features; + private PrintWriter vwriter; + + public void initialize() { + features = FEATURES.split(","); + + try { + vwriter = new PrintWriter(VARIANTS_OUT); + + vwriter.println(AlleleFrequencyEstimate.geliHeaderString()); + } catch (FileNotFoundException e) { + throw new StingException(String.format("Could not open file '%s' for writing", VARIANTS_OUT.getAbsolutePath())); + } + } + + public Integer reduceInit() { return 0; } + + public Integer map(RefMetaDataTracker tracker, char ref, LocusContext context) { + rodVariants variant = (rodVariants) tracker.lookup("variant", null); + + for (String feature : features) { + IndependentVariantFeature ivf; + + if (feature.equalsIgnoreCase("binomial")) { ivf = new IVFBinomialStrand(); } + else { throw new StingException(String.format("Cannot understand feature '%s'\n", feature)); } + + variant.multiplyLikelihoods(ivf.compute(ref, context)); + vwriter.println(variant); + } + + return 1; + } + + public Integer reduce(Integer value, Integer sum) { + return sum + 1; + } + + public void onTraversalDone(Integer result) { + out.printf("Processed %d loci.\n", result); + + vwriter.close(); + } +}