From ed7afd8b7083d56f7a4c0976e59d788084f62442 Mon Sep 17 00:00:00 2001 From: kiran Date: Thu, 18 Jun 2009 23:28:38 +0000 Subject: [PATCH] Added javadocs. Now throws an exception if an unknown feature is specified. General cleanup. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1055 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/variants/IVFBinomialStrand.java | 8 +- .../variants/IndependentVariantFeature.java | 23 ++- .../variants/VariantFiltrationWalker.java | 145 ++++++++++++------ 3 files changed, 126 insertions(+), 50 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFBinomialStrand.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFBinomialStrand.java index 673deb6bb..6a3d41b8c 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFBinomialStrand.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFBinomialStrand.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.playground.gatk.walkers.variants; -import org.broadinstitute.sting.gatk.LocusContext; -import org.broadinstitute.sting.utils.ReadBackedPileup; -import org.broadinstitute.sting.utils.MathUtils; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.ReadBackedPileup; import java.util.List; @@ -16,8 +16,6 @@ public class IVFBinomialStrand implements IndependentVariantFeature { } } - public String getFeatureName() { return "binomial"; } - public double[] compute(char ref, LocusContext context) { double[] likelihoods = new double[10]; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IndependentVariantFeature.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IndependentVariantFeature.java index efd139c8f..0de3d6531 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IndependentVariantFeature.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IndependentVariantFeature.java @@ -2,10 +2,29 @@ package org.broadinstitute.sting.playground.gatk.walkers.variants; import org.broadinstitute.sting.gatk.LocusContext; +/** + * Interface for conditionally independent variant features. + */ public interface IndependentVariantFeature { + /** + * A convenient enumeration for each of the ten genotypes. + */ public enum Genotype { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT } + /** + * Method so that features can initialize themselves based on a short argument string. + * At the moment, each feature is responsible for interpreting their own argument string. + * + * @param arguments + */ public void initialize(String arguments); - + + /** + * Method to compute the result of this feature for each of the ten genotypes. The return value must + * be a double array of length 10 (one for each genotype) and the value must be in log10-space. + * @param ref the reference base + * @param context the context for the given locus + * @return a ten-element array of log-likelihood result of the feature applied to each genotype + */ public double[] compute(char ref, LocusContext context); -} +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantFiltrationWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantFiltrationWalker.java index f2a524beb..f965f7fc9 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantFiltrationWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantFiltrationWalker.java @@ -1,34 +1,65 @@ package org.broadinstitute.sting.playground.gatk.walkers.variants; +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.rodVariants; +import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.RMD; -import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.Requires; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; -import org.broadinstitute.sting.gatk.refdata.rodVariants; -import org.broadinstitute.sting.gatk.LocusContext; -import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.PackageUtils; -import org.broadinstitute.sting.utils.JVMUtils; import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.PackageUtils; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.cmdLine.Argument; import java.io.File; -import java.io.PrintWriter; import java.io.FileNotFoundException; -import java.io.IOException; +import java.io.PrintWriter; import java.util.ArrayList; +/** + * VariantFiltrationWalker applies specified conditionally independent features to pre-called variants, thus modifying + * the likelihoods of each genotype. At the moment, the variants are expected to be in gelitext format. + */ @Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="variant",type=rodVariants.class)) public class VariantFiltrationWalker extends LocusWalker { - @Argument(fullName="features", shortName="F", doc="Feature test (optionally with arguments) to apply to genotype posteriors. Syntax: 'testname:arguments'") public String[] FEATURES; + @Argument(fullName="features", shortName="F", doc="Feature test (optionally with arguments) to apply to genotype posteriors. Syntax: 'testname[:arguments]'") public String[] FEATURES; @Argument(fullName="variants_out", shortName="VO", doc="File to which modified variants should be written") public File VARIANTS_OUT; @Argument(fullName="verbose", shortName="V", doc="Show how the variant likelihoods are changing with the application of each feature") public Boolean VERBOSE = false; - private PrintWriter vwriter; private ArrayList featureClasses; + private PrintWriter vwriter; + /** + * Trim the 'IVF' off the feature name so the user needn't specify that on the command-line. + * + * @param featureClass the feature class whose name we should rationalize + * @return the class name, minus 'IVF' + */ + private String rationalizeFeatureClassName(Class featureClass) { + String featureClassName = featureClass.getSimpleName(); + return featureClassName.replaceFirst("IVF", ""); + } + + /** + * Returns a comma-separated list of available features the user may specify at the command-line. + * + * @return String of available features + */ + private String getAvailableFeatureClasses() { + String featureString = ""; + + for (int featureClassIndex = 0; featureClassIndex < featureClasses.size(); featureClassIndex++) { + featureString += rationalizeFeatureClassName(featureClasses.get(featureClassIndex)) + (featureClassIndex == featureClasses.size() - 1 ? "" : ","); + } + + return featureString; + } + + /** + * Prepare the output file and the list of available features. + */ public void initialize() { try { vwriter = new PrintWriter(VARIANTS_OUT); @@ -40,58 +71,86 @@ public class VariantFiltrationWalker extends LocusWalker { } } + /** + * Initialize the number of loci processed to zero. + * + * @return 0 + */ public Integer reduceInit() { return 0; } + /** + * For each site of interest, rescore the genotype likelihoods by applying the specified feature set. + * + * @param tracker the meta-data tracker + * @param ref the reference base + * @param context the context for the given locus + * @return 1 if the locus was successfully processed, 0 if otherwise + */ public Integer map(RefMetaDataTracker tracker, char ref, LocusContext context) { rodVariants variant = (rodVariants) tracker.lookup("variant", null); - for (String feature : FEATURES) { - String[] featurePieces = feature.split(":"); - String featureName = featurePieces[0]; - String featureArgs = featurePieces[1]; + // Ignore places where we don't have a variant or where the reference base is ambiguous. + if (variant != null && BaseUtils.simpleBaseToBaseIndex(ref) != -1) { + if (VERBOSE) { out.println("Original:\n " + variant); } - IndependentVariantFeature ivf; + for (String requestedFeatureString : FEATURES) { + String[] requestedFeaturePieces = requestedFeatureString.split(":"); + String requestedFeatureName = requestedFeaturePieces[0]; + String requestedFeatureArgs = (requestedFeaturePieces.length == 2) ? requestedFeaturePieces[1] : ""; - if (VERBOSE) { - out.println("Original:"); - out.println(" " + variant); - } + int notYetSeenFeature = 0; + for ( Class featureClass : featureClasses ) { + String featureClassName = rationalizeFeatureClassName(featureClass); - for ( Class featureClass : featureClasses ) { - String featureClassName = featureClass.getSimpleName(); - featureClassName = featureClassName.replaceFirst("IVF", ""); + if (requestedFeatureName.equalsIgnoreCase(featureClassName)) { + try { + IndependentVariantFeature ivf = (IndependentVariantFeature) featureClass.newInstance(); + ivf.initialize(requestedFeatureArgs); - if (featureName.equalsIgnoreCase(featureClassName)) { - try { - ivf = (IndependentVariantFeature) featureClass.newInstance(); - ivf.initialize(featureArgs); + variant.adjustLikelihoods(ivf.compute(ref, context)); - variant.adjustLikelihoods(ivf.compute(ref, context)); - - if (VERBOSE) { - out.println(featureClassName + ":"); - out.println(" " + variant); + if (VERBOSE) { out.println(featureClassName + ":\n " + variant); } + } catch (InstantiationException e) { + throw new StingException(String.format("Cannot instantiate feature class '%s': must be concrete class", featureClass.getSimpleName())); + } catch (IllegalAccessException e) { + throw new StingException(String.format("Cannot instantiate feature class '%s': must have no-arg constructor", featureClass.getSimpleName())); } - } catch (InstantiationException e) { - throw new StingException(String.format("Cannot instantiate feature class '%s': must be concrete class", featureClass.getSimpleName())); - } catch (IllegalAccessException e) { - throw new StingException(String.format("Cannot instantiate feature class '%s': must have no-arg constructor", featureClass.getSimpleName())); + } else { + notYetSeenFeature++; } } + + if (notYetSeenFeature == featureClasses.size()) { + throw new StingException(String.format("Unknown feature '%s'. Valid features are '%s'", requestedFeatureName, getAvailableFeatureClasses())); + } + + if (VERBOSE) { System.out.println(); } } - if (VERBOSE) { System.out.println(); } + vwriter.println(variant); + + return 1; } - - vwriter.println(variant); - return 1; + return 0; } + /** + * Increment the number of loci processed. + * + * @param value result of the map. + * @param sum accumulator for the reduce. + * @return the new number of loci processed. + */ public Integer reduce(Integer value, Integer sum) { - return sum + 1; + return sum + value; } + /** + * Tell the user the number of loci processed and close out the new variants file. + * + * @param result the number of loci seen. + */ public void onTraversalDone(Integer result) { out.printf("Processed %d loci.\n", result);