diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFBinomialStrand.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFBinomialStrand.java index 6a3d41b8c..8a20ba58a 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFBinomialStrand.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFBinomialStrand.java @@ -10,14 +10,16 @@ import java.util.List; public class IVFBinomialStrand implements IndependentVariantFeature { private double strandBalance = 0.5; + private double[] likelihoods; + public void initialize(String arguments) { if (arguments != null && !arguments.isEmpty()) { strandBalance = Double.valueOf(arguments); } } - public double[] compute(char ref, LocusContext context) { - double[] likelihoods = new double[10]; + public void compute(char ref, LocusContext context) { + likelihoods = new double[10]; ReadBackedPileup pileup = new ReadBackedPileup(ref, context); List reads = context.getReads(); @@ -43,7 +45,17 @@ public class IVFBinomialStrand implements IndependentVariantFeature { likelihoods[genotypeIndex] = Math.log10(MathUtils.binomialProbability(strandCounts[0], strandCounts[0] + strandCounts[1], strandBalance)); } + } + public double[] getLikelihoods() { return likelihoods; } + + public String getStudyHeader() { + return ""; + } + + public String getStudyInfo() { + return ""; + } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFNull.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFNull.java index 32d91f446..d859b81cf 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFNull.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFNull.java @@ -5,7 +5,18 @@ import org.broadinstitute.sting.gatk.LocusContext; public class IVFNull implements IndependentVariantFeature { public void initialize(String arguments) {} - public double[] compute(char ref, LocusContext context) { + public void compute(char ref, LocusContext context) { + } + + public double[] getLikelihoods() { return new double[10]; } + + public String getStudyHeader() { + return ""; + } + + public String getStudyInfo() { + return ""; + } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFPrimaryBases.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFPrimaryBases.java index fc05e45e5..ef12fc5bb 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFPrimaryBases.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFPrimaryBases.java @@ -7,6 +7,8 @@ import org.broadinstitute.sting.utils.ReadBackedPileup; public class IVFPrimaryBases implements IndependentVariantFeature { private double[] p = { 0.933, 0.972, 0.970, 0.960, 0.945, 0.990, 0.971, 0.943, 0.978, 0.928 }; + private double[] likelihoods; + /** * Method so that features can initialize themselves based on a short argument string. At the moment, each feature is * responsible for interpreting their own argument string. @@ -31,8 +33,8 @@ public class IVFPrimaryBases implements IndependentVariantFeature { * @param context the context for the given locus * @return a ten-element array of log-likelihood result of the feature applied to each genotype */ - public double[] compute(char ref, LocusContext context) { - double[] likelihoods = new double[10]; + public void compute(char ref, LocusContext context) { + likelihoods = new double[10]; ReadBackedPileup pileup = new ReadBackedPileup(ref, context); String primaryBases = pileup.getBases(); @@ -62,7 +64,17 @@ public class IVFPrimaryBases implements IndependentVariantFeature { likelihoods[genotypeIndex] = Math.log10(Prior); } + } + public double[] getLikelihoods() { return likelihoods; } + + public String getStudyHeader() { + return ""; + } + + public String getStudyInfo() { + return ""; + } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFSecondaryBases.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFSecondaryBases.java index 51b2e609c..611f6a2b7 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFSecondaryBases.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFSecondaryBases.java @@ -8,6 +8,8 @@ public class IVFSecondaryBases implements IndependentVariantFeature { private double[] p2on = { 0.000, 0.302, 0.366, 0.142, 0.000, 0.548, 0.370, 0.000, 0.319, 0.000 }; private double[] p2off = { 0.480, 0.769, 0.744, 0.538, 0.575, 0.727, 0.768, 0.589, 0.762, 0.505 }; + private double[] likelihoods; + /** * Method so that features can initialize themselves based on a short argument string. At the moment, each feature is * responsible for interpreting their own argument string. @@ -58,8 +60,8 @@ public class IVFSecondaryBases implements IndependentVariantFeature { * @param context the context for the given locus * @return a ten-element array of log-likelihood result of the feature applied to each genotype */ - public double[] compute(char ref, LocusContext context) { - double[] likelihoods = new double[10]; + public void compute(char ref, LocusContext context) { + likelihoods = new double[10]; ReadBackedPileup pileup = new ReadBackedPileup(ref, context); String primaryBases = pileup.getBases(); @@ -109,7 +111,17 @@ public class IVFSecondaryBases implements IndependentVariantFeature { likelihoods[genotypeIndex] = logOffPrior + logOnPrior; } + } + public double[] getLikelihoods() { return likelihoods; } + + public String getStudyHeader() { + return ""; + } + + public String getStudyInfo() { + return ""; + } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IndependentVariantFeature.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IndependentVariantFeature.java index 3e5e6c3fb..3943c6c46 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IndependentVariantFeature.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IndependentVariantFeature.java @@ -25,7 +25,12 @@ public interface IndependentVariantFeature { * * @param ref the reference base * @param context the context for the given locus - * @return a ten-element array of log-likelihood result of the feature applied to each genotype */ - public double[] compute(char ref, LocusContext context); + public void compute(char ref, LocusContext context); + + public double[] getLikelihoods(); + + public String getStudyHeader(); + + public String getStudyInfo(); } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/RatioFilter.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/RatioFilter.java index 177d9097d..d912599a7 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/RatioFilter.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/RatioFilter.java @@ -207,6 +207,7 @@ public abstract class RatioFilter implements VariantExclusionCriterion { protected String name = null; protected GenotypeFeatureData.Tail tail = null; + protected boolean exclude = false; /** * A short-term hack to stop the systme from rejecting poorly covered sites, under the assumption @@ -256,7 +257,7 @@ public abstract class RatioFilter implements VariantExclusionCriterion { public boolean useZeroQualityReads() { return false; } - public boolean exclude(char ref, LocusContext context, rodVariants variant) { + public void compute(char ref, LocusContext context, rodVariants variant) { boolean exclude = false; // @@ -285,10 +286,20 @@ public abstract class RatioFilter implements VariantExclusionCriterion { name, counts.first, counts.first / (0.01 * n), counts.second, counts.second / (0.01 * n), n, value, gfd, exclude, pileup.getBases())); } + } + public boolean isExcludable() { return exclude; } + public String getStudyHeader() { + return ""; + } + + public String getStudyInfo() { + return ""; + } + private final static double SEARCH_INCREMENT = 0.01; private final static double integralPValueThreshold = 0.05; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECDepthOfCoverage.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECDepthOfCoverage.java index 2d107754e..1dc2a0d28 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECDepthOfCoverage.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECDepthOfCoverage.java @@ -13,15 +13,32 @@ import org.broadinstitute.sting.gatk.refdata.rodVariants; public class VECDepthOfCoverage implements VariantExclusionCriterion { private int maximum = 200; + private boolean exclude = false; + private int depth; + public void initialize(String arguments) { if (arguments != null && !arguments.isEmpty()) { maximum = Integer.valueOf(arguments); } } - public boolean useZeroQualityReads() { return false; } + public void compute(char ref, LocusContext context, rodVariants variant) { + exclude = context.getReads().size() > maximum; - public boolean exclude(char ref, LocusContext context, rodVariants variant) { - return context.getReads().size() > maximum; + depth = context.getReads().size(); } + + public boolean isExcludable() { + return exclude; + } + + public String getStudyHeader() { + return "#depth"; + } + + public String getStudyInfo() { + return Integer.toString(depth); + } + + public boolean useZeroQualityReads() { return false; } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECFisherStrand.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECFisherStrand.java index be57d5d5a..91415685f 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECFisherStrand.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECFisherStrand.java @@ -10,8 +10,8 @@ import java.util.List; import cern.jet.math.Arithmetic; public class VECFisherStrand implements VariantExclusionCriterion { - private double pvalueLimit = 0.0001; + private boolean exclude; public void initialize(String arguments) { if (arguments != null && !arguments.isEmpty()) { @@ -19,15 +19,27 @@ public class VECFisherStrand implements VariantExclusionCriterion { } } - public boolean exclude(char ref, LocusContext context, rodVariants variant) { + public void compute(char ref, LocusContext context, rodVariants variant) { int allele1 = BaseUtils.simpleBaseToBaseIndex(variant.getBestGenotype().charAt(0)); int allele2 = BaseUtils.simpleBaseToBaseIndex(variant.getBestGenotype().charAt(1)); if (allele1 != allele2) { - return strandTest(ref, context, allele1, allele2, pvalueLimit, null); + exclude = strandTest(ref, context, allele1, allele2, pvalueLimit, null); + } else { + exclude = false; } + } - return false; + public boolean isExcludable() { + return exclude; + } + + public String getStudyHeader() { + return ""; + } + + public String getStudyInfo() { + return ""; } public boolean useZeroQualityReads() { return false; } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECLodThreshold.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECLodThreshold.java index 9415c9bce..3eeb2932f 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECLodThreshold.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECLodThreshold.java @@ -6,15 +6,34 @@ import org.broadinstitute.sting.gatk.LocusContext; public class VECLodThreshold implements VariantExclusionCriterion { private double lodThreshold = 5.0; + private boolean exclude; + public void initialize(String arguments) { if (arguments != null && !arguments.isEmpty()) { lodThreshold = Double.valueOf(arguments); } } + public void compute(char ref, LocusContext context, rodVariants variant) { + exclude = (variant.getLodBtr() < lodThreshold); + } + public boolean useZeroQualityReads() { return false; } public boolean exclude(char ref, LocusContext context, rodVariants variant) { return (variant.getLodBtr() < lodThreshold); } + + public boolean isExcludable() { + return exclude; + } + + public String getStudyHeader() { + return ""; + } + + public String getStudyInfo() { + return ""; + } + } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECNull.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECNull.java new file mode 100755 index 000000000..acb2f66df --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECNull.java @@ -0,0 +1,28 @@ +package org.broadinstitute.sting.playground.gatk.walkers.variants; + +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.gatk.refdata.rodVariants; + +public class VECNull implements VariantExclusionCriterion { + public void initialize(String arguments) { + } + + public void compute(char ref, LocusContext context, rodVariants variant) { + } + + public boolean isExcludable() { + return false; + } + + public String getStudyHeader() { + return ""; + } + + public String getStudyInfo() { + return ""; + } + + public boolean useZeroQualityReads() { + return false; + } +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECOnOffGenotypeRatio.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECOnOffGenotypeRatio.java index 560decb8d..33bf995c2 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECOnOffGenotypeRatio.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECOnOffGenotypeRatio.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.variants; import org.broadinstitute.sting.gatk.refdata.rodVariants; +import org.broadinstitute.sting.gatk.LocusContext; import org.broadinstitute.sting.utils.*; public class VECOnOffGenotypeRatio extends RatioFilter { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantExclusionCriterion.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantExclusionCriterion.java index 3b2a8f504..41220572d 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantExclusionCriterion.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantExclusionCriterion.java @@ -6,6 +6,12 @@ import org.broadinstitute.sting.gatk.refdata.rodVariants; public interface VariantExclusionCriterion { public void initialize(String arguments); - public boolean exclude(char ref, LocusContext context, rodVariants variant); + public void compute(char ref, LocusContext context, rodVariants variant); + + public boolean isExcludable(); + + public String getStudyHeader(); + + public String getStudyInfo(); public boolean useZeroQualityReads(); } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantFiltrationWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantFiltrationWalker.java index d68d96fe2..459716e37 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantFiltrationWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantFiltrationWalker.java @@ -21,8 +21,9 @@ import java.util.*; import net.sf.samtools.SAMRecord; /** - * VariantFiltrationWalker applies specified conditionally independent features to pre-called variants, thus modifying - * the likelihoods of each genotype. At the moment, the variants are expected to be in gelitext format. + * VariantFiltrationWalker applies specified conditionally independent features and filters to pre-called variants. + * The former modifiesthe likelihoods of each genotype, while the latter makes a decision to include or exclude a + * variant outright. At the moment, the variants are expected to be in gelitext format. */ @Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="variant",type=rodVariants.class)) public class VariantFiltrationWalker extends LocusWalker { @@ -31,12 +32,15 @@ public class VariantFiltrationWalker extends LocusWalker { @Argument(fullName="exclusion_criterion", shortName="X", doc="Exclusion test (optionally with arguments) to apply to variant call. Syntax: 'testname[:arguments]'", required=false) public String[] EXCLUSIONS; @Argument(fullName="verbose", shortName="V", doc="Show how the variant likelihoods are changing with the application of each feature") public Boolean VERBOSE = false; @Argument(fullName="list", shortName="ls", doc="List the available features and exclusion criteria and exit") public Boolean LIST = false; + @Argument(fullName="learning_mode", shortName="LM", doc="Output parseable information on each filter that can then be fed back to the filter as a training set") public Boolean LEARNING = false; @Argument(fullName="truth", shortName="truth", doc="Operate on truth set only") public Boolean TRUTH = false; private List> featureClasses; private List> exclusionClasses; + private PrintWriter vwriter; private HashMap ewriters; + private HashMap swriters; private ArrayList requestedFeatures; private ArrayList requestedExclusions; @@ -61,6 +65,8 @@ public class VariantFiltrationWalker extends LocusWalker { requestedFeatures = new ArrayList(); requestedExclusions = new ArrayList(); + swriters = new HashMap(); + // Initialize requested features if (FEATURES != null) { for (String requestedFeatureString : FEATURES) { @@ -76,6 +82,13 @@ public class VariantFiltrationWalker extends LocusWalker { IndependentVariantFeature ivf = (IndependentVariantFeature) featureClass.newInstance(); ivf.initialize(requestedFeatureArgs); + if (LEARNING) { + PrintWriter studyWriter = new PrintWriter(VARIANTS_OUT_HEAD + "." + featureClassName + ".study"); + studyWriter.println(ivf.getStudyHeader()); + + swriters.put(featureClassName, studyWriter); + } + requestedFeatures.add(ivf); } catch (InstantiationException e) { throw new StingException(String.format("Cannot instantiate feature class '%s': must be concrete class", featureClass.getSimpleName())); @@ -104,6 +117,13 @@ public class VariantFiltrationWalker extends LocusWalker { VariantExclusionCriterion vec = (VariantExclusionCriterion) exclusionClass.newInstance(); vec.initialize(requestedExclusionArgs); + if (LEARNING) { + PrintWriter studyWriter = new PrintWriter(VARIANTS_OUT_HEAD + "." + exclusionClassName + ".study"); + studyWriter.println(vec.getStudyHeader()); + + swriters.put(exclusionClassName, studyWriter); + } + requestedExclusions.add(vec); PrintWriter writer = new PrintWriter(VARIANTS_OUT_HEAD + ".excluded." + exclusionClassName + ".geli.calls"); @@ -185,9 +205,22 @@ public class VariantFiltrationWalker extends LocusWalker { // Apply features that modify the likelihoods and LOD scores for ( IndependentVariantFeature ivf : requestedFeatures ) { - variant.adjustLikelihoods(ivf.compute(ref, context)); + ivf.compute(ref, context); + + String featureClassName = rationalizeClassName(ivf.getClass()); + + double[] weights = ivf.getLikelihoods(); + + variant.adjustLikelihoods(weights); if (VERBOSE) { out.println(rationalizeClassName(ivf.getClass()) + ":\n " + variant); } + + if (LEARNING) { + PrintWriter swriter = swriters.get(featureClassName); + if (swriter != null) { + swriter.println(ivf.getStudyInfo()); + } + } } // Apply exclusion tests that accept or reject the variant call @@ -198,10 +231,21 @@ public class VariantFiltrationWalker extends LocusWalker { LocusContext Q0freeContext = removeQ0reads(context); for ( VariantExclusionCriterion vec : requestedExclusions ) { - boolean excludeResult = vec.exclude(ref, (vec.useZeroQualityReads() ? context : Q0freeContext), variant); + vec.compute(ref, (vec.useZeroQualityReads() ? context : Q0freeContext), variant); - if (excludeResult) { - exclusionResults.add(rationalizeClassName(vec.getClass())); + String exclusionClassName = rationalizeClassName(vec.getClass()); + + if (vec.isExcludable()) { + exclusionResults.add(exclusionClassName); + } + + if (LEARNING) { + PrintWriter swriter = swriters.get(exclusionClassName); + + if (swriter != null) { + swriter.println(vec.getStudyInfo()); + swriter.flush(); + } } } @@ -272,8 +316,12 @@ public class VariantFiltrationWalker extends LocusWalker { vwriter.close(); - for (PrintWriter writer : ewriters.values()) { - writer.close(); + for (PrintWriter ewriter : ewriters.values()) { + ewriter.close(); + } + + for (PrintWriter swriter : swriters.values()) { + swriter.close(); } } }