From f12ea3a27e15bdfb9ed83085d2be633bd0eedf7e Mon Sep 17 00:00:00 2001 From: kiran Date: Tue, 25 Aug 2009 21:17:56 +0000 Subject: [PATCH] Added ability for all filters to return a probability for a given variant - interpreted as the probability that the given variant should be included in the final set. The joint probability of all the filters is computed to determine whether a variant should stay or go. At the moment, this is only visible in verbose mode (specify -V). Also removed 'learning mode'; now, filters emit important stats no matter what. Various code cleanups. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1458 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/variants/VECAlleleBalance.java | 5 +- .../walkers/variants/VECDepthOfCoverage.java | 9 +- .../walkers/variants/VECFisherStrand.java | 5 +- .../walkers/variants/VECLodThreshold.java | 5 +- .../walkers/variants/VECMappingQuality.java | 5 +- .../variants/VECMappingQualityZero.java | 5 +- .../gatk/walkers/variants/VECNull.java | 5 +- .../variants/VariantExclusionCriterion.java | 3 +- .../variants/VariantFiltrationWalker.java | 137 ++++++++---------- 9 files changed, 86 insertions(+), 93 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECAlleleBalance.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECAlleleBalance.java index 80611215c..09b251fbc 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECAlleleBalance.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECAlleleBalance.java @@ -69,8 +69,9 @@ public class VECAlleleBalance implements VariantExclusionCriterion { //extends public boolean useZeroQualityReads() { return false; } - public boolean isExcludable() { - return exclude; + public double inclusionProbability() { + // A hack for now until this filter is actually converted to an empirical filter + return exclude ? 0.0 : 1.0; } public String getStudyHeader() { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECDepthOfCoverage.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECDepthOfCoverage.java index de2e564f7..f44aac5d8 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECDepthOfCoverage.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECDepthOfCoverage.java @@ -27,10 +27,15 @@ public class VECDepthOfCoverage implements VariantExclusionCriterion { depth = context.getReads().size(); } - public boolean isExcludable() { - return exclude; + public double inclusionProbability() { + // A hack for now until this filter is actually converted to an empirical filter + return exclude ? 0.0 : 1.0; } +// public boolean isExcludable() { +// return exclude; +// } + public String getStudyHeader() { return "DepthOfCoverage("+maximum+")\tdepth"; } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECFisherStrand.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECFisherStrand.java index 3cb62e028..2d80343d6 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECFisherStrand.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECFisherStrand.java @@ -33,8 +33,9 @@ public class VECFisherStrand implements VariantExclusionCriterion { } } - public boolean isExcludable() { - return exclude; + public double inclusionProbability() { + // A hack for now until this filter is actually converted to an empirical filter + return exclude ? 0.0 : 1.0; } public String getStudyHeader() { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECLodThreshold.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECLodThreshold.java index d0fee764f..e2dd6928b 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECLodThreshold.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECLodThreshold.java @@ -21,8 +21,9 @@ public class VECLodThreshold implements VariantExclusionCriterion { public boolean useZeroQualityReads() { return false; } - public boolean isExcludable() { - return exclude; + public double inclusionProbability() { + // A hack for now until this filter is actually converted to an empirical filter + return exclude ? 0.0 : 1.0; } public String getStudyHeader() { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECMappingQuality.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECMappingQuality.java index 8ea872dba..f3cecb87d 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECMappingQuality.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECMappingQuality.java @@ -28,8 +28,9 @@ public class VECMappingQuality implements VariantExclusionCriterion { exclude = rms < minQuality; } - public boolean isExcludable() { - return exclude; + public double inclusionProbability() { + // A hack for now until this filter is actually converted to an empirical filter + return exclude ? 0.0 : 1.0; } public String getStudyHeader() { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECMappingQualityZero.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECMappingQualityZero.java index a8a65a5f4..991b456d1 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECMappingQualityZero.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECMappingQualityZero.java @@ -29,8 +29,9 @@ public class VECMappingQualityZero implements VariantExclusionCriterion { exclude = mq0Count > maximum; } - public boolean isExcludable() { - return exclude; + public double inclusionProbability() { + // A hack for now until this filter is actually converted to an empirical filter + return exclude ? 0.0 : 1.0; } public String getStudyHeader() { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECNull.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECNull.java index 2bf786913..90bab3e83 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECNull.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECNull.java @@ -10,8 +10,9 @@ public class VECNull implements VariantExclusionCriterion { public void compute(char ref, AlignmentContext context, rodVariants variant) { } - public boolean isExcludable() { - return false; + public double inclusionProbability() { + // A hack for now until this filter is actually converted to an empirical filter + return 1.0; } public String getStudyHeader() { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantExclusionCriterion.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantExclusionCriterion.java index 3b6c95a5a..63a5be1f7 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantExclusionCriterion.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantExclusionCriterion.java @@ -8,7 +8,8 @@ public interface VariantExclusionCriterion { public void compute(char ref, AlignmentContext context, rodVariants variant); - public boolean isExcludable(); + //public boolean isExcludable(); + public double inclusionProbability(); public String getStudyHeader(); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantFiltrationWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantFiltrationWalker.java index 832c7b884..2548be67f 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantFiltrationWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantFiltrationWalker.java @@ -24,19 +24,16 @@ public class VariantFiltrationWalker extends LocusWalker { @Argument(fullName="variants_out_head", shortName="VOH", doc="File to which modified variants should be written") public String VARIANTS_OUT_HEAD; @Argument(fullName="features", shortName="F", doc="Feature test (optionally with arguments) to apply to genotype posteriors. Syntax: 'testname[:arguments]'", required=false) public String[] FEATURES; @Argument(fullName="exclusion_criterion", shortName="X", doc="Exclusion test (optionally with arguments) to apply to variant call. Syntax: 'testname[:arguments]'", required=false) public String[] EXCLUSIONS; + @Argument(fullName="inclusion_threshold", shortName="IT", doc="The product of the probability to include variants based on these filters must be greater than the value specified here in order to be included", required=false) public Double INCLUSION_THRESHOLD = 0.9; @Argument(fullName="verbose", shortName="V", doc="Show how the variant likelihoods are changing with the application of each feature") public Boolean VERBOSE = false; @Argument(fullName="list", shortName="ls", doc="List the available features and exclusion criteria and exit") public Boolean LIST = false; - @Argument(fullName="learning_mode", shortName="LM", doc="Output parseable information on each filter that can then be fed back to the filter as a training set") public Boolean LEARNING = false; - @Argument(fullName="truth", shortName="truth", doc="Operate on truth set only") public Boolean TRUTH = false; private List> featureClasses; private List> exclusionClasses; - private PrintWriter vwriter; - private HashMap ewriters; - private HashMap swriters; - private final String STUDY_NAME = "study"; - private final String knownSNPDBName = "dbSNP"; + private PrintWriter variantsWriter; + private PrintWriter paramsWriter; + private HashMap exclusionWriters; private ArrayList requestedFeatures; private ArrayList requestedExclusions; @@ -55,16 +52,11 @@ public class VariantFiltrationWalker extends LocusWalker { } try { - vwriter = new PrintWriter(VARIANTS_OUT_HEAD + ".included.geli.calls"); - vwriter.println(GeliTextWriter.headerLine); + variantsWriter = new PrintWriter(VARIANTS_OUT_HEAD + ".included.geli.calls"); + variantsWriter.println(GeliTextWriter.headerLine); - swriters = new HashMap(); - - if (LEARNING) { - PrintWriter studyWriter = new PrintWriter(VARIANTS_OUT_HEAD + "." + STUDY_NAME); - swriters.put(STUDY_NAME, studyWriter); - studyWriter.print("Chr\tPosition\t"); - } + paramsWriter = new PrintWriter(VARIANTS_OUT_HEAD + ".params.out"); + paramsWriter.print("Chr\tPosition\t"); requestedFeatures = new ArrayList(); requestedExclusions = new ArrayList(); @@ -85,8 +77,7 @@ public class VariantFiltrationWalker extends LocusWalker { ivf.initialize(requestedFeatureArgs); requestedFeatures.add(ivf); - if (LEARNING) - swriters.get(STUDY_NAME).print(ivf.getStudyHeader() + "\t"); + paramsWriter.print(ivf.getStudyHeader() + "\t"); } catch (InstantiationException e) { throw new StingException(String.format("Cannot instantiate feature class '%s': must be concrete class", featureClass.getSimpleName())); } catch (IllegalAccessException e) { @@ -98,7 +89,7 @@ public class VariantFiltrationWalker extends LocusWalker { } // Initialize requested exclusion criteria - ewriters = new HashMap(); + exclusionWriters = new HashMap(); if (EXCLUSIONS != null) { for (String requestedExclusionString : EXCLUSIONS) { @@ -115,13 +106,12 @@ public class VariantFiltrationWalker extends LocusWalker { vec.initialize(requestedExclusionArgs); requestedExclusions.add(vec); - if (LEARNING) - swriters.get(STUDY_NAME).print(vec.getStudyHeader() + "\t"); + paramsWriter.print(vec.getStudyHeader() + "\t"); PrintWriter writer = new PrintWriter(VARIANTS_OUT_HEAD + ".excluded." + exclusionClassName + ".geli.calls"); writer.println(GeliTextWriter.headerLine); - ewriters.put(exclusionClassName, writer); + exclusionWriters.put(exclusionClassName, writer); } catch (InstantiationException e) { throw new StingException(String.format("Cannot instantiate exclusion class '%s': must be concrete class", exclusionClass.getSimpleName())); } catch (IllegalAccessException e) { @@ -132,8 +122,7 @@ public class VariantFiltrationWalker extends LocusWalker { } } - if (LEARNING) - swriters.get(STUDY_NAME).print("inDbSNP\tinHapMap\tisHet\n"); + paramsWriter.print("inDbSNP\tinHapMap\tisHet\n"); } catch (FileNotFoundException e) { throw new StingException(String.format("Could not open file(s) for writing")); } @@ -186,21 +175,13 @@ public class VariantFiltrationWalker extends LocusWalker { public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { rodVariants variant = (rodVariants) tracker.lookup("variant", null); - rodGFF hapmapSite = null; - - for ( ReferenceOrderedDatum datum : tracker.getAllRods() ) { - if ( datum != null && datum instanceof rodGFF ) { - hapmapSite = (rodGFF) datum; - } - } - // Ignore places where we don't have a variant or where the reference base is ambiguous. - if (variant != null && (!TRUTH || hapmapSite != null) && BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1) { - if (VERBOSE) { out.println("Original:\n " + variant); } + if (variant != null && BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1) { + HashMap exclusionResults = new HashMap(); - if (LEARNING) { - swriters.get(STUDY_NAME).print(context.getLocation().getContig() + "\t" + context.getLocation().getStart() + "\t"); - } + if (VERBOSE) { out.println("Original:\n" + variant); } + + paramsWriter.print(context.getLocation().getContig() + "\t" + context.getLocation().getStart() + "\t"); // Apply features that modify the likelihoods and LOD scores for ( IndependentVariantFeature ivf : requestedFeatures ) { @@ -212,59 +193,62 @@ public class VariantFiltrationWalker extends LocusWalker { if (VERBOSE) { out.println(rationalizeClassName(ivf.getClass()) + ":\n " + variant); } - if (LEARNING) { - swriters.get(STUDY_NAME).print(ivf.getStudyInfo() + "\t"); - } + paramsWriter.print(ivf.getStudyInfo() + "\t"); } - // Apply exclusion tests that accept or reject the variant call - ArrayList exclusionResults = new ArrayList(); - - // we need to provide an alternative context without mapping quality 0 reads + // We need to provide an alternative context without mapping quality 0 reads // for those exclusion criterion that don't want them AlignmentContext Q0freeContext = removeQ0reads(context); + // Apply exclusion tests that score the variant call + if (VERBOSE) { + out.print("InclusionProbabilities:["); + } + + // Use the filters to score the variant + double jointInclusionProbability = 1.0; for ( VariantExclusionCriterion vec : requestedExclusions ) { vec.compute(ref.getBase(), (vec.useZeroQualityReads() ? context : Q0freeContext), variant); String exclusionClassName = rationalizeClassName(vec.getClass()); - if (vec.isExcludable()) { - exclusionResults.add(exclusionClassName); - } + Double inclusionProbability = vec.inclusionProbability(); + jointInclusionProbability *= inclusionProbability; + exclusionResults.put(exclusionClassName, inclusionProbability); - if (LEARNING) { - swriters.get(STUDY_NAME).print(vec.getStudyInfo() + "\t"); - } - } - - if (exclusionResults.size() > 0) { - String exclusions = ""; - for (int i = 0; i < exclusionResults.size(); i++) { - exclusions += exclusionResults.get(i) + (i == exclusionResults.size() - 1 ? "" : ","); - - PrintWriter writer = ewriters.get(exclusionResults.get(i)); - if (writer != null) { - writer.println(variant); + if (inclusionProbability < INCLUSION_THRESHOLD) { + PrintWriter ewriter = exclusionWriters.get(exclusionClassName); + if (ewriter != null) { + ewriter.println(variant); + ewriter.flush(); } } - - if (VERBOSE) { out.printf("Exclusions: %s\n", exclusions); } + + if (VERBOSE) { + out.print(exclusionClassName + "=" + inclusionProbability + ";"); + } + + paramsWriter.print(vec.getStudyInfo() + "\t"); + } + + // Decide whether we should keep the call or not + if (jointInclusionProbability >= INCLUSION_THRESHOLD) { + variantsWriter.println(variant); + + if (VERBOSE) { out.println("] JointInclusionProbability:" + jointInclusionProbability + " State:included\n"); } } else { - vwriter.println(variant); + if (VERBOSE) { out.println("] JointInclusionProbability:" + jointInclusionProbability + " State:excluded\n"); } } - if (VERBOSE) { out.println(); } - - if (LEARNING) { - rodDbSNP dbsnp = (rodDbSNP)tracker.lookup(knownSNPDBName, null); - if ( dbsnp == null ) - swriters.get(STUDY_NAME).print("false\tfalse\t"); - else - swriters.get(STUDY_NAME).print(dbsnp.isSNP() + "\t" + dbsnp.isHapmap() + "\t"); - swriters.get(STUDY_NAME).println(GenotypeUtils.isHet(variant)); + rodDbSNP dbsnp = (rodDbSNP) tracker.lookup("dbSNP", null); + if ( dbsnp == null ) { + paramsWriter.print("false\tfalse\t"); + } else { + paramsWriter.print(dbsnp.isSNP() + "\t" + dbsnp.isHapmap() + "\t"); } + paramsWriter.println(GenotypeUtils.isHet(variant)); + return 1; } @@ -312,14 +296,11 @@ public class VariantFiltrationWalker extends LocusWalker { public void onTraversalDone(Integer result) { out.printf("Processed %d loci.\n", result); - vwriter.close(); + variantsWriter.close(); + paramsWriter.close(); - for (PrintWriter ewriter : ewriters.values()) { + for (PrintWriter ewriter : exclusionWriters.values()) { ewriter.close(); } - - for (PrintWriter swriter : swriters.values()) { - swriter.close(); - } } }