From e495b836d382bc96d2d523ce7756f0a5690de04c Mon Sep 17 00:00:00 2001 From: ebanks Date: Mon, 3 Aug 2009 16:46:51 +0000 Subject: [PATCH] - added mapping quality filter - make the filters brainless in that they strictly have thresholds and filter based on them; require user to calculate and input these thresholds. - update filters in preparation for migration to new output format git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1363 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/variants/RatioFilter.java | 11 +---- .../walkers/variants/VECAlleleBalance.java | 36 +++++++++++++-- .../walkers/variants/VECDepthOfCoverage.java | 5 +- .../walkers/variants/VECFisherStrand.java | 7 +-- .../walkers/variants/VECLodThreshold.java | 13 ++---- .../walkers/variants/VECMappingQuality.java | 46 +++++++++++++++++++ .../variants/VECOnOffGenotypeRatio.java | 35 ++++++++++++-- 7 files changed, 120 insertions(+), 33 deletions(-) create mode 100755 java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECMappingQuality.java diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/RatioFilter.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/RatioFilter.java index d912599a7..a4d4462fe 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/RatioFilter.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/RatioFilter.java @@ -144,7 +144,7 @@ class ObservationTable extends HashMap { Logger logger = null; GenotypeFeatureData.Tail tail; - public ObservationTable(final File observationsFile, final String statField, Logger logger, GenotypeFeatureData.Tail tail ) throws NoSuchFieldException, FileNotFoundException, IOException { + public ObservationTable(final File observationsFile, final String statField, Logger logger, GenotypeFeatureData.Tail tail ) throws NoSuchFieldException, IOException { this.observationsFile = observationsFile; this.statField = statField; this.logger = logger; @@ -202,7 +202,7 @@ public abstract class RatioFilter implements VariantExclusionCriterion { protected double pvalueLimit = -1; protected File observationsFile = null; protected String statField = null; // "AlleleRatio"; - protected Logger logger = null; // Logger.getLogger(RatioFilter.class); + protected Logger logger = null; // Logger.getLogger(RatioFilter.class); protected ObservationTable dataTable = null; protected String name = null; protected GenotypeFeatureData.Tail tail = null; @@ -260,13 +260,6 @@ public abstract class RatioFilter implements VariantExclusionCriterion { public void compute(char ref, LocusContext context, rodVariants variant) { boolean exclude = false; - // - // todo -- need to calculate a significance value for the chance that the - // todo -- observed first and second counts are reliably not passing the threshold - // todo -- basically, we need to sample with replacement from all first / second pairs - // todo -- and only conclude that the variant fails the test if the probability of failing - // todo -- across all samples is greater than some P value, like 0.05 - // ReadBackedPileup pileup = new ReadBackedPileup(ref, context); if ( applyToVariant(variant) ) { Pair counts = scoreVariant(ref, pileup, variant); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECAlleleBalance.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECAlleleBalance.java index 651b9b8e2..68fafee61 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECAlleleBalance.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECAlleleBalance.java @@ -1,17 +1,22 @@ package org.broadinstitute.sting.playground.gatk.walkers.variants; import org.broadinstitute.sting.gatk.refdata.rodVariants; +import org.broadinstitute.sting.gatk.LocusContext; import org.broadinstitute.sting.utils.ReadBackedPileup; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Pair; -public class VECAlleleBalance extends RatioFilter { - final private static String statField = "AlleleRatio"; +public class VECAlleleBalance implements VariantExclusionCriterion { //extends RatioFilter { final private static GenotypeFeatureData.Tail tail = GenotypeFeatureData.Tail.TwoTailed; + private boolean exclude; + private double lowThreshold, highThreshold, ratio; - - public VECAlleleBalance() { - super("AlleleBalance", statField, VECAlleleBalance.class, tail); + public void initialize(String arguments) { + if (arguments != null && !arguments.isEmpty()) { + String[] argPieces = arguments.split(","); + lowThreshold = Double.valueOf(argPieces[0]); + highThreshold = Double.valueOf(argPieces[1]); + } } /** @@ -51,4 +56,25 @@ public class VECAlleleBalance extends RatioFilter { return new Pair(major, minor); } + + public void compute(char ref, LocusContext context, rodVariants variant) { + ReadBackedPileup pileup = new ReadBackedPileup(ref, context); + Pair counts = scoreVariant(ref, pileup, variant); + ratio = (double)counts.first / (double)counts.second; + exclude = ratio < lowThreshold || ratio > highThreshold; + } + + public boolean useZeroQualityReads() { return false; } + + public boolean isExcludable() { + return exclude; + } + + public String getStudyHeader() { + return "AlleleBalance("+lowThreshold+","+highThreshold+")\tMajorMinorRatio"; + } + + public String getStudyInfo() { + return (exclude ? "fail" : "pass") + "\t" + ratio; + } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECDepthOfCoverage.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECDepthOfCoverage.java index 1dc2a0d28..d3e8c8f9b 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECDepthOfCoverage.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECDepthOfCoverage.java @@ -24,7 +24,6 @@ public class VECDepthOfCoverage implements VariantExclusionCriterion { public void compute(char ref, LocusContext context, rodVariants variant) { exclude = context.getReads().size() > maximum; - depth = context.getReads().size(); } @@ -33,11 +32,11 @@ public class VECDepthOfCoverage implements VariantExclusionCriterion { } public String getStudyHeader() { - return "#depth"; + return "DepthOfCoverage("+maximum+")\tdepth"; } public String getStudyInfo() { - return Integer.toString(depth); + return (exclude ? "fail" : "pass") + "\t" + depth; } public boolean useZeroQualityReads() { return false; } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECFisherStrand.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECFisherStrand.java index 0eebfbc80..7a1ba608b 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECFisherStrand.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECFisherStrand.java @@ -11,6 +11,7 @@ import java.util.List; public class VECFisherStrand implements VariantExclusionCriterion { private double pvalueLimit = 0.00001; + private double pValue; private boolean exclude; private ArrayList factorials = new ArrayList(); @@ -37,11 +38,11 @@ public class VECFisherStrand implements VariantExclusionCriterion { } public String getStudyHeader() { - return ""; + return "FisherStrand("+pvalueLimit+")\tpvalue"; } public String getStudyInfo() { - return ""; + return (exclude ? "fail" : "pass") + "\t" + pValue; } public boolean useZeroQualityReads() { return false; } @@ -56,7 +57,7 @@ public class VECFisherStrand implements VariantExclusionCriterion { double pCutoff = computePValue(table); //printTable(table, pCutoff); - double pValue = pCutoff; + pValue = pCutoff; while (rotateTable(table)) { double pValuePiece = computePValue(table); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECLodThreshold.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECLodThreshold.java index 3eeb2932f..c90595240 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECLodThreshold.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECLodThreshold.java @@ -5,7 +5,7 @@ import org.broadinstitute.sting.gatk.LocusContext; public class VECLodThreshold implements VariantExclusionCriterion { private double lodThreshold = 5.0; - + private double lod; private boolean exclude; public void initialize(String arguments) { @@ -15,25 +15,22 @@ public class VECLodThreshold implements VariantExclusionCriterion { } public void compute(char ref, LocusContext context, rodVariants variant) { - exclude = (variant.getLodBtr() < lodThreshold); + lod = variant.getLodBtr(); + exclude = lod < lodThreshold; } public boolean useZeroQualityReads() { return false; } - public boolean exclude(char ref, LocusContext context, rodVariants variant) { - return (variant.getLodBtr() < lodThreshold); - } - public boolean isExcludable() { return exclude; } public String getStudyHeader() { - return ""; + return "LodThreshold("+lod+")\tlod"; } public String getStudyInfo() { - return ""; + return (exclude ? "fail" : "pass") + "\t" + lod; } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECMappingQuality.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECMappingQuality.java new file mode 100755 index 000000000..a016a779b --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECMappingQuality.java @@ -0,0 +1,46 @@ +package org.broadinstitute.sting.playground.gatk.walkers.variants; + +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.gatk.refdata.rodVariants; +import net.sf.samtools.SAMRecord; + +import java.util.List; + + +public class VECMappingQuality implements VariantExclusionCriterion { + private double minQuality = 5.0; + private double rms; + private boolean exclude; + + public void initialize(String arguments) { + if (arguments != null && !arguments.isEmpty()) { + minQuality = Double.valueOf(arguments); + } + } + + public void compute(char ref, LocusContext context, rodVariants variant) { + List reads = context.getReads(); + + rms = 0.0; + for (int readIndex = 0; readIndex < reads.size(); readIndex++) { + int qual = reads.get(readIndex).getMappingQuality(); + rms += qual * qual; + } + rms /= reads.size(); + rms = Math.sqrt(rms); + } + + public boolean isExcludable() { + return exclude; + } + + public String getStudyHeader() { + return "MappingQuality("+minQuality+")\trms"; + } + + public String getStudyInfo() { + return (exclude ? "fail" : "pass") + "\t" + rms; + } + + public boolean useZeroQualityReads() { return true; } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECOnOffGenotypeRatio.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECOnOffGenotypeRatio.java index 33bf995c2..21d42ca8a 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECOnOffGenotypeRatio.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECOnOffGenotypeRatio.java @@ -4,12 +4,17 @@ import org.broadinstitute.sting.gatk.refdata.rodVariants; import org.broadinstitute.sting.gatk.LocusContext; import org.broadinstitute.sting.utils.*; -public class VECOnOffGenotypeRatio extends RatioFilter { - final private static String statField = "onOffRatio"; +public class VECOnOffGenotypeRatio implements VariantExclusionCriterion { // extends RatioFilter { final private static GenotypeFeatureData.Tail tail = GenotypeFeatureData.Tail.LeftTailed; + private boolean exclude; + private double lowThreshold, highThreshold, ratio; - public VECOnOffGenotypeRatio() { - super("On/Off Genotype Ratio", statField, VECOnOffGenotypeRatio.class, tail); + public void initialize(String arguments) { + if (arguments != null && !arguments.isEmpty()) { + String[] argPieces = arguments.split(","); + lowThreshold = Double.valueOf(argPieces[0]); + highThreshold = Double.valueOf(argPieces[1]); + } } /** @@ -38,7 +43,6 @@ public class VECOnOffGenotypeRatio extends RatioFilter { if ( genotype.length() > 2 ) throw new IllegalArgumentException(String.format("Can only handle diploid genotypes: %s", genotype)); - int on = 0, off = 0; for ( char base : BaseUtils.BASES ) { @@ -52,4 +56,25 @@ public class VECOnOffGenotypeRatio extends RatioFilter { return new Pair(on, off); } + + public void compute(char ref, LocusContext context, rodVariants variant) { + ReadBackedPileup pileup = new ReadBackedPileup(ref, context); + Pair counts = scoreVariant(ref, pileup, variant); + ratio = (double)counts.first / (double)counts.second; + exclude = ratio < lowThreshold || ratio > highThreshold; + } + + public boolean useZeroQualityReads() { return false; } + + public boolean isExcludable() { + return exclude; + } + + public String getStudyHeader() { + return "OnOffGenotype("+lowThreshold+","+highThreshold+")\tMajorMinorRatio"; + } + + public String getStudyInfo() { + return (exclude ? "fail" : "pass") + "\t" + ratio; + } } \ No newline at end of file