diff --git a/ivy.xml b/ivy.xml index 57d13a06d..8df8cd8ee 100644 --- a/ivy.xml +++ b/ivy.xml @@ -23,6 +23,10 @@ + + + + diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/VariantContext.java b/java/src/org/broadinstitute/sting/gatk/contexts/VariantContext.java deleted file mode 100755 index 028efd35a..000000000 --- a/java/src/org/broadinstitute/sting/gatk/contexts/VariantContext.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.contexts; - -import org.broadinstitute.sting.gatk.refdata.RodGeliText; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.RodVCF; - -import net.sf.samtools.SAMRecord; -import java.util.*; - - -public class VariantContext { - private RefMetaDataTracker tracker; - private ReferenceContext ref; - private AlignmentContext context; - private RodGeliText variant; - private RodVCF variantVCF; - private AlignmentContext Q0freeContext = null; - - public VariantContext(RefMetaDataTracker tracker, ReferenceContext ref, - AlignmentContext context, RodGeliText variant, RodVCF variantVCF) { - this.tracker = tracker; - this.ref = ref; - this.context = context; - this.variant = variant; - this.variantVCF = variantVCF; - } - - public RefMetaDataTracker getTracker() { return tracker; } - public ReferenceContext getReferenceContext() { return ref; } - public RodGeliText getVariant() { return variant; } - public RodVCF getVariantVCF() { return variantVCF; } - public AlignmentContext getAlignmentContext(boolean useMQ0Reads) { - return (useMQ0Reads ? context : getQ0freeContext()); - } - - private AlignmentContext getQ0freeContext() { - if ( Q0freeContext == null ) { - // set up the variables - List reads = context.getReads(); - List offsets = context.getOffsets(); - Iterator readIter = reads.iterator(); - Iterator offsetIter = offsets.iterator(); - ArrayList Q0freeReads = new ArrayList(); - ArrayList Q0freeOffsets = new ArrayList(); - - // copy over good reads/offsets - while ( readIter.hasNext() ) { - SAMRecord read = readIter.next(); - Integer offset = offsetIter.next(); - if ( read.getMappingQuality() > 0 ) { - Q0freeReads.add(read); - Q0freeOffsets.add(offset); - } - } - - Q0freeContext = new AlignmentContext(context.getLocation(), Q0freeReads, Q0freeOffsets); - } - - return Q0freeContext; - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java new file mode 100755 index 000000000..730e45b1e --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java @@ -0,0 +1,35 @@ +package org.broadinstitute.sting.gatk.walkers.filters; + +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Pair; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.RodVCF; + + +public class ClusteredSnps { + private int window = 10; + private int snpThreshold = 3; + + public ClusteredSnps(int snpThreshold, int window) { + this.window = window; + this.snpThreshold = snpThreshold; + if ( window < 1 || snpThreshold < 1 ) + throw new IllegalArgumentException("Window and threshold values need to be positive values"); + } + + public boolean filter(VariantContextWindow contextWindow) { + + Pair[] variants = contextWindow.getWindow(snpThreshold-1, snpThreshold-1); + for (int i = 0; i < snpThreshold; i++) { + if ( variants[i] == null || variants[i+snpThreshold-1] == null ) + continue; + + GenomeLoc left = variants[i].second.getLocation(); + GenomeLoc right = variants[i+snpThreshold-1].second.getLocation(); + if ( left.getContigIndex() == right.getContigIndex() && + Math.abs(right.getStart() - left.getStart()) <= window ) + return true; + } + return false; + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/IVFBinomialStrand.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/IVFBinomialStrand.java deleted file mode 100755 index e5b851ab6..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/IVFBinomialStrand.java +++ /dev/null @@ -1,64 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.filters; - -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.contexts.VariantContext; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.ReadBackedPileup; - -import java.util.List; - -public class IVFBinomialStrand implements IndependentVariantFeature { - private double strandBalance = 0.5; - - private double[] likelihoods; - - public void initialize(String arguments) { - if (arguments != null && !arguments.isEmpty()) { - strandBalance = Double.valueOf(arguments); - } - } - - public void compute(VariantContextWindow contextWindow) { - VariantContext context = contextWindow.getContext(); - likelihoods = new double[10]; - - ReadBackedPileup pileup = new ReadBackedPileup(context.getReferenceContext().getBase(), context.getAlignmentContext(useZeroQualityReads())); - List reads = context.getAlignmentContext(useZeroQualityReads()).getReads(); - String bases = pileup.getBasesAsString(); - - for (int genotypeIndex = 0; genotypeIndex < Genotype.values().length; genotypeIndex++) { - Genotype genotype = Genotype.values()[genotypeIndex]; - - char[] alleles = { genotype.toString().charAt(0), genotype.toString().charAt(1) }; - int[] strandCounts = { 0, 0 }; - - for (int pileupIndex = 0; pileupIndex < bases.length(); pileupIndex++) { - for (int alleleIndex = 0; alleleIndex < alleles.length; alleleIndex++) { - if (bases.charAt(pileupIndex) == alleles[alleleIndex]) { - if (reads.get(pileupIndex).getReadNegativeStrandFlag()) { - strandCounts[1]++; - } else { - strandCounts[0]++; - } - } - } - } - - likelihoods[genotypeIndex] = Math.log10(MathUtils.binomialProbability(strandCounts[0], strandCounts[0] + strandCounts[1], strandBalance)); - } - } - - public double[] getLikelihoods() { - return likelihoods; - } - - public String getStudyHeader() { - return ""; - } - - public String getStudyInfo() { - return ""; - } - - public boolean useZeroQualityReads() { return false; } -} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/IVFNull.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/IVFNull.java deleted file mode 100755 index 7ddbea3c8..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/IVFNull.java +++ /dev/null @@ -1,22 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.filters; - -public class IVFNull implements IndependentVariantFeature { - public void initialize(String arguments) {} - - public void compute(VariantContextWindow contextWindow) { - } - - public double[] getLikelihoods() { - return new double[10]; - } - - public String getStudyHeader() { - return ""; - } - - public String getStudyInfo() { - return ""; - } - - public boolean useZeroQualityReads() { return false; } -} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/IVFPrimaryBases.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/IVFPrimaryBases.java deleted file mode 100755 index eed6115c1..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/IVFPrimaryBases.java +++ /dev/null @@ -1,82 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.filters; - -import org.broadinstitute.sting.gatk.contexts.VariantContext; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.ReadBackedPileup; - -public class IVFPrimaryBases implements IndependentVariantFeature { - private double[] p = { 0.933, 0.972, 0.970, 0.960, 0.945, 0.990, 0.971, 0.943, 0.978, 0.928 }; - - private double[] likelihoods; - - /** - * Method so that features can initialize themselves based on a short argument string. At the moment, each feature is - * responsible for interpreting their own argument string. - * - * @param arguments the arguments! - */ - public void initialize(String arguments) { - if (arguments != null && !arguments.isEmpty()) { - String[] argPieces = arguments.split(","); - - for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) { - p[genotypeIndex] = Double.valueOf(argPieces[genotypeIndex]); - } - } - } - - /** - * Method to compute the result of this feature for each of the ten genotypes. The return value must be a double array - * of length 10 (one for each genotype) and the value must be in log10-space. - * - * @param contextWindow the context for the given locus - * @return a ten-element array of log-likelihood result of the feature applied to each genotype - */ - public void compute(VariantContextWindow contextWindow) { - VariantContext context = contextWindow.getContext(); - likelihoods = new double[10]; - - ReadBackedPileup pileup = new ReadBackedPileup(context.getReferenceContext().getBase(), context.getAlignmentContext(useZeroQualityReads())); - String primaryBases = pileup.getBasesAsString(); - - for (int genotypeIndex = 0; genotypeIndex < Genotype.values().length; genotypeIndex++) { - char firstAllele = Genotype.values()[genotypeIndex].toString().charAt(0); - char secondAllele = Genotype.values()[genotypeIndex].toString().charAt(1); - - int offTotal = 0; - - int onTotal = 0; - - for (int pileupIndex = 0; pileupIndex < primaryBases.length(); pileupIndex++) { - char primaryBase = primaryBases.charAt(pileupIndex); - - if (primaryBase != firstAllele && primaryBase != secondAllele) { - offTotal++; - } else { - onTotal++; - } - - } - - int Total = onTotal + offTotal; - - double Prior = MathUtils.binomialProbability(offTotal, Total, p[genotypeIndex]); - - likelihoods[genotypeIndex] = Math.log10(Prior); - } - } - - public double[] getLikelihoods() { - return likelihoods; - } - - public String getStudyHeader() { - return ""; - } - - public String getStudyInfo() { - return ""; - } - - public boolean useZeroQualityReads() { return false; } -} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/IVFSecondaryBases.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/IVFSecondaryBases.java deleted file mode 100755 index 09a9b49cc..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/IVFSecondaryBases.java +++ /dev/null @@ -1,129 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.filters; - -import org.broadinstitute.sting.gatk.contexts.VariantContext; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.ReadBackedPileup; - -public class IVFSecondaryBases implements IndependentVariantFeature { - private double[] p2on = { 0.000, 0.302, 0.366, 0.142, 0.000, 0.548, 0.370, 0.000, 0.319, 0.000 }; - private double[] p2off = { 0.480, 0.769, 0.744, 0.538, 0.575, 0.727, 0.768, 0.589, 0.762, 0.505 }; - - private double[] likelihoods; - - /** - * Method so that features can initialize themselves based on a short argument string. At the moment, each feature is - * responsible for interpreting their own argument string. - * - * @param arguments the arguments! - */ - public void initialize(String arguments) { - if (arguments != null && !arguments.isEmpty()) { - String[] argPieces = arguments.split(";"); - String[] argOnPieces = argPieces[0].split(","); - String[] argOffPieces = argPieces[0].split(","); - - for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) { - p2on[genotypeIndex] = Double.valueOf(argOnPieces[genotypeIndex]); - p2off[genotypeIndex] = Double.valueOf(argOffPieces[genotypeIndex]); - } - } - } - - private double scaleObservation(int k, int N, double p) { - int i = 0; - double likelihood = 0.0, logLikelihood = 0.0, scale = 1.0; - - do { - scale = 1.0 - ((double) i)*0.10; - - int scaledK = (int) (scale * ((double) k)); - int scaledN = (int) (scale * ((double) N)); - - likelihood = MathUtils.binomialProbability(scaledK, scaledN, p); - logLikelihood = Math.log10(likelihood); - - //System.out.printf(" %d %f: %d->%d, %d->%d, %f => %f %f\n", i, scale, k, scaledK, N, scaledN, p, likelihood, logLikelihood); - - i++; - } while (i < 10 && (Double.isInfinite(logLikelihood) || Double.isNaN(logLikelihood))); - - //System.out.println(); - - return likelihood; - } - - /** - * Method to compute the result of this feature for each of the ten genotypes. The return value must be a double array - * of length 10 (one for each genotype) and the value must be in log10-space. - * - * @param contextWindow the context for the given locus - * @return a ten-element array of log-likelihood result of the feature applied to each genotype - */ - public void compute(VariantContextWindow contextWindow) { - VariantContext context = contextWindow.getContext(); - likelihoods = new double[10]; - - ReadBackedPileup pileup = new ReadBackedPileup(context.getReferenceContext().getBase(), context.getAlignmentContext(useZeroQualityReads())); - String primaryBases = pileup.getBasesAsString(); - String secondaryBases = pileup.getSecondaryBasePileup(); - - for (int genotypeIndex = 0; genotypeIndex < Genotype.values().length; genotypeIndex++) { - char firstAllele = Genotype.values()[genotypeIndex].toString().charAt(0); - char secondAllele = Genotype.values()[genotypeIndex].toString().charAt(1); - - int offIsGenotypic = 0; - int offTotal = 0; - - int onIsGenotypic = 0; - int onTotal = 0; - - for (int pileupIndex = 0; pileupIndex < primaryBases.length(); pileupIndex++) { - char primaryBase = primaryBases.charAt(pileupIndex); - - if (secondaryBases != null) { - char secondaryBase = secondaryBases.charAt(pileupIndex); - - if (primaryBase != firstAllele && primaryBase != secondAllele) { - if (secondaryBase == firstAllele || secondaryBase == secondAllele) { - offIsGenotypic++; - } - offTotal++; - } else { - if (secondaryBase == firstAllele || secondaryBase == secondAllele) { - onIsGenotypic++; - } - onTotal++; - } - } - } - - //System.out.printf(Genotype.values()[genotypeIndex].toString() + " on:\n"); - double offPrior = scaleObservation(offIsGenotypic, offTotal, p2off[genotypeIndex]); - - //System.out.printf(Genotype.values()[genotypeIndex].toString() + " off:\n"); - double onPrior = scaleObservation(onIsGenotypic, onTotal, p2on[genotypeIndex]); - - //System.out.printf("%s: off: %d/%d %f %f%n", Genotype.values()[genotypeIndex].toString(), offIsGenotypic, offTotal, p2off[genotypeIndex], offPrior); - //System.out.printf("%s: on: %d/%d %f %f%n", Genotype.values()[genotypeIndex].toString(), onIsGenotypic, onTotal, p2on[genotypeIndex], onPrior); - - double logOffPrior = MathUtils.compareDoubles(offPrior, 0.0, 1e-10) == 0 ? Math.log10(Double.MIN_VALUE) : Math.log10(offPrior); - double logOnPrior = MathUtils.compareDoubles(onPrior, 0.0, 1e-10) == 0 ? Math.log10(Double.MIN_VALUE) : Math.log10(onPrior); - - likelihoods[genotypeIndex] = logOffPrior + logOnPrior; - } - } - - public double[] getLikelihoods() { - return likelihoods; - } - - public String getStudyHeader() { - return ""; - } - - public String getStudyInfo() { - return ""; - } - - public boolean useZeroQualityReads() { return false; } -} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/IndependentVariantFeature.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/IndependentVariantFeature.java deleted file mode 100755 index f9fd63c75..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/IndependentVariantFeature.java +++ /dev/null @@ -1,35 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.filters; - -/** - * Interface for conditionally independent variant features. - */ -public interface IndependentVariantFeature { - /** - * A convenient enumeration for each of the ten genotypes. - */ - public enum Genotype { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT } - - /** - * Method so that features can initialize themselves based on a short argument string. - * At the moment, each feature is responsible for interpreting their own argument string. - * - * @param arguments the arguments! - */ - public void initialize(String arguments); - - /** - * Method to compute the result of this feature for each of the ten genotypes. The return value must - * be a double array of length 10 (one for each genotype) and the value must be in log10-space. - * - * @param contextWindow the context for the given locus - */ - public void compute(VariantContextWindow contextWindow); - - public double[] getLikelihoods(); - - public String getStudyHeader(); - - public String getStudyInfo(); - - public boolean useZeroQualityReads(); -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/RatioFilter.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/RatioFilter.java deleted file mode 100755 index 463bdaf64..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/RatioFilter.java +++ /dev/null @@ -1,119 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.filters; - -import org.broadinstitute.sting.gatk.contexts.VariantContext; -import org.broadinstitute.sting.gatk.refdata.*; -import org.broadinstitute.sting.utils.*; -import org.apache.log4j.Logger; - - -public abstract class RatioFilter implements VariantExclusionCriterion { - private static final double defaultMinGenotypeConfidenceToTest = 5.0; // TODO -- must be replaced with true confidence scoore, right now assumes LOD - private static final int minDepthOfCoverage = 25; // TODO -- must be replaced with a proper probability calculation - protected double minGenotypeConfidenceToTest = defaultMinGenotypeConfidenceToTest; - - protected double pvalueLimit = -1; - protected Logger logger = null; // Logger.getLogger(RatioFilter.class); - protected String name = null; - - protected enum Tail { LeftTailed, RightTailed, TwoTailed } - protected Tail tail = null; - protected double lowThreshold = -1; - protected double highThreshold = -1; - - protected enum AnalysisType { POINT_ESTIMATE, FAIR_COIN_TEST }; - protected AnalysisType analysis = AnalysisType.POINT_ESTIMATE; - protected double integralPValueThreshold = 0.05; - private final static double SEARCH_INCREMENT = 0.01; - - protected boolean exclude = false; - - public RatioFilter(final String name, Class myClass, Tail tail ) { - this.name = name; - this.tail = tail; - logger = Logger.getLogger(myClass); - } - - protected void setLowThreshold(double threshold) { - lowThreshold = threshold; - } - - protected void setHighThreshold(double threshold) { - highThreshold = threshold; - } - - protected void setIntegralPvalue(double pvalue) { - integralPValueThreshold = pvalue; - } - - protected abstract Pair getRatioCounts(char ref, ReadBackedPileup pileup, RodGeliText variant); - protected abstract boolean excludeHetsOnly(); - - public boolean useZeroQualityReads() { return false; } - - public void compute(VariantContextWindow contextWindow) { - VariantContext context = contextWindow.getContext(); - RodGeliText variant = context.getVariant(); - char ref = context.getReferenceContext().getBase(); - - ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getAlignmentContext(useZeroQualityReads())); - Pair counts = getRatioCounts(ref, pileup, variant); - - boolean highGenotypeConfidence = variant.getConsensusConfidence() > minGenotypeConfidenceToTest; - boolean excludable = !excludeHetsOnly() || GenotypeUtils.isHet(variant); - - if ( analysis == AnalysisType.POINT_ESTIMATE ) - exclude = excludable && highGenotypeConfidence && pointEstimateExclude(counts); - else if ( analysis == AnalysisType.FAIR_COIN_TEST ) - exclude = excludable && highGenotypeConfidence && integralExclude(counts); - - // - // for printing only - // - int n = counts.first + counts.second; - double value = counts.first / (1.0 * counts.first + counts.second); - //logger.info(String.format("%s: counts1=%d (%.2f), counts2=%d (%.2f), n=%d, value=%f, exclude=%b, location=%s, bases=%s", - // name, counts.first, counts.first / (0.01 * n), counts.second, counts.second / (0.01 * n), n, - // value, exclude, variant.getLocation(), pileup.getBases())); - } - - // TODO - this calculation needs to be parameterized correctly - private boolean pointEstimateExclude(Pair counts) { - int n = counts.first + counts.second; - if ( n < minDepthOfCoverage ) - return false; - - double ratio = counts.first.doubleValue() / (double)n; - return !passesThreshold(ratio); - } - - private boolean integralExclude(Pair counts) { - double sumExclude = 0.0, sumP = 0.0; - int n = counts.first + counts.second; - for ( double r = 0.0; r <= 1.0; r += SEARCH_INCREMENT ) { - double p = MathUtils.binomialProbability(counts.first, n, r); - sumP += p; - boolean exclude = ! passesThreshold(r); - sumExclude += p * (exclude ? 1.0 : 0.0); - - //System.out.printf("integral: k=%d, n=%d, r=%f, p=%f, sumP = %f, exclude=%b | sum=%f, percentExcluded=%f%n", - // counts.first, n, r, p, sumP, exclude, sumExclude, sumExclude / sumP); - } - - double percentExcluded = sumExclude / sumP; - - return 1 - percentExcluded <= integralPValueThreshold ; - } - - private boolean passesThreshold(double value) { - switch ( tail ) { - case LeftTailed: - return value >= lowThreshold; - case RightTailed: - return value <= highThreshold; - case TwoTailed: - return value >= lowThreshold && value < highThreshold; - default: - return true; - } - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECAlleleBalance.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECAlleleBalance.java deleted file mode 100755 index af4cc7c61..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECAlleleBalance.java +++ /dev/null @@ -1,89 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.filters; - -import org.broadinstitute.sting.gatk.refdata.RodGeliText; -import org.broadinstitute.sting.utils.ReadBackedPileup; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.Pair; - -import java.util.HashMap; - -public class VECAlleleBalance extends RatioFilter { - - private double lowThreshold = 0.25, highThreshold = 0.75; - private double ratio; - - public VECAlleleBalance() { - super("Allele Balance Ratio", VECAlleleBalance.class, Tail.TwoTailed); - } - - public void initialize(HashMap args) { - if ( args.get("analysis") != null ) { - if ( args.get("analysis").equalsIgnoreCase("fair_coin_test") ) - analysis = AnalysisType.FAIR_COIN_TEST; - else if ( args.get("analysis").equalsIgnoreCase("point_estimate") ) - analysis = AnalysisType.POINT_ESTIMATE; - } - if ( args.get("pvalue") != null ) - setIntegralPvalue(Double.valueOf(args.get("pvalue"))); - if ( args.get("low") != null ) - lowThreshold = Double.valueOf(args.get("low")); - if ( args.get("high") != null ) - highThreshold = Double.valueOf(args.get("high")); - if ( args.get("confidence") != null ) - minGenotypeConfidenceToTest = Double.valueOf(args.get("confidence")); - setLowThreshold(lowThreshold); - setHighThreshold(highThreshold); - } - - /** - * Return the count of bases matching the major (first) and minor (second) alleles as a pair. - * - */ - protected Pair getRatioCounts(char ref, ReadBackedPileup pileup, RodGeliText variant) { - final String genotype = variant.getBestGenotype(); - if ( genotype.length() > 2 ) - throw new IllegalArgumentException(String.format("Can only handle diploid genotypes: %s", genotype)); - - final String bases = pileup.getBasesAsString(); - if ( bases.length() == 0 ) { - ratio = 0.0; - return new Pair(0, 0); - } - - char a = genotype.toUpperCase().charAt(0); - char b = genotype.toUpperCase().charAt(1); - int aCount = Utils.countOccurrences(a, bases.toUpperCase()); - int bCount = Utils.countOccurrences(b, bases.toUpperCase()); - - int refCount = a == ref ? aCount : bCount; - int altCount = a == ref ? bCount : aCount; - - ratio = (double)refCount / (double)(refCount + altCount); - return new Pair(refCount, altCount); - } - - protected boolean excludeHetsOnly() { return true; } - - public boolean useZeroQualityReads() { return false; } - - public double inclusionProbability() { - // A hack for now until this filter is actually converted to an empirical filter - return exclude ? 0.0 : 1.0; - } - - public String getStudyHeader() { - return "AlleleBalance("+lowThreshold+","+highThreshold+")\tRefRatio"; - } - - public String getStudyInfo() { - return (exclude ? "fail" : "pass") + "\t" + ratio; - } - - public String getVCFFilterString() { - return "AlleleBalance"; - } - - public String getScoreString() { - return String.format("%.2f", ratio); - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECClusteredSnps.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECClusteredSnps.java deleted file mode 100755 index 30a7d45c9..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECClusteredSnps.java +++ /dev/null @@ -1,68 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.filters; - -import org.broadinstitute.sting.gatk.contexts.VariantContext; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.StingException; - -import java.util.HashMap; - - -public class VECClusteredSnps implements VariantExclusionCriterion { - private int window = 10; - private int snpThreshold = 3; - private boolean exclude; - private long distance; - - public void initialize(HashMap args) { - if ( args.get("window") != null ) - window = Integer.valueOf(args.get("window")); - if ( args.get("snps") != null ) - snpThreshold = Integer.valueOf(args.get("snps")); - - if ( window < 2 || snpThreshold < 2 ) - throw new StingException("Window and threshold values need to be >= 2"); - } - - public void compute(VariantContextWindow contextWindow) { - exclude = false; - - VariantContext[] variants = contextWindow.getWindow(snpThreshold-1, snpThreshold-1); - for (int i = 0; i < snpThreshold; i++) { - if ( variants[i] == null || variants[i+snpThreshold-1] == null ) - continue; - - GenomeLoc left = variants[i].getAlignmentContext(useZeroQualityReads()).getLocation(); - GenomeLoc right = variants[i+snpThreshold-1].getAlignmentContext(useZeroQualityReads()).getLocation(); - if ( left.getContigIndex() == right.getContigIndex() ) { - distance = Math.abs(right.getStart() - left.getStart()); - if ( distance <= window ) { - exclude = true; - return; - } - } - } - } - - public double inclusionProbability() { - // A hack for now until this filter is actually converted to an empirical filter - return exclude ? 0.0 : 1.0; - } - - public String getStudyHeader() { - return "ClusteredSnps("+window+","+snpThreshold+")\tWindow_size"; - } - - public String getStudyInfo() { - return (exclude ? "fail" : "pass") + "\t" + (exclude ? distance : "N/A"); - } - - public String getVCFFilterString() { - return "ClusteredSnp"; - } - - public String getScoreString() { - return String.format("%d", distance); - } - - public boolean useZeroQualityReads() { return false; } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECDepthOfCoverage.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECDepthOfCoverage.java deleted file mode 100755 index 1ef8f098e..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECDepthOfCoverage.java +++ /dev/null @@ -1,59 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.filters; - -import org.broadinstitute.sting.gatk.contexts.VariantContext; - -import java.util.HashMap; - - -/** - * Created by IntelliJ IDEA. - * User: michaelmelgar - * Date: Jun 22, 2009 - * Time: 6:04:58 PM - * To change this template use File | Settings | File Templates. - */ -public class VECDepthOfCoverage implements VariantExclusionCriterion { - private int maximum = 200; - - private boolean exclude = false; - private int depth; - - public void initialize(HashMap args) { - if ( args.get("max") != null ) - maximum = Integer.valueOf(args.get("max")); - } - - public void compute(VariantContextWindow contextWindow) { - VariantContext context = contextWindow.getContext(); - depth = context.getAlignmentContext(useZeroQualityReads()).getReads().size(); - exclude = depth > maximum; - } - - public double inclusionProbability() { - // A hack for now until this filter is actually converted to an empirical filter - return exclude ? 0.0 : 1.0; - } - -// public boolean isExcludable() { -// return exclude; -// } - - public String getStudyHeader() { - return "DepthOfCoverage("+maximum+")\tdepth"; - } - - public String getStudyInfo() { - return (exclude ? "fail" : "pass") + "\t" + depth; - } - - public String getVCFFilterString() { - return "DoC"; - } - - public String getScoreString() { - return String.format("%d", depth); - } - - - public boolean useZeroQualityReads() { return false; } -} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECFisherStrand.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECFisherStrand.java deleted file mode 100644 index f4fda3967..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECFisherStrand.java +++ /dev/null @@ -1,210 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.filters; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.VariantContext; -import org.broadinstitute.sting.gatk.refdata.RodGeliText; -import org.broadinstitute.sting.utils.BaseUtils; -import net.sf.samtools.SAMRecord; -import cern.jet.math.Arithmetic; - -import java.util.List; -import java.util.HashMap; - - -public class VECFisherStrand implements VariantExclusionCriterion { - private double pvalueLimit = 0.00001; - private double pValue; - private boolean exclude; - - public void initialize(HashMap args) { - if ( args.get("pvalue") != null ) - pvalueLimit = Double.valueOf(args.get("pvalue")); - } - - public void compute(VariantContextWindow contextWindow) { - VariantContext context = contextWindow.getContext(); - RodGeliText variant = context.getVariant(); - int allele1 = BaseUtils.simpleBaseToBaseIndex(variant.getBestGenotype().charAt(0)); - int allele2 = BaseUtils.simpleBaseToBaseIndex(variant.getBestGenotype().charAt(1)); - - if (allele1 != allele2) { - exclude = strandTest(context.getReferenceContext().getBase(), context.getAlignmentContext(useZeroQualityReads()), allele1, allele2, pvalueLimit, null); - } else { - exclude = false; - } - } - - public double inclusionProbability() { - // A hack for now until this filter is actually converted to an empirical filter - return exclude ? 0.0 : 1.0; - } - - public String getStudyHeader() { - return "FisherStrand("+pvalueLimit+")\tpvalue"; - } - - public String getStudyInfo() { - return (exclude ? "fail" : "pass") + "\t" + pValue; - } - - public String getVCFFilterString() { - return "strand"; - } - - public String getScoreString() { - return String.format("%.4f",pValue); - } - - public boolean useZeroQualityReads() { return false; } - - public boolean strandTest(char ref, AlignmentContext context, int allele1, int allele2, double threshold, StringBuffer out) { - int[][] table = getContingencyTable(context, allele1, allele2); - if ( !variantIsHet(table) ) - return false; - - double pCutoff = computePValue(table); - //printTable(table, pCutoff); - - pValue = pCutoff; - while (rotateTable(table)) { - double pValuePiece = computePValue(table); - - //printTable(table, pValuePiece); - - if (pValuePiece <= pCutoff) { - pValue += pValuePiece; - } - } - - table = getContingencyTable(context, allele1, allele2); - - while (unrotateTable(table)) { - double pValuePiece = computePValue(table); - - //printTable(table, pValuePiece); - - if (pValuePiece <= pCutoff) { - pValue += pValuePiece; - } - } - - //System.out.printf("P-cutoff: %f\n", pCutoff); - //System.out.printf("P-value: %f\n\n", pValue); - - // optionally print out the pvalue and the alternate allele counts - if ( out != null ) { - int refBase = BaseUtils.simpleBaseToBaseIndex(ref); - table = getContingencyTable(context, allele1, allele2); - if ( allele1 != refBase ) - out.append(pValue + "\t" + table[0][0] + "\t" + table[0][1] + "\t"); - else - out.append(pValue + "\t" + table[1][0] + "\t" + table[1][1] + "\t"); - } - - return pValue < threshold; - } - - private static boolean variantIsHet(int[][] table) { - return ((table[0][1] != 0 || table[0][1] != 0) && (table[1][0] != 0 || table[1][1] != 0)); - } - - private static void printTable(int[][] table, double pValue) { - System.out.printf("%d %d; %d %d : %f\n", table[0][0], table[0][1], table[1][0], table[1][1], pValue); - } - - private static boolean rotateTable(int[][] table) { - table[0][0] -= 1; - table[1][0] += 1; - - table[0][1] += 1; - table[1][1] -= 1; - - return (table[0][0] >= 0 && table[1][1] >= 0) ? true : false; - } - - private static boolean unrotateTable(int[][] table) { - table[0][0] += 1; - table[1][0] -= 1; - - table[0][1] -= 1; - table[1][1] += 1; - - return (table[0][1] >= 0 && table[1][0] >= 0) ? true : false; - } - - private double computePValue(int[][] table) { - - int[] rowSums = { sumRow(table, 0), sumRow(table, 1) }; - int[] colSums = { sumColumn(table, 0), sumColumn(table, 1) }; - int N = rowSums[0] + rowSums[1]; - - // calculate in log space so we don't die with high numbers - double pCutoff = Arithmetic.logFactorial(rowSums[0]) - + Arithmetic.logFactorial(rowSums[1]) - + Arithmetic.logFactorial(colSums[0]) - + Arithmetic.logFactorial(colSums[1]) - - Arithmetic.logFactorial(table[0][0]) - - Arithmetic.logFactorial(table[0][1]) - - Arithmetic.logFactorial(table[1][0]) - - Arithmetic.logFactorial(table[1][1]) - - Arithmetic.logFactorial(N); - return Math.exp(pCutoff); - } - - private static int sumRow(int[][] table, int column) { - int sum = 0; - for (int r = 0; r < table.length; r++) { - sum += table[r][column]; - } - - return sum; - } - - private static int sumColumn(int[][] table, int row) { - int sum = 0; - for (int c = 0; c < table[row].length; c++) { - sum += table[row][c]; - } - - return sum; - } - - /** - Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this: - * fw rc - * allele1 # # - * allele2 # # - * @param context the context for the locus - * @param allele1 information for the called variant - * @param allele2 information for the called variant - * @return a 2x2 contingency table - */ - private static int[][] getContingencyTable(AlignmentContext context, int allele1, int allele2) { - - int[][] table = new int[2][2]; - - List reads = context.getReads(); - List offsets = context.getOffsets(); - - for (int readIndex = 0; readIndex < reads.size(); readIndex++) { - SAMRecord read = reads.get(readIndex); - int offset = offsets.get(readIndex); - - // skip over deletion sites - if ( offset == -1 ) - continue; - - int readAllele = BaseUtils.simpleBaseToBaseIndex(read.getReadString().charAt(offset)); - boolean isFW = !read.getReadNegativeStrandFlag(); - - if (readAllele == allele1 || readAllele == allele2) { - int row = (readAllele == allele1) ? 0 : 1; - int column = isFW ? 0 : 1; - - table[row][column]++; - } - } - - return table; - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECHomopolymer.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECHomopolymer.java deleted file mode 100755 index 78eec13bc..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECHomopolymer.java +++ /dev/null @@ -1,126 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.filters; - -import net.sf.picard.reference.ReferenceSequence; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.VariantContext; -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; - -import java.io.File; -import java.io.IOException; -import java.util.HashMap; - - -/** - * Created by IntelliJ IDEA. - * User: mmelgar - * Date: Jul 28, 2009 - * Time: 3:59:38 PM - * To change this template use File | Settings | File Templates. - */ -public class VECHomopolymer implements VariantExclusionCriterion { - - private int extent = 5; - private float frac = 0.8f; - public IndexedFastaSequenceFile refSeq; - private String contig; - public ReferenceSequence contigSeq; - - private boolean exclude = false; - - public void initialize(HashMap args) { - if ( args.get("extent") != null ) - extent = Integer.valueOf(args.get("extent")); - if ( args.get("fraction") != null ) - frac = Float.valueOf(args.get("fraction")); - - File refFile = new File ("/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"); - - try { - refSeq = new IndexedFastaSequenceFile(refFile); - } catch (IOException e) { - refSeq = null; - } - } - - public boolean useZeroQualityReads() { return false; } - - public String getStudyHeader() { - return ""; - } - - public String getStudyInfo() { - return ""; - } - - public void compute(VariantContextWindow contextWindow) { - VariantContext context = contextWindow.getContext(); - AlignmentContext alignmentContext = context.getAlignmentContext(useZeroQualityReads()); - char[] geno = context.getVariant().getBestGenotype().toCharArray(); - int[] genotype = {-1,-1}; - for (int h = 0; h < 2; h ++) { - genotype[h] = BaseUtils.simpleBaseToBaseIndex(geno[h]); - } - - if (contig == null || !alignmentContext.getContig().equals(contig)) { - contig = alignmentContext.getContig(); - contigSeq = refSeq.getSequence(contig); - } - - String bases = new String(contigSeq.getBases()); - String prevbases = bases.substring((int) (alignmentContext.getPosition() - extent - 1), (int) (alignmentContext.getPosition() - 1)); - String nextbases = bases.substring((int) (alignmentContext.getPosition() + 1), (int) (alignmentContext.getPosition() + extent + 1)); - - int[] prevCounts = {0,0,0,0}; - int[] nextCounts = {0,0,0,0}; - for (int i = 0; i < extent; i ++) { - int prevBaseIndex = BaseUtils.simpleBaseToBaseIndex(prevbases.toCharArray()[i]); - int nextBaseIndex = BaseUtils.simpleBaseToBaseIndex(nextbases.toCharArray()[i]); - if (prevBaseIndex != -1) { - prevCounts[prevBaseIndex] ++; - } - if (nextBaseIndex != -1) { - nextCounts[nextBaseIndex] ++; - } - } - - int prevMaxBase = 0; - int nextMaxBase = 0; - int prevMax = prevCounts[0]; - int nextMax = nextCounts[0]; - for (int j = 1; j < 4; j ++) { - if (prevCounts[j] > prevMax) { - prevMax = prevCounts[j]; - prevMaxBase = j; - } - if (nextCounts[j] > nextMax) { - nextMax = nextCounts[j]; - nextMaxBase = j; - } - } - - int[] homopolymer = {-1,-1}; - if ((float)(prevMax)/(float)(extent) > frac) {homopolymer[0] = prevMaxBase;} - if ((float)(nextMax)/(float)(extent) > frac) {homopolymer[1] = nextMaxBase;} - - char ref = context.getReferenceContext().getBase(); - boolean checkOne = homopolymer[0] == genotype[0] && homopolymer[0] != BaseUtils.simpleBaseToBaseIndex(ref); - boolean checkTwo = homopolymer[0] == genotype[1] && homopolymer[0] != BaseUtils.simpleBaseToBaseIndex(ref); - boolean checkThree = homopolymer[1] == genotype[0] && homopolymer[1] != BaseUtils.simpleBaseToBaseIndex(ref); - boolean checkFour = homopolymer[1] == genotype[1] && homopolymer[1] != BaseUtils.simpleBaseToBaseIndex(ref); - - exclude = checkOne || checkTwo || checkThree || checkFour; - } - - public double inclusionProbability() { - return exclude ? 0.0 : 1.0; - } - - public String getVCFFilterString() { - return "homopolymer"; - } - - public String getScoreString() { - return exclude ? "1" : "0"; - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECIndelArtifact.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECIndelArtifact.java deleted file mode 100755 index dc9f2847c..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECIndelArtifact.java +++ /dev/null @@ -1,67 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.filters; - -import org.broadinstitute.sting.gatk.contexts.VariantContext; -import org.broadinstitute.sting.gatk.refdata.CleanedOutSNPROD; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.rodDbSNP; -import org.broadinstitute.sting.utils.genotype.Variation; - -import java.util.HashMap; - - -public class VECIndelArtifact implements VariantExclusionCriterion { - private boolean exclude; - private String source = "N/A"; - - public void initialize(HashMap arguments) {} - - public void compute(VariantContextWindow contextWindow) { - VariantContext context = contextWindow.getContext(); - RefMetaDataTracker tracker = context.getTracker(); - - CleanedOutSNPROD cleanedSNP = (CleanedOutSNPROD)tracker.lookup("cleaned", null); - if ( cleanedSNP != null && !cleanedSNP.isRealSNP() ) { - exclude = true; - source = "Cleaner"; - return; - } - - Variation indelCall = (Variation)tracker.lookup("indels", null); - if ( indelCall != null ) { - exclude = true; - source = "IndelCall"; - return; - } - - rodDbSNP dbsnp = (rodDbSNP)tracker.lookup("dbSNP", null); - if ( dbsnp != null && dbsnp.isIndel() ) { - exclude = true; - source = "dbSNP"; - return; - } - - exclude = false; - } - - public double inclusionProbability() { - return exclude ? 0.0 : 1.0; - } - - public String getStudyHeader() { - return "IndelArtifact\tSource"; - } - - public String getStudyInfo() { - return (exclude ? "fail" : "pass") + "\t" + source; - } - - public String getVCFFilterString() { - return "InDel"; - } - - public String getScoreString() { - return exclude ? "1" : "0"; - } - - public boolean useZeroQualityReads() { return false; } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECLodThreshold.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECLodThreshold.java deleted file mode 100755 index 72e7dae32..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECLodThreshold.java +++ /dev/null @@ -1,45 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.filters; - -import org.broadinstitute.sting.gatk.contexts.VariantContext; - -import java.util.HashMap; - -public class VECLodThreshold implements VariantExclusionCriterion { - private double lodThreshold = 5.0; - private double lod; - private boolean exclude; - - public void initialize(HashMap args) { - if ( args.get("lod") != null ) - lodThreshold = Double.valueOf(args.get("lod")); - } - - public void compute(VariantContextWindow contextWindow) { - VariantContext context = contextWindow.getContext(); - lod = context.getVariant().getLodBtr(); - exclude = lod < lodThreshold; - } - - public boolean useZeroQualityReads() { return false; } - - public double inclusionProbability() { - // A hack for now until this filter is actually converted to an empirical filter - return exclude ? 0.0 : 1.0; - } - - public String getStudyHeader() { - return "LodThreshold("+lodThreshold+")\tlod"; - } - - public String getStudyInfo() { - return (exclude ? "fail" : "pass") + "\t" + lod; - } - - public String getVCFFilterString() { - return "LOD"; - } - - public String getScoreString() { - return String.format("%.2f",lod); - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECLodThresholdByCoverage.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECLodThresholdByCoverage.java deleted file mode 100755 index fe8db5925..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECLodThresholdByCoverage.java +++ /dev/null @@ -1,47 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.filters; - -import java.util.HashMap; - -public class VECLodThresholdByCoverage implements VariantExclusionCriterion { - private double slope = 0.46; - private double depth; - private double lod; - private boolean exclude; - - public void initialize(HashMap arguments) { - if (arguments.get("slope") != null) { - slope = Double.valueOf(arguments.get("slope")); - } - } - - public void compute(VariantContextWindow contextWindow) { - depth = (double) contextWindow.getContext().getVariant().getPileupDepth(); - lod = contextWindow.getContext().getVariant().getLodBtr(); - - exclude = (lod < slope*depth); - } - - public double inclusionProbability() { - return exclude ? 0.0 : 1.0; - } - - public String getStudyHeader() { - return String.format("LodThresholdByCoverage(%f)\tdepth\tlod", slope); - } - - public String getStudyInfo() { - return String.format("%s\t%d\t%f", exclude ? "fail" : "pass", (int) depth, lod); - } - - public String getVCFFilterString() { - return "LODbyDoC"; - } - - public String getScoreString() { - return exclude ? "1" : "0"; - } - - public boolean useZeroQualityReads() { - return false; - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECMappingQuality.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECMappingQuality.java deleted file mode 100755 index d51c2705a..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECMappingQuality.java +++ /dev/null @@ -1,53 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.filters; - -import org.broadinstitute.sting.gatk.contexts.VariantContext; -import org.broadinstitute.sting.utils.MathUtils; -import net.sf.samtools.SAMRecord; - -import java.util.List; -import java.util.HashMap; - - -public class VECMappingQuality implements VariantExclusionCriterion { - private double minQuality = 5.0; - private double rms; - private boolean exclude; - - public void initialize(HashMap args) { - if ( args.get("min") != null ) - minQuality = Double.valueOf(args.get("min")); - } - - public void compute(VariantContextWindow contextWindow) { - VariantContext context = contextWindow.getContext(); - List reads = context.getAlignmentContext(useZeroQualityReads()).getReads(); - int[] qualities = new int[reads.size()]; - for (int i=0; i < reads.size(); i++) - qualities[i] = reads.get(i).getMappingQuality(); - rms = MathUtils.rms(qualities); - exclude = rms < minQuality; - } - - public double inclusionProbability() { - // A hack for now until this filter is actually converted to an empirical filter - return exclude ? 0.0 : 1.0; - } - - public String getStudyHeader() { - return "MappingQuality("+minQuality+")\trms"; - } - - public String getStudyInfo() { - return (exclude ? "fail" : "pass") + "\t" + rms; - } - - public String getVCFFilterString() { - return "MQ"; - } - - public String getScoreString() { - return String.format("%.2f", rms); - } - - public boolean useZeroQualityReads() { return true; } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECMappingQualityZero.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECMappingQualityZero.java deleted file mode 100755 index 81a0861e6..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECMappingQualityZero.java +++ /dev/null @@ -1,53 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.filters; - -import org.broadinstitute.sting.gatk.contexts.VariantContext; -import net.sf.samtools.SAMRecord; - -import java.util.List; -import java.util.HashMap; - - -public class VECMappingQualityZero implements VariantExclusionCriterion { - private int maximum = 50; - private int mq0Count; - private boolean exclude; - - public void initialize(HashMap args) { - if ( args.get("max") != null ) - maximum = Integer.valueOf(args.get("max")); - } - - public void compute(VariantContextWindow contextWindow) { - VariantContext context = contextWindow.getContext(); - List reads = context.getAlignmentContext(useZeroQualityReads()).getReads(); - mq0Count = 0; - for (int i=0; i < reads.size(); i++) { - if ( reads.get(i).getMappingQuality() == 0 ) - mq0Count++; - } - exclude = mq0Count > maximum; - } - - public double inclusionProbability() { - // A hack for now until this filter is actually converted to an empirical filter - return exclude ? 0.0 : 1.0; - } - - public String getStudyHeader() { - return "MappingQualityZero("+maximum+")\tMQ0_count"; - } - - public String getStudyInfo() { - return (exclude ? "fail" : "pass") + "\t" + mq0Count; - } - - public String getVCFFilterString() { - return "MQzero"; - } - - public String getScoreString() { - return String.format("%d", mq0Count); - } - - public boolean useZeroQualityReads() { return true; } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECNull.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECNull.java deleted file mode 100755 index e1a506a4c..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECNull.java +++ /dev/null @@ -1,34 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.filters; - -import java.util.HashMap; - -public class VECNull implements VariantExclusionCriterion { - public void initialize(HashMap arguments) {} - - public void compute(VariantContextWindow contextWindow) {} - - public double inclusionProbability() { - // A hack for now until this filter is actually converted to an empirical filter - return 1.0; - } - - public String getStudyHeader() { - return ""; - } - - public String getStudyInfo() { - return ""; - } - - public String getVCFFilterString() { - return ""; - } - - public String getScoreString() { - return ""; - } - - public boolean useZeroQualityReads() { - return false; - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECOnOffGenotypeRatio.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECOnOffGenotypeRatio.java deleted file mode 100755 index c3834c09a..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECOnOffGenotypeRatio.java +++ /dev/null @@ -1,85 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.filters; - -import org.broadinstitute.sting.gatk.refdata.RodGeliText; -import org.broadinstitute.sting.utils.*; - -import java.util.HashMap; - -public class VECOnOffGenotypeRatio extends RatioFilter { - private double threshold = 0.8; - private double ratio; - - public VECOnOffGenotypeRatio() { - super("On/Off Genotype Ratio", VECOnOffGenotypeRatio.class, Tail.LeftTailed); - } - - public void initialize(HashMap args) { - if ( args.get("analysis") != null ) { - if ( args.get("analysis").equalsIgnoreCase("fair_coin_test") ) - analysis = AnalysisType.FAIR_COIN_TEST; - else if ( args.get("analysis").equalsIgnoreCase("point_estimate") ) - analysis = AnalysisType.POINT_ESTIMATE; - } - if ( args.get("pvalue") != null ) - setIntegralPvalue(Double.valueOf(args.get("pvalue"))); - if ( args.get("threshold") != null ) - threshold = Double.valueOf(args.get("threshold")); - if ( args.get("confidence") != null ) - minGenotypeConfidenceToTest = Double.valueOf(args.get("confidence")); - setLowThreshold(threshold); - } - - /** - * Return the counts of bases that are on (matching the bestGenotype) and off (not matching the - * best genotype). On are in the first field, off in the second. - * - */ - protected Pair getRatioCounts(char ref, ReadBackedPileup pileup, RodGeliText variant) { - final String genotype = variant.getBestGenotype().toUpperCase(); - if ( genotype.length() > 2 ) - throw new IllegalArgumentException(String.format("Can only handle diploid genotypes: %s", genotype)); - - final String bases = pileup.getBasesAsString(); - if ( bases.length() == 0 ) { - ratio = 0.0; - return new Pair(0, 0); - } - - int on = 0, off = 0; - for ( char base : BaseUtils.BASES ) { - int count = BasicPileup.countBase(base, bases); - if ( Utils.countOccurrences(base, genotype) > 0 ) - on += count; - else - off += count; - //System.out.printf("count = %d, on=%d, off=%d for %c in %s%n", count, on, off, base, genotype); - } - - ratio = (double)on / (double)(on + off); - return new Pair(on, off); - } - - protected boolean excludeHetsOnly() { return false; } - - public double inclusionProbability() { - return exclude ? 0.0 : 1.0; - } - - public boolean useZeroQualityReads() { return false; } - - public String getStudyHeader() { - return "OnOffGenotype("+threshold+")\tOnRatio"; - } - - public String getStudyInfo() { - return (exclude ? "fail" : "pass") + "\t" + ratio; - } - - public String getVCFFilterString() { - return "onOffGenotype"; - } - - public String getScoreString() { - return String.format("%.2f", ratio); - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantContextWindow.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantContextWindow.java index d248c9364..52f14137a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantContextWindow.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantContextWindow.java @@ -25,8 +25,8 @@ package org.broadinstitute.sting.gatk.walkers.filters; -import org.broadinstitute.sting.gatk.contexts.VariantContext; -import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.gatk.refdata.*; import java.util.*; @@ -41,14 +41,14 @@ public class VariantContextWindow { /** * The variants. */ - private LinkedList window = new LinkedList(); + private LinkedList> window = new LinkedList>(); private int currentContext; /** * Contructor for a variant context. * @param firstVariants the first set of variants, comprising the right half of the window */ - public VariantContextWindow(List firstVariants) { + public VariantContextWindow(List> firstVariants) { int windowSize = (firstVariants == null ? 1 : 2 * firstVariants.size() + 1); currentContext = (firstVariants == null ? 0 : firstVariants.size()); window.addAll(firstVariants); @@ -60,7 +60,7 @@ public class VariantContextWindow { * The context currently being examined. * @return The current context. */ - public VariantContext getContext() { + public Pair getContext() { return window.get(currentContext); } @@ -75,17 +75,17 @@ public class VariantContextWindow { /** * The window around the context currently being examined. * @param elementsToLeft number of earlier contexts to return () - * @param elementsToLeft number of later contexts to return () + * @param elementsToRight number of later contexts to return () * @return The current context window. */ - public VariantContext[] getWindow(int elementsToLeft, int elementsToRight) { + public Pair[] getWindow(int elementsToLeft, int elementsToRight) { if ( elementsToLeft > maxWindowElements() || elementsToRight > maxWindowElements() ) throw new StingException("Too large a window requested"); if ( elementsToLeft < 0 || elementsToRight < 0 ) throw new StingException("Window size cannot be negative"); - VariantContext[] array = new VariantContext[elementsToLeft + elementsToRight + 1]; - ListIterator iter = window.listIterator(currentContext - elementsToLeft); + Pair[] array = new Pair[elementsToLeft + elementsToRight + 1]; + ListIterator> iter = window.listIterator(currentContext - elementsToLeft); for (int i = 0; i < elementsToLeft + elementsToRight + 1; i++) array[i] = iter.next(); return array; @@ -95,7 +95,7 @@ public class VariantContextWindow { * Move the window along to the next context * @param context The new rightmost context */ - public void moveWindow(VariantContext context) { + public void moveWindow(Pair context) { window.removeFirst(); window.addLast(context); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantExclusionCriterion.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantExclusionCriterion.java deleted file mode 100755 index a930c1034..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantExclusionCriterion.java +++ /dev/null @@ -1,18 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.filters; - -import java.util.HashMap; - -public interface VariantExclusionCriterion { - public void initialize(HashMap arguments); - public void compute(VariantContextWindow contextWindow); - - //public boolean isExcludable(); - public double inclusionProbability(); - - public String getStudyHeader(); - public String getStudyInfo(); - public String getVCFFilterString(); - public String getScoreString(); - - public boolean useZeroQualityReads(); -} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index 1028220a4..db47c3248 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -4,209 +4,68 @@ import org.broadinstitute.sting.gatk.contexts.*; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter; import org.broadinstitute.sting.utils.genotype.vcf.*; import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.broadinstitute.sting.playground.gatk.walkers.variantstovcf.VariantsToVCF; -import java.io.FileNotFoundException; -import java.io.PrintWriter; import java.util.*; -import java.io.*; +import org.apache.commons.jexl.*; /** - * VariantFiltrationWalker applies specified conditionally independent features and filters to pre-called variants. - * The former modifiesthe likelihoods of each genotype, while the latter makes a decision to include or exclude a - * variant outright. At the moment, the variants are expected to be in gelitext format. + * VariantFiltrationWalker filters variant calls in VCF format. */ -@Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="variant",type= RodGeliText.class)) -//@Allows(value={DataSource.READS, DataSource.REFERENCE}, referenceMetaData={@RMD(name="variant",type= RodGeliText.class), @RMD(name="variantvcf",type= RodVCF.class), @RMD(name="dbsnp",type=rodDbSNP.class), @RMD(name="interval",type= IntervalRod.class)}) -public class VariantFiltrationWalker extends LocusWalker { - @Argument(fullName="vcfOutput", shortName="vcf", doc="VCF file to which all variants should be written with annotations", required=true) public File VCF_OUT; - @Argument(fullName="sampleName", shortName="sample", doc="Temporary hack to get VCF to work: the sample (NA-ID) corresponding to the variants", required=true) public String sampleName; - @Argument(fullName="includedOutput", shortName="included", doc="File to which all variants passing filters should be written", required=true) public String INCLUDED_OUT; - @Argument(fullName="annotatedOutput", shortName="annotated", doc="File to which all variants should be written with annotations - for debugging/parameterizing", required=false) public String ANNOTATED_OUT; - @Argument(fullName="features", shortName="F", doc="Feature test (optionally with arguments) to apply to genotype posteriors. Syntax: 'testname[:arguments]'", required=false) public String[] FEATURES; - @Argument(fullName="exclusion_criterion", shortName="X", doc="Exclusion test (optionally with arguments) to apply to variant call. Syntax: 'testname[:key1=arg1,key2=arg2,...]'", required=false) public String[] EXCLUSIONS; - @Argument(fullName="inclusion_threshold", shortName="IT", doc="The product of the probability to include variants based on these filters must be greater than the value specified here in order to be included", required=false) public Double INCLUSION_THRESHOLD = 0.9; - @Argument(fullName="verbose", shortName="V", doc="Show how the variant likelihoods are changing with the application of each feature") public Boolean VERBOSE = false; - @Argument(fullName="list", shortName="ls", doc="List the available features and exclusion criteria and exit") public Boolean LIST = false; - @Argument(fullName="onlyAnnotate", shortName="oa", doc="If provided, do not write output into the FILTER field, but only annotate the VCF") public boolean onlyAnnotate = false; +@Requires(value={},referenceMetaData=@RMD(name="variant",type= RodVCF.class)) +public class VariantFiltrationWalker extends RodWalker { + @Argument(fullName="filterExpression", shortName="filter", doc="Expression used with INFO fields to filter (see wiki docs for more info)", required=false) + protected String FILTER_STRING = null; + @Argument(fullName="filterName", shortName="filterName", doc="The text to put in the FILTER field if a filter expression is provided and a variant call matches", required=false) + protected String FILTER_NAME = "GATK_filter"; - private List> featureClasses; - private List> exclusionClasses; + @Argument(fullName="clusterSize", shortName="cluster", doc="The number of SNPs which make up a cluster (see also --clusterWindowSize)", required=false) + protected Integer clusterSize = 3; + @Argument(fullName="clusterWindowSize", shortName="window", doc="The window size (in bases) in which to evaluate clustered SNPs (to disable the clustered SNP filter, set this value to less than 1)", required=false) + protected Integer clusterWindow = 0; - private VCFWriter vcfWriter; - private VCFHeader vcfHeader; - private PrintWriter annotatedWriter; - private PrintWriter includedWriter; - private HashMap sampleNames = new HashMap(); + @Argument(fullName="maskName", shortName="mask", doc="The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call", required=false) + protected String MASK_NAME = "Mask"; - private ArrayList requestedFeatures; - private ArrayList requestedExclusions; + private VCFWriter writer = null; + + private ClusteredSnps clusteredSNPs = null; + + private Expression filterExpression = null; // the structures necessary to initialize and maintain a windowed context private VariantContextWindow variantContextWindow; private static final int windowSize = 10; // 10 variants on either end of the current one - private ArrayList windowInitializer = new ArrayList(); + private ArrayList> windowInitializer = new ArrayList>(); - private void listFiltersAndExit() { - out.println("\nAvailable features: " + getAvailableClasses(featureClasses)); - out.println("Available exclusion criteria: " + getAvailableClasses(exclusionClasses) + "\n"); - System.exit(0); + + private void initializeVcfWriter(RodVCF rod) { + // setup the header fields + Map hInfo = new HashMap(); + hInfo.put("format", VCFWriter.VERSION); + hInfo.put("source", "VariantFiltration"); + hInfo.put("reference", this.getToolkit().getArguments().referenceFile.getName()); + + VCFHeader header = new VCFHeader(hInfo, rod.getHeader().getGenotypeSamples()); + writer = new VCFWriter(header, out); } - /** - * Prepare the output file and the list of available features. - */ public void initialize() { - featureClasses = PackageUtils.getClassesImplementingInterface(IndependentVariantFeature.class); - exclusionClasses = PackageUtils.getClassesImplementingInterface(VariantExclusionCriterion.class); - - if (LIST) { listFiltersAndExit(); } + if ( clusterWindow > 0 ) + clusteredSNPs = new ClusteredSnps(clusterSize, clusterWindow); try { - sampleNames.put(sampleName.toUpperCase(), "variant"); - vcfHeader = VariantsToVCF.getHeader(this.getToolkit().getArguments(), sampleNames.keySet()); - vcfWriter = new VCFWriter(vcfHeader, VCF_OUT); - includedWriter = new PrintWriter(INCLUDED_OUT); - includedWriter.println(GeliTextWriter.headerLine); - if ( ANNOTATED_OUT != null ) { - annotatedWriter = new PrintWriter(ANNOTATED_OUT); - annotatedWriter.print("Chr\tPosition\t"); - } - requestedFeatures = new ArrayList(); - requestedExclusions = new ArrayList(); - - // Initialize requested features - if (FEATURES != null) { - for (String requestedFeatureString : FEATURES) { - String[] requestedFeaturePieces = requestedFeatureString.split(":"); - String requestedFeatureName = requestedFeaturePieces[0]; - String requestedFeatureArgs = (requestedFeaturePieces.length == 2) ? requestedFeaturePieces[1] : ""; - - boolean found = false; - for ( Class featureClass : featureClasses ) { - String featureClassName = rationalizeClassName(featureClass); - - if (requestedFeatureName.equalsIgnoreCase(featureClassName)) { - found = true; - - try { - IndependentVariantFeature ivf = (IndependentVariantFeature) featureClass.newInstance(); - ivf.initialize(requestedFeatureArgs); - requestedFeatures.add(ivf); - - if ( annotatedWriter != null ) - annotatedWriter.print(ivf.getStudyHeader() + "\t"); - } catch (InstantiationException e) { - throw new StingException(String.format("Cannot instantiate feature class '%s': must be concrete class", featureClass.getSimpleName())); - } catch (IllegalAccessException e) { - throw new StingException(String.format("Cannot instantiate feature class '%s': must have no-arg constructor", featureClass.getSimpleName())); - } - } - } - - if (!found) { - throw new StingException("Unknown feature '" + requestedFeatureString + "'. Issue the '-ls' argument to list available features."); - } - } - } - - if (EXCLUSIONS != null) { - for (String requestedExclusionString : EXCLUSIONS) { - String[] requestedExclusionPieces = requestedExclusionString.split(":"); - String requestedExclusionName = requestedExclusionPieces[0]; - - boolean found = false; - for ( Class exclusionClass : exclusionClasses ) { - String exclusionClassName = rationalizeClassName(exclusionClass); - - if (requestedExclusionName.equalsIgnoreCase(exclusionClassName)) { - found = true; - - try { - HashMap requestedArgs = new HashMap(); - if ( requestedExclusionPieces.length == 2 ) { - String[] argStrings = requestedExclusionPieces[1].split(","); - for (int i = 0; i < argStrings.length; i++ ) { - String[] arg = argStrings[i].split("="); - if ( arg.length == 2 ) - requestedArgs.put(arg[0], arg[1]); - } - } - VariantExclusionCriterion vec = (VariantExclusionCriterion) exclusionClass.newInstance(); - vec.initialize(requestedArgs); - requestedExclusions.add(vec); - - if ( annotatedWriter != null ) - annotatedWriter.print(vec.getStudyHeader() + "\t"); - } catch (InstantiationException e) { - throw new StingException(String.format("Cannot instantiate exclusion class '%s': must be concrete class", exclusionClass.getSimpleName())); - } catch (IllegalAccessException e) { - throw new StingException(String.format("Cannot instantiate exclusion class '%s': must have no-arg constructor", exclusionClass.getSimpleName())); - } - } - } - - if (!found) { - throw new StingException("Unknown exclusion criterion '" + requestedExclusionString + "'. Issue the '-ls' argument to list available exclusion criteria."); - } - } - } - - if ( annotatedWriter != null ) - annotatedWriter.print("inDbSNP\tinHapMap\tisHet\n"); - } catch (FileNotFoundException e) { - throw new StingException(String.format("Could not open file(s) for writing")); + if ( FILTER_STRING != null ) + filterExpression = ExpressionFactory.createExpression(FILTER_STRING); + } catch (Exception e) { + throw new StingException("Invalid expression used (" + FILTER_STRING + "). Please see the JEXL docs for correct syntax."); } } - /** - * Trim the 'IVF' or 'VEC' off the feature/exclusion name so the user needn't specify that on the command-line. - * - * @param featureClass the feature class whose name we should rationalize - * @return the class name, minus 'IVF' - */ - private String rationalizeClassName(Class featureClass) { - String featureClassName = featureClass.getSimpleName(); - String newName = featureClassName.replaceFirst("IVF", ""); - newName = newName.replaceFirst("VEC", ""); - return newName; - } - - /** - * Returns a comma-separated list of available classes the user may specify at the command-line. - * - * @param classes an ArrayList of classes - * @return String of available classes - */ - private String getAvailableClasses(List> classes) { - String availableString = ""; - - for (int classIndex = 0; classIndex < classes.size(); classIndex++) { - availableString += rationalizeClassName(classes.get(classIndex)) + (classIndex == classes.size() - 1 ? "" : ","); - } - - return availableString; - } - - /** - * Initialize the number of loci processed to zero. - * - * @return 0 - */ public Integer reduceInit() { return 0; } - - /** - * We want reads that span deletions - * - * @return true - */ - public boolean includeReadsWithDeletionAtLoci() { return true; } - /** * For each site of interest, rescore the genotype likelihoods by applying the specified feature set. * @@ -216,14 +75,16 @@ public class VariantFiltrationWalker extends LocusWalker { * @return 1 if the locus was successfully processed, 0 if otherwise */ public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - RodGeliText variant = (RodGeliText) tracker.lookup("variant", null); - - // Ignore places where we don't have a variant or where the reference base is ambiguous. - if ( variant == null || BaseUtils.simpleBaseToBaseIndex(ref.getBase()) == -1 ) + if ( tracker == null ) return 0; - RodVCF variantVCF = (RodVCF) tracker.lookup("variantVCF", null); - VariantContext varContext = new VariantContext(tracker, ref, context, variant, variantVCF); + RODRecordList rods = tracker.getTrackData("variant", null); + // ignore places where we don't have a variant + if ( rods == null || rods.getRecords().size() == 0 ) + return 0; + + RodVCF variant = (RodVCF)rods.getRecords().get(0); + Pair varContext = new Pair(tracker, variant); // if we're still initializing the context, do so if ( windowInitializer != null ) { @@ -234,117 +95,67 @@ public class VariantFiltrationWalker extends LocusWalker { } } else { variantContextWindow.moveWindow(varContext); - compute(); + filter(); } return 1; } - private void compute() { + private void filter() { // get the current context - VariantContext context = variantContextWindow.getContext(); + Pair context = variantContextWindow.getContext(); if ( context == null ) return; - HashMap exclusionResults = new HashMap(); - RodGeliText variant = context.getVariant(); - GenomeLoc loc = context.getAlignmentContext(true).getLocation(); + StringBuilder filterString = new StringBuilder(); - if (VERBOSE) { out.println("Original:\n" + variant); } + // test for SNP mask, if present + RODRecordList mask = context.first.getTrackData("mask", null); + if ( mask != null && mask.getRecords().size() > 0 ) + addFilter(filterString, MASK_NAME); - if ( annotatedWriter != null ) - annotatedWriter.print(loc.getContig() + "\t" + loc.getStart() + "\t"); + // test for clustered SNPs if requested + if ( clusteredSNPs != null && clusteredSNPs.filter(variantContextWindow) ) + addFilter(filterString, "SnpCluster"); - // Apply features that modify the likelihoods and LOD scores - for ( IndependentVariantFeature ivf : requestedFeatures ) { - ivf.compute(variantContextWindow); - double[] weights = ivf.getLikelihoods(); - variant.adjustLikelihoods(weights); + if ( filterExpression != null ) { + Map infoMap = context.second.mCurrentRecord.getInfoValues(); - if (VERBOSE) { out.println(rationalizeClassName(ivf.getClass()) + ":\n " + variant); } + JexlContext jContext = JexlHelper.createContext(); + jContext.setVars(infoMap); - if ( annotatedWriter != null ) - annotatedWriter.print(ivf.getStudyInfo() + "\t"); - } - - // Apply exclusion tests that score the variant call - if (VERBOSE) { - out.print("InclusionProbabilities:["); - } - - // Use the filters to score the variant - String filterFailureString = ""; - HashMap filterINFOString = new HashMap(); - double jointInclusionProbability = 1.0; - for ( VariantExclusionCriterion vec : requestedExclusions ) { - vec.compute(variantContextWindow); - - String exclusionClassName = rationalizeClassName(vec.getClass()); - - Double inclusionProbability = vec.inclusionProbability(); - jointInclusionProbability *= inclusionProbability; - exclusionResults.put(exclusionClassName, inclusionProbability); - - if (inclusionProbability < INCLUSION_THRESHOLD) { - filterFailureString += vec.getVCFFilterString() + ";"; + try { + if ( (Boolean)filterExpression.evaluate(jContext) ) + addFilter(filterString, FILTER_NAME); + } catch (Exception e) { + throw new StingException(e.getMessage()); } - filterINFOString.put(vec.getVCFFilterString(), vec.getScoreString()); - - if (VERBOSE) { - out.print(exclusionClassName + "=" + inclusionProbability + ";"); - } - - if ( annotatedWriter != null ) - annotatedWriter.print(vec.getStudyInfo() + "\t"); } - // Decide whether we should keep the call or not - if (jointInclusionProbability >= INCLUSION_THRESHOLD) { - includedWriter.println(variant); - } - if (VERBOSE) { out.println("] JointInclusionProbability:" + jointInclusionProbability + " State:included\n"); } - - if ( annotatedWriter != null ) { - rodDbSNP dbsnp = (rodDbSNP) context.getTracker().lookup("dbSNP", null); - if ( dbsnp == null ) - annotatedWriter.print("false\tfalse\t"); - else - annotatedWriter.print(dbsnp.isSNP() + "\t" + dbsnp.isHapmap() + "\t"); - annotatedWriter.println(GenotypeUtils.isHet(variant)); - } - - writeVCF(context, filterFailureString, filterINFOString); + writeVCF(context.second, filterString); } - - private void writeVCF(VariantContext context, final String filterFailureString, HashMap filterINFOString) { - VCFRecord rec = getVCFRecord(context); - if ( rec != null ) { - rec.addInfoFields(filterINFOString); - if ( ! onlyAnnotate && !filterFailureString.equals("") ) - rec.setFilterString(filterFailureString); - vcfWriter.addRecord(rec); - } + private static void addFilter(StringBuilder sb, String filter) { + if ( sb.length() > 0 ) + sb.append(";"); + sb.append(filter); } + private void writeVCF(RodVCF variant, StringBuilder filterString) { + VCFRecord rec = variant.mCurrentRecord; - private VCFRecord getVCFRecord(VariantContext context) { - if ( context.getVariantVCF() != null ) { - return context.getVariantVCF().mCurrentRecord; - } else { - List gt = new ArrayList(); - Map map = new HashMap(); - return VariantsToVCF.generateVCFRecord(context.getTracker(), context.getReferenceContext(), context.getAlignmentContext(true), vcfHeader, gt, map, sampleNames, out, false, false); + if ( filterString.length() != 0 ) { + // if the record is already filtered, don't destroy those filters + if ( rec.isFiltered() ) + filterString.append(";" + rec.getFilterString()); + rec.setFilterString(filterString.toString()); } + + if ( writer == null ) + initializeVcfWriter(variant); + writer.addRecord(rec); } - /** - * Increment the number of loci processed. - * - * @param value result of the map. - * @param sum accumulator for the reduce. - * @return the new number of loci processed. - */ public Integer reduce(Integer value, Integer sum) { return sum + value; } @@ -363,14 +174,12 @@ public class VariantFiltrationWalker extends LocusWalker { } for (int i=0; i < windowSize; i++) { variantContextWindow.moveWindow(null); - compute(); + filter(); } out.printf("Processed %d loci.\n", result); - vcfWriter.close(); - includedWriter.close(); - if ( annotatedWriter != null ) - annotatedWriter.close(); + if ( writer != null ) + writer.close(); } }