diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/ClusteredSNPFilterWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/ClusteredSNPFilterWalker.java deleted file mode 100755 index db1879393..000000000 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/ClusteredSNPFilterWalker.java +++ /dev/null @@ -1,141 +0,0 @@ -package org.broadinstitute.sting.playground.gatk.walkers.variants; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.rodVariants; -import org.broadinstitute.sting.gatk.walkers.DataSource; -import org.broadinstitute.sting.gatk.walkers.RefWalker; -import org.broadinstitute.sting.gatk.walkers.RMD; -import org.broadinstitute.sting.gatk.walkers.Requires; -import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter; -import org.broadinstitute.sting.utils.cmdLine.Argument; - -import java.io.*; -import java.util.*; - -/** - * ClusteredSNPFilterWalker takes a list of variant sites and filters out those that are - * too clustered together. At the moment, the variants are expected to be in gelitext format. - */ -@Requires(value={DataSource.REFERENCE},referenceMetaData=@RMD(name="variant",type=rodVariants.class)) -public class ClusteredSNPFilterWalker extends RefWalker { - @Argument(fullName="windowSize", shortName="window", doc="window size for calculating clusters", required=false) - int windowSize = 10; - @Argument(fullName="clusterSize", shortName="cluster", doc="number of variants in a window to be considered a cluster", required=false) - int clusterSize = 3; - @Argument(fullName="variants_out", shortName="VO", doc="File to which variants passing the filter should be written", required=true) - File VARIANTS_OUT = null; - @Argument(fullName="failed_out", shortName="FO", doc="File to which variants failing the filter should be written", required=false) - File FILTERED_OUT = null; - - private PrintWriter vwriter = null; - private PrintWriter fwriter = null; - - private LinkedList queue = new LinkedList(); - - /** - * Prepare the output file and the list of available features. - */ - public void initialize() { - try { - vwriter = new PrintWriter(VARIANTS_OUT); - vwriter.println(GeliTextWriter.headerLine); - if ( FILTERED_OUT != null ) { - fwriter = new PrintWriter(FILTERED_OUT); - fwriter.println(GeliTextWriter.headerLine); - } - } catch (FileNotFoundException e) { - throw new StingException(String.format("Could not open file for writing")); - } - } - - /** - * Initialize the number of loci processed to zero. - * - * @return 0 - */ - public Integer reduceInit() { return 0; } - - /** - * For each site of interest, rescore the genotype likelihoods by applying the specified feature set. - * - * @param tracker the meta-data tracker - * @param ref the reference base - * @param context the context for the given locus - * @return 1 if the locus was successfully processed, 0 if otherwise - */ - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - rodVariants variant = (rodVariants)tracker.lookup("variant", null); - - if (variant != null ) { - // add the new variant to the queue - queue.offer(new VariantState(variant, context.getLocation())); - - if ( queue.size() < clusterSize ) - return 1; - - // remove the head of the queue if applicable - if ( queue.size() > clusterSize ) { - VariantState var = queue.remove(); - if ( var.passed ) - vwriter.println(var.variant); - else if ( fwriter != null) - fwriter.println(var.variant); - } - - VariantState head = queue.peek(); - if ( context.getLocation().getContigIndex() == head.location.getContigIndex() && - Math.abs(head.location.getStart() - context.getLocation().getStart()) <= windowSize ) { - for (int i = 0; i < clusterSize; i++) - queue.get(i).passed = false; - } - } - - return 1; - } - - /** - * Increment the number of loci processed. - * - * @param value result of the map. - * @param sum accumulator for the reduce. - * @return the new number of loci processed. - */ - public Integer reduce(Integer value, Integer sum) { - return sum + value; - } - - /** - * Tell the user the number of loci processed and close out the new variants file. - * - * @param result the number of variants seen. - */ - public void onTraversalDone(Integer result) { - while ( queue.size() > 0 ) { - VariantState var = queue.remove(); - if ( var.passed ) - vwriter.println(var.variant); - else if ( fwriter != null) - fwriter.println(var.variant); - } - - out.printf("Processed %d variants.\n", result); - - vwriter.close(); - if ( fwriter != null ) - fwriter.close(); - } - - private class VariantState { - public rodVariants variant; - public GenomeLoc location; - public boolean passed = true; - - public VariantState(rodVariants variant, GenomeLoc location) { - this.variant = variant; - this.location = location; - } - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECClusteredSnps.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECClusteredSnps.java new file mode 100755 index 000000000..ca4c31f9c --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECClusteredSnps.java @@ -0,0 +1,60 @@ +package org.broadinstitute.sting.playground.gatk.walkers.variants; + +import org.broadinstitute.sting.gatk.contexts.VariantContext; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.StingException; + + + +public class VECClusteredSnps implements VariantExclusionCriterion { + private int window = 10; + private int snpThreshold = 3; + private boolean exclude; + private long distance; + + public void initialize(String arguments) { + if (arguments != null && !arguments.isEmpty()) { + String[] argPieces = arguments.split(","); + window = Integer.valueOf(argPieces[0]); + snpThreshold = Integer.valueOf(argPieces[1]); + } + + if ( window < 2 || snpThreshold < 2 ) + throw new StingException("Window and threshold values need to be >= 2"); + } + + public void compute(VariantContextWindow contextWindow) { + exclude = false; + + VariantContext[] variants = contextWindow.getWindow(snpThreshold-1, snpThreshold-1); + for (int i = 0; i < snpThreshold; i++) { + if ( variants[i] == null || variants[i+snpThreshold-1] == null ) + continue; + + GenomeLoc left = variants[i].getAlignmentContext().getLocation(); + GenomeLoc right = variants[i+snpThreshold-1].getAlignmentContext().getLocation(); + if ( left.getContigIndex() == right.getContigIndex() ) { + distance = Math.abs(right.getStart() - left.getStart()); + if ( distance <= window ) { + exclude = true; + return; + } + } + } + } + + public double inclusionProbability() { + // A hack for now until this filter is actually converted to an empirical filter + return exclude ? 0.0 : 1.0; + } + + public String getStudyHeader() { + return "ClusteredSnps("+window+","+snpThreshold+")\tWindow_size"; + } + + public String getStudyInfo() { + return (exclude ? "fail" : "pass") + "\t" + (exclude ? distance : "N/A"); + } + + public boolean useZeroQualityReads() { return false; } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantContextWindow.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantContextWindow.java index c72d899b2..d70220715 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantContextWindow.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantContextWindow.java @@ -81,6 +81,8 @@ public class VariantContextWindow { public VariantContext[] getWindow(int elementsToLeft, int elementsToRight) { if ( elementsToLeft > maxWindowElements() || elementsToRight > maxWindowElements() ) throw new StingException("Too large a window requested"); + if ( elementsToLeft < 0 || elementsToRight < 0 ) + throw new StingException("Window size cannot be negative"); VariantContext[] array = new VariantContext[elementsToLeft + elementsToRight + 1]; ListIterator iter = window.listIterator(currentContext - elementsToLeft); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantFiltrationWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantFiltrationWalker.java index 31e19f272..3d3b9b9a6 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantFiltrationWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantFiltrationWalker.java @@ -293,6 +293,7 @@ public class VariantFiltrationWalker extends LocusWalker { * @param result the number of loci seen. */ public void onTraversalDone(Integer result) { + // move the window over so that we can filter the last few variants for (int i=0; i < windowSize; i++) { variantContextWindow.moveWindow(null); compute();