Add a VEC filter for clustered SNP calls that takes advantage of the new windowed approach; delete the old standalone walker.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1485 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
215e908a11
commit
e70101febc
|
|
@ -1,141 +0,0 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.RefWalker;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.RMD;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
|
||||||
import org.broadinstitute.sting.utils.*;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter;
|
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
|
||||||
|
|
||||||
import java.io.*;
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* ClusteredSNPFilterWalker takes a list of variant sites and filters out those that are
|
|
||||||
* too clustered together. At the moment, the variants are expected to be in gelitext format.
|
|
||||||
*/
|
|
||||||
@Requires(value={DataSource.REFERENCE},referenceMetaData=@RMD(name="variant",type=rodVariants.class))
|
|
||||||
public class ClusteredSNPFilterWalker extends RefWalker<Integer, Integer> {
|
|
||||||
@Argument(fullName="windowSize", shortName="window", doc="window size for calculating clusters", required=false)
|
|
||||||
int windowSize = 10;
|
|
||||||
@Argument(fullName="clusterSize", shortName="cluster", doc="number of variants in a window to be considered a cluster", required=false)
|
|
||||||
int clusterSize = 3;
|
|
||||||
@Argument(fullName="variants_out", shortName="VO", doc="File to which variants passing the filter should be written", required=true)
|
|
||||||
File VARIANTS_OUT = null;
|
|
||||||
@Argument(fullName="failed_out", shortName="FO", doc="File to which variants failing the filter should be written", required=false)
|
|
||||||
File FILTERED_OUT = null;
|
|
||||||
|
|
||||||
private PrintWriter vwriter = null;
|
|
||||||
private PrintWriter fwriter = null;
|
|
||||||
|
|
||||||
private LinkedList<VariantState> queue = new LinkedList<VariantState>();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Prepare the output file and the list of available features.
|
|
||||||
*/
|
|
||||||
public void initialize() {
|
|
||||||
try {
|
|
||||||
vwriter = new PrintWriter(VARIANTS_OUT);
|
|
||||||
vwriter.println(GeliTextWriter.headerLine);
|
|
||||||
if ( FILTERED_OUT != null ) {
|
|
||||||
fwriter = new PrintWriter(FILTERED_OUT);
|
|
||||||
fwriter.println(GeliTextWriter.headerLine);
|
|
||||||
}
|
|
||||||
} catch (FileNotFoundException e) {
|
|
||||||
throw new StingException(String.format("Could not open file for writing"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initialize the number of loci processed to zero.
|
|
||||||
*
|
|
||||||
* @return 0
|
|
||||||
*/
|
|
||||||
public Integer reduceInit() { return 0; }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* For each site of interest, rescore the genotype likelihoods by applying the specified feature set.
|
|
||||||
*
|
|
||||||
* @param tracker the meta-data tracker
|
|
||||||
* @param ref the reference base
|
|
||||||
* @param context the context for the given locus
|
|
||||||
* @return 1 if the locus was successfully processed, 0 if otherwise
|
|
||||||
*/
|
|
||||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
|
||||||
rodVariants variant = (rodVariants)tracker.lookup("variant", null);
|
|
||||||
|
|
||||||
if (variant != null ) {
|
|
||||||
// add the new variant to the queue
|
|
||||||
queue.offer(new VariantState(variant, context.getLocation()));
|
|
||||||
|
|
||||||
if ( queue.size() < clusterSize )
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
// remove the head of the queue if applicable
|
|
||||||
if ( queue.size() > clusterSize ) {
|
|
||||||
VariantState var = queue.remove();
|
|
||||||
if ( var.passed )
|
|
||||||
vwriter.println(var.variant);
|
|
||||||
else if ( fwriter != null)
|
|
||||||
fwriter.println(var.variant);
|
|
||||||
}
|
|
||||||
|
|
||||||
VariantState head = queue.peek();
|
|
||||||
if ( context.getLocation().getContigIndex() == head.location.getContigIndex() &&
|
|
||||||
Math.abs(head.location.getStart() - context.getLocation().getStart()) <= windowSize ) {
|
|
||||||
for (int i = 0; i < clusterSize; i++)
|
|
||||||
queue.get(i).passed = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Increment the number of loci processed.
|
|
||||||
*
|
|
||||||
* @param value result of the map.
|
|
||||||
* @param sum accumulator for the reduce.
|
|
||||||
* @return the new number of loci processed.
|
|
||||||
*/
|
|
||||||
public Integer reduce(Integer value, Integer sum) {
|
|
||||||
return sum + value;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tell the user the number of loci processed and close out the new variants file.
|
|
||||||
*
|
|
||||||
* @param result the number of variants seen.
|
|
||||||
*/
|
|
||||||
public void onTraversalDone(Integer result) {
|
|
||||||
while ( queue.size() > 0 ) {
|
|
||||||
VariantState var = queue.remove();
|
|
||||||
if ( var.passed )
|
|
||||||
vwriter.println(var.variant);
|
|
||||||
else if ( fwriter != null)
|
|
||||||
fwriter.println(var.variant);
|
|
||||||
}
|
|
||||||
|
|
||||||
out.printf("Processed %d variants.\n", result);
|
|
||||||
|
|
||||||
vwriter.close();
|
|
||||||
if ( fwriter != null )
|
|
||||||
fwriter.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
private class VariantState {
|
|
||||||
public rodVariants variant;
|
|
||||||
public GenomeLoc location;
|
|
||||||
public boolean passed = true;
|
|
||||||
|
|
||||||
public VariantState(rodVariants variant, GenomeLoc location) {
|
|
||||||
this.variant = variant;
|
|
||||||
this.location = location;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -0,0 +1,60 @@
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
public class VECClusteredSnps implements VariantExclusionCriterion {
|
||||||
|
private int window = 10;
|
||||||
|
private int snpThreshold = 3;
|
||||||
|
private boolean exclude;
|
||||||
|
private long distance;
|
||||||
|
|
||||||
|
public void initialize(String arguments) {
|
||||||
|
if (arguments != null && !arguments.isEmpty()) {
|
||||||
|
String[] argPieces = arguments.split(",");
|
||||||
|
window = Integer.valueOf(argPieces[0]);
|
||||||
|
snpThreshold = Integer.valueOf(argPieces[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( window < 2 || snpThreshold < 2 )
|
||||||
|
throw new StingException("Window and threshold values need to be >= 2");
|
||||||
|
}
|
||||||
|
|
||||||
|
public void compute(VariantContextWindow contextWindow) {
|
||||||
|
exclude = false;
|
||||||
|
|
||||||
|
VariantContext[] variants = contextWindow.getWindow(snpThreshold-1, snpThreshold-1);
|
||||||
|
for (int i = 0; i < snpThreshold; i++) {
|
||||||
|
if ( variants[i] == null || variants[i+snpThreshold-1] == null )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
GenomeLoc left = variants[i].getAlignmentContext().getLocation();
|
||||||
|
GenomeLoc right = variants[i+snpThreshold-1].getAlignmentContext().getLocation();
|
||||||
|
if ( left.getContigIndex() == right.getContigIndex() ) {
|
||||||
|
distance = Math.abs(right.getStart() - left.getStart());
|
||||||
|
if ( distance <= window ) {
|
||||||
|
exclude = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public double inclusionProbability() {
|
||||||
|
// A hack for now until this filter is actually converted to an empirical filter
|
||||||
|
return exclude ? 0.0 : 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getStudyHeader() {
|
||||||
|
return "ClusteredSnps("+window+","+snpThreshold+")\tWindow_size";
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getStudyInfo() {
|
||||||
|
return (exclude ? "fail" : "pass") + "\t" + (exclude ? distance : "N/A");
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean useZeroQualityReads() { return false; }
|
||||||
|
}
|
||||||
|
|
@ -81,6 +81,8 @@ public class VariantContextWindow {
|
||||||
public VariantContext[] getWindow(int elementsToLeft, int elementsToRight) {
|
public VariantContext[] getWindow(int elementsToLeft, int elementsToRight) {
|
||||||
if ( elementsToLeft > maxWindowElements() || elementsToRight > maxWindowElements() )
|
if ( elementsToLeft > maxWindowElements() || elementsToRight > maxWindowElements() )
|
||||||
throw new StingException("Too large a window requested");
|
throw new StingException("Too large a window requested");
|
||||||
|
if ( elementsToLeft < 0 || elementsToRight < 0 )
|
||||||
|
throw new StingException("Window size cannot be negative");
|
||||||
|
|
||||||
VariantContext[] array = new VariantContext[elementsToLeft + elementsToRight + 1];
|
VariantContext[] array = new VariantContext[elementsToLeft + elementsToRight + 1];
|
||||||
ListIterator<VariantContext> iter = window.listIterator(currentContext - elementsToLeft);
|
ListIterator<VariantContext> iter = window.listIterator(currentContext - elementsToLeft);
|
||||||
|
|
|
||||||
|
|
@ -293,6 +293,7 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
* @param result the number of loci seen.
|
* @param result the number of loci seen.
|
||||||
*/
|
*/
|
||||||
public void onTraversalDone(Integer result) {
|
public void onTraversalDone(Integer result) {
|
||||||
|
// move the window over so that we can filter the last few variants
|
||||||
for (int i=0; i < windowSize; i++) {
|
for (int i=0; i < windowSize; i++) {
|
||||||
variantContextWindow.moveWindow(null);
|
variantContextWindow.moveWindow(null);
|
||||||
compute();
|
compute();
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue