Moving VariantFiltration over to use VariantContext.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3048 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-03-19 18:35:23 +00:00
parent c88a2a3027
commit 4c4d048f14
5 changed files with 96 additions and 69 deletions

View File

@ -1,10 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.filters;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RodVCF;
public class ClusteredSnps {
private int window = 10;
@ -19,13 +15,13 @@ public class ClusteredSnps {
public boolean filter(VariantContextWindow contextWindow) {
Pair<RefMetaDataTracker, RodVCF>[] variants = contextWindow.getWindow(snpThreshold-1, snpThreshold-1);
FiltrationContext[] variants = contextWindow.getWindow(snpThreshold-1, snpThreshold-1);
for (int i = 0; i < snpThreshold; i++) {
if ( variants[i] == null || variants[i+snpThreshold-1] == null )
continue;
GenomeLoc left = variants[i].second.getLocation();
GenomeLoc right = variants[i+snpThreshold-1].second.getLocation();
GenomeLoc left = variants[i].getVariantContext().getLocation();
GenomeLoc right = variants[i+snpThreshold-1].getVariantContext().getLocation();
if ( left.getContigIndex() == right.getContigIndex() &&
Math.abs(right.getStart() - left.getStart()) <= window )
return true;

View File

@ -0,0 +1,50 @@
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.filters;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
public class FiltrationContext {
private RefMetaDataTracker tracker;
private ReferenceContext ref;
private VariantContext vc;
public FiltrationContext(RefMetaDataTracker tracker, ReferenceContext ref, VariantContext vc) {
this.tracker = tracker;
this.ref = ref;
this.vc = vc;
}
public RefMetaDataTracker getTracker() { return tracker; }
public ReferenceContext getReferenceContext() { return ref; }
public VariantContext getVariantContext() { return vc; }
}

View File

@ -25,8 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.filters;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.utils.StingException;
import java.util.*;
@ -38,17 +37,18 @@ import java.util.*;
*/
public class VariantContextWindow {
/**
* The variants.
*/
private LinkedList<Pair<RefMetaDataTracker, RodVCF>> window = new LinkedList<Pair<RefMetaDataTracker, RodVCF>>();
private LinkedList<FiltrationContext> window = new LinkedList<FiltrationContext>();
private int currentContext;
/**
* Contructor for a variant context.
* @param firstVariants the first set of variants, comprising the right half of the window
*/
public VariantContextWindow(List<Pair<RefMetaDataTracker, RodVCF>> firstVariants) {
public VariantContextWindow(List<FiltrationContext> firstVariants) {
int windowSize = (firstVariants == null ? 1 : 2 * firstVariants.size() + 1);
currentContext = (firstVariants == null ? 0 : firstVariants.size());
window.addAll(firstVariants);
@ -60,7 +60,7 @@ public class VariantContextWindow {
* The context currently being examined.
* @return The current context.
*/
public Pair<RefMetaDataTracker, RodVCF> getContext() {
public FiltrationContext getContext() {
return window.get(currentContext);
}
@ -78,14 +78,14 @@ public class VariantContextWindow {
* @param elementsToRight number of later contexts to return ()
* @return The current context window.
*/
public Pair<RefMetaDataTracker, RodVCF>[] getWindow(int elementsToLeft, int elementsToRight) {
public FiltrationContext[] getWindow(int elementsToLeft, int elementsToRight) {
if ( elementsToLeft > maxWindowElements() || elementsToRight > maxWindowElements() )
throw new StingException("Too large a window requested");
if ( elementsToLeft < 0 || elementsToRight < 0 )
throw new StingException("Window size cannot be negative");
Pair[] array = new Pair[elementsToLeft + elementsToRight + 1];
ListIterator<Pair<RefMetaDataTracker, RodVCF>> iter = window.listIterator(currentContext - elementsToLeft);
FiltrationContext[] array = new FiltrationContext[elementsToLeft + elementsToRight + 1];
ListIterator<FiltrationContext> iter = window.listIterator(currentContext - elementsToLeft);
for (int i = 0; i < elementsToLeft + elementsToRight + 1; i++)
array[i] = iter.next();
return array;
@ -95,7 +95,7 @@ public class VariantContextWindow {
* Move the window along to the next context
* @param context The new rightmost context
*/
public void moveWindow(Pair<RefMetaDataTracker, RodVCF> context) {
public void moveWindow(FiltrationContext context) {
window.removeFirst();
window.addLast(context);
}

View File

@ -1,11 +1,12 @@
package org.broadinstitute.sting.gatk.walkers.filters;
import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import org.broadinstitute.sting.utils.cmdLine.Argument;
@ -16,12 +17,13 @@ import org.apache.commons.jexl.*;
/**
* Filters variant calls using a number of user-selectable, parameterizable criteria.
*/
@Requires(value={},referenceMetaData=@RMD(name="variant",type= RodVCF.class))
@Requires(value={},referenceMetaData=@RMD(name="variant",type= ReferenceOrderedDatum.class))
public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
@Argument(fullName="filterExpression", shortName="filter", doc="Expression used with INFO fields to filter (see wiki docs for more info)", required=false)
protected String[] FILTER_STRINGS = new String[]{null};
@Argument(fullName="filterName", shortName="filterName", doc="The text to put in the FILTER field if a filter expression is provided and a variant call matches", required=false)
protected String[] FILTER_NAMES = new String[]{"GATK_filter"};
@Argument(fullName="filterExpression", shortName="filter", doc="One or more expression used with INFO fields to filter (see wiki docs for more info)", required=false)
protected String[] FILTER_EXPS = new String[]{};
@Argument(fullName="filterName", shortName="filterName", doc="Names to use for the list of filters (must be a 1-to-1 mapping); this name is put in the FILTER field for variants that get filtered", required=false)
protected String[] FILTER_NAMES = new String[]{};
@Argument(fullName="clusterSize", shortName="cluster", doc="The number of SNPs which make up a cluster (see also --clusterWindowSize)", required=false)
protected Integer clusterSize = 3;
@ -31,6 +33,9 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
@Argument(fullName="maskName", shortName="mask", doc="The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call", required=false)
protected String MASK_NAME = "Mask";
// JEXL expressions for the filters
List<VariantContextUtils.JexlVCMatchExp> filterExps;
public static final String CLUSTERED_SNP_FILTER_NAME = "SnpCluster";
private VCFWriter writer = null;
@ -49,15 +54,13 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
}
}
private List<FilterExp> filterExpressions = new ArrayList<FilterExp>();
// the structures necessary to initialize and maintain a windowed context
private VariantContextWindow variantContextWindow;
private static final int windowSize = 10; // 10 variants on either end of the current one
private ArrayList<Pair<RefMetaDataTracker, RodVCF>> windowInitializer = new ArrayList<Pair<RefMetaDataTracker, RodVCF>>();
private ArrayList<FiltrationContext> windowInitializer = new ArrayList<FiltrationContext>();
private void initializeVcfWriter(RodVCF rod) {
private void initializeVcfWriter(VCFRecord rec) {
// setup the header fields
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
@ -67,8 +70,8 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
if ( clusterWindow > 0 )
hInfo.add(new VCFFilterHeaderLine(CLUSTERED_SNP_FILTER_NAME, "SNPs found in clusters"));
for ( FilterExp exp : filterExpressions ) {
hInfo.add(new VCFFilterHeaderLine(exp.name, exp.expStr));
for ( VariantContextUtils.JexlVCMatchExp exp : filterExps ) {
hInfo.add(new VCFFilterHeaderLine(exp.name, exp.exp.toString()));
}
List<ReferenceOrderedDataSource> dataSources = getToolkit().getRodDataSources();
@ -80,26 +83,14 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
}
writer = new VCFWriter(out);
writer.writeHeader(new VCFHeader(hInfo, rod.getHeader().getGenotypeSamples()));
writer.writeHeader(new VCFHeader(hInfo, new TreeSet<String>(Arrays.asList(rec.getSampleNames()))));
}
public void initialize() {
if ( clusterWindow > 0 )
clusteredSNPs = new ClusteredSnps(clusterSize, clusterWindow);
if ( FILTER_NAMES.length != FILTER_STRINGS.length )
throw new StingException("Inconsistent number of provided filter names and expressions.");
for ( int i = 0; i < FILTER_NAMES.length; i++ ) {
if ( FILTER_STRINGS[i] != null ) {
try {
Expression filterExpression = ExpressionFactory.createExpression(FILTER_STRINGS[i]);
filterExpressions.add(new FilterExp(FILTER_NAMES[i], FILTER_STRINGS[i], filterExpression));
} catch (Exception e) {
throw new StingException("Invalid expression used (" + FILTER_STRINGS[i] + "). Please see the JEXL docs for correct syntax.");
}
}
}
filterExps = VariantContextUtils.initializeMatchExps(FILTER_NAMES, FILTER_EXPS);
}
public Integer reduceInit() { return 0; }
@ -121,8 +112,8 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
if ( rods == null || rods.size() == 0 )
return 0;
RodVCF variant = (RodVCF)rods.get(0);
Pair<RefMetaDataTracker, RodVCF> varContext = new Pair<RefMetaDataTracker, RodVCF>(tracker, variant);
VariantContext vc = VariantContextAdaptors.toVariantContext("variant", rods.get(0));
FiltrationContext varContext = new FiltrationContext(tracker, ref, vc);
// if we're still initializing the context, do so
if ( windowInitializer != null ) {
@ -141,14 +132,14 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
private void filter() {
// get the current context
Pair<RefMetaDataTracker, RodVCF> context = variantContextWindow.getContext();
FiltrationContext context = variantContextWindow.getContext();
if ( context == null )
return;
StringBuilder filterString = new StringBuilder();
// test for SNP mask, if present
RODRecordList mask = context.first.getTrackData("mask", null);
RODRecordList mask = context.getTracker().getTrackData("mask", null);
if ( mask != null && mask.size() > 0 )
addFilter(filterString, MASK_NAME);
@ -156,22 +147,13 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
if ( clusteredSNPs != null && clusteredSNPs.filter(variantContextWindow) )
addFilter(filterString, CLUSTERED_SNP_FILTER_NAME);
for ( FilterExp exp : filterExpressions ) {
Map<String, String> infoMap = new HashMap<String, String>(context.second.mCurrentRecord.getInfoValues());
infoMap.put("QUAL", String.valueOf(context.second.mCurrentRecord.getQual()));
JexlContext jContext = JexlHelper.createContext();
jContext.setVars(infoMap);
try {
if ( (Boolean)exp.exp.evaluate(jContext) )
addFilter(filterString, exp.name);
} catch (Exception e) {
throw new StingException("Error evaluating filter expression for given input. Most often this is caused by a malformatted expression string, such as using an integral comparison for floating-point values. Observed value was: "+e.getMessage());
}
VariantContext vc = context.getVariantContext();
for ( VariantContextUtils.JexlVCMatchExp exp : filterExps ) {
if ( VariantContextUtils.match(vc, exp) )
addFilter(filterString, exp.name);
}
writeVCF(context.second, filterString);
writeVCF(VariantContextAdaptors.toVCF(vc, context.getReferenceContext().getBase(), null, true), filterString);
}
private static void addFilter(StringBuilder sb, String filter) {
@ -180,8 +162,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
sb.append(filter);
}
private void writeVCF(RodVCF variant, StringBuilder filterString) {
VCFRecord rec = variant.mCurrentRecord;
private void writeVCF(VCFRecord rec, StringBuilder filterString) {
if ( filterString.length() != 0 ) {
// if the record is already filtered, don't destroy those filters
@ -195,7 +176,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
}
if ( writer == null )
initializeVcfWriter(variant);
initializeVcfWriter(rec);
writer.addRecord(rec);
}

View File

@ -16,7 +16,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testNoAction() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("e8e1898d65eb77ec32b7eca6764864f3"));
Arrays.asList("af14c7d03e3516b61f2702c9e4a7780f"));
executeTest("test no action", spec);
}
@ -24,7 +24,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testClusteredSnps() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -window 10 -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("25da669bc6411d31f585b01be85e8429"));
Arrays.asList("b2b18929289bb47f07bcc23d4cec94c4"));
executeTest("test clustered SNPs", spec);
}
@ -32,7 +32,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testMask() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -mask foo -B mask,VCF," + validationDataLocation + "vcfexample2.vcf -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("bd08169351c4cc9a2d95f33a2a73c7fa"));
Arrays.asList("af048053eaef84fc4d61c51c50be1e0a"));
executeTest("test mask", spec);
}
@ -40,7 +40,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testFilter1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("9e7631aeda1c174f5aac6712279ed861"));
Arrays.asList("43a580d9c684a496f21d0d42939dd910"));
executeTest("test filter #1", spec);
}
@ -48,7 +48,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testFilter2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("9009742f5697c5a85f9e41e1d7d6f260"));
Arrays.asList("2bd1e8975d8105dec2fb6055fbf00569"));
executeTest("test filter #2", spec);
}
@ -56,7 +56,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testFilterWithSeparateNames() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --filterName ABF -filter 'AlleleBalance < 70.0' --filterName FSF -filter 'FisherStrand == 1.4' -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("6b280c5abd6e911df8e4fca58c7b216b"));
Arrays.asList("3479158ffd02a45371b5103277a30a53"));
executeTest("test filter with separate names #2", spec);
}
}