Moving VariantFiltration over to use VariantContext.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3048 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
c88a2a3027
commit
4c4d048f14
|
|
@ -1,10 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.filters;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||
|
||||
|
||||
public class ClusteredSnps {
|
||||
private int window = 10;
|
||||
|
|
@ -19,13 +15,13 @@ public class ClusteredSnps {
|
|||
|
||||
public boolean filter(VariantContextWindow contextWindow) {
|
||||
|
||||
Pair<RefMetaDataTracker, RodVCF>[] variants = contextWindow.getWindow(snpThreshold-1, snpThreshold-1);
|
||||
FiltrationContext[] variants = contextWindow.getWindow(snpThreshold-1, snpThreshold-1);
|
||||
for (int i = 0; i < snpThreshold; i++) {
|
||||
if ( variants[i] == null || variants[i+snpThreshold-1] == null )
|
||||
continue;
|
||||
|
||||
GenomeLoc left = variants[i].second.getLocation();
|
||||
GenomeLoc right = variants[i+snpThreshold-1].second.getLocation();
|
||||
GenomeLoc left = variants[i].getVariantContext().getLocation();
|
||||
GenomeLoc right = variants[i+snpThreshold-1].getVariantContext().getLocation();
|
||||
if ( left.getContigIndex() == right.getContigIndex() &&
|
||||
Math.abs(right.getStart() - left.getStart()) <= window )
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.filters;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
|
||||
|
||||
public class FiltrationContext {
|
||||
|
||||
private RefMetaDataTracker tracker;
|
||||
private ReferenceContext ref;
|
||||
private VariantContext vc;
|
||||
|
||||
public FiltrationContext(RefMetaDataTracker tracker, ReferenceContext ref, VariantContext vc) {
|
||||
this.tracker = tracker;
|
||||
this.ref = ref;
|
||||
this.vc = vc;
|
||||
}
|
||||
|
||||
public RefMetaDataTracker getTracker() { return tracker; }
|
||||
|
||||
public ReferenceContext getReferenceContext() { return ref; }
|
||||
|
||||
public VariantContext getVariantContext() { return vc; }
|
||||
}
|
||||
|
|
@ -25,8 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.filters;
|
||||
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -38,17 +37,18 @@ import java.util.*;
|
|||
*/
|
||||
|
||||
public class VariantContextWindow {
|
||||
|
||||
/**
|
||||
* The variants.
|
||||
*/
|
||||
private LinkedList<Pair<RefMetaDataTracker, RodVCF>> window = new LinkedList<Pair<RefMetaDataTracker, RodVCF>>();
|
||||
private LinkedList<FiltrationContext> window = new LinkedList<FiltrationContext>();
|
||||
private int currentContext;
|
||||
|
||||
/**
|
||||
* Contructor for a variant context.
|
||||
* @param firstVariants the first set of variants, comprising the right half of the window
|
||||
*/
|
||||
public VariantContextWindow(List<Pair<RefMetaDataTracker, RodVCF>> firstVariants) {
|
||||
public VariantContextWindow(List<FiltrationContext> firstVariants) {
|
||||
int windowSize = (firstVariants == null ? 1 : 2 * firstVariants.size() + 1);
|
||||
currentContext = (firstVariants == null ? 0 : firstVariants.size());
|
||||
window.addAll(firstVariants);
|
||||
|
|
@ -60,7 +60,7 @@ public class VariantContextWindow {
|
|||
* The context currently being examined.
|
||||
* @return The current context.
|
||||
*/
|
||||
public Pair<RefMetaDataTracker, RodVCF> getContext() {
|
||||
public FiltrationContext getContext() {
|
||||
return window.get(currentContext);
|
||||
}
|
||||
|
||||
|
|
@ -78,14 +78,14 @@ public class VariantContextWindow {
|
|||
* @param elementsToRight number of later contexts to return ()
|
||||
* @return The current context window.
|
||||
*/
|
||||
public Pair<RefMetaDataTracker, RodVCF>[] getWindow(int elementsToLeft, int elementsToRight) {
|
||||
public FiltrationContext[] getWindow(int elementsToLeft, int elementsToRight) {
|
||||
if ( elementsToLeft > maxWindowElements() || elementsToRight > maxWindowElements() )
|
||||
throw new StingException("Too large a window requested");
|
||||
if ( elementsToLeft < 0 || elementsToRight < 0 )
|
||||
throw new StingException("Window size cannot be negative");
|
||||
|
||||
Pair[] array = new Pair[elementsToLeft + elementsToRight + 1];
|
||||
ListIterator<Pair<RefMetaDataTracker, RodVCF>> iter = window.listIterator(currentContext - elementsToLeft);
|
||||
FiltrationContext[] array = new FiltrationContext[elementsToLeft + elementsToRight + 1];
|
||||
ListIterator<FiltrationContext> iter = window.listIterator(currentContext - elementsToLeft);
|
||||
for (int i = 0; i < elementsToLeft + elementsToRight + 1; i++)
|
||||
array[i] = iter.next();
|
||||
return array;
|
||||
|
|
@ -95,7 +95,7 @@ public class VariantContextWindow {
|
|||
* Move the window along to the next context
|
||||
* @param context The new rightmost context
|
||||
*/
|
||||
public void moveWindow(Pair<RefMetaDataTracker, RodVCF> context) {
|
||||
public void moveWindow(FiltrationContext context) {
|
||||
window.removeFirst();
|
||||
window.addLast(context);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,11 +1,12 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.filters;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
|
||||
|
|
@ -16,12 +17,13 @@ import org.apache.commons.jexl.*;
|
|||
/**
|
||||
* Filters variant calls using a number of user-selectable, parameterizable criteria.
|
||||
*/
|
||||
@Requires(value={},referenceMetaData=@RMD(name="variant",type= RodVCF.class))
|
||||
@Requires(value={},referenceMetaData=@RMD(name="variant",type= ReferenceOrderedDatum.class))
|
||||
public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
||||
@Argument(fullName="filterExpression", shortName="filter", doc="Expression used with INFO fields to filter (see wiki docs for more info)", required=false)
|
||||
protected String[] FILTER_STRINGS = new String[]{null};
|
||||
@Argument(fullName="filterName", shortName="filterName", doc="The text to put in the FILTER field if a filter expression is provided and a variant call matches", required=false)
|
||||
protected String[] FILTER_NAMES = new String[]{"GATK_filter"};
|
||||
|
||||
@Argument(fullName="filterExpression", shortName="filter", doc="One or more expression used with INFO fields to filter (see wiki docs for more info)", required=false)
|
||||
protected String[] FILTER_EXPS = new String[]{};
|
||||
@Argument(fullName="filterName", shortName="filterName", doc="Names to use for the list of filters (must be a 1-to-1 mapping); this name is put in the FILTER field for variants that get filtered", required=false)
|
||||
protected String[] FILTER_NAMES = new String[]{};
|
||||
|
||||
@Argument(fullName="clusterSize", shortName="cluster", doc="The number of SNPs which make up a cluster (see also --clusterWindowSize)", required=false)
|
||||
protected Integer clusterSize = 3;
|
||||
|
|
@ -31,6 +33,9 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
@Argument(fullName="maskName", shortName="mask", doc="The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call", required=false)
|
||||
protected String MASK_NAME = "Mask";
|
||||
|
||||
// JEXL expressions for the filters
|
||||
List<VariantContextUtils.JexlVCMatchExp> filterExps;
|
||||
|
||||
public static final String CLUSTERED_SNP_FILTER_NAME = "SnpCluster";
|
||||
|
||||
private VCFWriter writer = null;
|
||||
|
|
@ -49,15 +54,13 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
}
|
||||
|
||||
private List<FilterExp> filterExpressions = new ArrayList<FilterExp>();
|
||||
|
||||
// the structures necessary to initialize and maintain a windowed context
|
||||
private VariantContextWindow variantContextWindow;
|
||||
private static final int windowSize = 10; // 10 variants on either end of the current one
|
||||
private ArrayList<Pair<RefMetaDataTracker, RodVCF>> windowInitializer = new ArrayList<Pair<RefMetaDataTracker, RodVCF>>();
|
||||
private ArrayList<FiltrationContext> windowInitializer = new ArrayList<FiltrationContext>();
|
||||
|
||||
|
||||
private void initializeVcfWriter(RodVCF rod) {
|
||||
private void initializeVcfWriter(VCFRecord rec) {
|
||||
// setup the header fields
|
||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||
|
|
@ -67,8 +70,8 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
if ( clusterWindow > 0 )
|
||||
hInfo.add(new VCFFilterHeaderLine(CLUSTERED_SNP_FILTER_NAME, "SNPs found in clusters"));
|
||||
|
||||
for ( FilterExp exp : filterExpressions ) {
|
||||
hInfo.add(new VCFFilterHeaderLine(exp.name, exp.expStr));
|
||||
for ( VariantContextUtils.JexlVCMatchExp exp : filterExps ) {
|
||||
hInfo.add(new VCFFilterHeaderLine(exp.name, exp.exp.toString()));
|
||||
}
|
||||
|
||||
List<ReferenceOrderedDataSource> dataSources = getToolkit().getRodDataSources();
|
||||
|
|
@ -80,26 +83,14 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
writer = new VCFWriter(out);
|
||||
writer.writeHeader(new VCFHeader(hInfo, rod.getHeader().getGenotypeSamples()));
|
||||
writer.writeHeader(new VCFHeader(hInfo, new TreeSet<String>(Arrays.asList(rec.getSampleNames()))));
|
||||
}
|
||||
|
||||
public void initialize() {
|
||||
if ( clusterWindow > 0 )
|
||||
clusteredSNPs = new ClusteredSnps(clusterSize, clusterWindow);
|
||||
|
||||
if ( FILTER_NAMES.length != FILTER_STRINGS.length )
|
||||
throw new StingException("Inconsistent number of provided filter names and expressions.");
|
||||
|
||||
for ( int i = 0; i < FILTER_NAMES.length; i++ ) {
|
||||
if ( FILTER_STRINGS[i] != null ) {
|
||||
try {
|
||||
Expression filterExpression = ExpressionFactory.createExpression(FILTER_STRINGS[i]);
|
||||
filterExpressions.add(new FilterExp(FILTER_NAMES[i], FILTER_STRINGS[i], filterExpression));
|
||||
} catch (Exception e) {
|
||||
throw new StingException("Invalid expression used (" + FILTER_STRINGS[i] + "). Please see the JEXL docs for correct syntax.");
|
||||
}
|
||||
}
|
||||
}
|
||||
filterExps = VariantContextUtils.initializeMatchExps(FILTER_NAMES, FILTER_EXPS);
|
||||
}
|
||||
|
||||
public Integer reduceInit() { return 0; }
|
||||
|
|
@ -121,8 +112,8 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
if ( rods == null || rods.size() == 0 )
|
||||
return 0;
|
||||
|
||||
RodVCF variant = (RodVCF)rods.get(0);
|
||||
Pair<RefMetaDataTracker, RodVCF> varContext = new Pair<RefMetaDataTracker, RodVCF>(tracker, variant);
|
||||
VariantContext vc = VariantContextAdaptors.toVariantContext("variant", rods.get(0));
|
||||
FiltrationContext varContext = new FiltrationContext(tracker, ref, vc);
|
||||
|
||||
// if we're still initializing the context, do so
|
||||
if ( windowInitializer != null ) {
|
||||
|
|
@ -141,14 +132,14 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
|
||||
private void filter() {
|
||||
// get the current context
|
||||
Pair<RefMetaDataTracker, RodVCF> context = variantContextWindow.getContext();
|
||||
FiltrationContext context = variantContextWindow.getContext();
|
||||
if ( context == null )
|
||||
return;
|
||||
|
||||
StringBuilder filterString = new StringBuilder();
|
||||
|
||||
// test for SNP mask, if present
|
||||
RODRecordList mask = context.first.getTrackData("mask", null);
|
||||
RODRecordList mask = context.getTracker().getTrackData("mask", null);
|
||||
if ( mask != null && mask.size() > 0 )
|
||||
addFilter(filterString, MASK_NAME);
|
||||
|
||||
|
|
@ -156,22 +147,13 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
if ( clusteredSNPs != null && clusteredSNPs.filter(variantContextWindow) )
|
||||
addFilter(filterString, CLUSTERED_SNP_FILTER_NAME);
|
||||
|
||||
for ( FilterExp exp : filterExpressions ) {
|
||||
Map<String, String> infoMap = new HashMap<String, String>(context.second.mCurrentRecord.getInfoValues());
|
||||
infoMap.put("QUAL", String.valueOf(context.second.mCurrentRecord.getQual()));
|
||||
|
||||
JexlContext jContext = JexlHelper.createContext();
|
||||
jContext.setVars(infoMap);
|
||||
|
||||
try {
|
||||
if ( (Boolean)exp.exp.evaluate(jContext) )
|
||||
addFilter(filterString, exp.name);
|
||||
} catch (Exception e) {
|
||||
throw new StingException("Error evaluating filter expression for given input. Most often this is caused by a malformatted expression string, such as using an integral comparison for floating-point values. Observed value was: "+e.getMessage());
|
||||
}
|
||||
VariantContext vc = context.getVariantContext();
|
||||
for ( VariantContextUtils.JexlVCMatchExp exp : filterExps ) {
|
||||
if ( VariantContextUtils.match(vc, exp) )
|
||||
addFilter(filterString, exp.name);
|
||||
}
|
||||
|
||||
writeVCF(context.second, filterString);
|
||||
writeVCF(VariantContextAdaptors.toVCF(vc, context.getReferenceContext().getBase(), null, true), filterString);
|
||||
}
|
||||
|
||||
private static void addFilter(StringBuilder sb, String filter) {
|
||||
|
|
@ -180,8 +162,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
sb.append(filter);
|
||||
}
|
||||
|
||||
private void writeVCF(RodVCF variant, StringBuilder filterString) {
|
||||
VCFRecord rec = variant.mCurrentRecord;
|
||||
private void writeVCF(VCFRecord rec, StringBuilder filterString) {
|
||||
|
||||
if ( filterString.length() != 0 ) {
|
||||
// if the record is already filtered, don't destroy those filters
|
||||
|
|
@ -195,7 +176,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
if ( writer == null )
|
||||
initializeVcfWriter(variant);
|
||||
initializeVcfWriter(rec);
|
||||
writer.addRecord(rec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testNoAction() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("e8e1898d65eb77ec32b7eca6764864f3"));
|
||||
Arrays.asList("af14c7d03e3516b61f2702c9e4a7780f"));
|
||||
executeTest("test no action", spec);
|
||||
}
|
||||
|
||||
|
|
@ -24,7 +24,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testClusteredSnps() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -window 10 -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("25da669bc6411d31f585b01be85e8429"));
|
||||
Arrays.asList("b2b18929289bb47f07bcc23d4cec94c4"));
|
||||
executeTest("test clustered SNPs", spec);
|
||||
}
|
||||
|
||||
|
|
@ -32,7 +32,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testMask() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -mask foo -B mask,VCF," + validationDataLocation + "vcfexample2.vcf -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("bd08169351c4cc9a2d95f33a2a73c7fa"));
|
||||
Arrays.asList("af048053eaef84fc4d61c51c50be1e0a"));
|
||||
executeTest("test mask", spec);
|
||||
}
|
||||
|
||||
|
|
@ -40,7 +40,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testFilter1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("9e7631aeda1c174f5aac6712279ed861"));
|
||||
Arrays.asList("43a580d9c684a496f21d0d42939dd910"));
|
||||
executeTest("test filter #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -48,7 +48,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testFilter2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("9009742f5697c5a85f9e41e1d7d6f260"));
|
||||
Arrays.asList("2bd1e8975d8105dec2fb6055fbf00569"));
|
||||
executeTest("test filter #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -56,7 +56,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testFilterWithSeparateNames() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --filterName ABF -filter 'AlleleBalance < 70.0' --filterName FSF -filter 'FisherStrand == 1.4' -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("6b280c5abd6e911df8e4fca58c7b216b"));
|
||||
Arrays.asList("3479158ffd02a45371b5103277a30a53"));
|
||||
executeTest("test filter with separate names #2", spec);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue