VariantFiltration now accepts any number of --name --filter expressions, and annotates the VCF file with each name that matches. Very useful
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2732 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
01e73fc39e
commit
5aaf4e6434
|
|
@ -18,9 +18,9 @@ import org.apache.commons.jexl.*;
|
||||||
@Requires(value={},referenceMetaData=@RMD(name="variant",type= RodVCF.class))
|
@Requires(value={},referenceMetaData=@RMD(name="variant",type= RodVCF.class))
|
||||||
public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
||||||
@Argument(fullName="filterExpression", shortName="filter", doc="Expression used with INFO fields to filter (see wiki docs for more info)", required=false)
|
@Argument(fullName="filterExpression", shortName="filter", doc="Expression used with INFO fields to filter (see wiki docs for more info)", required=false)
|
||||||
protected String FILTER_STRING = null;
|
protected String[] FILTER_STRINGS = new String[]{null};
|
||||||
@Argument(fullName="filterName", shortName="filterName", doc="The text to put in the FILTER field if a filter expression is provided and a variant call matches", required=false)
|
@Argument(fullName="filterName", shortName="filterName", doc="The text to put in the FILTER field if a filter expression is provided and a variant call matches", required=false)
|
||||||
protected String FILTER_NAME = "GATK_filter";
|
protected String[] FILTER_NAMES = new String[]{"GATK_filter"};
|
||||||
|
|
||||||
@Argument(fullName="clusterSize", shortName="cluster", doc="The number of SNPs which make up a cluster (see also --clusterWindowSize)", required=false)
|
@Argument(fullName="clusterSize", shortName="cluster", doc="The number of SNPs which make up a cluster (see also --clusterWindowSize)", required=false)
|
||||||
protected Integer clusterSize = 3;
|
protected Integer clusterSize = 3;
|
||||||
|
|
@ -32,12 +32,23 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
public static final String CLUSTERED_SNP_FILTER_NAME = "SnpCluster";
|
public static final String CLUSTERED_SNP_FILTER_NAME = "SnpCluster";
|
||||||
|
|
||||||
|
|
||||||
private VCFWriter writer = null;
|
private VCFWriter writer = null;
|
||||||
|
|
||||||
private ClusteredSnps clusteredSNPs = null;
|
private ClusteredSnps clusteredSNPs = null;
|
||||||
|
|
||||||
private Expression filterExpression = null;
|
class FilterExp {
|
||||||
|
String name;
|
||||||
|
String expStr;
|
||||||
|
Expression exp;
|
||||||
|
|
||||||
|
public FilterExp(String name, String str, Expression exp) {
|
||||||
|
this.name = name;
|
||||||
|
this.expStr = str;
|
||||||
|
this.exp = exp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<FilterExp> filterExpressions = new ArrayList<FilterExp>();
|
||||||
|
|
||||||
// the structures necessary to initialize and maintain a windowed context
|
// the structures necessary to initialize and maintain a windowed context
|
||||||
private VariantContextWindow variantContextWindow;
|
private VariantContextWindow variantContextWindow;
|
||||||
|
|
@ -54,8 +65,11 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
if ( clusterWindow > 0 )
|
if ( clusterWindow > 0 )
|
||||||
hInfo.add(new VCFFilterHeaderLine(CLUSTERED_SNP_FILTER_NAME, "SNPs found in clusters"));
|
hInfo.add(new VCFFilterHeaderLine(CLUSTERED_SNP_FILTER_NAME, "SNPs found in clusters"));
|
||||||
if ( filterExpression != null )
|
|
||||||
hInfo.add(new VCFFilterHeaderLine(FILTER_NAME, FILTER_STRING));
|
for ( FilterExp exp : filterExpressions ) {
|
||||||
|
hInfo.add(new VCFFilterHeaderLine(exp.name, exp.expStr));
|
||||||
|
}
|
||||||
|
|
||||||
List<ReferenceOrderedDataSource> dataSources = getToolkit().getRodDataSources();
|
List<ReferenceOrderedDataSource> dataSources = getToolkit().getRodDataSources();
|
||||||
for ( ReferenceOrderedDataSource source : dataSources ) {
|
for ( ReferenceOrderedDataSource source : dataSources ) {
|
||||||
if ( source.getReferenceOrderedData().getName().equals("mask") ) {
|
if ( source.getReferenceOrderedData().getName().equals("mask") ) {
|
||||||
|
|
@ -72,11 +86,18 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
||||||
if ( clusterWindow > 0 )
|
if ( clusterWindow > 0 )
|
||||||
clusteredSNPs = new ClusteredSnps(clusterSize, clusterWindow);
|
clusteredSNPs = new ClusteredSnps(clusterSize, clusterWindow);
|
||||||
|
|
||||||
|
if ( FILTER_NAMES.length != FILTER_STRINGS.length )
|
||||||
|
throw new StingException("Inconsistent number of provided filter names and expressions.");
|
||||||
|
|
||||||
|
for ( int i = 0; i < FILTER_NAMES.length; i++ ) {
|
||||||
|
if ( FILTER_STRINGS[i] != null ) {
|
||||||
try {
|
try {
|
||||||
if ( FILTER_STRING != null )
|
Expression filterExpression = ExpressionFactory.createExpression(FILTER_STRINGS[i]);
|
||||||
filterExpression = ExpressionFactory.createExpression(FILTER_STRING);
|
filterExpressions.add(new FilterExp(FILTER_NAMES[i], FILTER_STRINGS[i], filterExpression));
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new StingException("Invalid expression used (" + FILTER_STRING + "). Please see the JEXL docs for correct syntax.");
|
throw new StingException("Invalid expression used (" + FILTER_STRINGS[i] + "). Please see the JEXL docs for correct syntax.");
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -134,7 +155,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
||||||
if ( clusteredSNPs != null && clusteredSNPs.filter(variantContextWindow) )
|
if ( clusteredSNPs != null && clusteredSNPs.filter(variantContextWindow) )
|
||||||
addFilter(filterString, CLUSTERED_SNP_FILTER_NAME);
|
addFilter(filterString, CLUSTERED_SNP_FILTER_NAME);
|
||||||
|
|
||||||
if ( filterExpression != null ) {
|
for ( FilterExp exp : filterExpressions ) {
|
||||||
Map<String, String> infoMap = new HashMap<String, String>(context.second.mCurrentRecord.getInfoValues());
|
Map<String, String> infoMap = new HashMap<String, String>(context.second.mCurrentRecord.getInfoValues());
|
||||||
infoMap.put("QUAL", String.valueOf(context.second.mCurrentRecord.getQual()));
|
infoMap.put("QUAL", String.valueOf(context.second.mCurrentRecord.getQual()));
|
||||||
|
|
||||||
|
|
@ -142,8 +163,8 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
||||||
jContext.setVars(infoMap);
|
jContext.setVars(infoMap);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if ( (Boolean)filterExpression.evaluate(jContext) )
|
if ( (Boolean)exp.exp.evaluate(jContext) )
|
||||||
addFilter(filterString, FILTER_NAME);
|
addFilter(filterString, exp.name);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new StingException(e.getMessage());
|
throw new StingException(e.getMessage());
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -51,4 +51,12 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
||||||
Arrays.asList("18ef67575a76c2be74af8bd8b9fdf86e"));
|
Arrays.asList("18ef67575a76c2be74af8bd8b9fdf86e"));
|
||||||
executeTest("test filter #2", spec);
|
executeTest("test filter #2", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFilterWithSeparateNames() {
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
baseTestString() + " --filterName ABF -filter 'AlleleBalance < 70.0' --filterName FSF -filter 'FisherStrand == 1.4' -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||||
|
Arrays.asList("7de66cac85cfe8a70219e51a5f6c251c"));
|
||||||
|
executeTest("test filter with separate names #2", spec);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Loading…
Reference in New Issue