Just like VariantFiltration but lets you match info fields out of the VCF instead of annotating them.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2736 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-01-29 15:38:03 +00:00
parent 0a7426c29c
commit 1993472b38
2 changed files with 182 additions and 0 deletions

View File

@ -0,0 +1,136 @@
package org.broadinstitute.sting.playground.gatk.walkers.vcftools;
import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.walkers.filters.ClusteredSnps;
import org.broadinstitute.sting.gatk.walkers.filters.VariantContextWindow;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import java.util.*;
import org.apache.commons.jexl.*;
/**
* Selects variant calls for output from a user-supplied VCF file using a number of user-selectable, parameterizable criteria.
*/
@Requires(value={},referenceMetaData=@RMD(name="variant",type= RodVCF.class))
public class VCFSelectWalker extends RodWalker<Integer, Integer> {
@Argument(fullName="match", shortName="match", doc="Expression used with INFO fields to select VCF records for inclusion in the output VCF(see wiki docs for more info)", required=false)
protected String[] MATCH_STRINGS = new String[]{null};
private VCFWriter writer = null;
class MatchExp {
String name;
String expStr;
Expression exp;
public MatchExp(String name, String str, Expression exp) {
this.name = name;
this.expStr = str;
this.exp = exp;
}
}
private List<MatchExp> matchExpressions = new ArrayList<MatchExp>();
private void initializeVcfWriter(RodVCF rod) {
// setup the header fields
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
hInfo.add(new VCFHeaderLine("source", "VariantSelect"));
hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
for ( MatchExp exp : matchExpressions ) {
hInfo.add(new VCFFilterHeaderLine(exp.name, exp.expStr));
}
writer = new VCFWriter(out);
writer.writeHeader(new VCFHeader(hInfo, rod.getHeader().getGenotypeSamples()));
}
public void initialize() {
for ( int i = 0; i < MATCH_STRINGS.length; i++ ) {
if ( MATCH_STRINGS[i] != null ) {
try {
Expression filterExpression = ExpressionFactory.createExpression(MATCH_STRINGS[i]);
matchExpressions.add(new MatchExp(String.format("match-%d", i), MATCH_STRINGS[i], filterExpression));
} catch (Exception e) {
throw new StingException("Invalid expression used (" + MATCH_STRINGS[i] + "). Please see the JEXL docs for correct syntax.");
}
}
}
}
public Integer reduceInit() { return 0; }
/**
* For each site of interest, rescore the genotype likelihoods by applying the specified feature set.
*
* @param tracker the meta-data tracker
* @param ref the reference base
* @param context the context for the given locus
* @return 1 if the locus was successfully processed, 0 if otherwise
*/
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( tracker == null )
return 0;
RODRecordList<ReferenceOrderedDatum> rods = tracker.getTrackData("variant", null);
// ignore places where we don't have a variant
if ( rods == null || rods.getRecords().size() == 0 )
return 0;
RodVCF variant = (RodVCF)rods.getRecords().get(0);
boolean someoneMatched = false;
for ( MatchExp exp : matchExpressions ) {
Map<String, String> infoMap = new HashMap<String, String>(variant.mCurrentRecord.getInfoValues());
infoMap.put("QUAL", String.valueOf(variant.mCurrentRecord.getQual()));
JexlContext jContext = JexlHelper.createContext();
jContext.setVars(infoMap);
try {
//System.out.printf("Matching %s vs. %s%n", infoMap, exp.expStr);
if ( (Boolean)exp.exp.evaluate(jContext) ) {
//System.out.printf(" => Matched%n");
someoneMatched = true;
break;
}
} catch (Exception e) {
throw new StingException(e.getMessage());
}
}
if ( someoneMatched )
writeVCF(variant);
return 1;
}
private void writeVCF(RodVCF variant) {
VCFRecord rec = variant.mCurrentRecord;
if ( writer == null )
initializeVcfWriter(variant);
writer.addRecord(rec);
}
public Integer reduce(Integer value, Integer sum) {
return sum + value;
}
/**
* Tell the user the number of loci processed and close out the new variants file.
*
* @param result the number of loci seen.
*/
public void onTraversalDone(Integer result) {
if ( writer != null )
writer.close();
}
}

View File

@ -0,0 +1,46 @@
package org.broadinstitute.sting.playground.gatk.walkers.vcftools;
import org.broadinstitute.sting.WalkerTest;
import org.junit.Test;
import java.util.Arrays;
public class VCFSelectIntegrationTest extends WalkerTest {
public static String baseTestString() {
return "-T VCFSelect -o %s -R " + oneKGLocation + "reference/human_b36_both.fasta";
}
@Test
public void testVCFSelect1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -match 'AF == 0.50' -L 1:10001290-10048590 ", 1,
Arrays.asList("ff15451c6138c75ece99c3bac91a9d4f"));
executeTest("testVCFSelect1", spec);
}
@Test
public void testVCFSelect2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -match 'HomopolymerRun == 6' -L 1:10001290-10048590 ", 1,
Arrays.asList("226ce18354d56f69d9506e7ae70e4eb9"));
executeTest("testVCFSelect2", spec);
}
@Test
public void testVCFSelectOr() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -match 'HomopolymerRun == 6' -match 'AF == 0.50' -L 1:10001290-10048590 ", 1,
Arrays.asList("62f6341ac676a919497784f792d3e22f"));
executeTest("testVCFSelectOr", spec);
}
@Test
public void testVCFSelectAnd() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -match 'HomopolymerRun == 6 && AF == 0.50' -L 1:10001290-10048590 ", 1,
Arrays.asList("914845500749fbc1863d8226f31c96b3"));
executeTest("testVCFSelectAnd", spec);
}
}