diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VCFSelectWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VCFSelectWalker.java new file mode 100755 index 000000000..ac1bcfb28 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VCFSelectWalker.java @@ -0,0 +1,136 @@ +package org.broadinstitute.sting.playground.gatk.walkers.vcftools; + +import org.broadinstitute.sting.gatk.contexts.*; +import org.broadinstitute.sting.gatk.refdata.*; +import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.gatk.walkers.filters.ClusteredSnps; +import org.broadinstitute.sting.gatk.walkers.filters.VariantContextWindow; +import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.genotype.vcf.*; +import org.broadinstitute.sting.utils.cmdLine.Argument; + +import java.util.*; +import org.apache.commons.jexl.*; + +/** + * Selects variant calls for output from a user-supplied VCF file using a number of user-selectable, parameterizable criteria. + */ +@Requires(value={},referenceMetaData=@RMD(name="variant",type= RodVCF.class)) +public class VCFSelectWalker extends RodWalker { + @Argument(fullName="match", shortName="match", doc="Expression used with INFO fields to select VCF records for inclusion in the output VCF(see wiki docs for more info)", required=false) + protected String[] MATCH_STRINGS = new String[]{null}; + + private VCFWriter writer = null; + + class MatchExp { + String name; + String expStr; + Expression exp; + + public MatchExp(String name, String str, Expression exp) { + this.name = name; + this.expStr = str; + this.exp = exp; + } + } + + private List matchExpressions = new ArrayList(); + + private void initializeVcfWriter(RodVCF rod) { + // setup the header fields + Set hInfo = new HashSet(); + hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); + hInfo.add(new VCFHeaderLine("source", "VariantSelect")); + hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName())); + + for ( MatchExp exp : matchExpressions ) { + hInfo.add(new VCFFilterHeaderLine(exp.name, exp.expStr)); + } + + writer = new VCFWriter(out); + writer.writeHeader(new VCFHeader(hInfo, rod.getHeader().getGenotypeSamples())); + } + + public void initialize() { + for ( int i = 0; i < MATCH_STRINGS.length; i++ ) { + if ( MATCH_STRINGS[i] != null ) { + try { + Expression filterExpression = ExpressionFactory.createExpression(MATCH_STRINGS[i]); + matchExpressions.add(new MatchExp(String.format("match-%d", i), MATCH_STRINGS[i], filterExpression)); + } catch (Exception e) { + throw new StingException("Invalid expression used (" + MATCH_STRINGS[i] + "). Please see the JEXL docs for correct syntax."); + } + } + } + } + + public Integer reduceInit() { return 0; } + + /** + * For each site of interest, rescore the genotype likelihoods by applying the specified feature set. + * + * @param tracker the meta-data tracker + * @param ref the reference base + * @param context the context for the given locus + * @return 1 if the locus was successfully processed, 0 if otherwise + */ + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( tracker == null ) + return 0; + + RODRecordList rods = tracker.getTrackData("variant", null); + // ignore places where we don't have a variant + if ( rods == null || rods.getRecords().size() == 0 ) + return 0; + + RodVCF variant = (RodVCF)rods.getRecords().get(0); + boolean someoneMatched = false; + for ( MatchExp exp : matchExpressions ) { + Map infoMap = new HashMap(variant.mCurrentRecord.getInfoValues()); + infoMap.put("QUAL", String.valueOf(variant.mCurrentRecord.getQual())); + + JexlContext jContext = JexlHelper.createContext(); + jContext.setVars(infoMap); + + try { + //System.out.printf("Matching %s vs. %s%n", infoMap, exp.expStr); + if ( (Boolean)exp.exp.evaluate(jContext) ) { + //System.out.printf(" => Matched%n"); + someoneMatched = true; + break; + } + } catch (Exception e) { + throw new StingException(e.getMessage()); + } + } + + if ( someoneMatched ) + writeVCF(variant); + + return 1; + } + + private void writeVCF(RodVCF variant) { + VCFRecord rec = variant.mCurrentRecord; + + if ( writer == null ) + initializeVcfWriter(variant); + + writer.addRecord(rec); + } + + public Integer reduce(Integer value, Integer sum) { + return sum + value; + } + + /** + * Tell the user the number of loci processed and close out the new variants file. + * + * @param result the number of loci seen. + */ + public void onTraversalDone(Integer result) { + if ( writer != null ) + writer.close(); + } +} \ No newline at end of file diff --git a/java/test/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VCFSelectIntegrationTest.java b/java/test/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VCFSelectIntegrationTest.java new file mode 100755 index 000000000..7b1e7b4ca --- /dev/null +++ b/java/test/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VCFSelectIntegrationTest.java @@ -0,0 +1,46 @@ +package org.broadinstitute.sting.playground.gatk.walkers.vcftools; + +import org.broadinstitute.sting.WalkerTest; +import org.junit.Test; + +import java.util.Arrays; + +public class VCFSelectIntegrationTest extends WalkerTest { + + public static String baseTestString() { + return "-T VCFSelect -o %s -R " + oneKGLocation + "reference/human_b36_both.fasta"; + } + + + @Test + public void testVCFSelect1() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -match 'AF == 0.50' -L 1:10001290-10048590 ", 1, + Arrays.asList("ff15451c6138c75ece99c3bac91a9d4f")); + executeTest("testVCFSelect1", spec); + } + + @Test + public void testVCFSelect2() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -match 'HomopolymerRun == 6' -L 1:10001290-10048590 ", 1, + Arrays.asList("226ce18354d56f69d9506e7ae70e4eb9")); + executeTest("testVCFSelect2", spec); + } + + @Test + public void testVCFSelectOr() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -match 'HomopolymerRun == 6' -match 'AF == 0.50' -L 1:10001290-10048590 ", 1, + Arrays.asList("62f6341ac676a919497784f792d3e22f")); + executeTest("testVCFSelectOr", spec); + } + + @Test + public void testVCFSelectAnd() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -match 'HomopolymerRun == 6 && AF == 0.50' -L 1:10001290-10048590 ", 1, + Arrays.asList("914845500749fbc1863d8226f31c96b3")); + executeTest("testVCFSelectAnd", spec); + } +} \ No newline at end of file