From 526b524c3c95b7c3185291087debc936463efdbb Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sun, 7 Aug 2011 20:16:51 -0400 Subject: [PATCH] CombineVariants with new RodBinding. Bugfix -- CombineVariants now uses the new RodBinding syntax, -V / --variants. Passed all integration tests on first run -- Exposed gapping bug in the List> system now fixed. ParserEngine now has a addRodBinding() that is called by RodBindingArgumentTypeDescriptor when it encounters each RodBinding. This allows the system to work with collection types that are recursively parsed by the system. --- .../commandline/ArgumentTypeDescriptor.java | 1 + .../sting/commandline/ParsingEngine.java | 16 +++++-- .../walkers/variantutils/CombineVariants.java | 45 ++++++++++--------- .../CombineVariantsIntegrationTest.java | 21 +++++---- 4 files changed, 48 insertions(+), 35 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index 0882f5385..622576747 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -330,6 +330,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { Class parameterType = getParameterizedTypeClass(type); RodBinding result = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags); parsingEngine.addTags(result,tags); + parsingEngine.addRodBinding(result); return result; } catch (InvocationTargetException e) { throw new UserException.CommandLineException( diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java index d85d45719..9b543142b 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.commandline; +import com.google.java.contract.Requires; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.classloader.JVMUtils; @@ -330,7 +331,17 @@ public class ParsingEngine { if(!tags.containsKey(key)) return new Tags(); return tags.get(key); - } + } + + /** + * Add a RodBinding type argument to this parser. Called during parsing to allow + * us to track all of the RodBindings discovered in the command line. + * @param rodBinding the rodbinding to add. Must not be added twice + */ + @Requires("rodBinding != null") + public void addRodBinding(final RodBinding rodBinding) { + rodBindings.add(rodBinding); + } /** * Notify the user that a deprecated command-line argument has been used. @@ -367,9 +378,6 @@ public class ParsingEngine { Object value = (argumentMatches.size() != 0) ? source.parse(this,argumentMatches) : source.createTypeDefault(this); JVMUtils.setFieldValue(source.field,target,value); - - if ( value instanceof RodBinding ) - rodBindings.add((RodBinding)value); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index e918d5ce8..7905c2c32 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -25,9 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Hidden; -import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub; @@ -52,8 +50,23 @@ import java.util.*; * priority list (if provided), emits a single record instance at every position represented in the rods. */ @Reference(window=@Window(start=-50,stop=50)) -@Requires(value={}) public class CombineVariants extends RodWalker { + /** + * The VCF files to merge together + * + * variants can take any number of arguments on the command line. Each -V argument + * will be included in the final merged output VCF. If no explicit name is provided, + * the -V arguments will be named using the default algorithm: variants, variants2, variants3, etc. + * The user can override this by providing an explicit name -V:name,vcf for each -V argument, + * and each named argument will be labeled as such in the output (i.e., set=name rather than + * set=variants2). The order of arguments does not matter unless except for the naming, so + * if you provide an rod priority list and no explicit names than variants, variants2, etc + * are techincally order dependent. It is strongly recommended to provide explicit names when + * a rod priority list is provided. + */ + @Input(fullName = "variants", shortName = "V", doc="The VCF files to merge together", required=true) + public List> variantsToMerge; + @Output(doc="File to which variants should be written",required=true) protected VCFWriter vcfWriter = null; @@ -85,10 +98,6 @@ public class CombineVariants extends RodWalker { @Argument(fullName="minimumN", shortName="minN", doc="Combine variants and output site only if variant is present in at least N input files.", required=false) public int minimumN = 1; - @Hidden - @Argument(fullName="masterMerge", shortName="master", doc="Master merge mode -- experts only. You need to look at the code to understand it", required=false) - public boolean master = false; - @Hidden @Argument(fullName="mergeInfoWithMaxAC", shortName="mergeInfoWithMaxAC", doc="If true, when VCF records overlap the info field is taken from the one with the max AC instead of only taking the fields which are identical across the overlapping records.", required=false) public boolean MERGE_INFO_WITH_MAX_AC = false; @@ -148,7 +157,7 @@ public class CombineVariants extends RodWalker { // get all of the vcf rods at this locus // Need to provide reference bases to simpleMerge starting at current locus - Collection vcs = tracker.getValues(VariantContext.class, context.getLocation()); + Collection vcs = tracker.getValues(variantsToMerge, context.getLocation()); if ( sitesOnlyVCF ) { vcs = VariantContextUtils.sitesOnlyVariantContexts(vcs); @@ -172,17 +181,13 @@ public class CombineVariants extends RodWalker { return 0; List mergedVCs = new ArrayList(); - if ( master ) { - mergedVCs.add(VariantContextUtils.masterMerge(vcs, "master")); - } else { - Map> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs); - // iterate over the types so that it's deterministic - for ( VariantContext.Type type : VariantContext.Type.values() ) { - if ( VCsByType.containsKey(type) ) - mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type), - priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges, - SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC)); - } + Map> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs); + // iterate over the types so that it's deterministic + for ( VariantContext.Type type : VariantContext.Type.values() ) { + if ( VCsByType.containsKey(type) ) + mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type), + priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges, + SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC)); } for ( VariantContext mergedVC : mergedVCs ) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index 9b152bc71..d27ab34a0 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -44,7 +44,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { public void test1InOut(String file, String md5, String args, boolean vcf3) { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString(" -priority v1 -B:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file + args), + baseTestString(" -priority v1 -V:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file + args), 1, Arrays.asList(md5)); executeTest("testInOut1--" + file, spec); @@ -52,7 +52,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { public void combine2(String file1, String file2, String args, String md5, boolean vcf3) { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString(" -priority v1,v2 -B:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file1 + " -B:v2,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file2 + args), + baseTestString(" -priority v1,v2 -V:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file1 + " -V:v2,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file2 + args), 1, Arrays.asList(md5)); executeTest("combine2 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec); @@ -63,8 +63,8 @@ public class CombineVariantsIntegrationTest extends WalkerTest { String file2 = "hapmap_3.3.b37.sites.vcf"; WalkerTestSpec spec = new WalkerTestSpec( "-T CombineVariants -NO_HEADER -o %s -R " + b37KGReference - + " -L 1:1-10,000,000 -B:omni,VCF " + validationDataLocation + file1 - + " -B:hm3,VCF " + validationDataLocation + file2 + args, + + " -L 1:1-10,000,000 -V:omni,VCF " + validationDataLocation + file1 + + " -V:hm3,VCF " + validationDataLocation + file2 + args, 1, Arrays.asList(md5)); executeTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec); @@ -91,10 +91,10 @@ public class CombineVariantsIntegrationTest extends WalkerTest { @Test public void threeWayWithRefs() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString(" -B:NA19240_BGI,VCF "+validationDataLocation+"NA19240.BGI.RG.vcf" + - " -B:NA19240_ILLUMINA,VCF "+validationDataLocation+"NA19240.ILLUMINA.RG.vcf" + - " -B:NA19240_WUGSC,VCF "+validationDataLocation+"NA19240.WUGSC.RG.vcf" + - " -B:denovoInfo,VCF "+validationDataLocation+"yri_merged_validation_data_240610.annotated.b36.vcf" + + baseTestString(" -V:NA19240_BGI,VCF "+validationDataLocation+"NA19240.BGI.RG.vcf" + + " -V:NA19240_ILLUMINA,VCF "+validationDataLocation+"NA19240.ILLUMINA.RG.vcf" + + " -V:NA19240_WUGSC,VCF "+validationDataLocation+"NA19240.WUGSC.RG.vcf" + + " -V:denovoInfo,VCF "+validationDataLocation+"yri_merged_validation_data_240610.annotated.b36.vcf" + " -setKey centerSet" + " -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED" + " -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" + @@ -104,15 +104,14 @@ public class CombineVariantsIntegrationTest extends WalkerTest { executeTest("threeWayWithRefs", spec); } - // complex examples with filtering, indels, and multiple alleles public void combineComplexSites(String args, String md5) { String file1 = "combine.1.vcf"; String file2 = "combine.2.vcf"; WalkerTestSpec spec = new WalkerTestSpec( "-T CombineVariants -NO_HEADER -o %s -R " + b37KGReference - + " -B:one,VCF " + validationDataLocation + file1 - + " -B:two,VCF " + validationDataLocation + file2 + args, + + " -V:one,VCF " + validationDataLocation + file1 + + " -V:two,VCF " + validationDataLocation + file2 + args, 1, Arrays.asList(md5)); executeTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);