CombineVariants with new RodBinding. Bugfix

-- CombineVariants now uses the new RodBinding syntax, -V / --variants.  Passed all integration tests on first run
-- Exposed gapping bug in the List<RodBinding<T>> system now fixed.  ParserEngine now has a addRodBinding() that is called by RodBindingArgumentTypeDescriptor when it encounters each RodBinding.  This allows the system to work with collection types that are recursively parsed by the system.
This commit is contained in:
Mark DePristo 2011-08-07 20:16:51 -04:00
parent 1d8b1bae0a
commit 526b524c3c
4 changed files with 48 additions and 35 deletions

View File

@ -330,6 +330,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
Class parameterType = getParameterizedTypeClass(type);
RodBinding result = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags);
parsingEngine.addTags(result,tags);
parsingEngine.addRodBinding(result);
return result;
} catch (InvocationTargetException e) {
throw new UserException.CommandLineException(

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.commandline;
import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.classloader.JVMUtils;
@ -330,7 +331,17 @@ public class ParsingEngine {
if(!tags.containsKey(key))
return new Tags();
return tags.get(key);
}
}
/**
* Add a RodBinding type argument to this parser. Called during parsing to allow
* us to track all of the RodBindings discovered in the command line.
* @param rodBinding the rodbinding to add. Must not be added twice
*/
@Requires("rodBinding != null")
public void addRodBinding(final RodBinding rodBinding) {
rodBindings.add(rodBinding);
}
/**
* Notify the user that a deprecated command-line argument has been used.
@ -367,9 +378,6 @@ public class ParsingEngine {
Object value = (argumentMatches.size() != 0) ? source.parse(this,argumentMatches) : source.createTypeDefault(this);
JVMUtils.setFieldValue(source.field,target,value);
if ( value instanceof RodBinding )
rodBindings.add((RodBinding)value);
}
}

View File

@ -25,9 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub;
@ -52,8 +50,23 @@ import java.util.*;
* priority list (if provided), emits a single record instance at every position represented in the rods.
*/
@Reference(window=@Window(start=-50,stop=50))
@Requires(value={})
public class CombineVariants extends RodWalker<Integer, Integer> {
/**
* The VCF files to merge together
*
* variants can take any number of arguments on the command line. Each -V argument
* will be included in the final merged output VCF. If no explicit name is provided,
* the -V arguments will be named using the default algorithm: variants, variants2, variants3, etc.
* The user can override this by providing an explicit name -V:name,vcf for each -V argument,
* and each named argument will be labeled as such in the output (i.e., set=name rather than
* set=variants2). The order of arguments does not matter unless except for the naming, so
* if you provide an rod priority list and no explicit names than variants, variants2, etc
* are techincally order dependent. It is strongly recommended to provide explicit names when
* a rod priority list is provided.
*/
@Input(fullName = "variants", shortName = "V", doc="The VCF files to merge together", required=true)
public List<RodBinding<VariantContext>> variantsToMerge;
@Output(doc="File to which variants should be written",required=true)
protected VCFWriter vcfWriter = null;
@ -85,10 +98,6 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
@Argument(fullName="minimumN", shortName="minN", doc="Combine variants and output site only if variant is present in at least N input files.", required=false)
public int minimumN = 1;
@Hidden
@Argument(fullName="masterMerge", shortName="master", doc="Master merge mode -- experts only. You need to look at the code to understand it", required=false)
public boolean master = false;
@Hidden
@Argument(fullName="mergeInfoWithMaxAC", shortName="mergeInfoWithMaxAC", doc="If true, when VCF records overlap the info field is taken from the one with the max AC instead of only taking the fields which are identical across the overlapping records.", required=false)
public boolean MERGE_INFO_WITH_MAX_AC = false;
@ -148,7 +157,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
// get all of the vcf rods at this locus
// Need to provide reference bases to simpleMerge starting at current locus
Collection<VariantContext> vcs = tracker.getValues(VariantContext.class, context.getLocation());
Collection<VariantContext> vcs = tracker.getValues(variantsToMerge, context.getLocation());
if ( sitesOnlyVCF ) {
vcs = VariantContextUtils.sitesOnlyVariantContexts(vcs);
@ -172,17 +181,13 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
return 0;
List<VariantContext> mergedVCs = new ArrayList<VariantContext>();
if ( master ) {
mergedVCs.add(VariantContextUtils.masterMerge(vcs, "master"));
} else {
Map<VariantContext.Type, List<VariantContext>> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs);
// iterate over the types so that it's deterministic
for ( VariantContext.Type type : VariantContext.Type.values() ) {
if ( VCsByType.containsKey(type) )
mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type),
priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
}
Map<VariantContext.Type, List<VariantContext>> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs);
// iterate over the types so that it's deterministic
for ( VariantContext.Type type : VariantContext.Type.values() ) {
if ( VCsByType.containsKey(type) )
mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type),
priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
}
for ( VariantContext mergedVC : mergedVCs ) {

View File

@ -44,7 +44,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
public void test1InOut(String file, String md5, String args, boolean vcf3) {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -priority v1 -B:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file + args),
baseTestString(" -priority v1 -V:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file + args),
1,
Arrays.asList(md5));
executeTest("testInOut1--" + file, spec);
@ -52,7 +52,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
public void combine2(String file1, String file2, String args, String md5, boolean vcf3) {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -priority v1,v2 -B:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file1 + " -B:v2,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file2 + args),
baseTestString(" -priority v1,v2 -V:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file1 + " -V:v2,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file2 + args),
1,
Arrays.asList(md5));
executeTest("combine2 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec);
@ -63,8 +63,8 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
String file2 = "hapmap_3.3.b37.sites.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T CombineVariants -NO_HEADER -o %s -R " + b37KGReference
+ " -L 1:1-10,000,000 -B:omni,VCF " + validationDataLocation + file1
+ " -B:hm3,VCF " + validationDataLocation + file2 + args,
+ " -L 1:1-10,000,000 -V:omni,VCF " + validationDataLocation + file1
+ " -V:hm3,VCF " + validationDataLocation + file2 + args,
1,
Arrays.asList(md5));
executeTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
@ -91,10 +91,10 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
@Test public void threeWayWithRefs() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -B:NA19240_BGI,VCF "+validationDataLocation+"NA19240.BGI.RG.vcf" +
" -B:NA19240_ILLUMINA,VCF "+validationDataLocation+"NA19240.ILLUMINA.RG.vcf" +
" -B:NA19240_WUGSC,VCF "+validationDataLocation+"NA19240.WUGSC.RG.vcf" +
" -B:denovoInfo,VCF "+validationDataLocation+"yri_merged_validation_data_240610.annotated.b36.vcf" +
baseTestString(" -V:NA19240_BGI,VCF "+validationDataLocation+"NA19240.BGI.RG.vcf" +
" -V:NA19240_ILLUMINA,VCF "+validationDataLocation+"NA19240.ILLUMINA.RG.vcf" +
" -V:NA19240_WUGSC,VCF "+validationDataLocation+"NA19240.WUGSC.RG.vcf" +
" -V:denovoInfo,VCF "+validationDataLocation+"yri_merged_validation_data_240610.annotated.b36.vcf" +
" -setKey centerSet" +
" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED" +
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
@ -104,15 +104,14 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
executeTest("threeWayWithRefs", spec);
}
// complex examples with filtering, indels, and multiple alleles
public void combineComplexSites(String args, String md5) {
String file1 = "combine.1.vcf";
String file2 = "combine.2.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T CombineVariants -NO_HEADER -o %s -R " + b37KGReference
+ " -B:one,VCF " + validationDataLocation + file1
+ " -B:two,VCF " + validationDataLocation + file2 + args,
+ " -V:one,VCF " + validationDataLocation + file1
+ " -V:two,VCF " + validationDataLocation + file2 + args,
1,
Arrays.asList(md5));
executeTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);