From f172c349f614918bc238b7c22bc4449ba023a136 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 6 Jan 2014 16:12:37 -0500 Subject: [PATCH 1/2] Adding the functionality to enable users to input a file of VCFs for -V. To do this I have added a RodBindingCollection which can represent either a VCF or a file of VCFs. Note that e.g. SelectVariants allows a list of RodBindingCollections so that one can intermix VCFs and VCF lists. For VariantContext tags with a list, by default the tags for the -V argument are applied unless overridden by the individual line. In other words, any given line can have either one token (the file path) or two tokens (the new tags and the file path). For example: foo.vcf VCF,name=bar bar.vcf Note that a VCF list file name must end with '.list'. Added this functionality to CombineVariants, CombineReferenceCalculationVariants, and VariantRecalibrator. --- .../VariantRecalibrator.java | 9 +- .../CombineReferenceCalculationVariants.java | 9 +- .../commandline/ArgumentTypeDescriptor.java | 140 +++++++++++++++++- .../sting/commandline/IntervalBinding.java | 8 +- .../sting/commandline/ParsingEngine.java | 1 + .../sting/commandline/RodBinding.java | 4 +- .../commandline/RodBindingCollection.java | 89 +++++++++++ .../walkers/variantutils/CombineVariants.java | 7 +- .../RodBindingCollectionUnitTest.java | 126 ++++++++++++++++ 9 files changed, 378 insertions(+), 15 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/commandline/RodBindingCollection.java create mode 100644 public/java/test/org/broadinstitute/sting/commandline/RodBindingCollectionUnitTest.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 5da7b4219..d43dc4a12 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -56,7 +56,6 @@ import org.broadinstitute.sting.gatk.walkers.PartitionBy; import org.broadinstitute.sting.gatk.walkers.PartitionType; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; -import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.R.RScriptExecutor; import org.broadinstitute.sting.utils.Utils; @@ -155,7 +154,8 @@ public class VariantRecalibrator extends RodWalker> input; + public List> inputCollections; + final private List> input = new ArrayList<>(); /** * These additional calls should be unfiltered and annotated with the error covariates that are intended to be used for modeling. @@ -272,7 +272,6 @@ public class VariantRecalibrator extends RodWalker hInfo = new HashSet<>(); ApplyRecalibration.addVQSRStandardHeaderLines(hInfo); recalWriter.writeHeader( new VCFHeader(hInfo) ); @@ -280,6 +279,10 @@ public class VariantRecalibrator extends RodWalker inputCollection : inputCollections ) + input.addAll(inputCollection.getRodBindings()); } //--------------------------------------------------------------------------------------------------------------- diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineReferenceCalculationVariants.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineReferenceCalculationVariants.java index a587b0250..2a004aaca 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineReferenceCalculationVariants.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineReferenceCalculationVariants.java @@ -111,13 +111,12 @@ import java.util.*; @Reference(window=@Window(start=-10,stop=10)) public class CombineReferenceCalculationVariants extends RodWalker implements AnnotatorCompatible, TreeReducible { - // TODO -- allow a file of VCF paths to be entered? - /** * The VCF files to merge together */ @Input(fullName="variant", shortName = "V", doc="One or more input VCF files", required=true) - public List> variants; + public List> variantCollections; + final private List> variants = new ArrayList<>(); @Output(doc="File to which variants should be written") protected VariantContextWriter vcfWriter = null; @@ -169,6 +168,10 @@ public class CombineReferenceCalculationVariants extends RodWalkeremptyList(), this, getToolkit()); + + // collect the actual rod bindings into a list for use later + for ( final RodBindingCollection variantCollection : variantCollections ) + variants.addAll(variantCollection.getRodBindings()); } public VariantContext map(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) { diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index a70d6e706..14b5118ad 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.commandline; +import org.apache.commons.io.FileUtils; import org.apache.log4j.Logger; import org.broad.tribble.Feature; import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager; @@ -36,6 +37,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.File; +import java.io.IOException; import java.lang.annotation.Annotation; import java.lang.reflect.*; import java.util.*; @@ -306,6 +308,7 @@ public abstract class ArgumentTypeDescriptor { * @param source source * @param type type to check * @param matches matches + * @param tags argument tags * @return the RodBinding/IntervalBinding object depending on the value of createIntervalBinding. */ protected Object parseBinding(ArgumentSource source, Type type, ArgumentMatches matches, Tags tags) { @@ -409,6 +412,95 @@ public abstract class ArgumentTypeDescriptor { value, fieldName, e.getMessage())); } } + + /** + * Parse the source of a RodBindingCollection, which can be either a file of RodBindings or an actual RodBinding. + * + * @param parsingEngine the parsing engine used to validate this argument type descriptor + * @param source source + * @param type type + * @param matches matches + * @param tags argument tags + * @return the newly created binding object + */ + public Object parseRodBindingCollectionSource(final ParsingEngine parsingEngine, + final ArgumentSource source, + final Type type, + final ArgumentMatches matches, + final Tags tags) { + + final ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source); + final ArgumentMatchValue value = getArgumentValue(defaultDefinition, matches); + @SuppressWarnings("unchecked") + Class parameterType = JVMUtils.getParameterizedTypeClass(type); + String name = defaultDefinition.fullName; + + // if this a list of files, get those bindings + final File file = value.asFile(); + try { + if (file.getAbsolutePath().endsWith(".list")) { + return getRodBindingsCollection(file, parsingEngine, parameterType, name, tags, source.field.getName()); + } + } catch (IOException e) { + throw new UserException.CouldNotReadInputFile(file, e); + } + + // otherwise, treat this as an individual binding + final RodBinding binding = (RodBinding)parseBinding(value, parameterType, RodBinding.class, name, tags, source.field.getName()); + parsingEngine.addTags(binding, tags); + parsingEngine.addRodBinding(binding); + return RodBindingCollection.createRodBindingCollectionOfType(parameterType, Arrays.asList(binding)); + } + + /** + * Retrieve and parse a collection of RodBindings from the given file. + * + * @param file the source file + * @param parsingEngine the engine responsible for parsing + * @param parameterType the Tribble Feature parameter type + * @param bindingName the name of the binding passed to the constructor. + * @param defaultTags general tags for the binding used for parsing and passed to the constructor. + * @param fieldName the name of the field that was parsed. Used for error reporting. + * @return the newly created collection of binding objects. + */ + public static Object getRodBindingsCollection(final File file, + final ParsingEngine parsingEngine, + final Class parameterType, + final String bindingName, + final Tags defaultTags, + final String fieldName) throws IOException { + final List bindings = new ArrayList<>(); + + // parse each line separately using the given Tags if none are provided on each line + for ( final String line: FileUtils.readLines(file) ) { + final String[] tokens = line.split("\\s+"); + final RodBinding binding; + + if ( tokens.length == 0 ) { + continue; // empty line, so do nothing + } + // use the default tags if none are provided for this binding + else if ( tokens.length == 1 ) { + final ArgumentMatchValue value = new ArgumentMatchStringValue(tokens[0]); + binding = (RodBinding)parseBinding(value, parameterType, RodBinding.class, bindingName, defaultTags, fieldName); + parsingEngine.addTags(binding, defaultTags); + } + // use the new tags if provided + else if ( tokens.length == 2 ) { + final Tags tags = ParsingMethod.parseTags(fieldName, tokens[0]); + final ArgumentMatchValue value = new ArgumentMatchStringValue(tokens[1]); + binding = (RodBinding)parseBinding(value, parameterType, RodBinding.class, bindingName, tags, fieldName); + parsingEngine.addTags(binding, tags); + } else { + throw new UserException.BadArgumentValue(fieldName, "data lines should consist of an optional set of tags along with a path to a file; too many tokens are present for line: " + line); + } + + bindings.add(binding); + parsingEngine.addRodBinding(binding); + } + + return RodBindingCollection.createRodBindingCollectionOfType(parameterType, bindings); + } } /** @@ -487,14 +579,60 @@ class IntervalBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { } } +/** + * Parser for RodBindingCollection objects + */ +class RodBindingCollectionArgumentTypeDescriptor extends ArgumentTypeDescriptor { + /** + * We only want RodBindingCollection class objects + * @param type The type to check. + * @return true if the provided class is an RodBindingCollection.class + */ + @Override + public boolean supports( final Class type ) { + return isRodBindingCollection(type); + } + + public static boolean isRodBindingCollection( final Class type ) { + return RodBindingCollection.class.isAssignableFrom(type); + } + + /** + * See note from RodBindingArgumentTypeDescriptor.parse(). + * + * @param parsingEngine parsing engine + * @param source source + * @param type type to check + * @param matches matches + * @return the IntervalBinding object. + */ + @Override + public Object parse(final ParsingEngine parsingEngine, final ArgumentSource source, final Type type, final ArgumentMatches matches) { + final Tags tags = getArgumentTags(matches); + return parseRodBindingCollectionSource(parsingEngine, source, type, matches, tags); + } +} + /** * Parse simple argument types: java primitives, wrapper classes, and anything that has * a simple String constructor. */ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor { + + /** + * @param type the class type + * @return true if this class is a binding type, false otherwise + */ + private boolean isBinding(final Class type) { + return RodBindingArgumentTypeDescriptor.isRodBinding(type) || + IntervalBindingArgumentTypeDescriptor.isIntervalBinding(type) || + RodBindingCollectionArgumentTypeDescriptor.isRodBindingCollection(type); + } + + @Override public boolean supports( Class type ) { - if ( RodBindingArgumentTypeDescriptor.isRodBinding(type) || IntervalBindingArgumentTypeDescriptor.isIntervalBinding(type) ) return false; + if ( isBinding(type) ) return false; if ( type.isPrimitive() ) return true; if ( type.isEnum() ) return true; if ( primitiveToWrapperMap.containsValue(type) ) return true; diff --git a/public/java/src/org/broadinstitute/sting/commandline/IntervalBinding.java b/public/java/src/org/broadinstitute/sting/commandline/IntervalBinding.java index 9253e1ee5..de57de871 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/IntervalBinding.java +++ b/public/java/src/org/broadinstitute/sting/commandline/IntervalBinding.java @@ -57,7 +57,7 @@ public final class IntervalBinding { @Requires({"type != null", "rawName != null", "source != null", "tribbleType != null", "tags != null"}) public IntervalBinding(Class type, final String rawName, final String source, final String tribbleType, final Tags tags) { - featureIntervals = new RodBinding(type, rawName, source, tribbleType, tags); + featureIntervals = new RodBinding<>(type, rawName, source, tribbleType, tags); } @Requires({"intervalArgument != null"}) @@ -66,9 +66,7 @@ public final class IntervalBinding { } public String getSource() { - if ( featureIntervals != null ) - return featureIntervals.getSource(); - return stringIntervals; + return ( featureIntervals != null ? featureIntervals.getSource() : stringIntervals ); } public List getIntervals(final GenomeAnalysisEngine toolkit) { @@ -79,7 +77,7 @@ public final class IntervalBinding { List intervals; if ( featureIntervals != null ) { - intervals = new ArrayList(); + intervals = new ArrayList<>(); // TODO -- after ROD system cleanup, go through the ROD system so that we can handle things like gzipped files diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java index aca20d5a1..ad64aaa1d 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java @@ -83,6 +83,7 @@ public class ParsingEngine { private static final Set STANDARD_ARGUMENT_TYPE_DESCRIPTORS = new LinkedHashSet( Arrays.asList(new SimpleArgumentTypeDescriptor(), new IntervalBindingArgumentTypeDescriptor(), new RodBindingArgumentTypeDescriptor(), + new RodBindingCollectionArgumentTypeDescriptor(), new CompoundArgumentTypeDescriptor(), new MultiplexArgumentTypeDescriptor()) ); diff --git a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java index ef8e01df4..87fa85858 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java +++ b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java @@ -32,7 +32,7 @@ import org.broad.tribble.Feature; import java.util.*; /** - * A RodBinding representing a walker argument that gets bound to a ROD track. + * A RodBinding represents a walker argument that gets bound to a ROD track. * * The RodBinding is a formal GATK argument that bridges between a walker and * the RefMetaDataTracker to obtain data about this rod track at runtime. The RodBinding @@ -77,7 +77,7 @@ public final class RodBinding { final private String tribbleType; /** The command line tags associated with this RodBinding */ final private Tags tags; - /** The Java class expected for this RodBinding. Must correspond to the type emited by Tribble */ + /** The Java class expected for this RodBinding. Must correspond to the type emitted by Tribble */ final private Class type; /** True for all RodBindings except the special UNBOUND binding, which is the default for optional arguments */ final private boolean bound; diff --git a/public/java/src/org/broadinstitute/sting/commandline/RodBindingCollection.java b/public/java/src/org/broadinstitute/sting/commandline/RodBindingCollection.java new file mode 100644 index 000000000..d8306ea5a --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/RodBindingCollection.java @@ -0,0 +1,89 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.commandline; + +import com.google.java.contract.Ensures; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.utils.exceptions.UserException; + +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.util.*; + +/** + * A RodBindingCollection represents a collection of RodBindings. + * + * The RodBindingCollection is a formal GATK argument that is used to specify a file of RodBindings. + * + */ +public final class RodBindingCollection { + + /** The Java class expected for this RodBinding. Must correspond to the type emitted by Tribble */ + final private Class type; + + private Collection> rodBindings; + + public RodBindingCollection(final Class type, final Collection> rodBindings) { + this.type = type; + this.rodBindings = Collections.unmodifiableCollection(rodBindings); + } + + /** + * @return the collection of RodBindings + */ + final public Collection> getRodBindings() { + return rodBindings; + } + + /** + * @return the string name of the tribble type, such as vcf, bed, etc. + */ + @Ensures({"result != null"}) + final public Class getType() { + return type; + } + + @Override + public String toString() { + return String.format("(RodBindingCollection %s)", getRodBindings()); + } + + /** + * Utility method to help construct a RodBindingCollection of the given Feature type + * + * @param type the Feature type + * @param rodBindings the rod bindings to put into the collection + * @return a new RodBindingCollection object + */ + public static Object createRodBindingCollectionOfType(final Class type, final Collection rodBindings) { + try { + final Constructor ctor = RodBindingCollection.class.getConstructor(Class.class, Collection.class); + return ctor.newInstance(type, rodBindings); + } catch (final Exception e) { + throw new IllegalStateException("Failed to create a RodBindingCollection for type " + type); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index e13252d49..152128022 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -131,7 +131,8 @@ public class CombineVariants extends RodWalker implements Tree * a rod priority list is provided. */ @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) - public List> variants; + public List> variantCollections; + final private List> variants = new ArrayList<>(); @Output(doc="File to which variants should be written") protected VariantContextWriter vcfWriter = null; @@ -230,6 +231,10 @@ public class CombineVariants extends RodWalker implements Tree VCFHeader vcfHeader = new VCFHeader(headerLines, samples); vcfHeader.setWriteCommandLine(!SUPPRESS_COMMAND_LINE_HEADER); vcfWriter.writeHeader(vcfHeader); + + // collect the actual rod bindings into a list for use later + for ( final RodBindingCollection variantCollection : variantCollections ) + variants.addAll(variantCollection.getRodBindings()); } private void validateAnnotateUnionArguments() { diff --git a/public/java/test/org/broadinstitute/sting/commandline/RodBindingCollectionUnitTest.java b/public/java/test/org/broadinstitute/sting/commandline/RodBindingCollectionUnitTest.java new file mode 100644 index 000000000..29d38ec19 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/commandline/RodBindingCollectionUnitTest.java @@ -0,0 +1,126 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.commandline; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.testng.Assert; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.Collection; + +public class RodBindingCollectionUnitTest extends BaseTest { + + private ParsingEngine parsingEngine; + private Tags mytags; + + private static final String defaultTagString = "VCF"; + private static final String testVCFFileName = privateTestDir + "empty.vcf"; + private static final String testListFileName = privateTestDir + "oneVCF.list"; + + @BeforeMethod + public void setUp() { + parsingEngine = new ParsingEngine(null); + RodBinding.resetNameCounter(); + mytags = new Tags(); + mytags.addPositionalTag(defaultTagString); + } + + private class RodBindingCollectionArgProvider { + @Argument(fullName="input",doc="input",shortName="V") + public RodBindingCollection input; + } + + @Test + public void testStandardVCF() { + final String[] commandLine = new String[] {"-V", testVCFFileName}; + + parsingEngine.addArgumentSource( RodBindingCollectionArgProvider.class ); + parsingEngine.parse( commandLine ); + parsingEngine.validate(); + + final RodBindingCollectionArgProvider argProvider = new RodBindingCollectionArgProvider(); + parsingEngine.loadArgumentsIntoObject( argProvider ); + + Assert.assertEquals(argProvider.input.getRodBindings().iterator().next().getSource(), testVCFFileName, "Argument is not correctly initialized"); + } + + @Test + public void testList() { + final String[] commandLine = new String[] {"-V", testListFileName}; + + parsingEngine.addArgumentSource(RodBindingCollectionArgProvider.class); + parsingEngine.parse( commandLine ); + parsingEngine.validate(); + + final RodBindingCollectionArgProvider argProvider = new RodBindingCollectionArgProvider(); + parsingEngine.loadArgumentsIntoObject( argProvider ); + + Assert.assertEquals(argProvider.input.getRodBindings().iterator().next().getSource(), "private/testdata/empty.vcf", "Argument is not correctly initialized"); + } + + @Test + public void testDefaultTagsInFile() throws IOException { + + final File testFile = File.createTempFile("RodBindingCollectionUnitTest.defaultTags", ".list"); + testFile.deleteOnExit(); + final FileWriter writer = new FileWriter(testFile); + writer.write(testVCFFileName, 0, testVCFFileName.length()); + writer.close(); + + ArgumentTypeDescriptor.getRodBindingsCollection(testFile, parsingEngine, VariantContext.class, "foo", mytags, "input"); + + final Collection bindings = parsingEngine.getRodBindings(); + Assert.assertNotNull(bindings); + Assert.assertEquals(bindings.size(), 1); + + final RodBinding binding = bindings.iterator().next(); + Assert.assertEquals(parsingEngine.getTags(binding), mytags); + } + + @Test + public void testOverrideTagsInFile() throws IOException { + final File testFile = File.createTempFile("RodBindingCollectionUnitTest.overrideTags", ".list"); + testFile.deleteOnExit(); + final FileWriter writer = new FileWriter(testFile); + final String textToWrite = "foo " + testVCFFileName; + writer.write(textToWrite, 0, textToWrite.length()); + writer.close(); + + ArgumentTypeDescriptor.getRodBindingsCollection(testFile, parsingEngine, VariantContext.class, "foo", mytags, "input"); + + final Collection bindings = parsingEngine.getRodBindings(); + Assert.assertNotNull(bindings); + Assert.assertEquals(bindings.size(), 1); + + final RodBinding binding = bindings.iterator().next(); + Assert.assertNotEquals(parsingEngine.getTags(binding), mytags); + } +} From 0323caefc87b9f82ddaa0a114e7cd769971b5727 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 7 Jan 2014 15:50:21 -0500 Subject: [PATCH 2/2] Added some bug fixes to the gVCF merging code after finally getting some real data to play with. Still under construction, awaiting more test data from Valentin. --- .../variant/GATKVariantContextUtils.java | 38 +++++++++---------- .../GATKVariantContextUtilsUnitTest.java | 7 ++-- 2 files changed, 22 insertions(+), 23 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java index c36d7f888..7d4d66f7c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java @@ -33,6 +33,7 @@ import org.broad.tribble.TribbleException; import org.broad.tribble.util.popgen.HardyWeinbergCalculation; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.variant.variantcontext.*; import org.broadinstitute.variant.vcf.VCFConstants; @@ -1064,7 +1065,6 @@ public class GATKVariantContextUtils { final Set inconsistentAttributes = new HashSet<>(); final Set rsIDs = new LinkedHashSet<>(1); // most of the time there's one id - VariantContext longestVC = first; int depth = 0; final Map> annotationMap = new LinkedHashMap<>(); GenotypesContext genotypes = GenotypesContext.create(); @@ -1084,10 +1084,6 @@ public class GATKVariantContextUtils { if ( isSpanningEvent ) continue; - // keep track of the longest location that starts here - if ( VariantContextUtils.getSize(vc) > VariantContextUtils.getSize(longestVC) ) - longestVC = vc; - // special case ID (just preserve it) if ( vc.hasID() ) rsIDs.add(vc.getID()); @@ -1105,15 +1101,15 @@ public class GATKVariantContextUtils { if ( depth > 0 ) attributes.put(VCFConstants.DEPTH_KEY, String.valueOf(depth)); + // remove stale AC and AF based attributes + removeStaleAttributesAfterMerge(attributes); + final String ID = rsIDs.isEmpty() ? VCFConstants.EMPTY_ID_FIELD : Utils.join(",", rsIDs); final VariantContextBuilder builder = new VariantContextBuilder().source(name).id(ID).alleles(alleles) - .loc(longestVC.getChr(), longestVC.getStart(), longestVC.getEnd()) + .chr(loc.getContig()).start(loc.getStart()).computeEndFromAlleles(alleles, loc.getStart()) .genotypes(genotypes).unfiltered().attributes(new TreeMap<>(attributes)).log10PError(CommonInfo.NO_LOG10_PERROR); // we will need to regenotype later - // remove stale AC and AF based attributes - removeStaleAttributesAfterMerge(builder); - return builder.make(); } @@ -1147,16 +1143,17 @@ public class GATKVariantContextUtils { } /** - * Remove the stale attributes from the merged VariantContext (builder) + * Remove the stale attributes from the merged set * - * @param builder the VC builder + * @param attributes the attribute map */ - private static void removeStaleAttributesAfterMerge(final VariantContextBuilder builder) { - builder.rmAttributes(Arrays.asList(VCFConstants.ALLELE_COUNT_KEY, - VCFConstants.ALLELE_FREQUENCY_KEY, - VCFConstants.ALLELE_NUMBER_KEY, - VCFConstants.MLE_ALLELE_COUNT_KEY, - VCFConstants.MLE_ALLELE_FREQUENCY_KEY)); + private static void removeStaleAttributesAfterMerge(final Map attributes) { + attributes.remove(VCFConstants.ALLELE_COUNT_KEY); + attributes.remove(VCFConstants.ALLELE_FREQUENCY_KEY); + attributes.remove(VCFConstants.ALLELE_NUMBER_KEY); + attributes.remove(VCFConstants.MLE_ALLELE_COUNT_KEY); + attributes.remove(VCFConstants.MLE_ALLELE_FREQUENCY_KEY); + attributes.remove(VCFConstants.END_KEY); } /** @@ -1544,7 +1541,7 @@ public class GATKVariantContextUtils { final String name = g.getSampleName(); if ( !mergedGenotypes.containsSample(name) ) { // we need to modify it even if it already contains all of the alleles because we need to purge the PLs out anyways - final int[] indexesOfRelevantAlleles = getIndexesOfRelevantAlleles(remappedAlleles, targetAlleles); + final int[] indexesOfRelevantAlleles = getIndexesOfRelevantAlleles(remappedAlleles, targetAlleles, VC.getStart()); final int[] PLs = generatePLs(g, indexesOfRelevantAlleles); // note that we set the alleles to null here (as we expect it to be re-genotyped) final Genotype newG = new GenotypeBuilder(g).name(name).alleles(null).PL(PLs).noAD().noGQ().make(); @@ -1559,15 +1556,16 @@ public class GATKVariantContextUtils { * * @param remappedAlleles the list of alleles to evaluate * @param targetAlleles the target list of alleles + * @param position position to use for error messages * @return non-null array of ints representing indexes */ - protected static int[] getIndexesOfRelevantAlleles(final List remappedAlleles, final List targetAlleles) { + protected static int[] getIndexesOfRelevantAlleles(final List remappedAlleles, final List targetAlleles, final int position) { if ( remappedAlleles == null || remappedAlleles.size() == 0 ) throw new IllegalArgumentException("The list of input alleles must not be null or empty"); if ( targetAlleles == null || targetAlleles.size() == 0 ) throw new IllegalArgumentException("The list of target alleles must not be null or empty"); if ( !remappedAlleles.contains(NON_REF_SYMBOLIC_ALLELE) ) - throw new IllegalArgumentException("The list of input alleles must contain " + NON_REF_SYMBOLIC_ALLELE + " as an allele; please use the Haplotype Caller with gVCF output to generate appropriate records"); + throw new UserException("The list of input alleles must contain " + NON_REF_SYMBOLIC_ALLELE + " as an allele but that is not the case at position " + position + "; please use the Haplotype Caller with gVCF output to generate appropriate records"); final int indexOfGenericAlt = remappedAlleles.indexOf(NON_REF_SYMBOLIC_ALLELE); final int[] indexMapping = new int[targetAlleles.size()]; diff --git a/public/java/test/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java index 23a24e180..6672e3264 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java @@ -29,6 +29,7 @@ import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.variant.variantcontext.*; import org.testng.Assert; import org.testng.annotations.BeforeSuite; @@ -1463,14 +1464,14 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest { } } - @Test(expectedExceptions = IllegalArgumentException.class) + @Test(expectedExceptions = UserException.class) public void testGetIndexesOfRelevantAllelesWithNoALT() { final List alleles1 = new ArrayList<>(1); alleles1.add(Allele.create("A", true)); final List alleles2 = new ArrayList<>(1); alleles2.add(Allele.create("A", true)); - GATKVariantContextUtils.getIndexesOfRelevantAlleles(alleles1, alleles2); + GATKVariantContextUtils.getIndexesOfRelevantAlleles(alleles1, alleles2, -1); Assert.fail("We should have thrown an exception because the allele was not present"); } @@ -1502,7 +1503,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest { if ( allelesIndex > 0 ) myAlleles.add(allAlleles.get(allelesIndex)); - final int[] indexes = GATKVariantContextUtils.getIndexesOfRelevantAlleles(myAlleles, allAlleles); + final int[] indexes = GATKVariantContextUtils.getIndexesOfRelevantAlleles(myAlleles, allAlleles, -1); Assert.assertEquals(indexes.length, allAlleles.size());