From 3dc78855fd7c862b04c3ba431e998854520fdaee Mon Sep 17 00:00:00 2001 From: hanna Date: Wed, 25 Aug 2010 03:47:57 +0000 Subject: [PATCH] Command-line argument tagging is in, and the ROD system is hacked slightly to support the new syntax (-B:name,type file) as well as the old syntax. Also, a bonus feature: BAMs can now be tagged at the command-line, which should allow us to get rid of some of the hackier calls in GenomeAnalysisEngine. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4105 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/commandline/ArgumentMatch.java | 26 ++++-- .../sting/commandline/ArgumentMatches.java | 2 +- .../sting/commandline/ArgumentSource.java | 4 +- .../commandline/ArgumentTypeDescriptor.java | 87 +++++++++++++------ .../sting/commandline/CommandLineProgram.java | 13 +++ .../sting/commandline/ParsingEngine.java | 12 ++- .../sting/commandline/ParsingMethod.java | 24 ++++- .../sting/gatk/CommandLineExecutable.java | 53 ++--------- .../sting/gatk/GenomeAnalysisEngine.java | 67 +++++++++++++- .../sting/gatk/ReadProperties.java | 16 ++-- .../arguments/GATKArgumentCollection.java | 1 + .../simpleDataSources/SAMDataSource.java | 25 +++--- .../simpleDataSources/SAMReaderID.java | 19 +++- .../sting/gatk/executive/MicroScheduler.java | 3 +- .../OutputStreamArgumentTypeDescriptor.java | 8 +- .../SAMFileReaderArgumentTypeDescriptor.java | 4 +- .../SAMFileWriterArgumentTypeDescriptor.java | 6 +- .../VCFWriterArgumentTypeDescriptor.java | 4 +- .../gatk/refdata/tracks/RMDTrackManager.java | 35 +++++--- .../sting/gatk/walkers/PrintReadsWalker.java | 2 +- .../providers/LocusViewTemplate.java | 3 +- .../datasources/shards/MockLocusShard.java | 4 +- .../SAMBAMDataSourceUnitTest.java | 23 ++--- .../LocusIteratorByStateUnitTest.java | 9 +- .../tracks/RMDTrackManagerUnitTest.java | 4 +- .../traversals/TraverseReadsUnitTest.java | 7 +- .../VariantEvalIntegrationTest.java | 16 ++-- .../ScalaCompoundArgumentTypeDescriptor.scala | 10 +-- 28 files changed, 324 insertions(+), 163 deletions(-) diff --git a/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java b/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java index e894cc830..0710f6ad2 100755 --- a/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java +++ b/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java @@ -48,6 +48,11 @@ public class ArgumentMatch implements Iterable { */ public final SortedMap> indices = new TreeMap>(); + /** + * An ordered, freeform collection of tags. + */ + public final List tags; + /** * Create a new argument match, defining its properties later. Used to create invalid arguments. */ @@ -63,6 +68,7 @@ public class ArgumentMatch implements Iterable { private ArgumentMatch(String label,ArgumentDefinition definition) { this.label = label; this.definition = definition; + this.tags = Collections.emptyList(); } /** @@ -70,13 +76,21 @@ public class ArgumentMatch implements Iterable { * @param label Label of the argument match. Must not be null. * @param definition The associated definition, if one exists. May be null. * @param index Position of the argument. Must not be null. + * @param tags ordered freeform text tags associated with this argument. */ - public ArgumentMatch( String label, ArgumentDefinition definition, int index ) { - this( label, definition, index, null ); + public ArgumentMatch( String label, ArgumentDefinition definition, int index, List tags ) { + this( label, definition, index, null, tags ); } - - private ArgumentMatch( String label, ArgumentDefinition definition, int index, String value ) { + /** + * A simple way of indicating that an argument with the given label and definition exists at this index. + * @param label Label of the argument match. Must not be null. + * @param definition The associated definition, if one exists. May be null. + * @param index Position of the argument. Must not be null. + * @param value Value for the argument at this position. + * @param tags ordered freeform text tags associated with this argument. + */ + private ArgumentMatch( String label, ArgumentDefinition definition, int index, String value, List tags ) { this.label = label; this.definition = definition; @@ -84,6 +98,8 @@ public class ArgumentMatch implements Iterable { if( value != null ) values.add(value); indices.put(index,values ); + + this.tags = tags; } /** @@ -161,7 +177,7 @@ public class ArgumentMatch implements Iterable { if( nextIndex == null || nextToken == null ) throw new IllegalStateException( "No more ArgumentMatches are available" ); - ArgumentMatch match = new ArgumentMatch( label, definition, nextIndex, nextToken ); + ArgumentMatch match = new ArgumentMatch( label, definition, nextIndex, nextToken, tags ); prepareNext(); return match; } diff --git a/java/src/org/broadinstitute/sting/commandline/ArgumentMatches.java b/java/src/org/broadinstitute/sting/commandline/ArgumentMatches.java index e1e240f51..0d97a3842 100755 --- a/java/src/org/broadinstitute/sting/commandline/ArgumentMatches.java +++ b/java/src/org/broadinstitute/sting/commandline/ArgumentMatches.java @@ -185,7 +185,7 @@ public class ArgumentMatches implements Iterable { // Clone the list of argument matches to avoid ConcurrentModificationExceptions. for( ArgumentMatch argumentMatch: getUniqueMatches() ) { - if( argumentMatch.definition == match.definition ) { + if( argumentMatch.definition == match.definition && argumentMatch.tags.equals(match.tags) ) { argumentMatch.mergeInto( match ); for( int index: match.indices.keySet() ) argumentMatches.put( index, argumentMatch ); diff --git a/java/src/org/broadinstitute/sting/commandline/ArgumentSource.java b/java/src/org/broadinstitute/sting/commandline/ArgumentSource.java index 9bf41016b..7ee086813 100644 --- a/java/src/org/broadinstitute/sting/commandline/ArgumentSource.java +++ b/java/src/org/broadinstitute/sting/commandline/ArgumentSource.java @@ -149,8 +149,8 @@ public class ArgumentSource { * @param values String representation of all values passed. * @return the parsed value of the object. */ - public Object parse( ArgumentMatches values ) { - return typeDescriptor.parse( this, values ); + public Object parse( ParsingEngine parsingEngine, ArgumentMatches values ) { + return typeDescriptor.parse( parsingEngine, this, values ); } /** diff --git a/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index 06c220da6..8c2a454e3 100644 --- a/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -42,7 +42,7 @@ import java.util.*; * @author mhanna * @version 0.1 */ -public abstract class ArgumentTypeDescriptor { +public abstract class ArgumentTypeDescriptor { private static Class[] ARGUMENT_ANNOTATIONS = {Input.class, Output.class, Argument.class}; /** @@ -116,12 +116,16 @@ public abstract class ArgumentTypeDescriptor { /** * Parses an argument source to an object. + * WARNING! Mandatory side effect of parsing! Each parse routine should register the tags it finds with the proper CommandLineProgram. + * TODO: Fix this, perhaps with an event model indicating that a new argument has been created. + * + * @param parsingEngine The engine responsible for parsing. * @param source The source used to find the matches. * @param matches The matches for the source. * @return The parsed object. */ - public Object parse( ArgumentSource source, ArgumentMatches matches ) { - return parse( source, source.field.getType(), matches ); + public Object parse(ParsingEngine parsingEngine, ArgumentSource source, ArgumentMatches matches) { + return parse(parsingEngine, source, source.field.getType(), matches); } /** @@ -174,7 +178,7 @@ public abstract class ArgumentTypeDescriptor { * @param matches The argument matches for the argument source, or the individual argument match for a scalar if this is being called to help parse a collection. * @return The individual parsed object matching the argument match with Class type. */ - public abstract Object parse( ArgumentSource source, Class type, ArgumentMatches matches ); + public abstract Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ); /** * If the argument source only accepts a small set of options, populate the returned list with @@ -219,6 +223,20 @@ public abstract class ArgumentTypeDescriptor { return argumentValues.size() > 0 ? argumentValues.iterator().next() : null; } + /** + * Gets the tags associated with a given command-line argument. + * If the argument matches multiple values, an exception will be thrown. + * @param matches The matches for the given argument. + * @return The value of the argument if available, or null if not present. + */ + protected List getArgumentTags(ArgumentMatches matches) { + Set tags = new LinkedHashSet(); + for( ArgumentMatch match: matches ) { + tags.addAll(match.tags); + } + return new ArrayList(tags); + } + /** * Gets the values of an argument with the given full name, from the collection of ArgumentMatches. * @param definition Definition of the argument for which to find matches. @@ -294,16 +312,19 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor { } @Override - public Object parse( ArgumentSource source, Class type, ArgumentMatches matches ) { + public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches) { if (source.isFlag()) return true; - String value = getArgumentValue( createDefaultArgumentDefinition(source), matches ); + ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source); + String value = getArgumentValue( defaultDefinition, matches ); + Object result; + List tags = getArgumentTags( matches ); // lets go through the types we support try { if (type.isPrimitive()) { Method valueOf = primitiveToWrapperMap.get(type).getMethod("valueOf",String.class); - return valueOf.invoke(null,value.trim()); + result = valueOf.invoke(null,value.trim()); } else if (type.isEnum()) { Object[] vals = type.getEnumConstants(); Object defaultEnumeration = null; // as we look at options, record the default option if it exists @@ -314,15 +335,16 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor { } // if their argument has no value (null), and there's a default, return that default for the enum value if (defaultEnumeration != null && value == null) - return defaultEnumeration; + result = defaultEnumeration; // if their argument has no value and there's no default, throw a missing argument value exception. // TODO: Clean this up so that null values never make it to this point. To fix this, we'll have to clean up the implementation of -U. - if (value == null) + else if (value == null) throw new MissingArgumentValueException(Collections.singleton(createDefaultArgumentDefinition(source))); - throw new UnknownEnumeratedValueException(createDefaultArgumentDefinition(source),value); + else + throw new UnknownEnumeratedValueException(createDefaultArgumentDefinition(source),value); } else { Constructor ctor = type.getConstructor(String.class); - return ctor.newInstance(value); + result = ctor.newInstance(value); } } catch (NoSuchMethodException e) { @@ -334,7 +356,10 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor { } catch (InstantiationException e) { throw new StingException("constructFromString:InstantiationException: Failed conversion " + e.getMessage()); } + // WARNING: Side effect! + parsingEngine.addTags(result,tags); + return result; } @@ -367,9 +392,10 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor { @Override @SuppressWarnings("unchecked") - public Object parse( ArgumentSource source, Class type, ArgumentMatches matches ) - { + public Object parse(ParsingEngine parsingEngine,ArgumentSource source, Class type, ArgumentMatches matches) { Class componentType; + Object result; + Set tags = new LinkedHashSet(); if( Collection.class.isAssignableFrom(type) ) { @@ -399,11 +425,13 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor { } for( ArgumentMatch match: matches ) { - for( ArgumentMatch value: match ) - collection.add( componentArgumentParser.parse(source,componentType,new ArgumentMatches(value)) ); + for( ArgumentMatch value: match ) { + collection.add( componentArgumentParser.parse(parsingEngine,source,componentType,new ArgumentMatches(value)) ); + tags.addAll(value.tags); + } } - return collection; + result = collection; } else if( type.isArray() ) { @@ -412,21 +440,25 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor { // Assemble a collection of individual values used in this computation. Collection values = new ArrayList(); - for( ArgumentMatch match: matches ) { + for( ArgumentMatch match: matches ) for( ArgumentMatch value: match ) values.add(value); - } - Object arr = Array.newInstance(componentType,values.size()); + result = Array.newInstance(componentType,values.size()); int i = 0; - for( ArgumentMatch value: values ) - Array.set( arr,i++,componentArgumentParser.parse(source,componentType,new ArgumentMatches(value))); - - return arr; + for( ArgumentMatch value: values ) { + Array.set( result,i++,componentArgumentParser.parse(parsingEngine,source,componentType,new ArgumentMatches(value))); + tags.addAll(value.tags); + } } else throw new StingException("Unsupported compound argument type: " + type); + + // WARNING: Side effect! + parsingEngine.addTags(result,new ArrayList(tags)); + + return result; } /** @@ -506,17 +538,22 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor { @Override - public Object parse( ArgumentSource source, Class type, ArgumentMatches matches ) { + public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches) { if(multiplexedIds == null) throw new StingException("Cannot directly parse a MultiplexArgumentTypeDescriptor; must create a derivative type descriptor first."); Map multiplexedMapping = new HashMap(); + Class componentType = getCollectionComponentType(source.field); + for(Object id: multiplexedIds) { - Object value = ArgumentTypeDescriptor.create(componentType).parse(source,componentType,matches.transform(multiplexer,id)); + Object value = ArgumentTypeDescriptor.create(componentType).parse(parsingEngine,source,componentType,matches.transform(multiplexer,id)); multiplexedMapping.put(id,value); } + + parsingEngine.addTags(multiplexedMapping,getArgumentTags(matches)); + return multiplexedMapping; } diff --git a/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java b/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java index 3c31e5443..df3ea95b3 100644 --- a/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java +++ b/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java @@ -124,6 +124,19 @@ public abstract class CommandLineProgram { */ protected String getArgumentSourceName( Class source ) { return source.toString(); } + /** + * The command-line argument system allows free-form String tags to accompany each + * object. However, there's no way for the clp to push these tags into the fields + * themselves, so we just provide a callback so that the clp can push tags into the + * argument system. + * @param key Key to use, created by the command-line argument system. + * @param tags List of freeform tags. + */ + protected void addTags(Object key, List tags) { + // NO-OP by default. + } + + /** * this is the function that the inheriting class can expect to have called * when all the argument processing is done diff --git a/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java b/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java index ecb350c15..1f4560317 100755 --- a/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java +++ b/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java @@ -276,6 +276,15 @@ public class ParsingEngine { } } + /** + * Notify the user that tags have been created. + * @param key The key created. + * @param tags List of tags, or empty list if no tags are present. + */ + public void addTags(Object key, List tags) { + if(clp!=null) clp.addTags(key,tags); + } + /** * Notify the user that a deprecated command-line argument has been used. * @param argumentSource Deprecated argument source specified by user. @@ -308,7 +317,8 @@ public class ParsingEngine { throw new StingException("Internal command-line parser error: unable to find a home for argument matches " + argumentMatches); for( Object target: targets ) { - Object value = (argumentMatches.size() != 0) ? source.parse(argumentMatches) : source.createDefault(); + Object value = (argumentMatches.size() != 0) ? source.parse(this,argumentMatches) : source.createDefault(); + JVMUtils.setFieldValue(source.field,target,value); } } diff --git a/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java b/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java index d0dcc42eb..40ed66b71 100755 --- a/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java +++ b/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java @@ -25,8 +25,12 @@ package org.broadinstitute.sting.commandline; +import org.broadinstitute.sting.utils.Utils; + import java.util.regex.Pattern; import java.util.regex.Matcher; +import java.util.List; +import java.util.ArrayList; /** * Holds a pattern, along with how to get to the argument definitions that could match that pattern. @@ -76,18 +80,32 @@ public abstract class ParsingMethod { String argument = matcher.group(1).trim(); + List tags = new ArrayList(); + if(matcher.group(2) != null) + tags.addAll(Utils.split(matcher.group(2),",")); + // Find the most appropriate argument definition for the given argument. ArgumentDefinition argumentDefinition = definitions.findArgumentDefinition( argument, definitionMatcher ); // Try to find a matching argument. If found, label that as the match. If not found, add the argument // with a null definition. - ArgumentMatch argumentMatch = new ArgumentMatch( argument, argumentDefinition, position ); + ArgumentMatch argumentMatch = new ArgumentMatch( argument, argumentDefinition, position, tags ); return argumentMatch; } - public static ParsingMethod FullNameParsingMethod = new ParsingMethod(Pattern.compile("\\s*--([A-Za-z_][\\w\\-\\.]*)\\s*"), + /** + * A command-line argument always starts with an alphabetical character or underscore followed by any word character. + */ + private static final String ARGUMENT_TEXT = "[A-Za-z_][\\w\\-\\.]*"; + + /** + * Tags, on the other hand, can start with any word character. + */ + private static final String TAG_TEXT = "[\\w\\-\\.]*"; + + public static ParsingMethod FullNameParsingMethod = new ParsingMethod(Pattern.compile(String.format("\\s*--(%1$s)(?:\\:(%2$s(?:,%2$s)*))?\\s*",ARGUMENT_TEXT,TAG_TEXT)), ArgumentDefinitions.FullNameDefinitionMatcher) {}; - public static ParsingMethod ShortNameParsingMethod = new ParsingMethod(Pattern.compile("\\s*-([A-Za-z_][\\w\\-]*)\\s*"), + public static ParsingMethod ShortNameParsingMethod = new ParsingMethod(Pattern.compile(String.format("\\s*-(%1$s)(?:\\:(%2$s(?:,%2$s)*))?\\s*",ARGUMENT_TEXT,TAG_TEXT)), ArgumentDefinitions.ShortNameDefinitionMatcher) {}; } diff --git a/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java b/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java index 0042817fc..1dd1e532a 100644 --- a/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java +++ b/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java @@ -82,12 +82,8 @@ public abstract class CommandLineExecutable extends CommandLineProgram { for(SamRecordFilter filter: filters) loadArgumentsIntoObject(filter); - // process any arguments that need a second pass - GATKArgumentCollection arguments = getArgumentCollection(); - processArguments(arguments); - // set the analysis name in the argument collection - return GATKEngine.execute(arguments, mWalker, filters); + return GATKEngine.execute(getArgumentCollection(), mWalker, filters); } /** @@ -133,50 +129,15 @@ public abstract class CommandLineExecutable extends CommandLineProgram { @Override protected String getArgumentSourceName( Class argumentSource ) { return GATKEngine.getWalkerName((Class)argumentSource); - } - - /** - * Preprocess the arguments before submitting them to the GATK engine. - * - * @param argCollection Collection of arguments to preprocess. - */ - private void processArguments( GATKArgumentCollection argCollection ) { - argCollection.samFiles = unpackBAMFileList( argCollection.samFiles ); } /** - * Unpack the bam files to be processed, given a list of files. That list of files can - * itself contain entries which are lists of other files to be read (note: you cannot have lists of lists of lists) - * - * @param inputFiles a list of files that represent either bam files themselves, or a file containing a list of bam files to process - * - * @return a flattened list of the bam files provided + * Supply command-line argument tags to the GATK engine. + * @param key Key to use, created by the command-line argument system. + * @param tags List of freeform tags. */ - public static List unpackBAMFileList( List inputFiles ) { - List unpackedReads = new ArrayList(); - for( File inputFile: inputFiles ) { - if (inputFile.getName().toLowerCase().endsWith(".list") ) { - try { - for(String fileName : new XReadLines(inputFile)) - unpackedReads.addAll(Collections.singletonList(new File(fileName))); - } - catch( FileNotFoundException ex ) { - throw new StingException("Unable to find file while unpacking reads", ex); - } - } - else if(inputFile.getName().toLowerCase().endsWith(".bam")) { - unpackedReads.add( inputFile ); - } - else if(inputFile.getName().equals("-")) { - unpackedReads.add( new File("/dev/stdin") ); - } - else { - Utils.scareUser(String.format("The GATK reads argument (-I) supports only BAM files with the .bam extension and lists of BAM files " + - "with the .list extension, but the file %s has neither extension. Please ensure that your BAM file or list " + - "of BAM files is in the correct format, update the extension, and try again.",inputFile.getName())); - } - } - return unpackedReads; + @Override + protected void addTags(Object key, List tags) { + GATKEngine.addTags(key,tags); } - } diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 41fcd9ca8..e618c51b5 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -50,11 +50,13 @@ import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackManager; import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.commandline.ArgumentException; import org.broadinstitute.sting.commandline.ArgumentSource; import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor; import java.io.File; +import java.io.FileNotFoundException; import java.util.*; public class GenomeAnalysisEngine { @@ -96,6 +98,11 @@ public class GenomeAnalysisEngine { */ private Collection> outputs = new ArrayList>(); + /** + * List of tags associated with the given instantiation of the command-line argument. + */ + private final Map> tags = new IdentityHashMap>(); + /** * Collection of the filters applied to the walker's input data. */ @@ -280,6 +287,27 @@ public class GenomeAnalysisEngine { outputs.add(stub); } + /** + * Adds an association between a object created by the + * command-line argument system and a freeform list of tags. + * @param key Object created by the command-line argument system. + * @param tags List of tags to use when reading arguments. + */ + public void addTags(Object key, List tags) { + this.tags.put(key,tags); + } + + /** + * Gets the tags associated with a given object. + * @param key Key for which to find a tag. + * @return List of tags associated with this key. + */ + public List getTags(Object key) { + if(!tags.containsKey(key)) + return Collections.emptyList(); + return tags.get(key); + } + /** * Retrieves an instance of the walker based on the walker name. * @@ -348,7 +376,7 @@ public class GenomeAnalysisEngine { } RMDTrackManager manager = new RMDTrackManager(); - List tracks = manager.getReferenceMetaDataSources(argCollection.RODBindings); + List tracks = manager.getReferenceMetaDataSources(this,argCollection.RODBindings); validateSuppliedReferenceOrderedDataAgainstWalker(my_walker, tracks); // validate all the sequence dictionaries against the reference @@ -547,7 +575,7 @@ public class GenomeAnalysisEngine { else method = new DownsamplingMethod(DownsampleType.NONE,null,null); - return new ReadProperties(argCollection.samFiles, + return new ReadProperties(unpackBAMFileList(argCollection.samFiles), argCollection.strictnessLevel, argCollection.readBufferSize, method, @@ -947,4 +975,39 @@ public class GenomeAnalysisEngine { public ReadMetrics getCumulativeMetrics() { return readsDataSource.getCumulativeReadMetrics(); } + + /** + * Unpack the bam files to be processed, given a list of files. That list of files can + * itself contain entries which are lists of other files to be read (note: you cannot have lists of lists of lists) + * + * @param inputFiles a list of files that represent either bam files themselves, or a file containing a list of bam files to process + * + * @return a flattened list of the bam files provided + */ + private List unpackBAMFileList( List inputFiles ) { + List unpackedReads = new ArrayList(); + for( File inputFile: inputFiles ) { + if (inputFile.getName().toLowerCase().endsWith(".list") ) { + try { + for(String fileName : new XReadLines(inputFile)) + unpackedReads.add(new SAMReaderID(new File(fileName),getTags(inputFile))); + } + catch( FileNotFoundException ex ) { + throw new StingException("Unable to find file while unpacking reads", ex); + } + } + else if(inputFile.getName().toLowerCase().endsWith(".bam")) { + unpackedReads.add( new SAMReaderID(inputFile,getTags(inputFile)) ); + } + else if(inputFile.getName().equals("-")) { + unpackedReads.add(new SAMReaderID(new File("/dev/stdin"),Collections.emptyList())); + } + else { + Utils.scareUser(String.format("The GATK reads argument (-I) supports only BAM files with the .bam extension and lists of BAM files " + + "with the .list extension, but the file %s has neither extension. Please ensure that your BAM file or list " + + "of BAM files is in the correct format, update the extension, and try again.",inputFile.getName())); + } + } + return unpackedReads; + } } diff --git a/java/src/org/broadinstitute/sting/gatk/ReadProperties.java b/java/src/org/broadinstitute/sting/gatk/ReadProperties.java index d92053bfb..dbc76e070 100755 --- a/java/src/org/broadinstitute/sting/gatk/ReadProperties.java +++ b/java/src/org/broadinstitute/sting/gatk/ReadProperties.java @@ -3,8 +3,8 @@ package org.broadinstitute.sting.gatk; import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMFileReader; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; +import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; -import java.io.File; import java.util.ArrayList; import java.util.List; import java.util.Collection; @@ -26,7 +26,7 @@ import java.util.Collection; * information about how they should be downsampled, sorted, and filtered. */ public class ReadProperties { - private List readsFiles = null; + private List readers = null; private SAMFileReader.ValidationStringency validationStringency = SAMFileReader.ValidationStringency.STRICT; private Integer readBufferSize = null; private DownsamplingMethod downsamplingMethod = null; @@ -63,8 +63,8 @@ public class ReadProperties { * Gets a list of the files acting as sources of reads. * @return A list of files storing reads data. */ - public List getReadsFiles() { - return readsFiles; + public List getSAMReaderIDs() { + return readers; } /** @@ -115,8 +115,8 @@ public class ReadProperties { * Simple constructor for unit testing. * @param readsFiles List of reads files to open. */ - public ReadProperties( List readsFiles ) { - this.readsFiles = readsFiles; + public ReadProperties( List readsFiles ) { + this.readers = readsFiles; this.downsamplingMethod = new DownsamplingMethod(DownsampleType.NONE,null,null); this.supplementalFilters = new ArrayList(); this.exclusionList = new ValidationExclusion(); @@ -138,7 +138,7 @@ public class ReadProperties { * will explicitly list reads with deletion over the current reference base; otherwise, only observed * bases will be seen in the pileups, and the deletions will be skipped silently. */ - ReadProperties( List samFiles, + ReadProperties( List samFiles, SAMFileReader.ValidationStringency strictness, Integer readBufferSize, DownsamplingMethod downsamplingMethod, @@ -147,7 +147,7 @@ public class ReadProperties { int maximumReadsAtLocus, boolean includeReadsWithDeletionAtLoci, boolean generateExtendedEvents) { - this.readsFiles = samFiles; + this.readers = samFiles; this.readBufferSize = readBufferSize; this.validationStringency = strictness; this.downsamplingMethod = downsamplingMethod; diff --git a/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 0f295089f..88f589f6d 100755 --- a/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -31,6 +31,7 @@ import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.gatk.DownsampleType; +import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; import org.broadinstitute.sting.utils.interval.IntervalSetRule; import org.simpleframework.xml.*; import org.simpleframework.xml.core.Persister; diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java index 9212c6f2b..82be1ecb6 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java @@ -65,7 +65,7 @@ public class SAMDataSource implements SimpleDataSource { /** * Identifiers for the readers driving this data source. */ - protected final List readerIDs = new ArrayList(); + protected final List readerIDs; /** * How far along is each reader? @@ -109,11 +109,10 @@ public class SAMDataSource implements SimpleDataSource { this.readProperties = reads; this.readMetrics = new ReadMetrics(); - for (File smFile : reads.getReadsFiles()) { - if (!smFile.canRead()) { - throw new SimpleDataSourceLoadException("SAMDataSource: Unable to load file: " + smFile.getName()); - } - readerIDs.add(new SAMReaderID(smFile)); + readerIDs = reads.getSAMReaderIDs(); + for (SAMReaderID readerID : reads.getSAMReaderIDs()) { + if (!readerID.samFile.canRead()) + throw new SimpleDataSourceLoadException("SAMDataSource: Unable to load file: " + readerID.samFile.getName()); } resourcePool = new SAMResourcePool(Integer.MAX_VALUE); @@ -170,7 +169,7 @@ public class SAMDataSource implements SimpleDataSource { * @return True if no reads files are supplying data to the traversal; false otherwise. */ public boolean isEmpty() { - return readProperties.getReadsFiles().size() == 0; + return readProperties.getSAMReaderIDs().size() == 0; } /** @@ -550,8 +549,8 @@ public class SAMDataSource implements SimpleDataSource { * @param sourceInfo Metadata for the reads to load. */ public SAMReaders(ReadProperties sourceInfo) { - for(File readsFile: sourceInfo.getReadsFiles()) { - SAMFileReader reader = new SAMFileReader(readsFile); + for(SAMReaderID readerID: sourceInfo.getSAMReaderIDs()) { + SAMFileReader reader = new SAMFileReader(readerID.samFile); reader.enableFileSource(true); reader.enableIndexCaching(true); reader.setValidationStringency(sourceInfo.getValidationStringency()); @@ -562,14 +561,14 @@ public class SAMDataSource implements SimpleDataSource { logger.debug(String.format("Sort order is: " + header.getSortOrder())); if (reader.getFileHeader().getReadGroups().size() < 1) { - SAMReadGroupRecord rec = new SAMReadGroupRecord(readsFile.getName()); - rec.setLibrary(readsFile.getName()); - rec.setSample(readsFile.getName()); + SAMReadGroupRecord rec = new SAMReadGroupRecord(readerID.samFile.getName()); + rec.setLibrary(readerID.samFile.getName()); + rec.setSample(readerID.samFile.getName()); reader.getFileHeader().addReadGroup(rec); } - readers.put(new SAMReaderID(readsFile),reader); + readers.put(readerID,reader); } } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMReaderID.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMReaderID.java index f5bf9f64e..733190541 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMReaderID.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMReaderID.java @@ -1,6 +1,8 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources; import java.io.File; +import java.util.List; +import java.util.Collections; /** * Uniquely identifies a SAM file reader. @@ -15,12 +17,27 @@ public class SAMReaderID { */ protected final File samFile; + /** + * A list of tags associated with this BAM file. + */ + protected final List tags; + /** * Creates an identifier for a SAM file based on read. * @param samFile The source file for SAM data. + * @param tags tags to use when creating a reader ID. */ - protected SAMReaderID(File samFile) { + public SAMReaderID(File samFile, List tags) { this.samFile = samFile; + this.tags = tags; + } + + /** + * Gets the tags associated with the given BAM file. + * @return A collection of the tags associated with this file. + */ + public List getTags() { + return Collections.unmodifiableList(tags); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java index e59f083af..5c6850675 100755 --- a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java @@ -30,6 +30,7 @@ import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; +import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; import org.broadinstitute.sting.gatk.traversals.*; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.io.OutputTracker; @@ -147,7 +148,7 @@ public abstract class MicroScheduler { * @return an iterator over the reads specified in the shard. */ protected StingSAMIterator getReadIterator(Shard shard) { - return (!reads.isEmpty()) ? reads.seek(shard) : new NullSAMIterator(new ReadProperties(new ArrayList())); + return (!reads.isEmpty()) ? reads.seek(shard) : new NullSAMIterator(new ReadProperties(Collections.emptyList())); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java b/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java index 75b29e47e..62bbf405d 100644 --- a/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java +++ b/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java @@ -77,7 +77,7 @@ public class OutputStreamArgumentTypeDescriptor extends ArgumentTypeDescriptor { } @Override - public Object parse( ArgumentSource source, Class type, ArgumentMatches matches ) { + public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) { ArgumentDefinition definition = createDefaultArgumentDefinition(source); String fileName = getArgumentValue( definition, matches ); @@ -85,7 +85,11 @@ public class OutputStreamArgumentTypeDescriptor extends ArgumentTypeDescriptor { engine.addOutput(stub); - return createInstanceOfClass(type,stub); + Object result = createInstanceOfClass(type,stub); + // WARNING: Side effects required by engine! + parsingEngine.addTags(result,getArgumentTags(matches)); + + return result; } /** diff --git a/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java b/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java index 12363bc61..041bb045d 100644 --- a/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java +++ b/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.io.stubs; import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor; import org.broadinstitute.sting.commandline.ArgumentSource; import org.broadinstitute.sting.commandline.ArgumentMatches; +import org.broadinstitute.sting.commandline.ParsingEngine; import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; @@ -59,7 +60,7 @@ public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor } @Override - public Object parse( ArgumentSource source, Class type, ArgumentMatches matches ) { + public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) { SAMFileReaderBuilder builder = new SAMFileReaderBuilder(); String readerFileName = getArgumentValue( createDefaultArgumentDefinition(source), matches ); @@ -69,6 +70,7 @@ public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor builder.setSAMFile(new File(readerFileName)); + // WARNING: Skipping required side-effect because stub is impossible to generate. engine.addInput(source, builder); // MASSIVE KLUDGE! SAMFileReader is tricky to implement and we don't yet have a stub. Return null, then diff --git a/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java b/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java index 5efdcbc4d..76c009172 100644 --- a/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java +++ b/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java @@ -91,7 +91,7 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor } @Override - public Object parse( ArgumentSource source, Class type, ArgumentMatches matches ) { + public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) { String writerFileName = getArgumentValue( createBAMArgumentDefinition(source), matches ); if( writerFileName == null ) throw new StingException("SAM file compression was supplied, but no associated writer was supplied with it."); @@ -103,6 +103,8 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor if( compressionLevel != null ) stub.setCompressionLevel(compressionLevel); + // WARNING: Side effects required by engine! + parsingEngine.addTags(stub,getArgumentTags(matches)); engine.addOutput(stub); return stub; @@ -121,7 +123,7 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor DEFAULT_ARGUMENT_FULLNAME, DEFAULT_ARGUMENT_SHORTNAME, ArgumentDefinition.getDoc(annotation), - true, + false, false, source.isMultiValued(), source.isHidden(), diff --git a/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java b/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java index aa43d920b..5d5adda7c 100644 --- a/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java +++ b/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java @@ -111,7 +111,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { * @return Transform from the matches into the associated argument. */ @Override - public Object parse( ArgumentSource source, Class type, ArgumentMatches matches ) { + public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) { // Get the filename for the genotype file, if it exists. If not, we'll need to send output to out. String writerFileName = getArgumentValue(createDefaultArgumentDefinition(source),matches); File writerFile = writerFileName != null ? new File(writerFileName) : null; @@ -122,6 +122,8 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { // Create a stub for the given object. VCFWriterStub stub = (writerFile != null) ? new VCFWriterStub(engine, writerFile, compress) : new VCFWriterStub(engine, System.out, compress); + // WARNING: Side effects required by engine! + parsingEngine.addTags(stub,getArgumentTags(matches)); engine.addOutput(stub); return stub; diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackManager.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackManager.java index 19a6607fb..91b51a117 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackManager.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackManager.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.refdata.tracks; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.StingException; @@ -62,13 +63,13 @@ public class RMDTrackManager extends PluginManager { /** * find the associated reference meta data * - * @param triplets the triplets of strings from the -B command line option + * @param bindings the bindings of strings from the -B command line option * * @return a list of RMDTracks, one for each -B option */ - public List getReferenceMetaDataSources(List triplets) { + public List getReferenceMetaDataSources(GenomeAnalysisEngine engine,List bindings) { initializeTrackTypes(); - initializeTriplets(triplets); + initializeBindings(engine,bindings); // try and make the tracks given their requests return createRequestedTrackObjects(); } @@ -91,18 +92,30 @@ public class RMDTrackManager extends PluginManager { } /** - * initialize our lists of triplets - * @param triplets the input to the GATK, as a list of strings passed in through the -B options + * initialize our lists of bindings + * @param engine The engine, used to populate tags. + * @param bindings the input to the GATK, as a list of strings passed in through the -B options */ - private void initializeTriplets(List triplets) { + private void initializeBindings(GenomeAnalysisEngine engine,List bindings) { // NOTE: Method acts as a static. Once the inputs have been passed once they are locked in. - if (inputs.size() > 0 || triplets.size() == 0) + if (inputs.size() > 0 || bindings.size() == 0) return; - for (String value: triplets) { - String[] split = value.split(","); - if (split.length != 3) throw new IllegalArgumentException(value + " is not a valid reference metadata track description"); - inputs.add(new RMDTriplet(split[0], split[1], split[2])); + for (String binding: bindings) { + if(engine != null && engine.getTags(binding).size() == 2) { + // Assume that if tags are present, those tags are name and type. + // Name is always first, followed by type. + List parameters = engine.getTags(binding); + String name = parameters.get(0); + String type = parameters.get(1); + inputs.add(new RMDTriplet(name,type,binding)); + } + else { + // Otherwise, use old-format bindings. + String[] split = binding.split(","); + if (split.length != 3) throw new IllegalArgumentException(binding + " is not a valid reference metadata track description"); + inputs.add(new RMDTriplet(split[0], split[1], split[2])); + } } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java index 2b1d3abe9..f6c24a2bd 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java @@ -44,7 +44,7 @@ import java.io.PrintStream; @Requires({DataSource.READS, DataSource.REFERENCE}) public class PrintReadsWalker extends ReadWalker { /** an optional argument to dump the reads out to a BAM file */ - @Output(doc="Write output to this BAM filename instead of STDOUT",required=false) + @Output(doc="Write output to this BAM filename instead of STDOUT") SAMFileWriter out; @Argument(fullName = "readGroup", shortName = "readGroup", doc="Discard reads not belonging to the specified read group", required = false) String readGroup = null; diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java index 88708efe4..bbab06168 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java @@ -19,7 +19,6 @@ import org.broadinstitute.sting.utils.GenomeLocParser; import org.junit.BeforeClass; import org.junit.Test; -import java.io.File; import java.io.FileNotFoundException; import java.util.*; /** @@ -51,7 +50,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(); GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1", 1, 5); - Shard shard = new LocusShard(new SAMDataSource(new ReadProperties(Collections.emptyList())),Collections.singletonList(shardBounds),Collections.emptyMap()); + Shard shard = new LocusShard(new SAMDataSource(new ReadProperties(Collections.emptyList())),Collections.singletonList(shardBounds),Collections.emptyMap()); WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, window.getLocus(), window, null, null); diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/shards/MockLocusShard.java b/java/test/org/broadinstitute/sting/gatk/datasources/shards/MockLocusShard.java index ace85cece..ce466a995 100644 --- a/java/test/org/broadinstitute/sting/gatk/datasources/shards/MockLocusShard.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/shards/MockLocusShard.java @@ -27,10 +27,10 @@ package org.broadinstitute.sting.gatk.datasources.shards; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; +import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; import java.util.List; import java.util.Collections; -import java.io.File; /** * A mock locus shard, usable for infrastructure that requires a shard to behave properly. @@ -40,6 +40,6 @@ import java.io.File; */ public class MockLocusShard extends LocusShard { public MockLocusShard(final List intervals) { - super(new SAMDataSource(new ReadProperties(Collections.emptyList())),intervals,null); + super(new SAMDataSource(new ReadProperties(Collections.emptyList())),intervals,null); } } diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceUnitTest.java index e1280ad8e..085d60ae1 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceUnitTest.java @@ -19,6 +19,7 @@ import java.io.File; import java.io.FileNotFoundException; import java.util.ArrayList; import java.util.List; +import java.util.Collections; /** * @@ -48,7 +49,7 @@ import java.util.List; */ public class SAMBAMDataSourceUnitTest extends BaseTest { - private List fl; + private List readers; private IndexedFastaSequenceFile seq; /** @@ -58,7 +59,7 @@ public class SAMBAMDataSourceUnitTest extends BaseTest { */ @Before public void doForEachTest() throws FileNotFoundException { - fl = new ArrayList(); + readers = new ArrayList(); // sequence seq = new IndexedFastaSequenceFile(new File(hg18Reference)); @@ -73,7 +74,7 @@ public class SAMBAMDataSourceUnitTest extends BaseTest { @After public void undoForEachTest() { seq = null; - fl.clear(); + readers.clear(); } @@ -83,8 +84,8 @@ public class SAMBAMDataSourceUnitTest extends BaseTest { logger.warn("Executing testLinearBreakIterateAll"); // setup the data - fl.add(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam")); - ReadProperties reads = new ReadProperties(fl); + readers.add(new SAMReaderID(new File(validationDataLocation+"/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),Collections.emptyList())); + ReadProperties reads = new ReadProperties(readers); // the sharding strat. SAMDataSource data = new SAMDataSource(reads); @@ -128,8 +129,8 @@ public class SAMBAMDataSourceUnitTest extends BaseTest { logger.warn("Executing testMergingTwoBAMFiles"); // setup the test files - fl.add(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam")); - ReadProperties reads = new ReadProperties(fl); + readers.add(new SAMReaderID(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),Collections.emptyList())); + ReadProperties reads = new ReadProperties(readers); // the sharding strat. SAMDataSource data = new SAMDataSource(reads); @@ -168,10 +169,10 @@ public class SAMBAMDataSourceUnitTest extends BaseTest { // setup the data and the counter before our second run - fl.clear(); - fl.add(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam")); - fl.add(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam")); - reads = new ReadProperties(fl); + readers.clear(); + readers.add(new SAMReaderID(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),Collections.emptyList())); + readers.add(new SAMReaderID(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),Collections.emptyList())); + reads = new ReadProperties(readers); count = 0; // the sharding strat. diff --git a/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java b/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java index bd9ba3439..ad29de786 100644 --- a/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java @@ -6,6 +6,7 @@ import net.sf.samtools.SAMRecord; import net.sf.samtools.util.CloseableIterator; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.ReadProperties; +import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; @@ -40,7 +41,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest { final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'}; // create a test version of the Reads object - ReadProperties readAttributes = new ReadProperties(new ArrayList()); + ReadProperties readAttributes = new ReadProperties(new ArrayList()); JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"generateExtendedEvents"),readAttributes,true); SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10); @@ -92,7 +93,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest { final byte[] quals = new byte[] { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20}; // create a test version of the Reads object - ReadProperties readAttributes = new ReadProperties(new ArrayList()); + ReadProperties readAttributes = new ReadProperties(new ArrayList()); JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"generateExtendedEvents"),readAttributes,true); SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10); @@ -140,7 +141,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest { records.add(ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, x, 20)); // create a test version of the Reads object - ReadProperties reads = new ReadProperties(new ArrayList()); + ReadProperties reads = new ReadProperties(new ArrayList()); JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"maximumReadsAtLocus"),reads,MAX_READS); // create the iterator by state with the fake reads and fake records @@ -166,7 +167,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest { records.add(ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, 100, 20)); // create a test version of the Reads object - ReadProperties reads = new ReadProperties(new ArrayList()); + ReadProperties reads = new ReadProperties(new ArrayList()); JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"maximumReadsAtLocus"),reads,MAX_READS); // create the iterator by state with the fake reads and fake records diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackManagerUnitTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackManagerUnitTest.java index 74a84cf9c..72fddea15 100644 --- a/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackManagerUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackManagerUnitTest.java @@ -108,8 +108,8 @@ public class RMDTrackManagerUnitTest extends BaseTest { triplets.add("db"); triplets.add("DBSNP"); triplets.add("../../GATK_Data/dbsnp_130_b36.rod"); - Assert.assertEquals(1, manager.getReferenceMetaDataSources(triplets).size()); - RMDTrack t = manager.getReferenceMetaDataSources(triplets).get(0); + Assert.assertEquals(1, manager.getReferenceMetaDataSources(null,triplets).size()); + RMDTrack t = manager.getReferenceMetaDataSources(null,triplets).get(0); // make sure we have a single track // lets test the first and 20th contigs of the human reference diff --git a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java index b81db8853..782cd296d 100755 --- a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java @@ -11,6 +11,7 @@ import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy; import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; +import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; import org.broadinstitute.sting.gatk.walkers.qc.CountReadsWalker; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -56,9 +57,9 @@ import java.util.Collections; public class TraverseReadsUnitTest extends BaseTest { private ReferenceSequenceFile seq; - private File bam = new File(validationDataLocation + "index_test.bam"); // TCGA-06-0188.aligned.duplicates_marked.bam"); + private SAMReaderID bam = new SAMReaderID(new File(validationDataLocation + "index_test.bam"),Collections.emptyList()); // TCGA-06-0188.aligned.duplicates_marked.bam"); private File refFile = new File(validationDataLocation + "Homo_sapiens_assembly17.fasta"); - private List bamList; + private List bamList; private Walker countReadWalker; private File output; private long readSize = 100000; @@ -82,7 +83,7 @@ public class TraverseReadsUnitTest extends BaseTest { fail("Couldn't open the output file"); } - bamList = new ArrayList(); + bamList = new ArrayList(); bamList.add(bam); countReadWalker = new CountReadsWalker(); diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index bb3d378ce..0e7acc5b0 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -14,13 +14,13 @@ public class private static String root = cmdRoot + " -D " + GATKDataLocation + "dbsnp_129_b36.rod" + - " -B eval,VCF," + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" + - " -B comp_genotypes,VCF," + validationDataLocation + "yri.trio.gatk.ug.head.vcf -reportType Grep"; + " -B:eval,VCF " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" + + " -B:comp_genotypes,VCF " + validationDataLocation + "yri.trio.gatk.ug.head.vcf -reportType Grep"; private static String rootGZ = cmdRoot + " -D " + GATKDataLocation + "dbsnp_129_b36.rod" + - " -B eval,VCF," + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf.gz" + - " -B comp_genotypes,VCF," + validationDataLocation + "yri.trio.gatk.ug.head.vcf.gz -reportType Grep"; + " -B:eval,VCF " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf.gz" + + " -B:comp_genotypes,VCF " + validationDataLocation + "yri.trio.gatk.ug.head.vcf.gz -reportType Grep"; private static String[] testsEnumerations = {root, rootGZ}; @@ -46,7 +46,7 @@ public class public void testVEGenotypeConcordance() { String vcfFiles[] = {"GenotypeConcordanceEval.vcf", "GenotypeConcordanceEval.vcf.gz"}; for (String vcfFile : vcfFiles) { - WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -B eval,VCF," + validationDataLocation + vcfFile + " -B comp,VCF," + validationDataLocation + "GenotypeConcordanceComp.vcf -noStandard -E GenotypeConcordance -reportType CSV -o %s", + WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -B:eval,VCF " + validationDataLocation + vcfFile + " -B:comp,VCF " + validationDataLocation + "GenotypeConcordanceComp.vcf -noStandard -E GenotypeConcordance -reportType CSV -o %s", 1, Arrays.asList("15d1075d384da2bb7445f7493f2b6a07")); executeTest("testVEGenotypeConcordance" + vcfFile, spec); @@ -76,8 +76,8 @@ public class public void testVEComplex() { HashMap expectations = new HashMap(); String extraArgs1 = "-L " + validationDataLocation + "chr1_b36_pilot3.interval_list -family NA19238+NA19239=NA19240 -MVQ 30 -E MendelianViolationEvaluator" + - " -B dbsnp_130,dbSNP," + GATKDataLocation + "dbsnp_130_b36.rod" + - " -B comp_hapmap,VCF," + validationDataLocation + "CEU_hapmap_nogt_23.vcf"; + " -B:dbsnp_130,dbSNP " + GATKDataLocation + "dbsnp_130_b36.rod" + + " -B:comp_hapmap,VCF " + validationDataLocation + "CEU_hapmap_nogt_23.vcf"; String matchingMD5 = "dd513bc72860133a58e9ee542782162b"; @@ -104,7 +104,7 @@ public class " -L 21" + " -D " + GATKDataLocation + "dbsnp_129_b36.rod" + " -E CountFunctionalClasses -noStandard" + - " -B eval,VCF," + validationDataLocation + "test.filtered.maf_annotated.vcf" + + " -B:eval,VCF " + validationDataLocation + "test.filtered.maf_annotated.vcf" + " -o %s"; String md5 = "d41d8cd98f00b204e9800998ecf8427e"; diff --git a/scala/src/org/broadinstitute/sting/queue/util/ScalaCompoundArgumentTypeDescriptor.scala b/scala/src/org/broadinstitute/sting/queue/util/ScalaCompoundArgumentTypeDescriptor.scala index f2c84649c..274535297 100644 --- a/scala/src/org/broadinstitute/sting/queue/util/ScalaCompoundArgumentTypeDescriptor.scala +++ b/scala/src/org/broadinstitute/sting/queue/util/ScalaCompoundArgumentTypeDescriptor.scala @@ -3,7 +3,7 @@ package org.broadinstitute.sting.queue.util import collection.JavaConversions._ import org.broadinstitute.sting.queue.QException import java.lang.Class -import org.broadinstitute.sting.commandline.{ArgumentMatches, ArgumentSource, ArgumentTypeDescriptor} +import org.broadinstitute.sting.commandline.{ArgumentMatches, ArgumentSource, ArgumentTypeDescriptor, ParsingEngine} /** * An ArgumentTypeDescriptor that can parse the scala collections. @@ -42,7 +42,7 @@ class ScalaCompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor { * @param argumentMatches The argument match strings that were found for this argument source. * @return The parsed object. */ - def parse(source: ArgumentSource, classType: Class[_], argumentMatches: ArgumentMatches) = { + def parse(parsingEngine: ParsingEngine, source: ArgumentSource, classType: Class[_], argumentMatches: ArgumentMatches) = { val componentType = ReflectionUtils.getCollectionType(source.field) val componentArgumentParser = ArgumentTypeDescriptor.create(componentType) @@ -50,19 +50,19 @@ class ScalaCompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor { var list = List.empty[Any] for (argumentMatch <- argumentMatches) for (value <- argumentMatch) - list :+= componentArgumentParser.parse(source, componentType, new ArgumentMatches(value)) + list :+= componentArgumentParser.parse(parsingEngine, source, componentType, new ArgumentMatches(value)) list } else if (classOf[Set[_]].isAssignableFrom(classType)) { var set = Set.empty[Any] for (argumentMatch <- argumentMatches) for (value <- argumentMatch) - set += componentArgumentParser.parse(source, componentType, new ArgumentMatches(value)) + set += componentArgumentParser.parse(parsingEngine, source, componentType, new ArgumentMatches(value)) set } else if (classOf[Option[_]].isAssignableFrom(classType)) { if (argumentMatches.size > 1) throw new QException("Unable to set Option to multiple values: " + argumentMatches.mkString(" ")) else if (argumentMatches.size == 1) - Some(componentArgumentParser.parse(source, componentType, argumentMatches)) + Some(componentArgumentParser.parse(parsingEngine, source, componentType, argumentMatches)) else None } else