Command-line argument tagging is in, and the ROD system is hacked slightly to support the new syntax

(-B:name,type file) as well as the old syntax.  Also, a bonus feature: BAMs can now be tagged at the
command-line, which should allow us to get rid of some of the hackier calls in GenomeAnalysisEngine.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4105 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2010-08-25 03:47:57 +00:00
parent aa8cf25d08
commit 3dc78855fd
28 changed files with 324 additions and 163 deletions

View File

@ -48,6 +48,11 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
*/ */
public final SortedMap<Integer,List<String>> indices = new TreeMap<Integer,List<String>>(); public final SortedMap<Integer,List<String>> indices = new TreeMap<Integer,List<String>>();
/**
* An ordered, freeform collection of tags.
*/
public final List<String> tags;
/** /**
* Create a new argument match, defining its properties later. Used to create invalid arguments. * Create a new argument match, defining its properties later. Used to create invalid arguments.
*/ */
@ -63,6 +68,7 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
private ArgumentMatch(String label,ArgumentDefinition definition) { private ArgumentMatch(String label,ArgumentDefinition definition) {
this.label = label; this.label = label;
this.definition = definition; this.definition = definition;
this.tags = Collections.emptyList();
} }
/** /**
@ -70,13 +76,21 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
* @param label Label of the argument match. Must not be null. * @param label Label of the argument match. Must not be null.
* @param definition The associated definition, if one exists. May be null. * @param definition The associated definition, if one exists. May be null.
* @param index Position of the argument. Must not be null. * @param index Position of the argument. Must not be null.
* @param tags ordered freeform text tags associated with this argument.
*/ */
public ArgumentMatch( String label, ArgumentDefinition definition, int index ) { public ArgumentMatch( String label, ArgumentDefinition definition, int index, List<String> tags ) {
this( label, definition, index, null ); this( label, definition, index, null, tags );
} }
/**
private ArgumentMatch( String label, ArgumentDefinition definition, int index, String value ) { * A simple way of indicating that an argument with the given label and definition exists at this index.
* @param label Label of the argument match. Must not be null.
* @param definition The associated definition, if one exists. May be null.
* @param index Position of the argument. Must not be null.
* @param value Value for the argument at this position.
* @param tags ordered freeform text tags associated with this argument.
*/
private ArgumentMatch( String label, ArgumentDefinition definition, int index, String value, List<String> tags ) {
this.label = label; this.label = label;
this.definition = definition; this.definition = definition;
@ -84,6 +98,8 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
if( value != null ) if( value != null )
values.add(value); values.add(value);
indices.put(index,values ); indices.put(index,values );
this.tags = tags;
} }
/** /**
@ -161,7 +177,7 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
if( nextIndex == null || nextToken == null ) if( nextIndex == null || nextToken == null )
throw new IllegalStateException( "No more ArgumentMatches are available" ); throw new IllegalStateException( "No more ArgumentMatches are available" );
ArgumentMatch match = new ArgumentMatch( label, definition, nextIndex, nextToken ); ArgumentMatch match = new ArgumentMatch( label, definition, nextIndex, nextToken, tags );
prepareNext(); prepareNext();
return match; return match;
} }

View File

@ -185,7 +185,7 @@ public class ArgumentMatches implements Iterable<ArgumentMatch> {
// Clone the list of argument matches to avoid ConcurrentModificationExceptions. // Clone the list of argument matches to avoid ConcurrentModificationExceptions.
for( ArgumentMatch argumentMatch: getUniqueMatches() ) { for( ArgumentMatch argumentMatch: getUniqueMatches() ) {
if( argumentMatch.definition == match.definition ) { if( argumentMatch.definition == match.definition && argumentMatch.tags.equals(match.tags) ) {
argumentMatch.mergeInto( match ); argumentMatch.mergeInto( match );
for( int index: match.indices.keySet() ) for( int index: match.indices.keySet() )
argumentMatches.put( index, argumentMatch ); argumentMatches.put( index, argumentMatch );

View File

@ -149,8 +149,8 @@ public class ArgumentSource {
* @param values String representation of all values passed. * @param values String representation of all values passed.
* @return the parsed value of the object. * @return the parsed value of the object.
*/ */
public Object parse( ArgumentMatches values ) { public Object parse( ParsingEngine parsingEngine, ArgumentMatches values ) {
return typeDescriptor.parse( this, values ); return typeDescriptor.parse( parsingEngine, this, values );
} }
/** /**

View File

@ -42,7 +42,7 @@ import java.util.*;
* @author mhanna * @author mhanna
* @version 0.1 * @version 0.1
*/ */
public abstract class ArgumentTypeDescriptor { public abstract class ArgumentTypeDescriptor {
private static Class[] ARGUMENT_ANNOTATIONS = {Input.class, Output.class, Argument.class}; private static Class[] ARGUMENT_ANNOTATIONS = {Input.class, Output.class, Argument.class};
/** /**
@ -116,12 +116,16 @@ public abstract class ArgumentTypeDescriptor {
/** /**
* Parses an argument source to an object. * Parses an argument source to an object.
* WARNING! Mandatory side effect of parsing! Each parse routine should register the tags it finds with the proper CommandLineProgram.
* TODO: Fix this, perhaps with an event model indicating that a new argument has been created.
*
* @param parsingEngine The engine responsible for parsing.
* @param source The source used to find the matches. * @param source The source used to find the matches.
* @param matches The matches for the source. * @param matches The matches for the source.
* @return The parsed object. * @return The parsed object.
*/ */
public Object parse( ArgumentSource source, ArgumentMatches matches ) { public Object parse(ParsingEngine parsingEngine, ArgumentSource source, ArgumentMatches matches) {
return parse( source, source.field.getType(), matches ); return parse(parsingEngine, source, source.field.getType(), matches);
} }
/** /**
@ -174,7 +178,7 @@ public abstract class ArgumentTypeDescriptor {
* @param matches The argument matches for the argument source, or the individual argument match for a scalar if this is being called to help parse a collection. * @param matches The argument matches for the argument source, or the individual argument match for a scalar if this is being called to help parse a collection.
* @return The individual parsed object matching the argument match with Class type. * @return The individual parsed object matching the argument match with Class type.
*/ */
public abstract Object parse( ArgumentSource source, Class type, ArgumentMatches matches ); public abstract Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches );
/** /**
* If the argument source only accepts a small set of options, populate the returned list with * If the argument source only accepts a small set of options, populate the returned list with
@ -219,6 +223,20 @@ public abstract class ArgumentTypeDescriptor {
return argumentValues.size() > 0 ? argumentValues.iterator().next() : null; return argumentValues.size() > 0 ? argumentValues.iterator().next() : null;
} }
/**
* Gets the tags associated with a given command-line argument.
* If the argument matches multiple values, an exception will be thrown.
* @param matches The matches for the given argument.
* @return The value of the argument if available, or null if not present.
*/
protected List<String> getArgumentTags(ArgumentMatches matches) {
Set<String> tags = new LinkedHashSet<String>();
for( ArgumentMatch match: matches ) {
tags.addAll(match.tags);
}
return new ArrayList<String>(tags);
}
/** /**
* Gets the values of an argument with the given full name, from the collection of ArgumentMatches. * Gets the values of an argument with the given full name, from the collection of ArgumentMatches.
* @param definition Definition of the argument for which to find matches. * @param definition Definition of the argument for which to find matches.
@ -294,16 +312,19 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
} }
@Override @Override
public Object parse( ArgumentSource source, Class type, ArgumentMatches matches ) { public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches) {
if (source.isFlag()) if (source.isFlag())
return true; return true;
String value = getArgumentValue( createDefaultArgumentDefinition(source), matches ); ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
String value = getArgumentValue( defaultDefinition, matches );
Object result;
List<String> tags = getArgumentTags( matches );
// lets go through the types we support // lets go through the types we support
try { try {
if (type.isPrimitive()) { if (type.isPrimitive()) {
Method valueOf = primitiveToWrapperMap.get(type).getMethod("valueOf",String.class); Method valueOf = primitiveToWrapperMap.get(type).getMethod("valueOf",String.class);
return valueOf.invoke(null,value.trim()); result = valueOf.invoke(null,value.trim());
} else if (type.isEnum()) { } else if (type.isEnum()) {
Object[] vals = type.getEnumConstants(); Object[] vals = type.getEnumConstants();
Object defaultEnumeration = null; // as we look at options, record the default option if it exists Object defaultEnumeration = null; // as we look at options, record the default option if it exists
@ -314,15 +335,16 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
} }
// if their argument has no value (null), and there's a default, return that default for the enum value // if their argument has no value (null), and there's a default, return that default for the enum value
if (defaultEnumeration != null && value == null) if (defaultEnumeration != null && value == null)
return defaultEnumeration; result = defaultEnumeration;
// if their argument has no value and there's no default, throw a missing argument value exception. // if their argument has no value and there's no default, throw a missing argument value exception.
// TODO: Clean this up so that null values never make it to this point. To fix this, we'll have to clean up the implementation of -U. // TODO: Clean this up so that null values never make it to this point. To fix this, we'll have to clean up the implementation of -U.
if (value == null) else if (value == null)
throw new MissingArgumentValueException(Collections.singleton(createDefaultArgumentDefinition(source))); throw new MissingArgumentValueException(Collections.singleton(createDefaultArgumentDefinition(source)));
throw new UnknownEnumeratedValueException(createDefaultArgumentDefinition(source),value); else
throw new UnknownEnumeratedValueException(createDefaultArgumentDefinition(source),value);
} else { } else {
Constructor ctor = type.getConstructor(String.class); Constructor ctor = type.getConstructor(String.class);
return ctor.newInstance(value); result = ctor.newInstance(value);
} }
} }
catch (NoSuchMethodException e) { catch (NoSuchMethodException e) {
@ -334,7 +356,10 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
} catch (InstantiationException e) { } catch (InstantiationException e) {
throw new StingException("constructFromString:InstantiationException: Failed conversion " + e.getMessage()); throw new StingException("constructFromString:InstantiationException: Failed conversion " + e.getMessage());
} }
// WARNING: Side effect!
parsingEngine.addTags(result,tags);
return result;
} }
@ -367,9 +392,10 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override @Override
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public Object parse( ArgumentSource source, Class type, ArgumentMatches matches ) public Object parse(ParsingEngine parsingEngine,ArgumentSource source, Class type, ArgumentMatches matches) {
{
Class componentType; Class componentType;
Object result;
Set<String> tags = new LinkedHashSet<String>();
if( Collection.class.isAssignableFrom(type) ) { if( Collection.class.isAssignableFrom(type) ) {
@ -399,11 +425,13 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
} }
for( ArgumentMatch match: matches ) { for( ArgumentMatch match: matches ) {
for( ArgumentMatch value: match ) for( ArgumentMatch value: match ) {
collection.add( componentArgumentParser.parse(source,componentType,new ArgumentMatches(value)) ); collection.add( componentArgumentParser.parse(parsingEngine,source,componentType,new ArgumentMatches(value)) );
tags.addAll(value.tags);
}
} }
return collection; result = collection;
} }
else if( type.isArray() ) { else if( type.isArray() ) {
@ -412,21 +440,25 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
// Assemble a collection of individual values used in this computation. // Assemble a collection of individual values used in this computation.
Collection<ArgumentMatch> values = new ArrayList<ArgumentMatch>(); Collection<ArgumentMatch> values = new ArrayList<ArgumentMatch>();
for( ArgumentMatch match: matches ) { for( ArgumentMatch match: matches )
for( ArgumentMatch value: match ) for( ArgumentMatch value: match )
values.add(value); values.add(value);
}
Object arr = Array.newInstance(componentType,values.size()); result = Array.newInstance(componentType,values.size());
int i = 0; int i = 0;
for( ArgumentMatch value: values ) for( ArgumentMatch value: values ) {
Array.set( arr,i++,componentArgumentParser.parse(source,componentType,new ArgumentMatches(value))); Array.set( result,i++,componentArgumentParser.parse(parsingEngine,source,componentType,new ArgumentMatches(value)));
tags.addAll(value.tags);
return arr; }
} }
else else
throw new StingException("Unsupported compound argument type: " + type); throw new StingException("Unsupported compound argument type: " + type);
// WARNING: Side effect!
parsingEngine.addTags(result,new ArrayList<String>(tags));
return result;
} }
/** /**
@ -506,17 +538,22 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override @Override
public Object parse( ArgumentSource source, Class type, ArgumentMatches matches ) { public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches) {
if(multiplexedIds == null) if(multiplexedIds == null)
throw new StingException("Cannot directly parse a MultiplexArgumentTypeDescriptor; must create a derivative type descriptor first."); throw new StingException("Cannot directly parse a MultiplexArgumentTypeDescriptor; must create a derivative type descriptor first.");
Map<Object,Object> multiplexedMapping = new HashMap<Object,Object>(); Map<Object,Object> multiplexedMapping = new HashMap<Object,Object>();
Class componentType = getCollectionComponentType(source.field); Class componentType = getCollectionComponentType(source.field);
for(Object id: multiplexedIds) { for(Object id: multiplexedIds) {
Object value = ArgumentTypeDescriptor.create(componentType).parse(source,componentType,matches.transform(multiplexer,id)); Object value = ArgumentTypeDescriptor.create(componentType).parse(parsingEngine,source,componentType,matches.transform(multiplexer,id));
multiplexedMapping.put(id,value); multiplexedMapping.put(id,value);
} }
parsingEngine.addTags(multiplexedMapping,getArgumentTags(matches));
return multiplexedMapping; return multiplexedMapping;
} }

View File

@ -124,6 +124,19 @@ public abstract class CommandLineProgram {
*/ */
protected String getArgumentSourceName( Class source ) { return source.toString(); } protected String getArgumentSourceName( Class source ) { return source.toString(); }
/**
* The command-line argument system allows free-form String tags to accompany each
* object. However, there's no way for the clp to push these tags into the fields
* themselves, so we just provide a callback so that the clp can push tags into the
* argument system.
* @param key Key to use, created by the command-line argument system.
* @param tags List of freeform tags.
*/
protected void addTags(Object key, List<String> tags) {
// NO-OP by default.
}
/** /**
* this is the function that the inheriting class can expect to have called * this is the function that the inheriting class can expect to have called
* when all the argument processing is done * when all the argument processing is done

View File

@ -276,6 +276,15 @@ public class ParsingEngine {
} }
} }
/**
* Notify the user that tags have been created.
* @param key The key created.
* @param tags List of tags, or empty list if no tags are present.
*/
public void addTags(Object key, List<String> tags) {
if(clp!=null) clp.addTags(key,tags);
}
/** /**
* Notify the user that a deprecated command-line argument has been used. * Notify the user that a deprecated command-line argument has been used.
* @param argumentSource Deprecated argument source specified by user. * @param argumentSource Deprecated argument source specified by user.
@ -308,7 +317,8 @@ public class ParsingEngine {
throw new StingException("Internal command-line parser error: unable to find a home for argument matches " + argumentMatches); throw new StingException("Internal command-line parser error: unable to find a home for argument matches " + argumentMatches);
for( Object target: targets ) { for( Object target: targets ) {
Object value = (argumentMatches.size() != 0) ? source.parse(argumentMatches) : source.createDefault(); Object value = (argumentMatches.size() != 0) ? source.parse(this,argumentMatches) : source.createDefault();
JVMUtils.setFieldValue(source.field,target,value); JVMUtils.setFieldValue(source.field,target,value);
} }
} }

View File

@ -25,8 +25,12 @@
package org.broadinstitute.sting.commandline; package org.broadinstitute.sting.commandline;
import org.broadinstitute.sting.utils.Utils;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.List;
import java.util.ArrayList;
/** /**
* Holds a pattern, along with how to get to the argument definitions that could match that pattern. * Holds a pattern, along with how to get to the argument definitions that could match that pattern.
@ -76,18 +80,32 @@ public abstract class ParsingMethod {
String argument = matcher.group(1).trim(); String argument = matcher.group(1).trim();
List<String> tags = new ArrayList<String>();
if(matcher.group(2) != null)
tags.addAll(Utils.split(matcher.group(2),","));
// Find the most appropriate argument definition for the given argument. // Find the most appropriate argument definition for the given argument.
ArgumentDefinition argumentDefinition = definitions.findArgumentDefinition( argument, definitionMatcher ); ArgumentDefinition argumentDefinition = definitions.findArgumentDefinition( argument, definitionMatcher );
// Try to find a matching argument. If found, label that as the match. If not found, add the argument // Try to find a matching argument. If found, label that as the match. If not found, add the argument
// with a null definition. // with a null definition.
ArgumentMatch argumentMatch = new ArgumentMatch( argument, argumentDefinition, position ); ArgumentMatch argumentMatch = new ArgumentMatch( argument, argumentDefinition, position, tags );
return argumentMatch; return argumentMatch;
} }
public static ParsingMethod FullNameParsingMethod = new ParsingMethod(Pattern.compile("\\s*--([A-Za-z_][\\w\\-\\.]*)\\s*"), /**
* A command-line argument always starts with an alphabetical character or underscore followed by any word character.
*/
private static final String ARGUMENT_TEXT = "[A-Za-z_][\\w\\-\\.]*";
/**
* Tags, on the other hand, can start with any word character.
*/
private static final String TAG_TEXT = "[\\w\\-\\.]*";
public static ParsingMethod FullNameParsingMethod = new ParsingMethod(Pattern.compile(String.format("\\s*--(%1$s)(?:\\:(%2$s(?:,%2$s)*))?\\s*",ARGUMENT_TEXT,TAG_TEXT)),
ArgumentDefinitions.FullNameDefinitionMatcher) {}; ArgumentDefinitions.FullNameDefinitionMatcher) {};
public static ParsingMethod ShortNameParsingMethod = new ParsingMethod(Pattern.compile("\\s*-([A-Za-z_][\\w\\-]*)\\s*"), public static ParsingMethod ShortNameParsingMethod = new ParsingMethod(Pattern.compile(String.format("\\s*-(%1$s)(?:\\:(%2$s(?:,%2$s)*))?\\s*",ARGUMENT_TEXT,TAG_TEXT)),
ArgumentDefinitions.ShortNameDefinitionMatcher) {}; ArgumentDefinitions.ShortNameDefinitionMatcher) {};
} }

View File

@ -82,12 +82,8 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
for(SamRecordFilter filter: filters) for(SamRecordFilter filter: filters)
loadArgumentsIntoObject(filter); loadArgumentsIntoObject(filter);
// process any arguments that need a second pass
GATKArgumentCollection arguments = getArgumentCollection();
processArguments(arguments);
// set the analysis name in the argument collection // set the analysis name in the argument collection
return GATKEngine.execute(arguments, mWalker, filters); return GATKEngine.execute(getArgumentCollection(), mWalker, filters);
} }
/** /**
@ -133,50 +129,15 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
@Override @Override
protected String getArgumentSourceName( Class argumentSource ) { protected String getArgumentSourceName( Class argumentSource ) {
return GATKEngine.getWalkerName((Class<Walker>)argumentSource); return GATKEngine.getWalkerName((Class<Walker>)argumentSource);
}
/**
* Preprocess the arguments before submitting them to the GATK engine.
*
* @param argCollection Collection of arguments to preprocess.
*/
private void processArguments( GATKArgumentCollection argCollection ) {
argCollection.samFiles = unpackBAMFileList( argCollection.samFiles );
} }
/** /**
* Unpack the bam files to be processed, given a list of files. That list of files can * Supply command-line argument tags to the GATK engine.
* itself contain entries which are lists of other files to be read (note: you cannot have lists of lists of lists) * @param key Key to use, created by the command-line argument system.
* * @param tags List of freeform tags.
* @param inputFiles a list of files that represent either bam files themselves, or a file containing a list of bam files to process
*
* @return a flattened list of the bam files provided
*/ */
public static List<File> unpackBAMFileList( List<File> inputFiles ) { @Override
List<File> unpackedReads = new ArrayList<File>(); protected void addTags(Object key, List<String> tags) {
for( File inputFile: inputFiles ) { GATKEngine.addTags(key,tags);
if (inputFile.getName().toLowerCase().endsWith(".list") ) {
try {
for(String fileName : new XReadLines(inputFile))
unpackedReads.addAll(Collections.singletonList(new File(fileName)));
}
catch( FileNotFoundException ex ) {
throw new StingException("Unable to find file while unpacking reads", ex);
}
}
else if(inputFile.getName().toLowerCase().endsWith(".bam")) {
unpackedReads.add( inputFile );
}
else if(inputFile.getName().equals("-")) {
unpackedReads.add( new File("/dev/stdin") );
}
else {
Utils.scareUser(String.format("The GATK reads argument (-I) supports only BAM files with the .bam extension and lists of BAM files " +
"with the .list extension, but the file %s has neither extension. Please ensure that your BAM file or list " +
"of BAM files is in the correct format, update the extension, and try again.",inputFile.getName()));
}
}
return unpackedReads;
} }
} }

View File

@ -50,11 +50,13 @@ import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackManager;
import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator; import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator;
import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.text.XReadLines;
import org.broadinstitute.sting.commandline.ArgumentException; import org.broadinstitute.sting.commandline.ArgumentException;
import org.broadinstitute.sting.commandline.ArgumentSource; import org.broadinstitute.sting.commandline.ArgumentSource;
import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor; import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException;
import java.util.*; import java.util.*;
public class GenomeAnalysisEngine { public class GenomeAnalysisEngine {
@ -96,6 +98,11 @@ public class GenomeAnalysisEngine {
*/ */
private Collection<Stub<?>> outputs = new ArrayList<Stub<?>>(); private Collection<Stub<?>> outputs = new ArrayList<Stub<?>>();
/**
* List of tags associated with the given instantiation of the command-line argument.
*/
private final Map<Object,List<String>> tags = new IdentityHashMap<Object,List<String>>();
/** /**
* Collection of the filters applied to the walker's input data. * Collection of the filters applied to the walker's input data.
*/ */
@ -280,6 +287,27 @@ public class GenomeAnalysisEngine {
outputs.add(stub); outputs.add(stub);
} }
/**
* Adds an association between a object created by the
* command-line argument system and a freeform list of tags.
* @param key Object created by the command-line argument system.
* @param tags List of tags to use when reading arguments.
*/
public void addTags(Object key, List<String> tags) {
this.tags.put(key,tags);
}
/**
* Gets the tags associated with a given object.
* @param key Key for which to find a tag.
* @return List of tags associated with this key.
*/
public List<String> getTags(Object key) {
if(!tags.containsKey(key))
return Collections.emptyList();
return tags.get(key);
}
/** /**
* Retrieves an instance of the walker based on the walker name. * Retrieves an instance of the walker based on the walker name.
* *
@ -348,7 +376,7 @@ public class GenomeAnalysisEngine {
} }
RMDTrackManager manager = new RMDTrackManager(); RMDTrackManager manager = new RMDTrackManager();
List<RMDTrack> tracks = manager.getReferenceMetaDataSources(argCollection.RODBindings); List<RMDTrack> tracks = manager.getReferenceMetaDataSources(this,argCollection.RODBindings);
validateSuppliedReferenceOrderedDataAgainstWalker(my_walker, tracks); validateSuppliedReferenceOrderedDataAgainstWalker(my_walker, tracks);
// validate all the sequence dictionaries against the reference // validate all the sequence dictionaries against the reference
@ -547,7 +575,7 @@ public class GenomeAnalysisEngine {
else else
method = new DownsamplingMethod(DownsampleType.NONE,null,null); method = new DownsamplingMethod(DownsampleType.NONE,null,null);
return new ReadProperties(argCollection.samFiles, return new ReadProperties(unpackBAMFileList(argCollection.samFiles),
argCollection.strictnessLevel, argCollection.strictnessLevel,
argCollection.readBufferSize, argCollection.readBufferSize,
method, method,
@ -947,4 +975,39 @@ public class GenomeAnalysisEngine {
public ReadMetrics getCumulativeMetrics() { public ReadMetrics getCumulativeMetrics() {
return readsDataSource.getCumulativeReadMetrics(); return readsDataSource.getCumulativeReadMetrics();
} }
/**
* Unpack the bam files to be processed, given a list of files. That list of files can
* itself contain entries which are lists of other files to be read (note: you cannot have lists of lists of lists)
*
* @param inputFiles a list of files that represent either bam files themselves, or a file containing a list of bam files to process
*
* @return a flattened list of the bam files provided
*/
private List<SAMReaderID> unpackBAMFileList( List<File> inputFiles ) {
List<SAMReaderID> unpackedReads = new ArrayList<SAMReaderID>();
for( File inputFile: inputFiles ) {
if (inputFile.getName().toLowerCase().endsWith(".list") ) {
try {
for(String fileName : new XReadLines(inputFile))
unpackedReads.add(new SAMReaderID(new File(fileName),getTags(inputFile)));
}
catch( FileNotFoundException ex ) {
throw new StingException("Unable to find file while unpacking reads", ex);
}
}
else if(inputFile.getName().toLowerCase().endsWith(".bam")) {
unpackedReads.add( new SAMReaderID(inputFile,getTags(inputFile)) );
}
else if(inputFile.getName().equals("-")) {
unpackedReads.add(new SAMReaderID(new File("/dev/stdin"),Collections.<String>emptyList()));
}
else {
Utils.scareUser(String.format("The GATK reads argument (-I) supports only BAM files with the .bam extension and lists of BAM files " +
"with the .list extension, but the file %s has neither extension. Please ensure that your BAM file or list " +
"of BAM files is in the correct format, update the extension, and try again.",inputFile.getName()));
}
}
return unpackedReads;
}
} }

View File

@ -3,8 +3,8 @@ package org.broadinstitute.sting.gatk;
import net.sf.picard.filter.SamRecordFilter; import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMFileReader; import net.sf.samtools.SAMFileReader;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import java.io.File;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Collection; import java.util.Collection;
@ -26,7 +26,7 @@ import java.util.Collection;
* information about how they should be downsampled, sorted, and filtered. * information about how they should be downsampled, sorted, and filtered.
*/ */
public class ReadProperties { public class ReadProperties {
private List<File> readsFiles = null; private List<SAMReaderID> readers = null;
private SAMFileReader.ValidationStringency validationStringency = SAMFileReader.ValidationStringency.STRICT; private SAMFileReader.ValidationStringency validationStringency = SAMFileReader.ValidationStringency.STRICT;
private Integer readBufferSize = null; private Integer readBufferSize = null;
private DownsamplingMethod downsamplingMethod = null; private DownsamplingMethod downsamplingMethod = null;
@ -63,8 +63,8 @@ public class ReadProperties {
* Gets a list of the files acting as sources of reads. * Gets a list of the files acting as sources of reads.
* @return A list of files storing reads data. * @return A list of files storing reads data.
*/ */
public List<File> getReadsFiles() { public List<SAMReaderID> getSAMReaderIDs() {
return readsFiles; return readers;
} }
/** /**
@ -115,8 +115,8 @@ public class ReadProperties {
* Simple constructor for unit testing. * Simple constructor for unit testing.
* @param readsFiles List of reads files to open. * @param readsFiles List of reads files to open.
*/ */
public ReadProperties( List<File> readsFiles ) { public ReadProperties( List<SAMReaderID> readsFiles ) {
this.readsFiles = readsFiles; this.readers = readsFiles;
this.downsamplingMethod = new DownsamplingMethod(DownsampleType.NONE,null,null); this.downsamplingMethod = new DownsamplingMethod(DownsampleType.NONE,null,null);
this.supplementalFilters = new ArrayList<SamRecordFilter>(); this.supplementalFilters = new ArrayList<SamRecordFilter>();
this.exclusionList = new ValidationExclusion(); this.exclusionList = new ValidationExclusion();
@ -138,7 +138,7 @@ public class ReadProperties {
* will explicitly list reads with deletion over the current reference base; otherwise, only observed * will explicitly list reads with deletion over the current reference base; otherwise, only observed
* bases will be seen in the pileups, and the deletions will be skipped silently. * bases will be seen in the pileups, and the deletions will be skipped silently.
*/ */
ReadProperties( List<File> samFiles, ReadProperties( List<SAMReaderID> samFiles,
SAMFileReader.ValidationStringency strictness, SAMFileReader.ValidationStringency strictness,
Integer readBufferSize, Integer readBufferSize,
DownsamplingMethod downsamplingMethod, DownsamplingMethod downsamplingMethod,
@ -147,7 +147,7 @@ public class ReadProperties {
int maximumReadsAtLocus, int maximumReadsAtLocus,
boolean includeReadsWithDeletionAtLoci, boolean includeReadsWithDeletionAtLoci,
boolean generateExtendedEvents) { boolean generateExtendedEvents) {
this.readsFiles = samFiles; this.readers = samFiles;
this.readBufferSize = readBufferSize; this.readBufferSize = readBufferSize;
this.validationStringency = strictness; this.validationStringency = strictness;
this.downsamplingMethod = downsamplingMethod; this.downsamplingMethod = downsamplingMethod;

View File

@ -31,6 +31,7 @@ import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.gatk.DownsampleType; import org.broadinstitute.sting.gatk.DownsampleType;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.utils.interval.IntervalSetRule; import org.broadinstitute.sting.utils.interval.IntervalSetRule;
import org.simpleframework.xml.*; import org.simpleframework.xml.*;
import org.simpleframework.xml.core.Persister; import org.simpleframework.xml.core.Persister;

View File

@ -65,7 +65,7 @@ public class SAMDataSource implements SimpleDataSource {
/** /**
* Identifiers for the readers driving this data source. * Identifiers for the readers driving this data source.
*/ */
protected final List<SAMReaderID> readerIDs = new ArrayList<SAMReaderID>(); protected final List<SAMReaderID> readerIDs;
/** /**
* How far along is each reader? * How far along is each reader?
@ -109,11 +109,10 @@ public class SAMDataSource implements SimpleDataSource {
this.readProperties = reads; this.readProperties = reads;
this.readMetrics = new ReadMetrics(); this.readMetrics = new ReadMetrics();
for (File smFile : reads.getReadsFiles()) { readerIDs = reads.getSAMReaderIDs();
if (!smFile.canRead()) { for (SAMReaderID readerID : reads.getSAMReaderIDs()) {
throw new SimpleDataSourceLoadException("SAMDataSource: Unable to load file: " + smFile.getName()); if (!readerID.samFile.canRead())
} throw new SimpleDataSourceLoadException("SAMDataSource: Unable to load file: " + readerID.samFile.getName());
readerIDs.add(new SAMReaderID(smFile));
} }
resourcePool = new SAMResourcePool(Integer.MAX_VALUE); resourcePool = new SAMResourcePool(Integer.MAX_VALUE);
@ -170,7 +169,7 @@ public class SAMDataSource implements SimpleDataSource {
* @return True if no reads files are supplying data to the traversal; false otherwise. * @return True if no reads files are supplying data to the traversal; false otherwise.
*/ */
public boolean isEmpty() { public boolean isEmpty() {
return readProperties.getReadsFiles().size() == 0; return readProperties.getSAMReaderIDs().size() == 0;
} }
/** /**
@ -550,8 +549,8 @@ public class SAMDataSource implements SimpleDataSource {
* @param sourceInfo Metadata for the reads to load. * @param sourceInfo Metadata for the reads to load.
*/ */
public SAMReaders(ReadProperties sourceInfo) { public SAMReaders(ReadProperties sourceInfo) {
for(File readsFile: sourceInfo.getReadsFiles()) { for(SAMReaderID readerID: sourceInfo.getSAMReaderIDs()) {
SAMFileReader reader = new SAMFileReader(readsFile); SAMFileReader reader = new SAMFileReader(readerID.samFile);
reader.enableFileSource(true); reader.enableFileSource(true);
reader.enableIndexCaching(true); reader.enableIndexCaching(true);
reader.setValidationStringency(sourceInfo.getValidationStringency()); reader.setValidationStringency(sourceInfo.getValidationStringency());
@ -562,14 +561,14 @@ public class SAMDataSource implements SimpleDataSource {
logger.debug(String.format("Sort order is: " + header.getSortOrder())); logger.debug(String.format("Sort order is: " + header.getSortOrder()));
if (reader.getFileHeader().getReadGroups().size() < 1) { if (reader.getFileHeader().getReadGroups().size() < 1) {
SAMReadGroupRecord rec = new SAMReadGroupRecord(readsFile.getName()); SAMReadGroupRecord rec = new SAMReadGroupRecord(readerID.samFile.getName());
rec.setLibrary(readsFile.getName()); rec.setLibrary(readerID.samFile.getName());
rec.setSample(readsFile.getName()); rec.setSample(readerID.samFile.getName());
reader.getFileHeader().addReadGroup(rec); reader.getFileHeader().addReadGroup(rec);
} }
readers.put(new SAMReaderID(readsFile),reader); readers.put(readerID,reader);
} }
} }

View File

@ -1,6 +1,8 @@
package org.broadinstitute.sting.gatk.datasources.simpleDataSources; package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
import java.io.File; import java.io.File;
import java.util.List;
import java.util.Collections;
/** /**
* Uniquely identifies a SAM file reader. * Uniquely identifies a SAM file reader.
@ -15,12 +17,27 @@ public class SAMReaderID {
*/ */
protected final File samFile; protected final File samFile;
/**
* A list of tags associated with this BAM file.
*/
protected final List<String> tags;
/** /**
* Creates an identifier for a SAM file based on read. * Creates an identifier for a SAM file based on read.
* @param samFile The source file for SAM data. * @param samFile The source file for SAM data.
* @param tags tags to use when creating a reader ID.
*/ */
protected SAMReaderID(File samFile) { public SAMReaderID(File samFile, List<String> tags) {
this.samFile = samFile; this.samFile = samFile;
this.tags = tags;
}
/**
* Gets the tags associated with the given BAM file.
* @return A collection of the tags associated with this file.
*/
public List<String> getTags() {
return Collections.unmodifiableList(tags);
} }
/** /**

View File

@ -30,6 +30,7 @@ import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy; import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.gatk.traversals.*; import org.broadinstitute.sting.gatk.traversals.*;
import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.io.OutputTracker; import org.broadinstitute.sting.gatk.io.OutputTracker;
@ -147,7 +148,7 @@ public abstract class MicroScheduler {
* @return an iterator over the reads specified in the shard. * @return an iterator over the reads specified in the shard.
*/ */
protected StingSAMIterator getReadIterator(Shard shard) { protected StingSAMIterator getReadIterator(Shard shard) {
return (!reads.isEmpty()) ? reads.seek(shard) : new NullSAMIterator(new ReadProperties(new ArrayList<File>())); return (!reads.isEmpty()) ? reads.seek(shard) : new NullSAMIterator(new ReadProperties(Collections.<SAMReaderID>emptyList()));
} }
/** /**

View File

@ -77,7 +77,7 @@ public class OutputStreamArgumentTypeDescriptor extends ArgumentTypeDescriptor {
} }
@Override @Override
public Object parse( ArgumentSource source, Class type, ArgumentMatches matches ) { public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) {
ArgumentDefinition definition = createDefaultArgumentDefinition(source); ArgumentDefinition definition = createDefaultArgumentDefinition(source);
String fileName = getArgumentValue( definition, matches ); String fileName = getArgumentValue( definition, matches );
@ -85,7 +85,11 @@ public class OutputStreamArgumentTypeDescriptor extends ArgumentTypeDescriptor {
engine.addOutput(stub); engine.addOutput(stub);
return createInstanceOfClass(type,stub); Object result = createInstanceOfClass(type,stub);
// WARNING: Side effects required by engine!
parsingEngine.addTags(result,getArgumentTags(matches));
return result;
} }
/** /**

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.io.stubs;
import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor; import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor;
import org.broadinstitute.sting.commandline.ArgumentSource; import org.broadinstitute.sting.commandline.ArgumentSource;
import org.broadinstitute.sting.commandline.ArgumentMatches; import org.broadinstitute.sting.commandline.ArgumentMatches;
import org.broadinstitute.sting.commandline.ParsingEngine;
import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder; import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
@ -59,7 +60,7 @@ public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor
} }
@Override @Override
public Object parse( ArgumentSource source, Class type, ArgumentMatches matches ) { public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) {
SAMFileReaderBuilder builder = new SAMFileReaderBuilder(); SAMFileReaderBuilder builder = new SAMFileReaderBuilder();
String readerFileName = getArgumentValue( createDefaultArgumentDefinition(source), matches ); String readerFileName = getArgumentValue( createDefaultArgumentDefinition(source), matches );
@ -69,6 +70,7 @@ public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor
builder.setSAMFile(new File(readerFileName)); builder.setSAMFile(new File(readerFileName));
// WARNING: Skipping required side-effect because stub is impossible to generate.
engine.addInput(source, builder); engine.addInput(source, builder);
// MASSIVE KLUDGE! SAMFileReader is tricky to implement and we don't yet have a stub. Return null, then // MASSIVE KLUDGE! SAMFileReader is tricky to implement and we don't yet have a stub. Return null, then

View File

@ -91,7 +91,7 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
} }
@Override @Override
public Object parse( ArgumentSource source, Class type, ArgumentMatches matches ) { public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) {
String writerFileName = getArgumentValue( createBAMArgumentDefinition(source), matches ); String writerFileName = getArgumentValue( createBAMArgumentDefinition(source), matches );
if( writerFileName == null ) if( writerFileName == null )
throw new StingException("SAM file compression was supplied, but no associated writer was supplied with it."); throw new StingException("SAM file compression was supplied, but no associated writer was supplied with it.");
@ -103,6 +103,8 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
if( compressionLevel != null ) if( compressionLevel != null )
stub.setCompressionLevel(compressionLevel); stub.setCompressionLevel(compressionLevel);
// WARNING: Side effects required by engine!
parsingEngine.addTags(stub,getArgumentTags(matches));
engine.addOutput(stub); engine.addOutput(stub);
return stub; return stub;
@ -121,7 +123,7 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
DEFAULT_ARGUMENT_FULLNAME, DEFAULT_ARGUMENT_FULLNAME,
DEFAULT_ARGUMENT_SHORTNAME, DEFAULT_ARGUMENT_SHORTNAME,
ArgumentDefinition.getDoc(annotation), ArgumentDefinition.getDoc(annotation),
true, false,
false, false,
source.isMultiValued(), source.isMultiValued(),
source.isHidden(), source.isHidden(),

View File

@ -111,7 +111,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
* @return Transform from the matches into the associated argument. * @return Transform from the matches into the associated argument.
*/ */
@Override @Override
public Object parse( ArgumentSource source, Class type, ArgumentMatches matches ) { public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) {
// Get the filename for the genotype file, if it exists. If not, we'll need to send output to out. // Get the filename for the genotype file, if it exists. If not, we'll need to send output to out.
String writerFileName = getArgumentValue(createDefaultArgumentDefinition(source),matches); String writerFileName = getArgumentValue(createDefaultArgumentDefinition(source),matches);
File writerFile = writerFileName != null ? new File(writerFileName) : null; File writerFile = writerFileName != null ? new File(writerFileName) : null;
@ -122,6 +122,8 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
// Create a stub for the given object. // Create a stub for the given object.
VCFWriterStub stub = (writerFile != null) ? new VCFWriterStub(engine, writerFile, compress) : new VCFWriterStub(engine, System.out, compress); VCFWriterStub stub = (writerFile != null) ? new VCFWriterStub(engine, writerFile, compress) : new VCFWriterStub(engine, System.out, compress);
// WARNING: Side effects required by engine!
parsingEngine.addTags(stub,getArgumentTags(matches));
engine.addOutput(stub); engine.addOutput(stub);
return stub; return stub;

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.refdata.tracks;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.StingException;
@ -62,13 +63,13 @@ public class RMDTrackManager extends PluginManager<RMDTrackBuilder> {
/** /**
* find the associated reference meta data * find the associated reference meta data
* *
* @param triplets the triplets of strings from the -B command line option * @param bindings the bindings of strings from the -B command line option
* *
* @return a list of RMDTracks, one for each -B option * @return a list of RMDTracks, one for each -B option
*/ */
public List<RMDTrack> getReferenceMetaDataSources(List<String> triplets) { public List<RMDTrack> getReferenceMetaDataSources(GenomeAnalysisEngine engine,List<String> bindings) {
initializeTrackTypes(); initializeTrackTypes();
initializeTriplets(triplets); initializeBindings(engine,bindings);
// try and make the tracks given their requests // try and make the tracks given their requests
return createRequestedTrackObjects(); return createRequestedTrackObjects();
} }
@ -91,18 +92,30 @@ public class RMDTrackManager extends PluginManager<RMDTrackBuilder> {
} }
/** /**
* initialize our lists of triplets * initialize our lists of bindings
* @param triplets the input to the GATK, as a list of strings passed in through the -B options * @param engine The engine, used to populate tags.
* @param bindings the input to the GATK, as a list of strings passed in through the -B options
*/ */
private void initializeTriplets(List<String> triplets) { private void initializeBindings(GenomeAnalysisEngine engine,List<String> bindings) {
// NOTE: Method acts as a static. Once the inputs have been passed once they are locked in. // NOTE: Method acts as a static. Once the inputs have been passed once they are locked in.
if (inputs.size() > 0 || triplets.size() == 0) if (inputs.size() > 0 || bindings.size() == 0)
return; return;
for (String value: triplets) { for (String binding: bindings) {
String[] split = value.split(","); if(engine != null && engine.getTags(binding).size() == 2) {
if (split.length != 3) throw new IllegalArgumentException(value + " is not a valid reference metadata track description"); // Assume that if tags are present, those tags are name and type.
inputs.add(new RMDTriplet(split[0], split[1], split[2])); // Name is always first, followed by type.
List<String> parameters = engine.getTags(binding);
String name = parameters.get(0);
String type = parameters.get(1);
inputs.add(new RMDTriplet(name,type,binding));
}
else {
// Otherwise, use old-format bindings.
String[] split = binding.split(",");
if (split.length != 3) throw new IllegalArgumentException(binding + " is not a valid reference metadata track description");
inputs.add(new RMDTriplet(split[0], split[1], split[2]));
}
} }
} }

View File

@ -44,7 +44,7 @@ import java.io.PrintStream;
@Requires({DataSource.READS, DataSource.REFERENCE}) @Requires({DataSource.READS, DataSource.REFERENCE})
public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> { public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
/** an optional argument to dump the reads out to a BAM file */ /** an optional argument to dump the reads out to a BAM file */
@Output(doc="Write output to this BAM filename instead of STDOUT",required=false) @Output(doc="Write output to this BAM filename instead of STDOUT")
SAMFileWriter out; SAMFileWriter out;
@Argument(fullName = "readGroup", shortName = "readGroup", doc="Discard reads not belonging to the specified read group", required = false) @Argument(fullName = "readGroup", shortName = "readGroup", doc="Discard reads not belonging to the specified read group", required = false)
String readGroup = null; String readGroup = null;

View File

@ -19,7 +19,6 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.util.*; import java.util.*;
/** /**
@ -51,7 +50,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(); SAMRecordIterator iterator = new SAMRecordIterator();
GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1", 1, 5); GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1", 1, 5);
Shard shard = new LocusShard(new SAMDataSource(new ReadProperties(Collections.<File>emptyList())),Collections.singletonList(shardBounds),Collections.<SAMReaderID,SAMFileSpan>emptyMap()); Shard shard = new LocusShard(new SAMDataSource(new ReadProperties(Collections.<SAMReaderID>emptyList())),Collections.singletonList(shardBounds),Collections.<SAMReaderID,SAMFileSpan>emptyMap());
WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS);
WindowMaker.WindowMakerIterator window = windowMaker.next(); WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, window.getLocus(), window, null, null); LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, window.getLocus(), window, null, null);

View File

@ -27,10 +27,10 @@ package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import java.util.List; import java.util.List;
import java.util.Collections; import java.util.Collections;
import java.io.File;
/** /**
* A mock locus shard, usable for infrastructure that requires a shard to behave properly. * A mock locus shard, usable for infrastructure that requires a shard to behave properly.
@ -40,6 +40,6 @@ import java.io.File;
*/ */
public class MockLocusShard extends LocusShard { public class MockLocusShard extends LocusShard {
public MockLocusShard(final List<GenomeLoc> intervals) { public MockLocusShard(final List<GenomeLoc> intervals) {
super(new SAMDataSource(new ReadProperties(Collections.<File>emptyList())),intervals,null); super(new SAMDataSource(new ReadProperties(Collections.<SAMReaderID>emptyList())),intervals,null);
} }
} }

View File

@ -19,6 +19,7 @@ import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Collections;
/** /**
* *
@ -48,7 +49,7 @@ import java.util.List;
*/ */
public class SAMBAMDataSourceUnitTest extends BaseTest { public class SAMBAMDataSourceUnitTest extends BaseTest {
private List<File> fl; private List<SAMReaderID> readers;
private IndexedFastaSequenceFile seq; private IndexedFastaSequenceFile seq;
/** /**
@ -58,7 +59,7 @@ public class SAMBAMDataSourceUnitTest extends BaseTest {
*/ */
@Before @Before
public void doForEachTest() throws FileNotFoundException { public void doForEachTest() throws FileNotFoundException {
fl = new ArrayList<File>(); readers = new ArrayList<SAMReaderID>();
// sequence // sequence
seq = new IndexedFastaSequenceFile(new File(hg18Reference)); seq = new IndexedFastaSequenceFile(new File(hg18Reference));
@ -73,7 +74,7 @@ public class SAMBAMDataSourceUnitTest extends BaseTest {
@After @After
public void undoForEachTest() { public void undoForEachTest() {
seq = null; seq = null;
fl.clear(); readers.clear();
} }
@ -83,8 +84,8 @@ public class SAMBAMDataSourceUnitTest extends BaseTest {
logger.warn("Executing testLinearBreakIterateAll"); logger.warn("Executing testLinearBreakIterateAll");
// setup the data // setup the data
fl.add(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam")); readers.add(new SAMReaderID(new File(validationDataLocation+"/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),Collections.<String>emptyList()));
ReadProperties reads = new ReadProperties(fl); ReadProperties reads = new ReadProperties(readers);
// the sharding strat. // the sharding strat.
SAMDataSource data = new SAMDataSource(reads); SAMDataSource data = new SAMDataSource(reads);
@ -128,8 +129,8 @@ public class SAMBAMDataSourceUnitTest extends BaseTest {
logger.warn("Executing testMergingTwoBAMFiles"); logger.warn("Executing testMergingTwoBAMFiles");
// setup the test files // setup the test files
fl.add(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam")); readers.add(new SAMReaderID(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),Collections.<String>emptyList()));
ReadProperties reads = new ReadProperties(fl); ReadProperties reads = new ReadProperties(readers);
// the sharding strat. // the sharding strat.
SAMDataSource data = new SAMDataSource(reads); SAMDataSource data = new SAMDataSource(reads);
@ -168,10 +169,10 @@ public class SAMBAMDataSourceUnitTest extends BaseTest {
// setup the data and the counter before our second run // setup the data and the counter before our second run
fl.clear(); readers.clear();
fl.add(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam")); readers.add(new SAMReaderID(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),Collections.<String>emptyList()));
fl.add(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam")); readers.add(new SAMReaderID(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),Collections.<String>emptyList()));
reads = new ReadProperties(fl); reads = new ReadProperties(readers);
count = 0; count = 0;
// the sharding strat. // the sharding strat.

View File

@ -6,6 +6,7 @@ import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator; import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
@ -40,7 +41,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'}; final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
// create a test version of the Reads object // create a test version of the Reads object
ReadProperties readAttributes = new ReadProperties(new ArrayList<File>()); ReadProperties readAttributes = new ReadProperties(new ArrayList<SAMReaderID>());
JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"generateExtendedEvents"),readAttributes,true); JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"generateExtendedEvents"),readAttributes,true);
SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10); SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10);
@ -92,7 +93,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
final byte[] quals = new byte[] { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20}; final byte[] quals = new byte[] { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20};
// create a test version of the Reads object // create a test version of the Reads object
ReadProperties readAttributes = new ReadProperties(new ArrayList<File>()); ReadProperties readAttributes = new ReadProperties(new ArrayList<SAMReaderID>());
JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"generateExtendedEvents"),readAttributes,true); JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"generateExtendedEvents"),readAttributes,true);
SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10); SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10);
@ -140,7 +141,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
records.add(ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, x, 20)); records.add(ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, x, 20));
// create a test version of the Reads object // create a test version of the Reads object
ReadProperties reads = new ReadProperties(new ArrayList<File>()); ReadProperties reads = new ReadProperties(new ArrayList<SAMReaderID>());
JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"maximumReadsAtLocus"),reads,MAX_READS); JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"maximumReadsAtLocus"),reads,MAX_READS);
// create the iterator by state with the fake reads and fake records // create the iterator by state with the fake reads and fake records
@ -166,7 +167,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
records.add(ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, 100, 20)); records.add(ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, 100, 20));
// create a test version of the Reads object // create a test version of the Reads object
ReadProperties reads = new ReadProperties(new ArrayList<File>()); ReadProperties reads = new ReadProperties(new ArrayList<SAMReaderID>());
JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"maximumReadsAtLocus"),reads,MAX_READS); JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"maximumReadsAtLocus"),reads,MAX_READS);
// create the iterator by state with the fake reads and fake records // create the iterator by state with the fake reads and fake records

View File

@ -108,8 +108,8 @@ public class RMDTrackManagerUnitTest extends BaseTest {
triplets.add("db"); triplets.add("db");
triplets.add("DBSNP"); triplets.add("DBSNP");
triplets.add("../../GATK_Data/dbsnp_130_b36.rod"); triplets.add("../../GATK_Data/dbsnp_130_b36.rod");
Assert.assertEquals(1, manager.getReferenceMetaDataSources(triplets).size()); Assert.assertEquals(1, manager.getReferenceMetaDataSources(null,triplets).size());
RMDTrack t = manager.getReferenceMetaDataSources(triplets).get(0); RMDTrack t = manager.getReferenceMetaDataSources(null,triplets).get(0);
// make sure we have a single track // make sure we have a single track
// lets test the first and 20th contigs of the human reference // lets test the first and 20th contigs of the human reference

View File

@ -11,6 +11,7 @@ import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy; import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory; import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.gatk.walkers.qc.CountReadsWalker; import org.broadinstitute.sting.gatk.walkers.qc.CountReadsWalker;
import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
@ -56,9 +57,9 @@ import java.util.Collections;
public class TraverseReadsUnitTest extends BaseTest { public class TraverseReadsUnitTest extends BaseTest {
private ReferenceSequenceFile seq; private ReferenceSequenceFile seq;
private File bam = new File(validationDataLocation + "index_test.bam"); // TCGA-06-0188.aligned.duplicates_marked.bam"); private SAMReaderID bam = new SAMReaderID(new File(validationDataLocation + "index_test.bam"),Collections.<String>emptyList()); // TCGA-06-0188.aligned.duplicates_marked.bam");
private File refFile = new File(validationDataLocation + "Homo_sapiens_assembly17.fasta"); private File refFile = new File(validationDataLocation + "Homo_sapiens_assembly17.fasta");
private List<File> bamList; private List<SAMReaderID> bamList;
private Walker countReadWalker; private Walker countReadWalker;
private File output; private File output;
private long readSize = 100000; private long readSize = 100000;
@ -82,7 +83,7 @@ public class TraverseReadsUnitTest extends BaseTest {
fail("Couldn't open the output file"); fail("Couldn't open the output file");
} }
bamList = new ArrayList<File>(); bamList = new ArrayList<SAMReaderID>();
bamList.add(bam); bamList.add(bam);
countReadWalker = new CountReadsWalker(); countReadWalker = new CountReadsWalker();

View File

@ -14,13 +14,13 @@ public class
private static String root = cmdRoot + private static String root = cmdRoot +
" -D " + GATKDataLocation + "dbsnp_129_b36.rod" + " -D " + GATKDataLocation + "dbsnp_129_b36.rod" +
" -B eval,VCF," + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" + " -B:eval,VCF " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" +
" -B comp_genotypes,VCF," + validationDataLocation + "yri.trio.gatk.ug.head.vcf -reportType Grep"; " -B:comp_genotypes,VCF " + validationDataLocation + "yri.trio.gatk.ug.head.vcf -reportType Grep";
private static String rootGZ = cmdRoot + private static String rootGZ = cmdRoot +
" -D " + GATKDataLocation + "dbsnp_129_b36.rod" + " -D " + GATKDataLocation + "dbsnp_129_b36.rod" +
" -B eval,VCF," + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf.gz" + " -B:eval,VCF " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf.gz" +
" -B comp_genotypes,VCF," + validationDataLocation + "yri.trio.gatk.ug.head.vcf.gz -reportType Grep"; " -B:comp_genotypes,VCF " + validationDataLocation + "yri.trio.gatk.ug.head.vcf.gz -reportType Grep";
private static String[] testsEnumerations = {root, rootGZ}; private static String[] testsEnumerations = {root, rootGZ};
@ -46,7 +46,7 @@ public class
public void testVEGenotypeConcordance() { public void testVEGenotypeConcordance() {
String vcfFiles[] = {"GenotypeConcordanceEval.vcf", "GenotypeConcordanceEval.vcf.gz"}; String vcfFiles[] = {"GenotypeConcordanceEval.vcf", "GenotypeConcordanceEval.vcf.gz"};
for (String vcfFile : vcfFiles) { for (String vcfFile : vcfFiles) {
WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -B eval,VCF," + validationDataLocation + vcfFile + " -B comp,VCF," + validationDataLocation + "GenotypeConcordanceComp.vcf -noStandard -E GenotypeConcordance -reportType CSV -o %s", WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -B:eval,VCF " + validationDataLocation + vcfFile + " -B:comp,VCF " + validationDataLocation + "GenotypeConcordanceComp.vcf -noStandard -E GenotypeConcordance -reportType CSV -o %s",
1, 1,
Arrays.asList("15d1075d384da2bb7445f7493f2b6a07")); Arrays.asList("15d1075d384da2bb7445f7493f2b6a07"));
executeTest("testVEGenotypeConcordance" + vcfFile, spec); executeTest("testVEGenotypeConcordance" + vcfFile, spec);
@ -76,8 +76,8 @@ public class
public void testVEComplex() { public void testVEComplex() {
HashMap<String, String> expectations = new HashMap<String, String>(); HashMap<String, String> expectations = new HashMap<String, String>();
String extraArgs1 = "-L " + validationDataLocation + "chr1_b36_pilot3.interval_list -family NA19238+NA19239=NA19240 -MVQ 30 -E MendelianViolationEvaluator" + String extraArgs1 = "-L " + validationDataLocation + "chr1_b36_pilot3.interval_list -family NA19238+NA19239=NA19240 -MVQ 30 -E MendelianViolationEvaluator" +
" -B dbsnp_130,dbSNP," + GATKDataLocation + "dbsnp_130_b36.rod" + " -B:dbsnp_130,dbSNP " + GATKDataLocation + "dbsnp_130_b36.rod" +
" -B comp_hapmap,VCF," + validationDataLocation + "CEU_hapmap_nogt_23.vcf"; " -B:comp_hapmap,VCF " + validationDataLocation + "CEU_hapmap_nogt_23.vcf";
String matchingMD5 = "dd513bc72860133a58e9ee542782162b"; String matchingMD5 = "dd513bc72860133a58e9ee542782162b";
@ -104,7 +104,7 @@ public class
" -L 21" + " -L 21" +
" -D " + GATKDataLocation + "dbsnp_129_b36.rod" + " -D " + GATKDataLocation + "dbsnp_129_b36.rod" +
" -E CountFunctionalClasses -noStandard" + " -E CountFunctionalClasses -noStandard" +
" -B eval,VCF," + validationDataLocation + "test.filtered.maf_annotated.vcf" + " -B:eval,VCF " + validationDataLocation + "test.filtered.maf_annotated.vcf" +
" -o %s"; " -o %s";
String md5 = "d41d8cd98f00b204e9800998ecf8427e"; String md5 = "d41d8cd98f00b204e9800998ecf8427e";

View File

@ -3,7 +3,7 @@ package org.broadinstitute.sting.queue.util
import collection.JavaConversions._ import collection.JavaConversions._
import org.broadinstitute.sting.queue.QException import org.broadinstitute.sting.queue.QException
import java.lang.Class import java.lang.Class
import org.broadinstitute.sting.commandline.{ArgumentMatches, ArgumentSource, ArgumentTypeDescriptor} import org.broadinstitute.sting.commandline.{ArgumentMatches, ArgumentSource, ArgumentTypeDescriptor, ParsingEngine}
/** /**
* An ArgumentTypeDescriptor that can parse the scala collections. * An ArgumentTypeDescriptor that can parse the scala collections.
@ -42,7 +42,7 @@ class ScalaCompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
* @param argumentMatches The argument match strings that were found for this argument source. * @param argumentMatches The argument match strings that were found for this argument source.
* @return The parsed object. * @return The parsed object.
*/ */
def parse(source: ArgumentSource, classType: Class[_], argumentMatches: ArgumentMatches) = { def parse(parsingEngine: ParsingEngine, source: ArgumentSource, classType: Class[_], argumentMatches: ArgumentMatches) = {
val componentType = ReflectionUtils.getCollectionType(source.field) val componentType = ReflectionUtils.getCollectionType(source.field)
val componentArgumentParser = ArgumentTypeDescriptor.create(componentType) val componentArgumentParser = ArgumentTypeDescriptor.create(componentType)
@ -50,19 +50,19 @@ class ScalaCompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
var list = List.empty[Any] var list = List.empty[Any]
for (argumentMatch <- argumentMatches) for (argumentMatch <- argumentMatches)
for (value <- argumentMatch) for (value <- argumentMatch)
list :+= componentArgumentParser.parse(source, componentType, new ArgumentMatches(value)) list :+= componentArgumentParser.parse(parsingEngine, source, componentType, new ArgumentMatches(value))
list list
} else if (classOf[Set[_]].isAssignableFrom(classType)) { } else if (classOf[Set[_]].isAssignableFrom(classType)) {
var set = Set.empty[Any] var set = Set.empty[Any]
for (argumentMatch <- argumentMatches) for (argumentMatch <- argumentMatches)
for (value <- argumentMatch) for (value <- argumentMatch)
set += componentArgumentParser.parse(source, componentType, new ArgumentMatches(value)) set += componentArgumentParser.parse(parsingEngine, source, componentType, new ArgumentMatches(value))
set set
} else if (classOf[Option[_]].isAssignableFrom(classType)) { } else if (classOf[Option[_]].isAssignableFrom(classType)) {
if (argumentMatches.size > 1) if (argumentMatches.size > 1)
throw new QException("Unable to set Option to multiple values: " + argumentMatches.mkString(" ")) throw new QException("Unable to set Option to multiple values: " + argumentMatches.mkString(" "))
else if (argumentMatches.size == 1) else if (argumentMatches.size == 1)
Some(componentArgumentParser.parse(source, componentType, argumentMatches)) Some(componentArgumentParser.parse(parsingEngine, source, componentType, argumentMatches))
else else
None None
} else } else