Another pass of command-line arguments. Revised parser supports all types

of arguments that the existing parser supports, but does a poor job with
validation.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@591 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-05-04 22:41:23 +00:00
parent 8925df2e1e
commit 6550fe6f97
4 changed files with 522 additions and 54 deletions

View File

@ -23,18 +23,10 @@ import java.util.Map;
*/
class ArgumentDefinitions {
/**
* Backing data set of argument stored by short name.
* Backing data set of argument stored by short name and long name.
*/
private Map<String,ArgumentDefinition> argumentsByShortName = new HashMap<String,ArgumentDefinition>();
/**
* Does this set of argument definitions specify an argument with the given short name?
* @param shortName The short name.
* @return True if it contains the definition. False otherwise.
*/
public boolean hasArgumentWithShortName( String shortName ) {
return argumentsByShortName.containsKey( shortName );
}
private Map<String,ArgumentDefinition> argumentsByLongName = new HashMap<String,ArgumentDefinition>();
/**
* Returns the argument with the given short name.
@ -45,6 +37,15 @@ class ArgumentDefinitions {
return argumentsByShortName.get( shortName );
}
/**
* Returns the argument with the given short name.
* @param longName Argument long name.
* @return The argument definition, or null if nothing matches.
*/
public ArgumentDefinition getArgumentWithLongName( String longName ) {
return argumentsByLongName.get( longName );
}
/**
* Adds an argument to the this argument definition list.
* @param argument The argument to add.
@ -52,8 +53,16 @@ class ArgumentDefinitions {
* @param sourceField Field in which the argument was defined.
*/
public void add( Argument argument, Class sourceClass, Field sourceField ) {
argumentsByShortName.put( argument.shortName(),
new ArgumentDefinition( argument, sourceClass, sourceField ) );
ArgumentDefinition definition = new ArgumentDefinition( argument, sourceClass, sourceField );
String fullName = argument.fullName().trim();
String shortName = argument.shortName().trim();
if( fullName.length() == 0 )
throw new IllegalArgumentException( "Argument cannot have 0-length fullname." );
argumentsByLongName.put( fullName, definition );
if( shortName.length() != 0 )
argumentsByShortName.put( shortName, definition );
}
}
@ -77,5 +86,23 @@ class ArgumentDefinition {
this.sourceClass = sourceClass;
this.sourceField = sourceField;
}
}
/**
* A general purpose accessor interface for ArgumentDefinitions.
*/
interface DefinitionMatcher {
ArgumentDefinition get( ArgumentDefinitions argumentDefinitions, String key );
}
class FullNameDefinitionMatcher implements DefinitionMatcher {
public ArgumentDefinition get( ArgumentDefinitions argumentDefinitions, String key ) {
return argumentDefinitions.getArgumentWithLongName( key );
}
}
class ShortNameDefinitionMatcher implements DefinitionMatcher {
public ArgumentDefinition get( ArgumentDefinitions argumentDefinitions, String key ) {
return argumentDefinitions.getArgumentWithShortName( key );
}
}

View File

@ -1,8 +1,13 @@
package org.broadinstitute.sting.utils.cmdLine;
import java.util.Iterator;
import java.util.ArrayList;
import java.util.List;
import java.util.TreeMap;
import java.util.Map;
import java.util.Set;
import java.util.HashSet;
import java.util.Iterator; /**
/**
* Created by IntelliJ IDEA.
* User: mhanna
* Date: May 3, 2009
@ -26,30 +31,105 @@ public class ArgumentMatches implements Iterable<ArgumentMatch> {
* Collection matches from argument definition to argument value.
* Package protected access is deliberate.
*/
Set<ArgumentMatch> argumentMatches = new HashSet<ArgumentMatch>();
Map<Integer,ArgumentMatch> argumentMatches = new TreeMap<Integer,ArgumentMatch>();
void add( ArgumentDefinition definition, String value ) {
argumentMatches.add( new ArgumentMatch( definition, value ) );
void mergeInto( ArgumentMatch match ) {
boolean definitionExists = false;
// Clone the list of argument matches to avoid ConcurrentModificationExceptions.
Set<ArgumentMatch> uniqueMatches = getUniqueMatches();
for( ArgumentMatch argumentMatch: uniqueMatches ) {
if( argumentMatch.definition.equals(match.definition) ) {
argumentMatch.mergeInto( match );
for( int index: match.indices )
argumentMatches.put( index, argumentMatch );
definitionExists = true;
}
}
if( !definitionExists ) {
for( int index: match.indices )
argumentMatches.put( index, match );
}
}
/**
* Get an iterator cycling through command-line argument <-> definition matches.
* Get an iterator cycling through *unique* command-line argument <-> definition matches.
* @return Iterator over all argument matches.
*/
public Iterator<ArgumentMatch> iterator() {
return argumentMatches.iterator();
return getUniqueMatches().iterator();
}
/**
* Indicates whether the site contains a matched argument.
* @param site Site at which to check.
* @return True if the site has a match. False otherwise.
*/
public boolean hasMatch( int site ) {
return argumentMatches.containsKey( site );
}
/**
* Gets the match at a given site.
* @param site Site at which to look for a match.
* @return The match present at the given site.
* @throws IllegalArgumentException if site does not contain a match.
*/
public ArgumentMatch getMatch( int site ) {
if( !argumentMatches.containsKey(site) )
throw new IllegalArgumentException( "Site does not contain an argument: " + site );
return argumentMatches.get(site);
}
/**
* Determines, of the argument matches by position, which are unique and returns that list.
* @return A unique set of matches.
*/
private Set<ArgumentMatch> getUniqueMatches() {
return new HashSet<ArgumentMatch>( argumentMatches.values() );
}
}
/**
* An individual match from argument definition to argument value.
* A mapping of all the sites where an argument definition maps to a site on the command line.
*/
class ArgumentMatch {
/**
* The argument definition that's been matched.
*/
public final ArgumentDefinition definition;
public final String value;
public ArgumentMatch( ArgumentDefinition definition, String value ) {
/**
* Index into the string of arguments where this match was found.
*/
public final Set<Integer> indices = new HashSet<Integer>();
/**
* The values associated with this parameter.
*/
public final List<String> values = new ArrayList<String>();
public ArgumentMatch( ArgumentDefinition definition, int index ) {
this.definition = definition;
this.value = value;
indices.add(index);
}
/**
* Merge two ArgumentMatches, so that the values for all arguments go into the
* same data structure.
* @param other The other match to merge into.
*/
public void mergeInto( ArgumentMatch other ) {
indices.addAll(other.indices);
values.addAll(other.values);
}
/**
* Associate a value with this merge maapping.
* @param value Text representation of value to add.
*/
public void addValue( String value ) {
this.values.add(value);
}
}

View File

@ -1,9 +1,19 @@
package org.broadinstitute.sting.utils.cmdLine;
import org.broadinstitute.sting.utils.StingException;
import org.apache.log4j.Logger;
import java.lang.reflect.Field;
import java.util.Set;
import java.lang.reflect.ParameterizedType;
import java.lang.reflect.Modifier;
import java.lang.reflect.Array;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import java.util.ArrayList;
import java.util.List;
import java.util.Collection;
/**
* Created by IntelliJ IDEA.
@ -28,6 +38,22 @@ public class ParsingEngine {
*/
private ArgumentDefinitions argumentDefinitions = new ArgumentDefinitions();
/**
* Techniques for parsing and for argument lookup.
*/
private List<ParsingMethod> parsingMethods = new ArrayList<ParsingMethod>();
/**
* our log, which we want to capture anything from org.broadinstitute.sting
*/
protected static Logger logger = Logger.getLogger(ArgumentParser.class);
public ParsingEngine() {
parsingMethods.add( new ParsingMethod(Pattern.compile("\\s*--([\\w\\.]+)\\s*"), new FullNameDefinitionMatcher()) );
parsingMethods.add( new ParsingMethod(Pattern.compile("\\s*-([\\w\\.]+)\\s*"), new ShortNameDefinitionMatcher()) );
parsingMethods.add( new ParsingMethod(Pattern.compile("\\s*-([\\w\\.])([\\w\\.]+)\\s*"), new ShortNameDefinitionMatcher()) );
}
/**
* Add an argument source. Argument sources are expected to have
* any number of fields with an @Argument annotation attached.
@ -50,24 +76,13 @@ public class ParsingEngine {
* an ArgumentMatches object describing the best fit of these
* command-line arguments to the arguments that are actually
* required.
* @param arguments Command-line arguments.
* @param tokens Tokens passed on the command line.
* @return A object indicating which matches are best. Might return
* an empty object, but will never return null.
*/
public ArgumentMatches parse( String[] arguments ) {
ArgumentMatches argumentMatches = new ArgumentMatches();
for( int i = 0; i < arguments.length; i++ ) {
String argument = arguments[i].trim();
if( argument.startsWith("-") ) {
String shortName = argument.substring(1);
if( argumentDefinitions.hasArgumentWithShortName(shortName) ) {
ArgumentDefinition definition = argumentDefinitions.getArgumentWithShortName(shortName);
argumentMatches.add( definition, arguments[i+1].trim() );
}
}
}
public ArgumentMatches parse( String[] tokens ) {
ArgumentMatches argumentMatches = parseArguments( tokens );
fitValuesToArguments( argumentMatches, tokens );
return argumentMatches;
}
@ -82,13 +97,17 @@ public class ParsingEngine {
/**
* Loads a set of matched command-line arguments into the given object.
* @param object Object into which to add arguments.
* @param argumentMatches List of matches.
* @param matches List of matches.
*/
public void loadArgumentsIntoObject( Object object, ArgumentMatches matches ) {
for( ArgumentMatch match: matches ) {
if( object.getClass().equals(match.definition.sourceClass) ) {
ArgumentDefinition definition = match.definition;
if( object.getClass().equals(definition.sourceClass) ) {
try {
match.definition.sourceField.set( object, match.value );
if( !isArgumentBoolean(definition) )
definition.sourceField.set( object, constructFromString( definition.sourceField, match.values ) );
else
definition.sourceField.set( object, true );
}
catch( IllegalAccessException ex ) {
//logger.fatal("processArgs: cannot convert field " + field.toString());
@ -97,4 +116,226 @@ public class ParsingEngine {
}
}
}
private boolean isArgumentBoolean( ArgumentDefinition definition ) {
return (definition.sourceField.getType() == Boolean.class) || (definition.sourceField.getType() == Boolean.TYPE);
}
/**
* Determines whether a token looks like the name of an argument.
* @param token Token to inspect. Can be surrounded by whitespace.
* @return True if token is of short name form.
*/
private boolean isArgumentForm( String token ) {
for( ParsingMethod parsingMethod: parsingMethods ) {
if( parsingMethod.pattern.matcher(token).matches() )
return true;
}
return false;
}
/**
* Parse a short name into an ArgumentMatch.
* @param token The token to parse. The token should pass the isLongArgumentForm test.
* @return ArgumentMatch associated with this token, or null if no match exists.
*/
private ArgumentMatch parseArgument( String token, int position ) {
if( !isArgumentForm(token) )
throw new IllegalArgumentException( "Token is not recognizable as an argument: " + token );
for( ParsingMethod parsingMethod: parsingMethods ) {
if( parsingMethod.hasMatch( argumentDefinitions, token ) )
return parsingMethod.findMatch( argumentDefinitions, token, position );
}
// No parse results found.
return null;
}
/**
* Extracts the argument portions of the string and assemble them into a data structure.
* @param tokens List of tokens from which to find arguments.
* @return Set of argument matches.
*/
private ArgumentMatches parseArguments( String[] tokens ) {
ArgumentMatches argumentMatches = new ArgumentMatches();
for( int i = 0; i < tokens.length; i++ ) {
String token = tokens[i];
if( isArgumentForm(token) ) {
ArgumentMatch argumentMatch = parseArgument( token, i );
if( argumentMatch != null )
argumentMatches.mergeInto( argumentMatch );
}
}
return argumentMatches;
}
/**
* Fit the options presented on the command line to the given arguments.
* @param argumentMatches List of arguments already matched to data.
* @param tokens The command-line input.
*/
private void fitValuesToArguments( ArgumentMatches argumentMatches, String[] tokens ) {
ArgumentMatch lastMatched = null;
for( int i = 0; i < tokens.length; i++ ) {
if( argumentMatches.hasMatch(i) ) {
lastMatched = argumentMatches.getMatch(i);
continue;
}
lastMatched.addValue( tokens[i] );
}
}
/**
* Constructs a command-line argument given a string and field.
* @param f Field type from which to infer the type.
* @param strs Collection of parameter strings to parse.
* @return Parsed object of the inferred type.
*/
private Object constructFromString(Field f, List<String> strs) {
Class type = f.getType();
if( Collection.class.isAssignableFrom(type) ) {
Collection collection = null;
Class containedType = null;
// If this is a parameterized collection, find the contained type. If blow up if only one type exists.
if( f.getGenericType() instanceof ParameterizedType) {
ParameterizedType parameterizedType = (ParameterizedType)f.getGenericType();
if( parameterizedType.getActualTypeArguments().length > 1 )
throw new IllegalArgumentException("Unable to determine collection type of field: " + f.toString());
containedType = (Class)parameterizedType.getActualTypeArguments()[0];
}
else
containedType = String.class;
// If this is a generic interface, pick a concrete implementation to create and pass back.
// Because of type erasure, don't worry about creating one of exactly the correct type.
if( Modifier.isInterface(type.getModifiers()) || Modifier.isAbstract(type.getModifiers()) )
{
if( java.util.List.class.isAssignableFrom(type) ) type = ArrayList.class;
else if( java.util.Queue.class.isAssignableFrom(type) ) type = java.util.ArrayDeque.class;
else if( java.util.Set.class.isAssignableFrom(type) ) type = java.util.TreeSet.class;
}
try
{
collection = (Collection)type.newInstance();
}
catch( Exception ex ) {
// Runtime exceptions are definitely unexpected parsing simple collection classes.
throw new IllegalArgumentException(ex);
}
for( String str: strs )
collection.add( constructSingleElement(f,containedType,str) );
return collection;
}
else if( type.isArray() ) {
Class containedType = type.getComponentType();
Object arr = Array.newInstance(containedType,strs.size());
for( int i = 0; i < strs.size(); i++ )
Array.set( arr,i,constructSingleElement(f,containedType,strs.get(i)) );
return arr;
}
else {
if( strs.size() != 1 )
throw new IllegalArgumentException("Passed multiple arguments to an object expecting a single value.");
return constructSingleElement(f,type,strs.get(0));
}
}
/**
* Builds a single element of the given type.
* @param f Implies type of data to construct.
* @param str String representation of data.
* @return parsed form of String.
*/
private Object constructSingleElement(Field f, Class type, String str) {
// lets go through the types we support
if (type == Boolean.TYPE) {
boolean b = false;
if (str.toLowerCase().equals("true")) {
b = true;
}
Boolean bool = new Boolean(b);
return bool;
} else if (type == Integer.TYPE) {
Integer in = Integer.valueOf(str);
return in;
} else if (type == Float.TYPE) {
Float fl = Float.valueOf(str);
return fl;
}
else {
Constructor ctor = null;
try {
ctor = type.getConstructor(String.class);
return ctor.newInstance(str);
} catch (NoSuchMethodException e) {
logger.fatal("constructFromString:NoSuchMethodException: cannot convert field " + f.toString());
throw new RuntimeException("constructFromString:NoSuchMethodException: Failed conversion " + e.getMessage());
} catch (IllegalAccessException e) {
logger.fatal("constructFromString:IllegalAccessException: cannot convert field " + f.toString());
throw new RuntimeException("constructFromString:IllegalAccessException: Failed conversion " + e.getMessage());
} catch (InvocationTargetException e) {
logger.fatal("constructFromString:InvocationTargetException: cannot convert field " + f.toString());
throw new RuntimeException("constructFromString:InvocationTargetException: Failed conversion " + e.getMessage());
} catch (InstantiationException e) {
logger.fatal("constructFromString:InstantiationException: cannot convert field " + f.toString());
throw new RuntimeException("constructFromString:InstantiationException: Failed conversion " + e.getMessage());
}
}
}
/**
* Holds a pattern, along with how to get to the argument definitions that could match that pattern.
*/
private class ParsingMethod {
public final Pattern pattern;
public final DefinitionMatcher definitionMatcher;
public ParsingMethod( Pattern pattern, DefinitionMatcher definitionMatcher ) {
this.pattern = pattern;
this.definitionMatcher = definitionMatcher;
}
public boolean hasMatch( ArgumentDefinitions definitions, String token ) {
Matcher matcher = pattern.matcher(token);
return matcher.matches() && definitionMatcher.get( definitions, matcher.group(1) ) != null;
}
public ArgumentMatch findMatch( ArgumentDefinitions definitions, String token, int position ) {
Matcher matcher = pattern.matcher(token);
// Didn't match? Must be bad input.
if( !matcher.matches() )
throw new IllegalArgumentException( String.format("Unable to parse token %s with pattern %s", token, pattern.pattern()) );
// If the argument is valid, parse out the argument and value (if present).
String argument = matcher.group(1);
String value = matcher.groupCount() > 1 ? matcher.group(2) : null;
// Try to find a matching argument. If found, label that as the match.
ArgumentDefinition argumentDefinition = definitionMatcher.get( definitions, argument );
if( argumentDefinition != null ) {
ArgumentMatch argumentMatch = new ArgumentMatch( argumentDefinition, position );
if( value != null )
argumentMatch.addValue( value );
return argumentMatch;
}
throw new IllegalArgumentException( String.format("Unable to find match for token %s", token) );
}
}
}

View File

@ -4,6 +4,8 @@ import org.broadinstitute.sting.BaseTest;
import org.junit.Test;
import org.junit.Before;
import org.junit.Assert;
import java.util.List;
/**
* Created by IntelliJ IDEA.
* User: mhanna
@ -47,18 +49,10 @@ public class ParsingEngineTest extends BaseTest {
Assert.assertEquals("Argument is not correctly initialized", "na12878.bam", argProvider.inputFile );
}
// To test
// 'Composite' short names
// long names
// flags
// flags with arguments at every point on the line
// flags with arguments at the end of the line
/*
@Test
public void shortNameCompositeArgumentTest() {
final String[] commandLine = new String[] {"-I na12878.bam"};
final String[] commandLine = new String[] {"-Ina12878.bam"};
parsingEngine.addArgumentSources( InputFileArgProvider.class );
ArgumentMatches argumentMatches = parsingEngine.parse( commandLine );
@ -67,8 +61,134 @@ public class ParsingEngineTest extends BaseTest {
InputFileArgProvider argProvider = new InputFileArgProvider();
parsingEngine.loadArgumentsIntoObject( argProvider, argumentMatches);
Assert.assertEquals("Argument is not correctly initialized", "na12878.bam" );
Assert.assertEquals("Argument is not correctly initialized", "na12878.bam", argProvider.inputFile );
}
*/
@Test
public void longNameArgumentTest() {
final String[] commandLine = new String[] {"--input_file", "na12878.bam"};
parsingEngine.addArgumentSources( InputFileArgProvider.class );
ArgumentMatches argumentMatches = parsingEngine.parse( commandLine );
parsingEngine.validate(argumentMatches);
InputFileArgProvider argProvider = new InputFileArgProvider();
parsingEngine.loadArgumentsIntoObject( argProvider, argumentMatches);
Assert.assertEquals("Argument is not correctly initialized", "na12878.bam", argProvider.inputFile );
}
@Test
public void extraWhitespaceTest() {
final String[] commandLine = new String[] {" --input_file ", "na12878.bam"};
parsingEngine.addArgumentSources( InputFileArgProvider.class );
ArgumentMatches argumentMatches = parsingEngine.parse( commandLine );
parsingEngine.validate(argumentMatches);
InputFileArgProvider argProvider = new InputFileArgProvider();
parsingEngine.loadArgumentsIntoObject( argProvider, argumentMatches);
Assert.assertEquals("Argument is not correctly initialized", "na12878.bam", argProvider.inputFile );
}
@Test
public void flagTest() {
final String[] commandLine = new String[] {"--all_loci"};
parsingEngine.addArgumentSources( AllLociArgProvider.class );
ArgumentMatches argumentMatches = parsingEngine.parse( commandLine );
parsingEngine.validate(argumentMatches);
AllLociArgProvider argProvider = new AllLociArgProvider();
parsingEngine.loadArgumentsIntoObject( argProvider, argumentMatches);
Assert.assertTrue("Argument is not correctly initialized", argProvider.allLoci );
}
private class AllLociArgProvider {
@Argument(fullName="all_loci",shortName="A")
public boolean allLoci = false;
}
@Test
public void arrayTest() {
final String[] commandLine = new String[] {"-Ifoo.txt", "--input_file", "bar.txt"};
parsingEngine.addArgumentSources( MultiValueArgProvider.class );
ArgumentMatches argumentMatches = parsingEngine.parse( commandLine );
parsingEngine.validate(argumentMatches);
MultiValueArgProvider argProvider = new MultiValueArgProvider();
parsingEngine.loadArgumentsIntoObject( argProvider, argumentMatches);
Assert.assertEquals("Argument array is of incorrect length", 2, argProvider.inputFile.length);
Assert.assertEquals("1st filename is incorrect", "foo.txt", argProvider.inputFile[0] );
Assert.assertEquals("2nd filename is incorrect", "bar.txt", argProvider.inputFile[1] );
}
private class MultiValueArgProvider {
@Argument(fullName="input_file",shortName="I")
public String[] inputFile;
}
@Test
public void typedCollectionTest() {
final String[] commandLine = new String[] { "-N2", "-N4", "-N6", "-N8", "-N10" };
parsingEngine.addArgumentSources( IntegerListArgProvider.class );
ArgumentMatches argumentMatches = parsingEngine.parse( commandLine );
parsingEngine.validate(argumentMatches);
IntegerListArgProvider argProvider = new IntegerListArgProvider();
parsingEngine.loadArgumentsIntoObject( argProvider, argumentMatches);
Assert.assertNotNull("Argument array is null",argProvider.integers);
Assert.assertEquals("Argument array is of incorrect length", 5, argProvider.integers.size());
Assert.assertEquals("1st integer is incorrect", 2, argProvider.integers.get(0).intValue() );
Assert.assertEquals("2nd integer is incorrect", 4, argProvider.integers.get(1).intValue() );
Assert.assertEquals("3rd integer is incorrect", 6, argProvider.integers.get(2).intValue() );
Assert.assertEquals("4th integer is incorrect", 8, argProvider.integers.get(3).intValue() );
Assert.assertEquals("5th integer is incorrect",10, argProvider.integers.get(4).intValue() );
}
private class IntegerListArgProvider {
@Argument(fullName="integer_list",shortName="N")
public List<Integer> integers;
}
@Test
public void untypedCollectionTest() {
final String[] commandLine = new String[] { "-N2", "-N4", "-N6", "-N8", "-N10" };
parsingEngine.addArgumentSources( UntypedListArgProvider.class );
ArgumentMatches argumentMatches = parsingEngine.parse( commandLine );
parsingEngine.validate(argumentMatches);
UntypedListArgProvider argProvider = new UntypedListArgProvider();
parsingEngine.loadArgumentsIntoObject( argProvider, argumentMatches);
Assert.assertNotNull("Argument array is null",argProvider.integers);
Assert.assertEquals("Argument array is of incorrect length", 5, argProvider.integers.size());
Assert.assertEquals("1st integer is incorrect", "2", argProvider.integers.get(0) );
Assert.assertEquals("2nd integer is incorrect", "4", argProvider.integers.get(1) );
Assert.assertEquals("3rd integer is incorrect", "6", argProvider.integers.get(2) );
Assert.assertEquals("4th integer is incorrect", "8", argProvider.integers.get(3) );
Assert.assertEquals("5th integer is incorrect","10", argProvider.integers.get(4) );
}
private class UntypedListArgProvider {
@Argument(fullName="untyped_list",shortName="N")
public List integers;
}
// To test
// misc first element
// multiple trailing values
// differing input types
// spurious arguments with in conjuction with immediate setters "-Ifoo.txt bar.txt"
// required but missing arguments
// invalid arguments
}