From 4f8506200407343b417344cb3af4f8768f8da47b Mon Sep 17 00:00:00 2001 From: hanna Date: Thu, 7 May 2009 16:21:17 +0000 Subject: [PATCH] Cleanup parsing method to make it less generic. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@619 348d0f76-0448-11de-a6fe-93d51630548a --- .../utils/cmdLine/ArgumentDefinitions.java | 49 -------- .../sting/utils/cmdLine/ParsingEngine.java | 85 +------------ .../sting/utils/cmdLine/ParsingMethod.java | 119 ++++++++++++++++++ 3 files changed, 123 insertions(+), 130 deletions(-) create mode 100755 java/src/org/broadinstitute/sting/utils/cmdLine/ParsingMethod.java diff --git a/java/src/org/broadinstitute/sting/utils/cmdLine/ArgumentDefinitions.java b/java/src/org/broadinstitute/sting/utils/cmdLine/ArgumentDefinitions.java index 545c1caa4..5323bc1ab 100755 --- a/java/src/org/broadinstitute/sting/utils/cmdLine/ArgumentDefinitions.java +++ b/java/src/org/broadinstitute/sting/utils/cmdLine/ArgumentDefinitions.java @@ -142,34 +142,6 @@ class ArgumentDefinitions implements Iterable { } }; - static AliasProvider ShortNameAliasProvider = new AliasProvider() { - /** - * Short names can come in the form -Ofoo.txt, -O foo.txt, or -out (multi-character short name). - * Given the argument name and built-in provided, see if these can be formed into some other argument - * name. - * @param argument Name of the argument, as parsed. For a short name, will be a single letter. - * @param value Value of the argument, as parsed. - * @return Any potential aliases for the given shortname. - */ - public List getAliases( String argument, String value ) { - List aliases = new ArrayList(); - aliases.add(argument+value); - aliases.add(argument); - return aliases; - } - - /** - * Is the value part of the given alias, or something separate that should be treated as an argument value. - * @param alias The alias to use. - * @param argument The parsed argument. - * @param value The parsed value. - * @return True if this alias should be used instead of the given value. - */ - public boolean doesAliasConsumeValue( String alias, String argument, String value ) { - return alias.equals(argument + value); - } - }; - /** * Find all required definitions. */ @@ -289,24 +261,3 @@ interface DefinitionMatcher { */ boolean matches( ArgumentDefinition definition, Object key ); } - -/** - * A way to get alternate names for the argument given the recognized name and value. - */ -interface AliasProvider { - /** - * Give all alternate names for the given argument / value pair. The aliases should - * be returned in 'preferred order'. - * @param argument The argument. - * @param value The value. - * @return All possible names. - */ - List getAliases( String argument, String value ); - - /** - * True if this alias 'consumes' the value, meaning that the argument + value together - * represent some other alias. - * @return True if the value should still be used. False otherwise. - */ - boolean doesAliasConsumeValue( String alias, String argument, String value ); -} diff --git a/java/src/org/broadinstitute/sting/utils/cmdLine/ParsingEngine.java b/java/src/org/broadinstitute/sting/utils/cmdLine/ParsingEngine.java index 09c511f1f..c47cb2a69 100755 --- a/java/src/org/broadinstitute/sting/utils/cmdLine/ParsingEngine.java +++ b/java/src/org/broadinstitute/sting/utils/cmdLine/ParsingEngine.java @@ -10,8 +10,6 @@ import java.lang.reflect.Modifier; import java.lang.reflect.Array; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; -import java.util.regex.Pattern; -import java.util.regex.Matcher; import java.util.ArrayList; import java.util.List; import java.util.Collection; @@ -60,10 +58,8 @@ public class ParsingEngine { protected static Logger logger = Logger.getLogger(ParsingEngine.class); public ParsingEngine() { - parsingMethods.add( new ParsingMethod(Pattern.compile("\\s*--([\\w\\.]+)\\s*"), ArgumentDefinitions.FullNameDefinitionMatcher) ); - parsingMethods.add( new ParsingMethod(Pattern.compile("\\s*-([\\w\\.])([\\w\\.\\/:\\-]*)\\s*"), - ArgumentDefinitions.ShortNameDefinitionMatcher, - ArgumentDefinitions.ShortNameAliasProvider) ); + parsingMethods.add( new FullNameParsingMethod() ); + parsingMethods.add( new ShortNameParsingMethod() ); } /** @@ -242,7 +238,7 @@ public class ParsingEngine { */ private boolean isArgumentForm( String token ) { for( ParsingMethod parsingMethod: parsingMethods ) { - if( parsingMethod.pattern.matcher(token).matches() ) + if( parsingMethod.matches(token) ) return true; } @@ -259,7 +255,7 @@ public class ParsingEngine { throw new IllegalArgumentException( "Token is not recognizable as an argument: " + token ); for( ParsingMethod parsingMethod: parsingMethods ) { - if( parsingMethod.matches( argumentDefinitions, token ) ) + if( parsingMethod.matches( token ) ) return parsingMethod.match( argumentDefinitions, token, position ); } @@ -413,79 +409,6 @@ public class ParsingEngine { } } } - - - - /** - * Holds a pattern, along with how to get to the argument definitions that could match that pattern. - */ - private class ParsingMethod { - public final Pattern pattern; - public final DefinitionMatcher definitionMatcher; - public final AliasProvider aliasProvider; - - public ParsingMethod( Pattern pattern, DefinitionMatcher definitionMatcher ) { - this( pattern, definitionMatcher, null ); - } - - public ParsingMethod( Pattern pattern, DefinitionMatcher definitionMatcher, AliasProvider aliasProvider ) { - this.pattern = pattern; - this.definitionMatcher = definitionMatcher; - this.aliasProvider = aliasProvider; - } - - public boolean matches( ArgumentDefinitions definitions, String token ) { - Matcher matcher = pattern.matcher(token); - return matcher.matches(); - } - - public ArgumentMatch match( ArgumentDefinitions definitions, String token, int position ) { - Matcher matcher = pattern.matcher(token); - - // Didn't match? Must be bad input. - if( !matcher.matches() ) - throw new IllegalArgumentException( String.format("Unable to parse token %s with pattern %s", token, pattern.pattern()) ); - - // If the argument is valid, parse out the argument and value (if present). - String argument = matcher.group(1); - String value = null; - if( matcher.groupCount() > 1 && matcher.group(2).trim().length() > 0) - value = matcher.group(2).trim(); - - // If an alias provider has been provided, determine the possible list of argument names that this - // argument / value pair can represent. - ArgumentDefinition bestMatchArgumentDefinition = null; - if( aliasProvider != null ) { - List aliases = aliasProvider.getAliases( argument, value ); - String bestAlias = null; - - for( String alias: aliases ) { - if( definitions.findArgumentDefinition(alias,definitionMatcher) != null ) { - bestAlias = alias; - bestMatchArgumentDefinition = definitions.findArgumentDefinition(alias,definitionMatcher); - break; - } - } - - // Couldn't find anything appropriate? The aliases should be in best-to-worst order, so - if( bestAlias == null ) { - bestAlias = aliases.get(0); - } - - if( aliasProvider.doesAliasConsumeValue(bestAlias,argument,value) ) value = null; - argument = bestAlias; - } - else - bestMatchArgumentDefinition = definitions.findArgumentDefinition( argument, definitionMatcher ); - - // Try to find a matching argument. If found, label that as the match. If not found, add the argument - // with a null definition. - ArgumentMatch argumentMatch = new ArgumentMatch( argument, bestMatchArgumentDefinition, position ); - if( value != null ) - argumentMatch.addValue( position, value ); - return argumentMatch; - } - } } /** diff --git a/java/src/org/broadinstitute/sting/utils/cmdLine/ParsingMethod.java b/java/src/org/broadinstitute/sting/utils/cmdLine/ParsingMethod.java new file mode 100755 index 000000000..de1faf8c5 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/cmdLine/ParsingMethod.java @@ -0,0 +1,119 @@ +package org.broadinstitute.sting.utils.cmdLine; + +import java.util.regex.Pattern; +import java.util.regex.Matcher; +import java.util.List; +import java.util.ArrayList; + +/** + * Holds a pattern, along with how to get to the argument definitions that could match that pattern. + */ +interface ParsingMethod { + /** + * Can the given token be parsed by this parsing method? + * @param token Token to validate. + * @return True if the given token matches. + */ + public abstract boolean matches( String token ); + + /** + * Find the best match for a given token at a given position from among the provided + * argument definitions. + * @param definitions List of argument definitions. + * @param token The token from the command line to match. Should be validated using + * ParsingMethod's matches() tester. + * @param position Position at which this command-line argument occurs. Will be used + * for validation later. + * @return An argument match. Definition field will be populated if a match was found or + * empty if no appropriate definition could be found. + */ + public abstract ArgumentMatch match( ArgumentDefinitions definitions, String token, int position ); +} + +/** + * Instructions for how to parse a command-line argument passed by full name into a match. + */ +class FullNameParsingMethod implements ParsingMethod { + private static final Pattern pattern = Pattern.compile("\\s*--([\\w\\.\\-]+)\\s*"); + private static final DefinitionMatcher definitionMatcher = ArgumentDefinitions.FullNameDefinitionMatcher; + + public boolean matches( String token ) { + Matcher matcher = pattern.matcher(token); + return matcher.matches(); + } + + public ArgumentMatch match( ArgumentDefinitions definitions, String token, int position ) { + // If the argument is valid, parse out the argument. + Matcher matcher = pattern.matcher(token); + + // Didn't match? Must be bad input. + if( !matcher.matches() ) + throw new IllegalArgumentException( String.format("Unable to parse token %s with pattern %s", token, pattern.pattern()) ); + + String argument = matcher.group(1).trim(); + + // Find the most appropriate argument definition for the given argument. + ArgumentDefinition argumentDefinition = definitions.findArgumentDefinition( argument, definitionMatcher ); + + // Try to find a matching argument. If found, label that as the match. If not found, add the argument + // with a null definition. + ArgumentMatch argumentMatch = new ArgumentMatch( argument, argumentDefinition, position ); + + return argumentMatch; + } +} + +/** + * Instructions for how to parse a command-line argument passed by short name into a match. + */ +class ShortNameParsingMethod implements ParsingMethod { + private static final Pattern standalonePattern = Pattern.compile("\\s*-([\\w\\-]+)\\s*"); + private static final Pattern embeddedValuePattern = Pattern.compile("\\s*-([\\w\\.])([\\w/:\\.\\-]+)\\s*"); + private static final DefinitionMatcher definitionMatcher = ArgumentDefinitions.ShortNameDefinitionMatcher; + + public boolean matches( String token ) { + return standalonePattern.matcher(token).matches() || embeddedValuePattern.matcher(token).matches(); + } + + public ArgumentMatch match( ArgumentDefinitions definitions, String token, int position ) { + // Didn't match? Must be bad input. + if( !matches(token) ) + throw new IllegalArgumentException( String.format("Unable to parse token %s with pattern %s", token, embeddedValuePattern.pattern()) ); + + // Build the best possible standalone match given the available data. + Matcher standaloneMatcher = standalonePattern.matcher(token); + ArgumentMatch standaloneMatch = null; + + if( standaloneMatcher.matches() ) { + String argument = standaloneMatcher.group(1).trim(); + + ArgumentDefinition argumentDefinition = definitions.findArgumentDefinition(argument,definitionMatcher); + standaloneMatch = new ArgumentMatch( argument, argumentDefinition, position ); + } + + // Build the best possible embedded value match given the available data. + Matcher embeddedValueMatcher = embeddedValuePattern.matcher(token); + ArgumentMatch embeddedValueMatch = null; + + if( embeddedValueMatcher.matches() ) { + String argument = embeddedValueMatcher.group(1).trim(); + String value = embeddedValueMatcher.group(2).trim(); + + ArgumentDefinition argumentDefinition = definitions.findArgumentDefinition(argument,definitionMatcher); + embeddedValueMatch = new ArgumentMatch( argument, argumentDefinition, position ); + + if( embeddedValueMatch != null && value != null ) + embeddedValueMatch.addValue( position, value ); + } + + // Prefer the standalone match... + ArgumentMatch bestMatch = standaloneMatch; + + // ...But if the embedded value match is clearly better, choose it as the best match instead. + if( (standaloneMatch == null || standaloneMatch.definition == null) && + (embeddedValueMatch != null && embeddedValueMatch.definition != null) ) + bestMatch = embeddedValueMatch; + + return bestMatch; + } +}