Bring new command-line argument parsing system live.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@603 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-05-06 18:16:11 +00:00
parent 98f4920739
commit 521aa40baa
10 changed files with 232 additions and 119 deletions

View File

@ -121,45 +121,6 @@ public class GenomeAnalysisTK extends CommandLineProgram {
public static ArrayList<String> ROD_BINDINGS = new ArrayList<String>();
/**
* setup our arguments, both required and optional
* <p/>
* Flags don't take an argument, the associated Boolean gets set to true if the flag appears on the command line.
*/
protected void setupArgs() {
m_parser.addOptionalArgList("input_file", "I", "SAM or BAM file", "INPUT_FILES");
m_parser.addOptionalArg("maximum_reads", "M", "Maximum number of reads to process before exiting", "MAX_READS_ARG");
m_parser.addOptionalArg("validation_strictness", "S", "How strict should we be with validation (LENIENT|SILENT|STRICT)", "STRICTNESS_ARG");
m_parser.addOptionalArg("reference_sequence", "R", "Reference sequence file", "REF_FILE_ARG");
m_parser.addOptionalArg("genome_region", "L", "Genome region to operation on: from chr:start-end", "REGION_STR");
m_parser.addRequiredArg("analysis_type", "T", "Type of analysis to run", "Analysis_Name");
m_parser.addOptionalArg("DBSNP", "D", "DBSNP file", "DBSNP_FILE");
m_parser.addOptionalArg("hapmap", "H", "Hapmap file", "HAPMAP_FILE");
m_parser.addOptionalArg("hapmap_chip", "hc", "Hapmap chip file", "HAPMAP_CHIP_FILE");
m_parser.addOptionalFlag("threaded_IO", "P", "If set, enables threaded I/O operations", "ENABLED_THREADED_IO");
m_parser.addOptionalFlag("unsafe", "U", "If set, enables unsafe operations, nothing will be checked at runtime.", "UNSAFE");
m_parser.addOptionalArg("sort_on_the_fly", "sort", "Maximum number of reads to sort on the fly", "MAX_ON_FLY_SORTS");
m_parser.addOptionalArg("downsample_to_fraction", "dfrac", "Fraction [0.0-1.0] of reads to downsample to", "DOWNSAMPLE_FRACTION");
m_parser.addOptionalArg("downsample_to_coverage", "dcov", "Coverage [integer] to downsample to", "DOWNSAMPLE_COVERAGE");
m_parser.addOptionalArg("intervals_file", "V", "File containing list of genomic intervals to operate on. line := <contig> <start> <end>", "INTERVALS_FILE");
m_parser.addOptionalFlag("all_loci", "A", "Should we process all loci, not just those covered by reads", "WALK_ALL_LOCI");
m_parser.addOptionalArg("out", "o", "An output file presented to the walker. Will overwrite contents if file exists.", "outFileName" );
m_parser.addOptionalArg("err", "e", "An error output file presented to the walker. Will overwrite contents if file exists.", "errFileName" );
m_parser.addOptionalArg("outerr", "oe", "A joint file for 'normal' and error output presented to the walker. Will overwrite contents if file exists.", "outErrFileName");
m_parser.addOptionalArg("numthreads", "nt", "How many threads should be allocated to running this analysis.", "numThreads");
m_parser.addOptionalFlag("disablethreading", "dt", "Disable experimental threading support.", "DISABLE_THREADING");
// --rodBind <name> <type> <file>
//m_parser.addOptionalArg("rods", "B", "Bind rod with <name> and <type> to <file>", "ROD_BINDINGS");
Option rodBinder = OptionBuilder.withArgName("rodBind")
.hasArgs()
.withDescription( "" )
.create("B");
m_parser.addOptionalArg(rodBinder, "ROD_BINDINGS");
}
/**
* GATK can add arguments dynamically based on analysis type.
* @return true

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.playground.fourbasecaller;
import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.QualityUtils;
import java.io.*;
@ -14,8 +15,11 @@ import net.sf.samtools.SAMFileWriterFactory;
public class CombineSamAndFourProbs extends CommandLineProgram {
public static CombineSamAndFourProbs Instance = null;
@Argument(fullName="sam", shortName="S", doc="Input SAM file")
public File SAM;
@Argument(fullName="fourprob", shortName="F", doc="Input text file := read_name sq_field")
public File FOURPROBS;
@Argument(fullName="sam_out", shortName="O", doc="Output SAM file")
public File SAM_OUT;
public static void main(String[] argv) {
@ -23,12 +27,6 @@ public class CombineSamAndFourProbs extends CommandLineProgram {
start(Instance, argv);
}
protected void setupArgs() {
m_parser.addRequiredArg("sam", "S", "Input SAM file", "SAM");
m_parser.addRequiredArg("fourprob", "F", "Input text file := read_name sq_field", "FOURPROBS");
m_parser.addRequiredArg("sam_out", "O", "Output SAM file", "SAM_OUT");
}
protected int execute() {
BufferedReader fpreader = null;

View File

@ -13,6 +13,7 @@ import org.broadinstitute.sting.playground.illumina.FirecrestReadData;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import java.io.File;
import java.io.IOException;
@ -21,15 +22,25 @@ import java.io.PrintWriter;
public class FourBaseRecaller extends CommandLineProgram {
public static FourBaseRecaller Instance = null;
@Argument(fullName="dir", shortName="D", doc="Illumina Bustard directory")
public File DIR;
@Argument(fullName="lane", shortName="L", doc="Illumina flowcell lane")
public int LANE;
@Argument(fullName="run_barcode", shortName="B", doc="Illumina Run Barcode (e.g. 305PJAAXX080716)")
public String RUN_BARCODE;
@Argument(fullName="out", shortName="O", doc="Output path for sam file")
public File OUT;
@Argument(fullName="end", shortName="E", doc="End of read to process (0 = whole read, i.e. unpaired; 1 = first end; 2 = second end)", required=false)
public int END = 0;
@Argument(fullName="tlim", shortName="T", doc="Number of reads to use for parameter initialization", required=false)
public int TRAINING_LIMIT = 1000000000;
@Argument(fullName="clim", shortName="C", doc="Number of reads to basecall", required=false)
public int CALLING_LIMIT = 1000000000;
@Argument(fullName="raw", shortName="R", doc="Use raw intensities?", required=false)
public Boolean RAW = false;
@Argument(fullName="old", shortName="1", doc="Old Bustard 1.1 mode?", required=false)
public Boolean OLD = false;
@Argument(fullName="context", shortName="X", doc="Correct for context?", required=false)
public Boolean CONTEXT = false;
public static void main(String[] argv) {
@ -37,19 +48,6 @@ public class FourBaseRecaller extends CommandLineProgram {
start(Instance, argv);
}
protected void setupArgs() {
m_parser.addRequiredArg("dir", "D", "Illumina Bustard directory", "DIR");
m_parser.addRequiredArg("lane", "L", "Illumina flowcell lane", "LANE");
m_parser.addOptionalArg("end", "E", "End of read to process (0 = whole read, i.e. unpaired; 1 = first end; 2 = second end)", "END");
m_parser.addRequiredArg("run_barcode", "B", "Illumina Run Barcode (e.g. 305PJAAXX080716)", "RUN_BARCODE");
m_parser.addRequiredArg("out", "O", "Output path for sam file", "OUT");
m_parser.addOptionalArg("tlim", "T", "Number of reads to use for parameter initialization", "TRAINING_LIMIT");
m_parser.addOptionalArg("clim", "C", "Number of reads to basecall", "CALLING_LIMIT");
m_parser.addOptionalFlag("raw", "R", "Use raw intensities?", "RAW");
m_parser.addOptionalFlag("old", "1", "Old Bustard 1.1 mode?", "OLD");
m_parser.addOptionalFlag("context", "X", "Correct for context?", "CONTEXT");
}
protected int execute() {
boolean isPaired = (END > 0);

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.playground.fourbasecaller;
import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.QualityUtils;
import java.io.File;
@ -13,7 +14,9 @@ import net.sf.samtools.SAMRecord;
public class MatchSQTagToStrand extends CommandLineProgram {
public static MatchSQTagToStrand Instance = null;
@Argument(fullName="sam_in", shortName="I", doc="Input SAM file")
public File SAM_IN;
@Argument(fullName="sam_out", shortName="O", doc="Output SAM file")
public File SAM_OUT;
public static void main(String[] argv) {
@ -21,11 +24,6 @@ public class MatchSQTagToStrand extends CommandLineProgram {
start(Instance, argv);
}
protected void setupArgs() {
m_parser.addRequiredArg("sam_in", "I", "Input SAM file", "SAM_IN");
m_parser.addRequiredArg("sam_out", "O", "Output SAM file", "SAM_OUT");
}
protected int execute() {
SAMFileReader sf = new SAMFileReader(SAM_IN);
sf.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);

View File

@ -8,6 +8,7 @@ import java.util.HashSet;
import java.util.Collection;
import java.util.List;
import java.util.ArrayList;
import java.util.Iterator;
/**
* Created by IntelliJ IDEA.
@ -26,7 +27,7 @@ import java.util.ArrayList;
/**
* A collection of argument definitions.
*/
class ArgumentDefinitions {
class ArgumentDefinitions implements Iterable<ArgumentDefinition> {
/**
* Backing data set of argument stored by short name and long name.
*/
@ -95,6 +96,14 @@ class ArgumentDefinitions {
return selectedArgumentDefinitions;
}
/**
* Iterates through all command-line arguments.
* @return
*/
public Iterator<ArgumentDefinition> iterator() {
return argumentDefinitions.iterator();
}
/**
* Match the full name of a definition.
*/

View File

@ -65,7 +65,7 @@ public class ArgumentParser {
*/
public void printHelp() {
// automatically generate the help statement
HelpFormatter formatter = new HelpFormatter();
org.apache.commons.cli.HelpFormatter formatter = new org.apache.commons.cli.HelpFormatter();
formatter.printHelp(100,
"java -Xmx4096m -jar dist/GenomeAnalysisTK.jar",
"",

View File

@ -5,6 +5,7 @@ import org.apache.log4j.*;
import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.EnumSet;
/**
* User: aaron
@ -26,9 +27,10 @@ import java.text.SimpleDateFormat;
public abstract class CommandLineProgram {
/**
* Our Argument parser, which handles parsing the command line in GNU format
* The command-line program and the arguments it returned.
*/
protected ArgumentParser m_parser;
private ParsingEngine parser = null;
private ArgumentMatches parameters = null;
/**
* our log, which we want to capture anything from org.broadinstitute.sting
@ -79,12 +81,6 @@ public abstract class CommandLineProgram {
private static String patternString = "%p %m %n";
private static String debugPatternString = "%n[level] %p%n[date]\t\t %d{dd MMM yyyy HH:mm:ss,SSS} %n[class]\t\t %C %n[location]\t %l %n[line number]\t %L %n[message]\t %m %n";
/**
* the contract for the inheriting class is that they have a setupArgs()
* function which sets up the args to the specific program.
*/
protected abstract void setupArgs();
/**
* Will this application want to vary its argument list dynamically?
* If so, parse the command-line options and then prompt the subclass to return
@ -137,34 +133,25 @@ public abstract class CommandLineProgram {
// setup our log layout
PatternLayout layout = new PatternLayout();
// setup the parser
clp.m_parser = new ArgumentParser(clp.getClass().getName(), clp);
// setup the default help and logging args controlled by the base class
clp.setupDefaultArgs();
// setup the args
clp.setupArgs();
ParsingEngine parser = clp.parser = new ParsingEngine();
parser.addArgumentSources( clp.getClass() );
// process the args
if( clp.canAddArgumentsDynamically() ) {
// if the command-line program can toss in extra args, fetch them and reparse the arguments.
clp.m_parser.processArgs(args, true);
clp.m_parser.loadArgumentsIntoObject( clp );
clp.parameters = parser.parse(args);
parser.validate( clp.parameters, EnumSet.of(ParsingEngine.ValidationType.InvalidArgument) );
parser.loadArgumentsIntoObject( clp, clp.parameters );
Class[] argumentSources = clp.getArgumentSources();
for( Class argumentSource: argumentSources )
clp.addArgumentSource( argumentSource );
clp.m_parser.processArgs(args, false);
// HACK: Load arguments into object again. Apache CLI always stops processing when an option
// is unrecognized, so if core arguments were intermixed with walker arguments, stop processing.
clp.m_parser.loadArgumentsIntoObject( clp );
parser.addArgumentSources( argumentSources );
clp.parameters = parser.parse(args);
parser.validate( clp.parameters );
}
else {
clp.m_parser.processArgs(args, false);
clp.m_parser.loadArgumentsIntoObject( clp );
clp.parameters = parser.parse(args);
parser.validate( clp.parameters );
}
// if we're in debug mode, set the mode up
@ -190,7 +177,7 @@ public abstract class CommandLineProgram {
// they asked for help, give it to them
if (clp.help) {
clp.m_parser.printHelp();
parser.printHelp();
System.exit(1);
}
@ -219,7 +206,7 @@ public abstract class CommandLineProgram {
}
catch (org.apache.commons.cli.ParseException e) {
logger.fatal("Unable to pass command line arguments: " + e.getMessage() );
clp.m_parser.printHelp();
clp.parser.printHelp();
}
catch (Exception e) {
// we catch all exceptions here. if it makes it to this level, we're in trouble. Let's bail!
@ -237,7 +224,7 @@ public abstract class CommandLineProgram {
* @param obj Object to inspect for command line arguments.
*/
public void loadArgumentsIntoObject( Object obj ) {
m_parser.loadArgumentsIntoObject( obj );
parser.loadArgumentsIntoObject( obj, parameters );
}
/**
@ -298,27 +285,4 @@ public abstract class CommandLineProgram {
logger.setLevel(par);
}
/**
* Pass along a new set of valid command line arguments. In this case,
* probably a class with @argument annotations.
* @param source
*/
private void addArgumentSource( Class source ) {
m_parser.addArgumentSource( this, source );
}
/**
* we have some default options that should always get checked for in the
* arguments provided to the program
*/
private void setupDefaultArgs() {
m_parser.addOptionalFlag("help", "h", "Generate this help message", "help");
m_parser.addOptionalArg("logging_level", "l", "Set the minimum level of logging, i.e. setting INFO get's you INFO up to FATAL, setting ERROR gets you ERROR and FATAL level logging. (DEBUG, INFO, WARN, ERROR, FATAL, OFF). ", "logging_level");
m_parser.addOptionalArg("log_to_file", "log", "Set the logging location", "toFile");
m_parser.addOptionalFlag("quiet_output_mode", "quiet", "Set the logging to quiet mode, no output to stdout", "quietMode");
m_parser.addOptionalFlag("debug_mode", "debug", "Set the logging file string to include a lot of debugging information (SLOW!)", "debugMode");
}
}

View File

@ -0,0 +1,170 @@
package org.broadinstitute.sting.utils.cmdLine;
import java.util.Formatter;
import java.util.Locale;
import java.util.Formattable;
import java.util.List;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
/**
* User: hanna
* Date: May 6, 2009
* Time: 10:16:43 AM
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
* Software and documentation are copyright 2005 by the Broad Institute.
* All rights are reserved.
*
* Users acknowledge that this software is supplied without any warranty or support.
* The Broad Institute is not responsible for its use, misuse, or
* functionality.
*/
/**
* Print out help for Sting command-line applications.
*/
public class HelpFormatter {
/**
* Target this line width.
*/
private static final int LINE_WIDTH = 100;
private static final int ARG_DOC_SEPARATION_WIDTH = 3;
/**
* Prints the help, given a collection of argument definitions.
* @param argumentDefinitions Argument definitions for which help should be printed.
*/
public void printHelp( ArgumentDefinitions argumentDefinitions ) {
System.out.printf("%s%n%n%s%n", getSynopsis(argumentDefinitions), getDetailed(argumentDefinitions) );
}
/**
* Gets the synopsis: the actual command to run.
* @param argumentDefinitions Argument definitions for which help should be printed.
* @return A synopsis line.
*/
private String getSynopsis( ArgumentDefinitions argumentDefinitions ) {
// Build out the synopsis all as one long line.
StringBuilder lineBuilder = new StringBuilder();
Formatter lineFormatter = new Formatter( lineBuilder );
lineFormatter.format("java -jar dist/GenomeAnalysisTK.jar");
for( ArgumentDefinition argumentDefinition: argumentDefinitions ) {
lineFormatter.format(" ");
if( !argumentDefinition.required ) lineFormatter.format("[");
if( argumentDefinition.shortName != null )
lineFormatter.format("-%s", argumentDefinition.shortName);
else
lineFormatter.format("--%s", argumentDefinition.fullName);
if( !argumentDefinition.isFlag() )
lineFormatter.format(" <%s>", argumentDefinition.fullName);
if( !argumentDefinition.required ) lineFormatter.format("]");
}
// Word wrap the synopsis.
List<String> wrappedSynopsis = wordWrap( lineBuilder.toString(), LINE_WIDTH );
String header = "usage: ";
int headerLength = header.length();
StringBuilder synopsisBuilder = new StringBuilder();
Formatter synopsisFormatter = new Formatter(synopsisBuilder);
for( String synopsisLine: wrappedSynopsis ) {
synopsisFormatter.format("%" + headerLength + "s%s%n", header, synopsisLine);
header = "";
}
return synopsisBuilder.toString();
}
/**
* Gets detailed output about each argument type.
* @param argumentDefinitions Argument definitions for which help should be printed.
* @return Detailed text about all arguments.
*/
private String getDetailed( ArgumentDefinitions argumentDefinitions ) {
StringBuilder builder = new StringBuilder();
Formatter formatter = new Formatter( builder );
// Try to fit the entire argument definition across the screen, but impose an arbitrary cap of 3/4 *
// LINE_WIDTH in case the length of the arguments gets out of control.
int argWidth = Math.min( findLongestArgumentCallingInfo(argumentDefinitions), (LINE_WIDTH*3)/4 - ARG_DOC_SEPARATION_WIDTH );
int docWidth = LINE_WIDTH - argWidth - ARG_DOC_SEPARATION_WIDTH;
for( ArgumentDefinition argumentDefinition: argumentDefinitions ) {
Iterator<String> wordWrappedArgs = wordWrap( getArgumentCallingInfo(argumentDefinition), argWidth ).iterator();
Iterator<String> wordWrappedDoc = wordWrap( argumentDefinition.doc, docWidth ).iterator();
while( wordWrappedArgs.hasNext() || wordWrappedDoc.hasNext() ) {
String arg = wordWrappedArgs.hasNext() ? wordWrappedArgs.next() : "";
String doc = wordWrappedDoc.hasNext() ? wordWrappedDoc.next() : "";
String formatString = "%-" + argWidth + "s%" + ARG_DOC_SEPARATION_WIDTH + "s%s%n";
formatter.format( formatString, arg, "", doc );
}
}
return builder.toString();
}
/**
* Gets a string indicating how this argument should be passed to the application.
* @param argumentDefinition Argument definition for which help should be printed.
* @return Calling information for this argument.
*/
private String getArgumentCallingInfo( ArgumentDefinition argumentDefinition ) {
StringBuilder builder = new StringBuilder();
Formatter formatter = new Formatter( builder );
formatter.format(" ");
if( argumentDefinition.shortName != null )
formatter.format("-%s,", argumentDefinition.shortName);
formatter.format("--%s", argumentDefinition.fullName);
if( !argumentDefinition.isFlag() )
formatter.format(" <%s>", argumentDefinition.fullName);
return builder.toString();
}
/**
* Crude implementation which finds the longest argument portion
* given a set of arguments.
* @param argumentDefinitions argument definitions to inspect.
* @return longest argument length.
*/
private int findLongestArgumentCallingInfo( ArgumentDefinitions argumentDefinitions ) {
int longest = 0;
for( ArgumentDefinition argumentDefinition: argumentDefinitions ) {
String argumentText = getArgumentCallingInfo( argumentDefinition );
if( longest < argumentText.length() )
longest = argumentText.length();
}
return longest;
}
/**
* Simple implementation of word-wrap for a line of text. Idea and
* regexp shamelessly stolen from http://joust.kano.net/weblog/archives/000060.html.
* Regexp can probably be simplified for our application.
* @param text Text to wrap.
* @param width Maximum line width.
* @return A list of word-wrapped lines.
*/
private List<String> wordWrap( String text, int width ) {
Pattern wrapper = Pattern.compile( String.format(".{0,%d}(?:\\S(?: |$)|$)", width-1) );
Matcher matcher = wrapper.matcher( text );
List<String> wrapped = new ArrayList<String>();
while( matcher.find() ) {
// Regular expression is supersensitive to whitespace.
// Assert that content is present before adding the line.
String line = matcher.group().trim();
if( line.length() > 0 )
wrapped.add( matcher.group() );
}
return wrapped;
}
}

View File

@ -66,7 +66,7 @@ public class ParsingEngine {
*/
public void addArgumentSources( Class... sources ) {
for( Class source: sources ) {
Field[] fields = source.getDeclaredFields();
Field[] fields = source.getFields();
for( Field field: fields ) {
Argument argument = field.getAnnotation(Argument.class);
if(argument != null)
@ -161,6 +161,12 @@ public class ParsingEngine {
public void loadArgumentsIntoObject( Object object, ArgumentMatches matches ) {
for( ArgumentMatch match: matches ) {
ArgumentDefinition definition = match.definition;
// A null definition might be in the list if some invalid arguments were passed in but we
// want to load in a subset of data for better error reporting. Ignore null definitions.
if( definition == null )
continue;
if( object.getClass().equals(definition.sourceClass) ) {
try {
if( !isArgumentFlag(definition) )
@ -176,6 +182,13 @@ public class ParsingEngine {
}
}
/**
* Prints out the help associated with these command-line argument definitions.
*/
public void printHelp() {
new HelpFormatter().printHelp(argumentDefinitions);
}
/**
* Returns true if the argument is a flag (a 0-valued argument).
* @param definition Argument definition.

View File

@ -327,6 +327,7 @@ public class ParsingEngineTest extends BaseTest {
parsingEngine.validate( argumentMatches );
}
/*
@Test
public void packageProtectedArgTest() {
final String[] commandLine = new String[] {"--foo", "1"};
@ -345,6 +346,7 @@ public class ParsingEngineTest extends BaseTest {
@Argument(doc="foo")
Integer foo;
}
*/
@Test
public void correctDefaultArgNameTest() {