Make command-line arguments available in walker constructor, provide back door from

walker into GATK itself, do some cleanup of output messages, and add some bug fixes.
Command-line arguments in walkers are now feature-complete, but still a bit messy.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@203 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-03-26 20:45:27 +00:00
parent 1d95f06ad5
commit 53fe9acf65
11 changed files with 165 additions and 92 deletions

View File

@ -24,7 +24,7 @@ import java.util.HashMap;
import java.util.List;
public class GenomeAnalysisTK extends CommandLineProgram {
public static GenomeAnalysisTK Instance = null;
// parameters and their defaults
public File INPUT_FILE;
@ -43,7 +43,6 @@ public class GenomeAnalysisTK extends CommandLineProgram {
// our walker manager
private WalkerManager walkerManager = null;
private Walker my_walker = null;
public String pluginPathName = null;
private TraversalEngine engine = null;
@ -88,7 +87,7 @@ public class GenomeAnalysisTK extends CommandLineProgram {
* @return List of walkers to load dynamically.
*/
@Override
protected Object[] getArgumentSources() {
protected Class[] getArgumentSources() {
if( Analysis_Name == null )
throw new IllegalArgumentException("Must provide analysis name");
@ -97,16 +96,20 @@ public class GenomeAnalysisTK extends CommandLineProgram {
if( !walkerManager.doesWalkerExist(Analysis_Name) )
throw new IllegalArgumentException("Invalid analysis name");
my_walker = walkerManager.getWalkerByName(Analysis_Name);
return new Class[] { walkerManager.getWalkerClassByName(Analysis_Name) };
}
return new Object[] { my_walker };
@Override
protected String getArgumentSourceName( Class argumentSource ) {
return WalkerManager.getWalkerName( (Class<Walker>)argumentSource );
}
/**
* Required main method implementation.
*/
public static void main(String[] argv) {
start(new GenomeAnalysisTK(), argv);
Instance = new GenomeAnalysisTK();
start(Instance, argv);
}
protected int execute() {
@ -175,12 +178,17 @@ public class GenomeAnalysisTK extends CommandLineProgram {
engine.setSortOnFly(ENABLED_SORT_ON_FLY);
engine.setThreadedIO(ENABLED_THREADED_IO);
engine.initialize();
//engine.testReference();
//LocusWalker<Integer,Integer> walker = new PileupWalker();
if( my_walker == null )
throw new RuntimeException( "Sanity check failed -- no walker present." );
Walker my_walker = null;
try {
my_walker = walkerManager.createWalkerByName( Analysis_Name );
}
catch( InstantiationException ex ) {
throw new RuntimeException( "Unable to instantiate walker.", ex );
}
catch( IllegalAccessException ex ) {
throw new RuntimeException( "Unable to access walker", ex );
}
// Try to get the walker specified
try {

View File

@ -21,6 +21,7 @@ import java.util.jar.JarEntry;
import java.util.jar.JarInputStream;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.gatk.walkers.WalkerName;
import org.broadinstitute.sting.utils.cmdLine.Argument;
/**
@ -31,16 +32,16 @@ import org.broadinstitute.sting.utils.cmdLine.Argument;
* To change this template use File | Settings | File Templates.
*/
public class WalkerManager {
private Map<String,Walker> walkers = null;
private Map<String,Class> walkers;
public WalkerManager( String pluginDirectory ) {
try {
List<Class> walkerClasses = new ArrayList<Class>();
List<Class> walkerCandidates = new ArrayList<Class>();
// Load all classes that live in this jar.
final File location = getThisLocation();
walkerClasses.addAll( loadClassesFromLocation( location ) );
walkerCandidates.addAll( loadClassesFromLocation( location ) );
// Load all classes that live in the extension path.
if(pluginDirectory == null)
@ -50,29 +51,21 @@ public class WalkerManager {
File extensionPath = new File( pluginDirectory );
if(extensionPath.exists()) {
List<String> filesInPath = findFilesInPath( extensionPath, "", "class", false );
walkerClasses.addAll( loadExternalClasses( extensionPath, filesInPath ) );
walkerCandidates.addAll( loadExternalClasses( extensionPath, filesInPath ) );
}
walkerClasses = filterWalkers(walkerClasses);
walkerCandidates = filterWalkers(walkerCandidates);
if(walkerClasses.isEmpty())
if(walkerCandidates.isEmpty())
throw new RuntimeException("No walkers were found.");
walkers = instantiateWalkers( walkerClasses );
walkers = createWalkerDatabase( walkerCandidates );
}
// IOExceptions here are suspect; they indicate that the WalkerManager can't open its containing jar.
// Wrap in a RuntimeException.
catch(IOException ex) {
throw new RuntimeException(ex);
}
// The following two catches are more 'expected'; someone might add a walker that can't be instantiated.
// TODO: Should these exceptions be handled differently? Handling them like IOExceptions for the moment.
catch(InstantiationException ex) {
throw new RuntimeException(ex);
}
catch(IllegalAccessException ex) {
throw new RuntimeException(ex);
}
}
/**
@ -89,9 +82,14 @@ public class WalkerManager {
* @param walkerName Name of the walker to retrieve.
* @return The walker object if found; null otherwise.
*/
public Walker getWalkerByName(String walkerName) {
Walker walker = walkers.get(walkerName);
return walker;
public Walker createWalkerByName(String walkerName)
throws InstantiationException, IllegalAccessException {
Class walker = walkers.get(walkerName);
return (Walker)walker.newInstance();
}
public Class getWalkerClassByName( String walkerName ) {
return walkers.get(walkerName);
}
/**
@ -224,7 +222,7 @@ public class WalkerManager {
* TODO: Test recursive traversal in the presence of a symlink.
*/
private List<String> findFilesInPath(final File basePath, final String relativePrefix, final String extension, boolean recursive) {
List<String> filesInPath = new ArrayList();
List<String> filesInPath = new ArrayList<String>();
File[] contents = basePath.listFiles( new OrFilenameFilter( new DirectoryFilter(), new ExtensionFilter( extension ) ) );
for( File content: contents ) {
@ -302,21 +300,37 @@ public class WalkerManager {
/**
* Instantiate the list of walker classes. Add them to the walker hashmap.
* @param walkerClasses Classes to instantiate.
* @throws InstantiationException
* @throws IllegalAccessException
* @return map of walker name to walker.
*/
private Map<String,Walker> instantiateWalkers(List<Class> walkerClasses)
throws InstantiationException, IllegalAccessException {
Map<String,Walker> walkers = new HashMap<String,Walker>();
for(Class walkerClass : walkerClasses) {
Walker walker = (Walker)walkerClass.newInstance();
String walkerName = walker.getName();
private Map<String,Class> createWalkerDatabase(List<Class> walkerClasses) {
Map<String,Class> walkers = new HashMap<String,Class>();
for(Class<Walker> walkerClass : walkerClasses) {
String walkerName = getWalkerName( walkerClass );
System.out.printf("* Adding module %s%n", walkerName);
walkers.put(walkerName,walker);
walkers.put(walkerName,walkerClass);
}
return walkers;
}
/**
* Create a name for this type of walker.
* @param walkerType The type of walker.
* @return A name for this type of walker.
*/
public static String getWalkerName( Class<Walker> walkerType ) {
String walkerName = "";
if( walkerType.getAnnotation( WalkerName.class ) != null )
walkerName = walkerType.getAnnotation( WalkerName.class ).value().trim();
if( walkerName.length() == 0 ) {
walkerName = walkerType.getSimpleName();
if( walkerName.endsWith("Walker") )
walkerName = walkerName.substring( 0,walkerName.lastIndexOf("Walker") );
}
return walkerName;
}
}

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers;
import org.broadinstitute.sting.gatk.GenomeAnalysisTK;
/**
* Created by IntelliJ IDEA.
* User: hanna
@ -9,13 +11,19 @@ package org.broadinstitute.sting.gatk.walkers;
*/
public abstract class Walker {
// TODO: Can a walker be templatized so that map and reduce live here?
public String getName() {
// Return name of class, trimming 'Walker' from the end if present.
String className = getClass().getSimpleName();
if(className.endsWith(Walker.class.getSimpleName()))
return className.substring(0,className.lastIndexOf(Walker.class.getSimpleName()));
else
return className;
protected Walker() {
GenomeAnalysisTK.Instance.loadArgumentsIntoObject(this);
}
/**
* Retrieve the toolkit, for peering into internal structures that can't
* otherwise be read. Use sparingly, and discuss uses with software engineering
* team.
* @return The genome analysis toolkit.
*/
protected GenomeAnalysisTK getToolkit() {
return GenomeAnalysisTK.Instance;
}
public void initialize() { }

View File

@ -0,0 +1,21 @@
package org.broadinstitute.sting.gatk.walkers;
import java.lang.annotation.Documented;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
/**
* Created by IntelliJ IDEA.
* User: hanna
* Date: Mar 26, 2009
* Time: 3:00:16 PM
* To change this template use File | Settings | File Templates.
*/
@Documented
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.TYPE)
public @interface WalkerName {
public String value() default "";
}

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.playground.gatk.walkers;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.WalkerName;
import org.broadinstitute.sting.gatk.LocusContext;
/**
@ -11,13 +12,10 @@ import org.broadinstitute.sting.gatk.LocusContext;
* Time: 3:22:14 PM
* To change this template use File | Settings | File Templates.
*/
@WalkerName("Aligned_Reads_Histogram")
public class AlignedReadsHistoWalker extends ReadWalker<Integer, Integer> {
long[] alignCounts = new long[51];
public String getName() {
return "Aligned_Reads_Histogram";
}
public void initialize() {
for ( int i = 0; i < alignCounts.length; i++ ) {
alignCounts[i] = 0;

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.playground.gatk.walkers;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.WalkerName;
import org.broadinstitute.sting.utils.Utils;
import edu.mit.broad.picard.reference.ReferenceSequence;
@ -10,15 +11,12 @@ import java.util.Iterator;
import java.util.List;
import static java.lang.reflect.Array.*;
@WalkerName("Base_Quality_Dump")
public class BaseQualityDumpWalker extends ReadWalker<Integer, Integer> {
protected final int MIN_TARGET_EDIT_DISTANCE = 0; //5;
protected final int MAX_TARGET_EDIT_DISTANCE = 4; //10;
public String getName() {
return "Base_Quality_Dump";
}
// Do we actually want to operate on the context?
public boolean filter(LocusContext context, SAMRecord read) {
// we only want aligned reads

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.playground.gatk.walkers;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.WalkerName;
import org.broadinstitute.sting.gatk.LocusContext;
/**
@ -11,13 +12,10 @@ import org.broadinstitute.sting.gatk.LocusContext;
* Time: 3:22:14 PM
* To change this template use File | Settings | File Templates.
*/
@WalkerName("Base_Quality_Histogram")
public class BaseQualityHistoWalker extends ReadWalker<Integer, Integer> {
long[] qualCounts = new long[100];
public String getName() {
return "Base_Quality_Histogram";
}
public void initialize() {
for ( int i = 0; i < this.qualCounts.length; i++ ) {
this.qualCounts[i] = 0;

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.playground.gatk.walkers;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.WalkerName;
import org.broadinstitute.sting.utils.Utils;
import edu.mit.broad.picard.reference.ReferenceSequence;
@ -10,11 +11,8 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@WalkerName("CountMismatches")
public class MismatchCounterWalker extends ReadWalker<Integer, Integer> {
public String getName() {
return "CountMismatches";
}
public Integer map(LocusContext context, SAMRecord read) {
int nMismatches = 0;

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.playground.gatk.walkers;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.WalkerName;
import org.broadinstitute.sting.utils.Utils;
import edu.mit.broad.picard.reference.ReferenceSequence;
@ -10,16 +11,13 @@ import java.util.Iterator;
import java.util.List;
import static java.lang.reflect.Array.*;
@WalkerName("Mismatch_Histogram")
public class MismatchHistoWalker extends ReadWalker<Integer, Integer> {
protected long[] mismatchCounts = new long[0];
protected final int MIN_TARGET_EDIT_DISTANCE = 5;
protected final int MAX_TARGET_EDIT_DISTANCE = 10;
public String getName() {
return "Mismatch_Histogram";
}
// Do we actually want to operate on the context?
public boolean filter(LocusContext context, SAMRecord read) {
// we only want aligned reads

View File

@ -9,7 +9,7 @@ import java.lang.reflect.Field;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Modifier;
import java.lang.reflect.ParameterizedType;
import java.lang.reflect.Type;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
@ -37,8 +37,11 @@ public class ArgumentParser {
// what program are we parsing for
private String programName;
// the command-line options received.
private CommandLine cmd;
// where we eventually want the values to land
private HashMap<String, Pair<Object,Field>> m_storageLocations = new HashMap<String, Pair<Object,Field>>();
private HashMap<String,Field> m_storageLocations = new HashMap<String,Field>();
// create Options object
protected Options m_options = new Options();
@ -112,7 +115,7 @@ public class ArgumentParser {
* @param opt the option
* @param field what field it should be stuck into on the calling class
*/
private void AddToOptionStorage(Option opt, Pair<Object,Field> field ) {
private void AddToOptionStorage(Option opt, Field field ) {
// first check to see if we've already added an option with the same name
if (m_options.hasOption( opt.getOpt() ))
throw new IllegalArgumentException(opt.getOpt() + " was already added as an option");
@ -132,10 +135,10 @@ public class ArgumentParser {
* Used locally to add a group of mutually exclusive options to options storage.
* @param options A list of pairs of param, field to add.
*/
private void AddToOptionStorage( List<Pair<Option,Pair<Object,Field>>> options ) {
private void AddToOptionStorage( List<Pair<Option,Field>> options ) {
OptionGroup optionGroup = new OptionGroup();
for( Pair<Option,Pair<Object,Field>> option: options ) {
for( Pair<Option,Field> option: options ) {
if (m_options.hasOption(option.first.getOpt()) )
throw new IllegalArgumentException(option.first.getOpt() + " was already added as an option");
@ -147,9 +150,9 @@ public class ArgumentParser {
m_options.addOptionGroup(optionGroup);
}
private Pair<Object,Field> getField( Object obj, String fieldName ) {
private Field getField( Object obj, String fieldName ) {
try {
return new Pair<Object,Field>( obj, obj.getClass().getField(fieldName) );
return obj.getClass().getField(fieldName);
} catch (NoSuchFieldException e) {
logger.fatal("Failed to find the field specified by the fieldname parameter.");
throw new RuntimeException(e.getMessage());
@ -283,14 +286,24 @@ public class ArgumentParser {
// you can't get to the unparsed args. Override PosixParser with a class
// that can reach in and extract the protected command line.
// TODO: Holy crap this is wacky. Find a cleaner way.
CommandLine cmd = parser.getCmd();
this.cmd = parser.getCmd();
}
public void loadArgumentsIntoObject( Object obj ) {
Collection<Option> opts = m_options.getOptions();
// logger.info("We have " + opts.size() + " options");
for (Option opt : opts) {
if (cmd.hasOption(opt.getOpt())) {
//logger.info("looking at " + m_storageLocations.get(opt.getLongOpt()));
Object obj = m_storageLocations.get(opt.getLongOpt()).first;
Field field = m_storageLocations.get(opt.getLongOpt()).second;
Field field = m_storageLocations.get(opt.getLongOpt());
// Check to see if the object contains the specified field. Iterate through
// the array rather than doing a name lookup in case field names overlap between
// multiple classes in the application.
List<Field> fieldsInObj = Arrays.asList(obj.getClass().getFields());
if( !fieldsInObj.contains(field) )
continue;
try {
if (opt.hasArg())
@ -316,19 +329,19 @@ public class ArgumentParser {
* Extract arguments stored in annotations from fields of a given class.
* @param source Source of arguments, probably provided through Argument annotation.
*/
public void addArgumentSource( Object source ) {
Field[] fields = source.getClass().getFields();
public void addArgumentSource( CommandLineProgram clp, Class source ) {
Field[] fields = source.getFields();
for( Set<Field> optionGroup: groupExclusiveOptions(fields) ) {
List<Pair<Option,Pair<Object,Field>>> options = new ArrayList<Pair<Option,Pair<Object,Field>>>();
List<Pair<Option,Field>> options = new ArrayList<Pair<Option,Field>>();
for( Field field: optionGroup ) {
Argument argument = field.getAnnotation(Argument.class);
Option option = createOptionFromField( source, field, argument );
options.add( new Pair<Option,Pair<Object,Field>>( option, new Pair<Object,Field>( source,field) ) );
Option option = createOptionFromField( clp.getArgumentSourceName( source ), field, argument );
options.add( new Pair<Option,Field>( option, field ) );
}
if( options.size() == 1 )
AddToOptionStorage( options.get(0).first, new Pair<Object,Field>( source, options.get(0).second.second ) );
AddToOptionStorage( options.get(0).first, options.get(0).second );
else {
AddToOptionStorage( options );
}
@ -392,7 +405,7 @@ public class ArgumentParser {
* @param field Field
* @return Option representing the field options.
*/
private Option createOptionFromField( Object source, Field field, Argument argument ) {
private Option createOptionFromField( String sourceName, Field field, Argument argument ) {
String fullName = (argument.fullName().length() != 0) ? argument.fullName() : field.getName().trim().toLowerCase();
String shortName = (argument.shortName().length() != 0) ? argument.shortName() : fullName.substring(0,1);
@ -404,7 +417,7 @@ public class ArgumentParser {
if( isFlag && isCollection )
throw new IllegalArgumentException("Can't have an array of flags.");
String description = String.format("[%s] %s", source, argument.doc());
String description = String.format("[%s] %s", sourceName, argument.doc());
OptionBuilder ob = OptionBuilder.withLongOpt(fullName);
if( !isFlag ) {
@ -413,7 +426,7 @@ public class ArgumentParser {
}
if( argument.required() ) {
ob = ob.isRequired();
description = String.format("[%s] (Required Option) %s", source, argument.doc());
description = String.format("[%s] (Required Option) %s", sourceName, argument.doc());
}
if( description.length() != 0 ) ob = ob.withDescription( description );

View File

@ -81,7 +81,14 @@ public abstract class CommandLineProgram {
* Provide a list of object to inspect, looking for additional command-line arguments.
* @return A list of objects to inspect.
*/
protected Object[] getArgumentSources() { return new Object[] {}; }
protected Class[] getArgumentSources() { return new Class[] {}; }
/**
* Name this argument source. Provides the (full) class name as a default.
* @param source The argument source.
* @return a name for the argument source.
*/
protected String getArgumentSourceName( Class source ) { return source.toString(); }
/**
* this is the function that the inheriting class can expect to have called
@ -128,13 +135,16 @@ public abstract class CommandLineProgram {
if( clp.canAddArgumentsDynamically() ) {
// if the command-line program can toss in extra args, fetch them and reparse the arguments.
clp.m_parser.processArgs(args, true);
Object[] argumentSources = clp.getArgumentSources();
for( Object argumentSource: argumentSources )
clp.m_parser.loadArgumentsIntoObject( clp );
Class[] argumentSources = clp.getArgumentSources();
for( Class argumentSource: argumentSources )
clp.addArgumentSource( argumentSource );
clp.m_parser.processArgs(args, false);
}
else {
clp.m_parser.processArgs(args, false);
clp.m_parser.loadArgumentsIntoObject( clp );
}
// if we're in debug mode, set the mode up
@ -201,6 +211,15 @@ public abstract class CommandLineProgram {
}
}
/**
* Find fields in the object obj that look like command-line arguments, and put command-line
* arguments into them.
* @param obj Object to inspect for command line arguments.
*/
public void loadArgumentsIntoObject( Object obj ) {
m_parser.loadArgumentsIntoObject( obj );
}
/**
* generateHeaderInformation
* <p/>
@ -261,8 +280,8 @@ public abstract class CommandLineProgram {
* probably a class with @argument annotations.
* @param source
*/
private void addArgumentSource( Object source ) {
m_parser.addArgumentSource(source);
private void addArgumentSource( Class source ) {
m_parser.addArgumentSource( this, source );
}
/**