From 898f65547e90463a6a2a3a7f93e80857fa8b74ca Mon Sep 17 00:00:00 2001 From: aaron Date: Mon, 11 May 2009 02:07:20 +0000 Subject: [PATCH] Added code to split GenomeAnalysisTK.java into an object concerned with loading command line args, and one that runs the engines. This will allow us to run the GATK from other tools (like Matlab). Also some cleanup to seperate out the legacy traversals and the new style traversals. This is not live yet, and any modifications you need should be made to GenomeAnalysisTK.java for now. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@650 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/CommandLineGATK.java | 131 ++++++++ .../sting/gatk/GATKArgumentCollection.java | 275 ++++++++++++++++ .../sting/gatk/GenomeAnalysisEngine.java | 302 ++++++++++++++++++ .../utils/cmdLine/CommandLineProgram.java | 51 +++ .../gatk/GATKArgumentCollectionTest.java | 126 ++++++++ 5 files changed, 885 insertions(+) create mode 100755 java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java create mode 100755 java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java create mode 100755 java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java create mode 100755 java/test/org/broadinstitute/sting/gatk/GATKArgumentCollectionTest.java diff --git a/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java b/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java new file mode 100755 index 000000000..e3cb441b2 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java @@ -0,0 +1,131 @@ +package org.broadinstitute.sting.gatk; + +import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.cmdLine.ArgumentCollection; +import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram; +import org.broadinstitute.sting.gatk.GATKArgumentCollection; + +/** + * + * User: aaron + * Date: May 8, 2009 + * Time: 10:50:58 AM + * + * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2009 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + * + * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + * + */ + + +/** + * @author aaron + * @version 1.0 + * @date May 8, 2009 + *

+ * Class CommandLineGATK + *

+ * We run command line GATK programs using this class. It gets the command line args, parses them, and hands the + * gatk all the parsed out information. Pretty much anything dealing with the underlying system should go here, + * the gatk engine should deal with any data related information. + */ +public class CommandLineGATK extends CommandLineProgram { + + @ArgumentCollection // our argument collection, the collection of command line args we accept + public GATKArgumentCollection argCollection = new GATKArgumentCollection(); + + public String pluginPathName = null; + + // our genome analysis engine + GenomeAnalysisEngine GATKEngine = null; + + // our walker manager + private WalkerManager walkerManager = null; + + + /** Required main method implementation. */ + public static void main(String[] argv) { + try { + CommandLineGATK instance = new CommandLineGATK(); + start(instance, argv); + } catch (Exception e) { + exitSystemWithError(e); + } + } + + + /** + * this is the function that the inheriting class can expect to have called + * when the command line system has initialized. + * + * @return the return code to exit the program with + */ + protected int execute() { + Walker mWalker = null; + try { + mWalker = walkerManager.createWalkerByName(argCollection.analysisName); + } catch (InstantiationException ex) { + throw new RuntimeException("Unable to instantiate walker.", ex); + } + catch (IllegalAccessException ex) { + throw new RuntimeException("Unable to access walker", ex); + } + loadArgumentsIntoObject(mWalker); + try { + GATKEngine = new GenomeAnalysisEngine(argCollection, mWalker); + } catch (StingException exp) { + System.err.println("Caught StingException. It's message is " + exp.getMessage()); + exp.printStackTrace(); + return -1; + } + return 0; + } + + /** + * GATK can add arguments dynamically based on analysis type. + * + * @return true + */ + @Override + protected boolean canAddArgumentsDynamically() { + return true; + } + + /** + * GATK provides the walker as an argument source. As a side-effect, initializes the walker variable. + * + * @return List of walkers to load dynamically. + */ + @Override + protected Class[] getArgumentSources() { + loadArgumentsIntoObject(this.argCollection); + if (argCollection.analysisName == null) + throw new IllegalArgumentException("Must provide analysis name"); + + walkerManager = new WalkerManager(pluginPathName); + + if (!walkerManager.doesWalkerExist(argCollection.analysisName)) + throw new IllegalArgumentException("Invalid analysis name"); + + return new Class[]{walkerManager.getWalkerClassByName(argCollection.analysisName)}; + } + + @Override + protected String getArgumentSourceName(Class argumentSource) { + return WalkerManager.getWalkerName((Class) argumentSource); + } + + public GATKArgumentCollection getArgCollection() { + return argCollection; + } + + public void setArgCollection(GATKArgumentCollection argCollection) { + this.argCollection = argCollection; + } + +} diff --git a/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java new file mode 100755 index 000000000..c61724003 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java @@ -0,0 +1,275 @@ +package org.broadinstitute.sting.gatk; + +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.simpleframework.xml.*; +import org.simpleframework.xml.core.Persister; +import org.simpleframework.xml.stream.Format; +import org.simpleframework.xml.stream.HyphenStyle; + +import java.io.File; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * + * User: aaron + * Date: May 7, 2009 + * Time: 11:46:21 AM + * + * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2009 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + * + * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + * + */ + + +/** + * @author aaron + * @version 1.0 + * @date May 7, 2009 + *

+ * Class ArgumentObject + *

+ * Encapsolute the massively large list of possible parameters we take in the Genome Analysis tool + */ +@Root +public class GATKArgumentCollection { + + /* our version number */ + private float versionNumber = 1; + private String description = "GATK Arguments"; + + /** the constructor */ + public GATKArgumentCollection() { + } + + // parameters and their defaults + @ElementList + @Argument(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false) + public List samFiles = new ArrayList(); + + @Element(required=false) + @Argument(fullName = "maximum_reads", shortName = "M", doc = "Maximum number of reads to process before exiting", required = false) + public String maximumReads = "-1"; + + @Element(required=false) + @Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation (LENIENT|SILENT|STRICT)", required = false) + public String strictnessLevel = "strict"; + + @Element(required=false) + @Argument(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false) + public File referenceFile = null; + + @Element(required=false) + @Argument(fullName = "genome_region", shortName = "L", doc = "Genome region to operation on: from chr:start-end", required = false) + public String genomeRegion = null; + + @Element(required=false) + @Argument(fullName = "analysis_type", shortName = "T", doc = "Type of analysis to run") + public String analysisName = null; + + // parameters and their defaults + @ElementMap(entry = "analysis_argument", key = "key", attribute = true, inline = true, required=false) + public Map walkerArgs = new HashMap(); + + @Element(required=false) + @Argument(fullName = "DBSNP", shortName = "D", doc = "DBSNP file", required = false) + public String DBSNPFile = null; + + @Element(required=false) + @Argument(fullName = "hapmap", shortName = "H", doc = "Hapmap file", required = false) + public String HAPMAPFile = null; + + @Element(required=false) + @Argument(fullName = "hapmap_chip", shortName = "hc", doc = "Hapmap chip file", required = false) + public String HAPMAPChipFile = null; + + @Element(required=false) + @Argument(fullName = "threaded_IO", shortName = "P", doc = "If set, enables threaded I/O operations", required = false) + public Boolean enabledThreadedIO = false; + + @Element(required=false) + @Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations, nothing will be checked at runtime.", required = false) + public Boolean unsafe = false; + + @Element(required=false) + @Argument(fullName = "sort_on_the_fly", shortName = "sort", doc = "Maximum number of reads to sort on the fly", required = false) + public String maximumReadSorts = null; + + @Element(required=false) + @Argument(fullName = "downsample_to_fraction", shortName = "dfrac", doc = "Fraction [0.0-1.0] of reads to downsample to", required = false) + public String downsampleFraction = null; + + @Element(required=false) + @Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to", required = false) + public String downsampleCoverage = null; + + @Element(required=false) + @Argument(fullName = "intervals_file", shortName = "V", doc = "File containing list of genomic intervals to operate on. line := ", required = false) + public String intervalsFile = null; + + @Element(required=false) + @Argument(fullName = "all_loci", shortName = "A", doc = "Should we process all loci, not just those covered by reads", required = false) + public Boolean walkAllLoci = false; + + @Element(required=false) + @Argument(fullName = "disablethreading", shortName = "dt", doc = "Disable experimental threading support.", required = false) + public Boolean disableThreading = false; + + /** An output file presented to the walker. */ + @Element(required=false) + @Argument(fullName = "out", shortName = "o", doc = "An output file presented to the walker. Will overwrite contents if file exists.", required = false) + public String outFileName = null; + + /** An error output file presented to the walker. */ + @Element(required=false) + @Argument(fullName = "err", shortName = "e", doc = "An error output file presented to the walker. Will overwrite contents if file exists.", required = false) + public String errFileName = null; + + /** A joint file for both 'normal' and error output presented to the walker. */ + @Element(required=false) + @Argument(fullName = "outerr", shortName = "oe", doc = "A joint file for 'normal' and error output presented to the walker. Will overwrite contents if file exists.", required = false) + public String outErrFileName = null; + + /** How many threads should be allocated to this analysis. */ + @Element(required=false) + @Argument(fullName = "numthreads", shortName = "nt", doc = "How many threads should be allocated to running this analysis.", required = false) + public int numberOfThreads = 1; + + @ElementList(required=false) + @Argument(fullName = "rodBind", shortName = "B", doc = "", required = false) + public ArrayList RODBindings = new ArrayList(); + + /** + * marshal the data out to a object + * + * @param collection the GATKArgumentCollection to load into + * @param outputFile the file to write to + */ + public static void marshal(GATKArgumentCollection collection, String outputFile) { + Serializer serializer = new Persister(new Format(new HyphenStyle())); + File result = new File(outputFile); + try { + serializer.write(collection, result); + } catch (Exception e) { + throw new StingException("Failed to marshal the data from the file " + outputFile, e); + } + } + + /** + * unmashall the object from a configuration file + * + * @param filename the filename to marshal from + */ + public static GATKArgumentCollection unmarshal(String filename) { + Serializer serializer = new Persister(new Format(new HyphenStyle())); + File source = new File(filename); + try { + GATKArgumentCollection example = serializer.read(GATKArgumentCollection.class, source); + return example; + } catch (Exception e) { + throw new StingException("Failed to marshal the data to file " + filename, e); + } + } + + /** + * test equality between two arg collections. This function defines the statement: + * "not fun to write" + * @param other the other collection + * @return true if they're equal + */ + public boolean equals(GATKArgumentCollection other) { + if (other.samFiles.size() != samFiles.size()) { + return false; + } + for (int x = 0; x < samFiles.size(); x++) { + if (!samFiles.get(x).equals(other.samFiles.get(x))) { + return false; + } + } + if (other.walkerArgs.size() != walkerArgs.size()) { + return false; + } + for (String s : walkerArgs.keySet()) { + if (!other.walkerArgs.containsKey(s)) { + return false; + } + } + if (other.RODBindings.size() != RODBindings.size()) { + return false; + } + for (int x = 0; x < RODBindings.size(); x++) { + if (!RODBindings.get(x).equals(other.RODBindings.get(x))) { + return false; + } + } + if (!other.samFiles.equals(this.samFiles)) { + return false; + } + if (!other.maximumReads.equals(this.maximumReads)) { + return false; + } + if (!other.strictnessLevel.equals(this.strictnessLevel)) { + return false; + } + if (!other.referenceFile.equals(this.referenceFile)) { + return false; + } + if (!other.genomeRegion.equals(this.genomeRegion)) { + return false; + } + if (!other.analysisName.equals(this.analysisName)) { + return false; + } + if (!other.DBSNPFile.equals(this.DBSNPFile)) { + return false; + } + if (!other.HAPMAPFile.equals(this.HAPMAPFile)) { + return false; + } + if (!other.HAPMAPChipFile.equals(this.HAPMAPChipFile)) { + return false; + } + if (!other.enabledThreadedIO.equals(this.enabledThreadedIO)) { + return false; + } + if (!other.unsafe.equals(this.unsafe)) { + return false; + } + if (!other.maximumReadSorts.equals(this.maximumReadSorts)) { + return false; + } + if (!other.downsampleFraction.equals(this.downsampleFraction)) { + return false; + } + if (!other.downsampleCoverage.equals(this.downsampleCoverage)) { + return false; + } + if (!other.intervalsFile.equals(this.intervalsFile)) { + return false; + } + if (!other.walkAllLoci.equals(this.walkAllLoci)) { + return false; + } + if (!other.outFileName.equals(this.outFileName)) { + return false; + } + if (!other.errFileName.equals(this.errFileName)) { + return false; + } + if (!other.outErrFileName.equals(this.outErrFileName)) { + return false; + } + if (other.numberOfThreads != this.numberOfThreads) { + return false; + } + return true; + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java new file mode 100755 index 000000000..148381dee --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -0,0 +1,302 @@ +package org.broadinstitute.sting.gatk; + +import edu.mit.broad.picard.reference.ReferenceSequenceFile; +import edu.mit.broad.picard.reference.ReferenceSequenceFileFactory; +import net.sf.samtools.SAMFileReader; +import net.sf.samtools.SAMFileReader.ValidationStringency; +import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.executive.MicroScheduler; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.traversals.*; +import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.gatk.GATKArgumentCollection; + +import java.util.ArrayList; +import java.util.List; + +public class GenomeAnalysisEngine { + + // our instance of this genome analysis toolkit; it's used by other classes to extract the traversal engine + // TODO: public static without final tends to indicate we're thinking about this the wrong way + public static GenomeAnalysisEngine instance; + + // our traversal engine + private TraversalEngine engine = null; + + // the level of debugging we're using + public boolean DEBUGGING = false; + + // our argument collection + private final GATKArgumentCollection argCollection; + + /** Collection of output streams used by the walker. */ + private OutputTracker outputTracker = null; + + /** our log, which we want to capture anything from this class */ + private static Logger logger = Logger.getLogger(GenomeAnalysisEngine.class); + + /** + * our constructor, where all the work is done + *

+ * legacy traversal types are sent to legacyTraversal function; as we move more of the traversals to the + * new MicroScheduler class we'll be able to delete that function. + * + * @param args the argument collection, where we get all our setup information from + * @param my_walker the walker we're running with + */ + protected GenomeAnalysisEngine(GATKArgumentCollection args, Walker my_walker) { + + // validate our parameters + if (args == null || my_walker == null) { + throw new StingException("Neither the GATKArgumentCollection or the Walker passed to GenomeAnalysisEngine can be null."); + } + + // save our argument parameter + this.argCollection = args; + + // make sure our instance variable points to this analysis engine + instance = this; + + // our reference ordered data collection + List> rods = new ArrayList>(); + + // + // please don't use these in the future, use the new syntax <- if we're not using these please remove them + // + if (argCollection.DBSNPFile != null) bindConvenienceRods("dbSNP", "dbsnp", argCollection.DBSNPFile); + if (argCollection.HAPMAPFile != null) + bindConvenienceRods("hapmap", "HapMapAlleleFrequencies", argCollection.HAPMAPFile); + if (argCollection.HAPMAPChipFile != null) + bindConvenienceRods("hapmap-chip", "GFF", argCollection.HAPMAPChipFile); + + // parse out the rod bindings + ReferenceOrderedData.parseBindings(logger, argCollection.RODBindings, rods); + + // create the output streams + initializeOutputStreams(); + + // our microscheduler, which is in charge of running everything + MicroScheduler microScheduler = null; + + // if we're a read or a locus walker, we use the new system. Right now we have complicated + // branching based on the input data, but this should disapear when all the traversals are switched over + if ((my_walker instanceof LocusWalker && argCollection.walkAllLoci && !(argCollection.samFiles == null || argCollection.samFiles.size() == 0)) || + my_walker instanceof ReadWalker) { + microScheduler = createMicroscheduler(my_walker, rods); + } else { // we have an old style traversal, once we're done return + legacyTraversal(my_walker, rods); + return; + } + + // Prepare the sort ordering w.r.t. the sequence dictionary + if (argCollection.referenceFile != null) { + final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(argCollection.referenceFile); + GenomeLoc.setupRefContigOrdering(refFile); + } + + // Determine the validation stringency. Default to ValidationStringency.STRICT. + ValidationStringency strictness = getValidationStringency(); + + logger.info("Strictness is " + strictness); + + // perform validation steps that are common to all the engines + genericEngineSetup(strictness); + + // parse out any genomic location they've provided + List locs = setupIntervalRegion(); + + // excute the microscheduler + microScheduler.execute(my_walker, locs); + } + + + /** + * this is to accomdate the older style traversals, that haven't been converted over to the new system. Putting them + * into their own function allows us to deviate in the two behaviors so the new style traversals aren't limited to what + * the old style does. As traversals are converted, this function should disappear. + * + * @param my_walker + * @param rods + */ + private void legacyTraversal(Walker my_walker, List> rods) { + if (my_walker instanceof LocusWindowWalker) { + this.engine = new TraverseByLocusWindows(argCollection.samFiles, argCollection.referenceFile, rods); + } else if (my_walker instanceof LocusWalker) { + if (argCollection.referenceFile == null) + Utils.scareUser(String.format("Locus-based traversals require a reference file but none was given")); + if (argCollection.samFiles == null || argCollection.samFiles.size() == 0) { + if (((LocusWalker) my_walker).requiresReads()) + Utils.scareUser(String.format("Analysis %s requires reads, but none were given", argCollection.analysisName)); + this.engine = new TraverseByReference(null, argCollection.referenceFile, rods); + } else { + if (((LocusWalker) my_walker).cannotHandleReads()) + Utils.scareUser(String.format("Analysis %s doesn't support SAM/BAM reads, but a read file %s was provided", argCollection.analysisName, argCollection.samFiles)); + this.engine = new TraverseByLoci(argCollection.samFiles, argCollection.referenceFile, rods); + } + } else if (my_walker instanceof DuplicateWalker) { + // we're a duplicate walker + this.engine = new TraverseDuplicates(argCollection.samFiles, argCollection.referenceFile, rods); + } else { + throw new RuntimeException("Unexpected walker type: " + my_walker); + } + + // Prepare the sort ordering w.r.t. the sequence dictionary + if (argCollection.referenceFile != null) { + final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(argCollection.referenceFile); + GenomeLoc.setupRefContigOrdering(refFile); + } + + // Determine the validation stringency. Default to ValidationStringency.STRICT. + ValidationStringency strictness = getValidationStringency(); + + logger.info("Strictness is " + strictness); + genericEngineSetup(strictness); + + + engine.traverse(my_walker); + + } + + /** + * setup a microscheduler + * + * @param my_walker our walker of type LocusWalker + * @param rods the reference order data + * @return a new microscheduler + */ + private MicroScheduler createMicroscheduler(Walker my_walker, List> rods) { + // the mircoscheduler to return + MicroScheduler microScheduler = null; + + // we need to verify different parameter based on the walker type + if (my_walker instanceof LocusWalker) { + // some warnings + if (argCollection.referenceFile == null) + Utils.scareUser(String.format("Locus-based traversals require a reference file but none was given")); + if (((LocusWalker) my_walker).cannotHandleReads()) + Utils.scareUser(String.format("Analysis %s doesn't support SAM/BAM reads, but a read file %s was provided", argCollection.analysisName, argCollection.samFiles)); + + // create the MicroScheduler + microScheduler = MicroScheduler.create(my_walker, argCollection.samFiles, argCollection.referenceFile, rods, argCollection.numberOfThreads); + engine = microScheduler.getTraversalEngine(); + } + else if (my_walker instanceof ReadWalker) + { + if (argCollection.referenceFile == null) + Utils.scareUser(String.format("Locus-based traversals require a reference file but none was given")); + microScheduler = MicroScheduler.create(my_walker, argCollection.samFiles, argCollection.referenceFile, rods, argCollection.numberOfThreads); + engine = microScheduler.getTraversalEngine(); + } + + return microScheduler; + } + + + /** + * commands that get executed for each engine, regardless of the type + * + * @param strictness our current strictness level + */ + private void genericEngineSetup(ValidationStringency strictness) { + engine.setStrictness(strictness); + + engine.setMaxReads(Integer.parseInt(argCollection.maximumReads)); + + if (argCollection.genomeRegion != null) { + engine.setLocation(argCollection.genomeRegion); + } + // we default interval files over the genome region strin + if (argCollection.intervalsFile != null) { + engine.setLocationFromFile(argCollection.intervalsFile); + } + // hmm... + if (argCollection.maximumReadSorts != null) { + engine.setSortOnFly(Integer.parseInt(argCollection.maximumReadSorts)); + } + + if (argCollection.downsampleFraction != null) { + engine.setDownsampleByFraction(Double.parseDouble(argCollection.downsampleFraction)); + } + + if (argCollection.downsampleCoverage != null) { + engine.setDownsampleByCoverage(Integer.parseInt(argCollection.downsampleCoverage)); + } + + engine.setSafetyChecking(!argCollection.unsafe); + engine.setThreadedIO(argCollection.enabledThreadedIO); + engine.setWalkOverAllSites(argCollection.walkAllLoci); + engine.initialize(); + } + + + /** + * setup the interval regions, from either the interval file of the genome region string + * + * @return a list of genomeLoc representing the interval file + */ + private List setupIntervalRegion() { + List locs; + if (argCollection.intervalsFile != null) + locs = GenomeLoc.IntervalFileToList(argCollection.intervalsFile); + else + locs = GenomeLoc.parseGenomeLocs(argCollection.genomeRegion); + return locs; + } + + /** + * Default to ValidationStringency.STRICT. + * + * @return the validation stringency + */ + private ValidationStringency getValidationStringency() { + ValidationStringency strictness; + try { + strictness = Enum.valueOf(ValidationStringency.class, argCollection.strictnessLevel); + } + catch (IllegalArgumentException ex) { + strictness = ValidationStringency.STRICT; + } + return strictness; + } + + /** + * Convenience function that binds RODs using the old-style command line parser to the new style list for + * a uniform processing. + * + * @param name + * @param type + * @param file + */ + private void bindConvenienceRods(final String name, final String type, final String file) { + argCollection.RODBindings.add(Utils.join(",", new String[]{name, type, file})); + } + + + /** Initialize the output streams as specified by the user. */ + private void initializeOutputStreams() { + outputTracker = (argCollection.outErrFileName != null) ? new OutputTracker(argCollection.outErrFileName, argCollection.outErrFileName) + : new OutputTracker(argCollection.outFileName, argCollection.errFileName); + } + + /** + * Gets the output tracker. Tracks data available to a given walker. + * + * @return The output tracker. + */ + public OutputTracker getOutputTracker() { + return outputTracker; + } + + + public SAMFileReader getSamReader() { + return this.engine.getSamReader(); + } + + public TraversalEngine getEngine() { + return this.engine; + } +} diff --git a/java/src/org/broadinstitute/sting/utils/cmdLine/CommandLineProgram.java b/java/src/org/broadinstitute/sting/utils/cmdLine/CommandLineProgram.java index e009030e6..845173396 100644 --- a/java/src/org/broadinstitute/sting/utils/cmdLine/CommandLineProgram.java +++ b/java/src/org/broadinstitute/sting/utils/cmdLine/CommandLineProgram.java @@ -255,6 +255,15 @@ public abstract class CommandLineProgram { parser.loadArgumentsIntoObject( obj ); } + /** + * a manual way to load argument providing objects into the program + * @param clp the command line program + * @param cls the class to load the arguments off of + */ + public void loadAdditionalSource(CommandLineProgram clp, Class cls ) { + parser.addArgumentSource( clp.getArgumentSourceName(cls), cls ); + } + /** * generateHeaderInformation *

@@ -313,4 +322,46 @@ public abstract class CommandLineProgram { logger.setLevel(par); } + + /** + * a function used to indicate an error occured in the command line tool + * + * @param msg + */ + private static void printExitSystemMsg(final String msg) { + System.out.printf("------------------------------------------------------------------------------------------%n"); + System.out.printf("An error has occurred%n"); + System.out.printf("Check your command line arguments for any typos or inconsistencies.%n"); + System.out.printf("If you think it's because of a bug or a feature in GATK that should work, please report this to gsadevelopers@broad.mit.edu%n"); + System.out.printf("%n"); + System.out.printf("%s%n", msg); + } + + /** + * used to indicate an error occured + * @param msg the message to display + */ + public static void exitSystemWithError(final String msg) { + printExitSystemMsg(msg); + System.exit(1); + } + + /** + * used to indicate an error occured + * @param msg the message + * @param e the error + */ + public static void exitSystemWithError(final String msg, Exception e) { + e.printStackTrace(); + printExitSystemMsg(msg); + System.exit(1); + } + + /** + * used to indicate an error occured + * @param e the exception occured + */ + public static void exitSystemWithError(Exception e) { + exitSystemWithError(e.getMessage(), e); + } } diff --git a/java/test/org/broadinstitute/sting/gatk/GATKArgumentCollectionTest.java b/java/test/org/broadinstitute/sting/gatk/GATKArgumentCollectionTest.java new file mode 100755 index 000000000..5d10b9cc0 --- /dev/null +++ b/java/test/org/broadinstitute/sting/gatk/GATKArgumentCollectionTest.java @@ -0,0 +1,126 @@ +package org.broadinstitute.sting.gatk; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.GATKArgumentCollection; +import org.junit.After; +import static org.junit.Assert.fail; +import org.junit.Before; +import org.junit.Test; + +import java.io.File; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * + * User: aaron + * Date: May 7, 2009 + * Time: 1:12:58 PM + * + * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2009 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + * + * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + * + */ + + +/** + * @author aaron + * @version 1.0 + * @date May 7, 2009 + *

+ * Class GATKArgumentCollection + *

+ * A descriptions should go here. Blame aaron if it's missing. + */ +public class GATKArgumentCollectionTest extends BaseTest { + + // our collection of arguments + private GATKArgumentCollection collect; + + // where to write our xml file + private String xmlFileLoc = "testfile.xml"; + + /** setup our test */ + @Before + public void setup() { + collect = new GATKArgumentCollection(); + } + + /** destroy the temp file */ + @After + public void takedown() { + File f = new File(xmlFileLoc); + if (f.exists()) { + f.delete(); + } + } + + private void setupCollection() { + // parameters and their defaults + Map wArgs = new HashMap(); + wArgs.put("wArgType1", "Arg1"); + wArgs.put("wArgType2", "Arg2"); + wArgs.put("wArgType3", "Arg3"); + collect.walkerArgs = wArgs; + + List input = new ArrayList(); + input.add(new File("test.file")); + collect.samFiles = input; + collect.maximumReads = "-1"; + collect.strictnessLevel = "strict"; + collect.referenceFile = new File("referenceFile".toLowerCase()); + collect.genomeRegion = "genomeRegion".toLowerCase(); + collect.analysisName = "analysisName".toLowerCase(); + collect.DBSNPFile = "DBSNPFile".toLowerCase(); + collect.HAPMAPFile = "HAPMAPFile".toLowerCase(); + collect.HAPMAPChipFile = "HAPMAPChipFile".toLowerCase(); + collect.enabledThreadedIO = true; + collect.unsafe = false; + collect.maximumReadSorts = "maximumReadSorts".toLowerCase(); + collect.downsampleFraction = "downsampleFraction".toLowerCase(); + collect.downsampleCoverage = "downsampleCoverage".toLowerCase(); + collect.intervalsFile = "intervalsFile".toLowerCase(); + collect.walkAllLoci = true; + collect.disableThreading = false; + collect.outFileName = "outFileName".toLowerCase(); + collect.errFileName = "errFileName".toLowerCase(); + collect.outErrFileName = "outErrFileName".toLowerCase(); + collect.numberOfThreads = 1; + + // make some rod bindings up + ArrayList fakeBindings = new ArrayList(); + fakeBindings.add("Bind1"); + fakeBindings.add("Bind2"); + fakeBindings.add("Bind3"); + + collect.RODBindings = fakeBindings; + } + + + /** test the output of an XML file in the arg collection */ + @Test + public void testOutput() { + setupCollection(); + + GATKArgumentCollection.marshal(collect, xmlFileLoc); + GATKArgumentCollection collection = GATKArgumentCollection.unmarshal(xmlFileLoc); + if (!collect.equals(collection)) { + fail("Collections not equal"); + } + } + + + /** test the output of an XML file in the arg collection */ + @Test + public void testInput() { + setupCollection(); + GATKArgumentCollection.marshal(collect, xmlFileLoc); + } +}