diff --git a/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java b/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java
new file mode 100755
index 000000000..e3cb441b2
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java
@@ -0,0 +1,131 @@
+package org.broadinstitute.sting.gatk;
+
+import org.broadinstitute.sting.gatk.walkers.Walker;
+import org.broadinstitute.sting.utils.StingException;
+import org.broadinstitute.sting.utils.cmdLine.ArgumentCollection;
+import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram;
+import org.broadinstitute.sting.gatk.GATKArgumentCollection;
+
+/**
+ *
+ * User: aaron
+ * Date: May 8, 2009
+ * Time: 10:50:58 AM
+ *
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2009 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither
+ * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+ *
+ */
+
+
+/**
+ * @author aaron
+ * @version 1.0
+ * @date May 8, 2009
+ *
+ * Class CommandLineGATK
+ *
+ * We run command line GATK programs using this class. It gets the command line args, parses them, and hands the
+ * gatk all the parsed out information. Pretty much anything dealing with the underlying system should go here,
+ * the gatk engine should deal with any data related information.
+ */
+public class CommandLineGATK extends CommandLineProgram {
+
+ @ArgumentCollection // our argument collection, the collection of command line args we accept
+ public GATKArgumentCollection argCollection = new GATKArgumentCollection();
+
+ public String pluginPathName = null;
+
+ // our genome analysis engine
+ GenomeAnalysisEngine GATKEngine = null;
+
+ // our walker manager
+ private WalkerManager walkerManager = null;
+
+
+ /** Required main method implementation. */
+ public static void main(String[] argv) {
+ try {
+ CommandLineGATK instance = new CommandLineGATK();
+ start(instance, argv);
+ } catch (Exception e) {
+ exitSystemWithError(e);
+ }
+ }
+
+
+ /**
+ * this is the function that the inheriting class can expect to have called
+ * when the command line system has initialized.
+ *
+ * @return the return code to exit the program with
+ */
+ protected int execute() {
+ Walker, ?> mWalker = null;
+ try {
+ mWalker = walkerManager.createWalkerByName(argCollection.analysisName);
+ } catch (InstantiationException ex) {
+ throw new RuntimeException("Unable to instantiate walker.", ex);
+ }
+ catch (IllegalAccessException ex) {
+ throw new RuntimeException("Unable to access walker", ex);
+ }
+ loadArgumentsIntoObject(mWalker);
+ try {
+ GATKEngine = new GenomeAnalysisEngine(argCollection, mWalker);
+ } catch (StingException exp) {
+ System.err.println("Caught StingException. It's message is " + exp.getMessage());
+ exp.printStackTrace();
+ return -1;
+ }
+ return 0;
+ }
+
+ /**
+ * GATK can add arguments dynamically based on analysis type.
+ *
+ * @return true
+ */
+ @Override
+ protected boolean canAddArgumentsDynamically() {
+ return true;
+ }
+
+ /**
+ * GATK provides the walker as an argument source. As a side-effect, initializes the walker variable.
+ *
+ * @return List of walkers to load dynamically.
+ */
+ @Override
+ protected Class[] getArgumentSources() {
+ loadArgumentsIntoObject(this.argCollection);
+ if (argCollection.analysisName == null)
+ throw new IllegalArgumentException("Must provide analysis name");
+
+ walkerManager = new WalkerManager(pluginPathName);
+
+ if (!walkerManager.doesWalkerExist(argCollection.analysisName))
+ throw new IllegalArgumentException("Invalid analysis name");
+
+ return new Class[]{walkerManager.getWalkerClassByName(argCollection.analysisName)};
+ }
+
+ @Override
+ protected String getArgumentSourceName(Class argumentSource) {
+ return WalkerManager.getWalkerName((Class) argumentSource);
+ }
+
+ public GATKArgumentCollection getArgCollection() {
+ return argCollection;
+ }
+
+ public void setArgCollection(GATKArgumentCollection argCollection) {
+ this.argCollection = argCollection;
+ }
+
+}
diff --git a/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java
new file mode 100755
index 000000000..c61724003
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java
@@ -0,0 +1,275 @@
+package org.broadinstitute.sting.gatk;
+
+import org.broadinstitute.sting.utils.StingException;
+import org.broadinstitute.sting.utils.cmdLine.Argument;
+import org.simpleframework.xml.*;
+import org.simpleframework.xml.core.Persister;
+import org.simpleframework.xml.stream.Format;
+import org.simpleframework.xml.stream.HyphenStyle;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ *
+ * User: aaron
+ * Date: May 7, 2009
+ * Time: 11:46:21 AM
+ *
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2009 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither
+ * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+ *
+ */
+
+
+/**
+ * @author aaron
+ * @version 1.0
+ * @date May 7, 2009
+ *
+ * Class ArgumentObject
+ *
+ * Encapsolute the massively large list of possible parameters we take in the Genome Analysis tool
+ */
+@Root
+public class GATKArgumentCollection {
+
+ /* our version number */
+ private float versionNumber = 1;
+ private String description = "GATK Arguments";
+
+ /** the constructor */
+ public GATKArgumentCollection() {
+ }
+
+ // parameters and their defaults
+ @ElementList
+ @Argument(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false)
+ public List samFiles = new ArrayList();
+
+ @Element(required=false)
+ @Argument(fullName = "maximum_reads", shortName = "M", doc = "Maximum number of reads to process before exiting", required = false)
+ public String maximumReads = "-1";
+
+ @Element(required=false)
+ @Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation (LENIENT|SILENT|STRICT)", required = false)
+ public String strictnessLevel = "strict";
+
+ @Element(required=false)
+ @Argument(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false)
+ public File referenceFile = null;
+
+ @Element(required=false)
+ @Argument(fullName = "genome_region", shortName = "L", doc = "Genome region to operation on: from chr:start-end", required = false)
+ public String genomeRegion = null;
+
+ @Element(required=false)
+ @Argument(fullName = "analysis_type", shortName = "T", doc = "Type of analysis to run")
+ public String analysisName = null;
+
+ // parameters and their defaults
+ @ElementMap(entry = "analysis_argument", key = "key", attribute = true, inline = true, required=false)
+ public Map walkerArgs = new HashMap();
+
+ @Element(required=false)
+ @Argument(fullName = "DBSNP", shortName = "D", doc = "DBSNP file", required = false)
+ public String DBSNPFile = null;
+
+ @Element(required=false)
+ @Argument(fullName = "hapmap", shortName = "H", doc = "Hapmap file", required = false)
+ public String HAPMAPFile = null;
+
+ @Element(required=false)
+ @Argument(fullName = "hapmap_chip", shortName = "hc", doc = "Hapmap chip file", required = false)
+ public String HAPMAPChipFile = null;
+
+ @Element(required=false)
+ @Argument(fullName = "threaded_IO", shortName = "P", doc = "If set, enables threaded I/O operations", required = false)
+ public Boolean enabledThreadedIO = false;
+
+ @Element(required=false)
+ @Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations, nothing will be checked at runtime.", required = false)
+ public Boolean unsafe = false;
+
+ @Element(required=false)
+ @Argument(fullName = "sort_on_the_fly", shortName = "sort", doc = "Maximum number of reads to sort on the fly", required = false)
+ public String maximumReadSorts = null;
+
+ @Element(required=false)
+ @Argument(fullName = "downsample_to_fraction", shortName = "dfrac", doc = "Fraction [0.0-1.0] of reads to downsample to", required = false)
+ public String downsampleFraction = null;
+
+ @Element(required=false)
+ @Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to", required = false)
+ public String downsampleCoverage = null;
+
+ @Element(required=false)
+ @Argument(fullName = "intervals_file", shortName = "V", doc = "File containing list of genomic intervals to operate on. line := ", required = false)
+ public String intervalsFile = null;
+
+ @Element(required=false)
+ @Argument(fullName = "all_loci", shortName = "A", doc = "Should we process all loci, not just those covered by reads", required = false)
+ public Boolean walkAllLoci = false;
+
+ @Element(required=false)
+ @Argument(fullName = "disablethreading", shortName = "dt", doc = "Disable experimental threading support.", required = false)
+ public Boolean disableThreading = false;
+
+ /** An output file presented to the walker. */
+ @Element(required=false)
+ @Argument(fullName = "out", shortName = "o", doc = "An output file presented to the walker. Will overwrite contents if file exists.", required = false)
+ public String outFileName = null;
+
+ /** An error output file presented to the walker. */
+ @Element(required=false)
+ @Argument(fullName = "err", shortName = "e", doc = "An error output file presented to the walker. Will overwrite contents if file exists.", required = false)
+ public String errFileName = null;
+
+ /** A joint file for both 'normal' and error output presented to the walker. */
+ @Element(required=false)
+ @Argument(fullName = "outerr", shortName = "oe", doc = "A joint file for 'normal' and error output presented to the walker. Will overwrite contents if file exists.", required = false)
+ public String outErrFileName = null;
+
+ /** How many threads should be allocated to this analysis. */
+ @Element(required=false)
+ @Argument(fullName = "numthreads", shortName = "nt", doc = "How many threads should be allocated to running this analysis.", required = false)
+ public int numberOfThreads = 1;
+
+ @ElementList(required=false)
+ @Argument(fullName = "rodBind", shortName = "B", doc = "", required = false)
+ public ArrayList RODBindings = new ArrayList();
+
+ /**
+ * marshal the data out to a object
+ *
+ * @param collection the GATKArgumentCollection to load into
+ * @param outputFile the file to write to
+ */
+ public static void marshal(GATKArgumentCollection collection, String outputFile) {
+ Serializer serializer = new Persister(new Format(new HyphenStyle()));
+ File result = new File(outputFile);
+ try {
+ serializer.write(collection, result);
+ } catch (Exception e) {
+ throw new StingException("Failed to marshal the data from the file " + outputFile, e);
+ }
+ }
+
+ /**
+ * unmashall the object from a configuration file
+ *
+ * @param filename the filename to marshal from
+ */
+ public static GATKArgumentCollection unmarshal(String filename) {
+ Serializer serializer = new Persister(new Format(new HyphenStyle()));
+ File source = new File(filename);
+ try {
+ GATKArgumentCollection example = serializer.read(GATKArgumentCollection.class, source);
+ return example;
+ } catch (Exception e) {
+ throw new StingException("Failed to marshal the data to file " + filename, e);
+ }
+ }
+
+ /**
+ * test equality between two arg collections. This function defines the statement:
+ * "not fun to write"
+ * @param other the other collection
+ * @return true if they're equal
+ */
+ public boolean equals(GATKArgumentCollection other) {
+ if (other.samFiles.size() != samFiles.size()) {
+ return false;
+ }
+ for (int x = 0; x < samFiles.size(); x++) {
+ if (!samFiles.get(x).equals(other.samFiles.get(x))) {
+ return false;
+ }
+ }
+ if (other.walkerArgs.size() != walkerArgs.size()) {
+ return false;
+ }
+ for (String s : walkerArgs.keySet()) {
+ if (!other.walkerArgs.containsKey(s)) {
+ return false;
+ }
+ }
+ if (other.RODBindings.size() != RODBindings.size()) {
+ return false;
+ }
+ for (int x = 0; x < RODBindings.size(); x++) {
+ if (!RODBindings.get(x).equals(other.RODBindings.get(x))) {
+ return false;
+ }
+ }
+ if (!other.samFiles.equals(this.samFiles)) {
+ return false;
+ }
+ if (!other.maximumReads.equals(this.maximumReads)) {
+ return false;
+ }
+ if (!other.strictnessLevel.equals(this.strictnessLevel)) {
+ return false;
+ }
+ if (!other.referenceFile.equals(this.referenceFile)) {
+ return false;
+ }
+ if (!other.genomeRegion.equals(this.genomeRegion)) {
+ return false;
+ }
+ if (!other.analysisName.equals(this.analysisName)) {
+ return false;
+ }
+ if (!other.DBSNPFile.equals(this.DBSNPFile)) {
+ return false;
+ }
+ if (!other.HAPMAPFile.equals(this.HAPMAPFile)) {
+ return false;
+ }
+ if (!other.HAPMAPChipFile.equals(this.HAPMAPChipFile)) {
+ return false;
+ }
+ if (!other.enabledThreadedIO.equals(this.enabledThreadedIO)) {
+ return false;
+ }
+ if (!other.unsafe.equals(this.unsafe)) {
+ return false;
+ }
+ if (!other.maximumReadSorts.equals(this.maximumReadSorts)) {
+ return false;
+ }
+ if (!other.downsampleFraction.equals(this.downsampleFraction)) {
+ return false;
+ }
+ if (!other.downsampleCoverage.equals(this.downsampleCoverage)) {
+ return false;
+ }
+ if (!other.intervalsFile.equals(this.intervalsFile)) {
+ return false;
+ }
+ if (!other.walkAllLoci.equals(this.walkAllLoci)) {
+ return false;
+ }
+ if (!other.outFileName.equals(this.outFileName)) {
+ return false;
+ }
+ if (!other.errFileName.equals(this.errFileName)) {
+ return false;
+ }
+ if (!other.outErrFileName.equals(this.outErrFileName)) {
+ return false;
+ }
+ if (other.numberOfThreads != this.numberOfThreads) {
+ return false;
+ }
+ return true;
+ }
+}
diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
new file mode 100755
index 000000000..148381dee
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
@@ -0,0 +1,302 @@
+package org.broadinstitute.sting.gatk;
+
+import edu.mit.broad.picard.reference.ReferenceSequenceFile;
+import edu.mit.broad.picard.reference.ReferenceSequenceFileFactory;
+import net.sf.samtools.SAMFileReader;
+import net.sf.samtools.SAMFileReader.ValidationStringency;
+import org.apache.log4j.Logger;
+import org.broadinstitute.sting.gatk.executive.MicroScheduler;
+import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
+import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
+import org.broadinstitute.sting.gatk.traversals.*;
+import org.broadinstitute.sting.gatk.walkers.*;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.StingException;
+import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.gatk.GATKArgumentCollection;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class GenomeAnalysisEngine {
+
+ // our instance of this genome analysis toolkit; it's used by other classes to extract the traversal engine
+ // TODO: public static without final tends to indicate we're thinking about this the wrong way
+ public static GenomeAnalysisEngine instance;
+
+ // our traversal engine
+ private TraversalEngine engine = null;
+
+ // the level of debugging we're using
+ public boolean DEBUGGING = false;
+
+ // our argument collection
+ private final GATKArgumentCollection argCollection;
+
+ /** Collection of output streams used by the walker. */
+ private OutputTracker outputTracker = null;
+
+ /** our log, which we want to capture anything from this class */
+ private static Logger logger = Logger.getLogger(GenomeAnalysisEngine.class);
+
+ /**
+ * our constructor, where all the work is done
+ *
+ * legacy traversal types are sent to legacyTraversal function; as we move more of the traversals to the
+ * new MicroScheduler class we'll be able to delete that function.
+ *
+ * @param args the argument collection, where we get all our setup information from
+ * @param my_walker the walker we're running with
+ */
+ protected GenomeAnalysisEngine(GATKArgumentCollection args, Walker my_walker) {
+
+ // validate our parameters
+ if (args == null || my_walker == null) {
+ throw new StingException("Neither the GATKArgumentCollection or the Walker passed to GenomeAnalysisEngine can be null.");
+ }
+
+ // save our argument parameter
+ this.argCollection = args;
+
+ // make sure our instance variable points to this analysis engine
+ instance = this;
+
+ // our reference ordered data collection
+ List> rods = new ArrayList>();
+
+ //
+ // please don't use these in the future, use the new syntax <- if we're not using these please remove them
+ //
+ if (argCollection.DBSNPFile != null) bindConvenienceRods("dbSNP", "dbsnp", argCollection.DBSNPFile);
+ if (argCollection.HAPMAPFile != null)
+ bindConvenienceRods("hapmap", "HapMapAlleleFrequencies", argCollection.HAPMAPFile);
+ if (argCollection.HAPMAPChipFile != null)
+ bindConvenienceRods("hapmap-chip", "GFF", argCollection.HAPMAPChipFile);
+
+ // parse out the rod bindings
+ ReferenceOrderedData.parseBindings(logger, argCollection.RODBindings, rods);
+
+ // create the output streams
+ initializeOutputStreams();
+
+ // our microscheduler, which is in charge of running everything
+ MicroScheduler microScheduler = null;
+
+ // if we're a read or a locus walker, we use the new system. Right now we have complicated
+ // branching based on the input data, but this should disapear when all the traversals are switched over
+ if ((my_walker instanceof LocusWalker && argCollection.walkAllLoci && !(argCollection.samFiles == null || argCollection.samFiles.size() == 0)) ||
+ my_walker instanceof ReadWalker) {
+ microScheduler = createMicroscheduler(my_walker, rods);
+ } else { // we have an old style traversal, once we're done return
+ legacyTraversal(my_walker, rods);
+ return;
+ }
+
+ // Prepare the sort ordering w.r.t. the sequence dictionary
+ if (argCollection.referenceFile != null) {
+ final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(argCollection.referenceFile);
+ GenomeLoc.setupRefContigOrdering(refFile);
+ }
+
+ // Determine the validation stringency. Default to ValidationStringency.STRICT.
+ ValidationStringency strictness = getValidationStringency();
+
+ logger.info("Strictness is " + strictness);
+
+ // perform validation steps that are common to all the engines
+ genericEngineSetup(strictness);
+
+ // parse out any genomic location they've provided
+ List locs = setupIntervalRegion();
+
+ // excute the microscheduler
+ microScheduler.execute(my_walker, locs);
+ }
+
+
+ /**
+ * this is to accomdate the older style traversals, that haven't been converted over to the new system. Putting them
+ * into their own function allows us to deviate in the two behaviors so the new style traversals aren't limited to what
+ * the old style does. As traversals are converted, this function should disappear.
+ *
+ * @param my_walker
+ * @param rods
+ */
+ private void legacyTraversal(Walker my_walker, List> rods) {
+ if (my_walker instanceof LocusWindowWalker) {
+ this.engine = new TraverseByLocusWindows(argCollection.samFiles, argCollection.referenceFile, rods);
+ } else if (my_walker instanceof LocusWalker) {
+ if (argCollection.referenceFile == null)
+ Utils.scareUser(String.format("Locus-based traversals require a reference file but none was given"));
+ if (argCollection.samFiles == null || argCollection.samFiles.size() == 0) {
+ if (((LocusWalker) my_walker).requiresReads())
+ Utils.scareUser(String.format("Analysis %s requires reads, but none were given", argCollection.analysisName));
+ this.engine = new TraverseByReference(null, argCollection.referenceFile, rods);
+ } else {
+ if (((LocusWalker) my_walker).cannotHandleReads())
+ Utils.scareUser(String.format("Analysis %s doesn't support SAM/BAM reads, but a read file %s was provided", argCollection.analysisName, argCollection.samFiles));
+ this.engine = new TraverseByLoci(argCollection.samFiles, argCollection.referenceFile, rods);
+ }
+ } else if (my_walker instanceof DuplicateWalker) {
+ // we're a duplicate walker
+ this.engine = new TraverseDuplicates(argCollection.samFiles, argCollection.referenceFile, rods);
+ } else {
+ throw new RuntimeException("Unexpected walker type: " + my_walker);
+ }
+
+ // Prepare the sort ordering w.r.t. the sequence dictionary
+ if (argCollection.referenceFile != null) {
+ final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(argCollection.referenceFile);
+ GenomeLoc.setupRefContigOrdering(refFile);
+ }
+
+ // Determine the validation stringency. Default to ValidationStringency.STRICT.
+ ValidationStringency strictness = getValidationStringency();
+
+ logger.info("Strictness is " + strictness);
+ genericEngineSetup(strictness);
+
+
+ engine.traverse(my_walker);
+
+ }
+
+ /**
+ * setup a microscheduler
+ *
+ * @param my_walker our walker of type LocusWalker
+ * @param rods the reference order data
+ * @return a new microscheduler
+ */
+ private MicroScheduler createMicroscheduler(Walker my_walker, List> rods) {
+ // the mircoscheduler to return
+ MicroScheduler microScheduler = null;
+
+ // we need to verify different parameter based on the walker type
+ if (my_walker instanceof LocusWalker) {
+ // some warnings
+ if (argCollection.referenceFile == null)
+ Utils.scareUser(String.format("Locus-based traversals require a reference file but none was given"));
+ if (((LocusWalker) my_walker).cannotHandleReads())
+ Utils.scareUser(String.format("Analysis %s doesn't support SAM/BAM reads, but a read file %s was provided", argCollection.analysisName, argCollection.samFiles));
+
+ // create the MicroScheduler
+ microScheduler = MicroScheduler.create(my_walker, argCollection.samFiles, argCollection.referenceFile, rods, argCollection.numberOfThreads);
+ engine = microScheduler.getTraversalEngine();
+ }
+ else if (my_walker instanceof ReadWalker)
+ {
+ if (argCollection.referenceFile == null)
+ Utils.scareUser(String.format("Locus-based traversals require a reference file but none was given"));
+ microScheduler = MicroScheduler.create(my_walker, argCollection.samFiles, argCollection.referenceFile, rods, argCollection.numberOfThreads);
+ engine = microScheduler.getTraversalEngine();
+ }
+
+ return microScheduler;
+ }
+
+
+ /**
+ * commands that get executed for each engine, regardless of the type
+ *
+ * @param strictness our current strictness level
+ */
+ private void genericEngineSetup(ValidationStringency strictness) {
+ engine.setStrictness(strictness);
+
+ engine.setMaxReads(Integer.parseInt(argCollection.maximumReads));
+
+ if (argCollection.genomeRegion != null) {
+ engine.setLocation(argCollection.genomeRegion);
+ }
+ // we default interval files over the genome region strin
+ if (argCollection.intervalsFile != null) {
+ engine.setLocationFromFile(argCollection.intervalsFile);
+ }
+ // hmm...
+ if (argCollection.maximumReadSorts != null) {
+ engine.setSortOnFly(Integer.parseInt(argCollection.maximumReadSorts));
+ }
+
+ if (argCollection.downsampleFraction != null) {
+ engine.setDownsampleByFraction(Double.parseDouble(argCollection.downsampleFraction));
+ }
+
+ if (argCollection.downsampleCoverage != null) {
+ engine.setDownsampleByCoverage(Integer.parseInt(argCollection.downsampleCoverage));
+ }
+
+ engine.setSafetyChecking(!argCollection.unsafe);
+ engine.setThreadedIO(argCollection.enabledThreadedIO);
+ engine.setWalkOverAllSites(argCollection.walkAllLoci);
+ engine.initialize();
+ }
+
+
+ /**
+ * setup the interval regions, from either the interval file of the genome region string
+ *
+ * @return a list of genomeLoc representing the interval file
+ */
+ private List setupIntervalRegion() {
+ List locs;
+ if (argCollection.intervalsFile != null)
+ locs = GenomeLoc.IntervalFileToList(argCollection.intervalsFile);
+ else
+ locs = GenomeLoc.parseGenomeLocs(argCollection.genomeRegion);
+ return locs;
+ }
+
+ /**
+ * Default to ValidationStringency.STRICT.
+ *
+ * @return the validation stringency
+ */
+ private ValidationStringency getValidationStringency() {
+ ValidationStringency strictness;
+ try {
+ strictness = Enum.valueOf(ValidationStringency.class, argCollection.strictnessLevel);
+ }
+ catch (IllegalArgumentException ex) {
+ strictness = ValidationStringency.STRICT;
+ }
+ return strictness;
+ }
+
+ /**
+ * Convenience function that binds RODs using the old-style command line parser to the new style list for
+ * a uniform processing.
+ *
+ * @param name
+ * @param type
+ * @param file
+ */
+ private void bindConvenienceRods(final String name, final String type, final String file) {
+ argCollection.RODBindings.add(Utils.join(",", new String[]{name, type, file}));
+ }
+
+
+ /** Initialize the output streams as specified by the user. */
+ private void initializeOutputStreams() {
+ outputTracker = (argCollection.outErrFileName != null) ? new OutputTracker(argCollection.outErrFileName, argCollection.outErrFileName)
+ : new OutputTracker(argCollection.outFileName, argCollection.errFileName);
+ }
+
+ /**
+ * Gets the output tracker. Tracks data available to a given walker.
+ *
+ * @return The output tracker.
+ */
+ public OutputTracker getOutputTracker() {
+ return outputTracker;
+ }
+
+
+ public SAMFileReader getSamReader() {
+ return this.engine.getSamReader();
+ }
+
+ public TraversalEngine getEngine() {
+ return this.engine;
+ }
+}
diff --git a/java/src/org/broadinstitute/sting/utils/cmdLine/CommandLineProgram.java b/java/src/org/broadinstitute/sting/utils/cmdLine/CommandLineProgram.java
index e009030e6..845173396 100644
--- a/java/src/org/broadinstitute/sting/utils/cmdLine/CommandLineProgram.java
+++ b/java/src/org/broadinstitute/sting/utils/cmdLine/CommandLineProgram.java
@@ -255,6 +255,15 @@ public abstract class CommandLineProgram {
parser.loadArgumentsIntoObject( obj );
}
+ /**
+ * a manual way to load argument providing objects into the program
+ * @param clp the command line program
+ * @param cls the class to load the arguments off of
+ */
+ public void loadAdditionalSource(CommandLineProgram clp, Class cls ) {
+ parser.addArgumentSource( clp.getArgumentSourceName(cls), cls );
+ }
+
/**
* generateHeaderInformation
*
@@ -313,4 +322,46 @@ public abstract class CommandLineProgram {
logger.setLevel(par);
}
+
+ /**
+ * a function used to indicate an error occured in the command line tool
+ *
+ * @param msg
+ */
+ private static void printExitSystemMsg(final String msg) {
+ System.out.printf("------------------------------------------------------------------------------------------%n");
+ System.out.printf("An error has occurred%n");
+ System.out.printf("Check your command line arguments for any typos or inconsistencies.%n");
+ System.out.printf("If you think it's because of a bug or a feature in GATK that should work, please report this to gsadevelopers@broad.mit.edu%n");
+ System.out.printf("%n");
+ System.out.printf("%s%n", msg);
+ }
+
+ /**
+ * used to indicate an error occured
+ * @param msg the message to display
+ */
+ public static void exitSystemWithError(final String msg) {
+ printExitSystemMsg(msg);
+ System.exit(1);
+ }
+
+ /**
+ * used to indicate an error occured
+ * @param msg the message
+ * @param e the error
+ */
+ public static void exitSystemWithError(final String msg, Exception e) {
+ e.printStackTrace();
+ printExitSystemMsg(msg);
+ System.exit(1);
+ }
+
+ /**
+ * used to indicate an error occured
+ * @param e the exception occured
+ */
+ public static void exitSystemWithError(Exception e) {
+ exitSystemWithError(e.getMessage(), e);
+ }
}
diff --git a/java/test/org/broadinstitute/sting/gatk/GATKArgumentCollectionTest.java b/java/test/org/broadinstitute/sting/gatk/GATKArgumentCollectionTest.java
new file mode 100755
index 000000000..5d10b9cc0
--- /dev/null
+++ b/java/test/org/broadinstitute/sting/gatk/GATKArgumentCollectionTest.java
@@ -0,0 +1,126 @@
+package org.broadinstitute.sting.gatk;
+
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.gatk.GATKArgumentCollection;
+import org.junit.After;
+import static org.junit.Assert.fail;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ *
+ * User: aaron
+ * Date: May 7, 2009
+ * Time: 1:12:58 PM
+ *
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2009 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither
+ * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+ *
+ */
+
+
+/**
+ * @author aaron
+ * @version 1.0
+ * @date May 7, 2009
+ *
+ * Class GATKArgumentCollection
+ *
+ * A descriptions should go here. Blame aaron if it's missing.
+ */
+public class GATKArgumentCollectionTest extends BaseTest {
+
+ // our collection of arguments
+ private GATKArgumentCollection collect;
+
+ // where to write our xml file
+ private String xmlFileLoc = "testfile.xml";
+
+ /** setup our test */
+ @Before
+ public void setup() {
+ collect = new GATKArgumentCollection();
+ }
+
+ /** destroy the temp file */
+ @After
+ public void takedown() {
+ File f = new File(xmlFileLoc);
+ if (f.exists()) {
+ f.delete();
+ }
+ }
+
+ private void setupCollection() {
+ // parameters and their defaults
+ Map wArgs = new HashMap();
+ wArgs.put("wArgType1", "Arg1");
+ wArgs.put("wArgType2", "Arg2");
+ wArgs.put("wArgType3", "Arg3");
+ collect.walkerArgs = wArgs;
+
+ List input = new ArrayList();
+ input.add(new File("test.file"));
+ collect.samFiles = input;
+ collect.maximumReads = "-1";
+ collect.strictnessLevel = "strict";
+ collect.referenceFile = new File("referenceFile".toLowerCase());
+ collect.genomeRegion = "genomeRegion".toLowerCase();
+ collect.analysisName = "analysisName".toLowerCase();
+ collect.DBSNPFile = "DBSNPFile".toLowerCase();
+ collect.HAPMAPFile = "HAPMAPFile".toLowerCase();
+ collect.HAPMAPChipFile = "HAPMAPChipFile".toLowerCase();
+ collect.enabledThreadedIO = true;
+ collect.unsafe = false;
+ collect.maximumReadSorts = "maximumReadSorts".toLowerCase();
+ collect.downsampleFraction = "downsampleFraction".toLowerCase();
+ collect.downsampleCoverage = "downsampleCoverage".toLowerCase();
+ collect.intervalsFile = "intervalsFile".toLowerCase();
+ collect.walkAllLoci = true;
+ collect.disableThreading = false;
+ collect.outFileName = "outFileName".toLowerCase();
+ collect.errFileName = "errFileName".toLowerCase();
+ collect.outErrFileName = "outErrFileName".toLowerCase();
+ collect.numberOfThreads = 1;
+
+ // make some rod bindings up
+ ArrayList fakeBindings = new ArrayList();
+ fakeBindings.add("Bind1");
+ fakeBindings.add("Bind2");
+ fakeBindings.add("Bind3");
+
+ collect.RODBindings = fakeBindings;
+ }
+
+
+ /** test the output of an XML file in the arg collection */
+ @Test
+ public void testOutput() {
+ setupCollection();
+
+ GATKArgumentCollection.marshal(collect, xmlFileLoc);
+ GATKArgumentCollection collection = GATKArgumentCollection.unmarshal(xmlFileLoc);
+ if (!collect.equals(collection)) {
+ fail("Collections not equal");
+ }
+ }
+
+
+ /** test the output of an XML file in the arg collection */
+ @Test
+ public void testInput() {
+ setupCollection();
+ GATKArgumentCollection.marshal(collect, xmlFileLoc);
+ }
+}