diff --git a/build.xml b/build.xml
index cc8f42195..b3155c105 100644
--- a/build.xml
+++ b/build.xml
@@ -97,7 +97,7 @@
-
+
diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
index 148381dee..007917405 100755
--- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
+++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
@@ -13,7 +13,6 @@ import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
-import org.broadinstitute.sting.gatk.GATKArgumentCollection;
import java.util.ArrayList;
import java.util.List;
@@ -48,7 +47,7 @@ public class GenomeAnalysisEngine {
* @param args the argument collection, where we get all our setup information from
* @param my_walker the walker we're running with
*/
- protected GenomeAnalysisEngine(GATKArgumentCollection args, Walker my_walker) {
+ public GenomeAnalysisEngine(GATKArgumentCollection args, Walker my_walker) {
// validate our parameters
if (args == null || my_walker == null) {
diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisTK.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisTK.java
deleted file mode 100644
index 084f49c28..000000000
--- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisTK.java
+++ /dev/null
@@ -1,342 +0,0 @@
-package org.broadinstitute.sting.gatk;
-
-import edu.mit.broad.picard.reference.ReferenceSequenceFile;
-import edu.mit.broad.picard.reference.ReferenceSequenceFileFactory;
-import net.sf.samtools.SAMFileReader;
-import net.sf.samtools.SAMFileReader.ValidationStringency;
-import org.apache.log4j.Logger;
-import org.broadinstitute.sting.gatk.executive.MicroScheduler;
-import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
-import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
-import org.broadinstitute.sting.gatk.traversals.*;
-import org.broadinstitute.sting.gatk.walkers.*;
-import org.broadinstitute.sting.utils.GenomeLoc;
-import org.broadinstitute.sting.utils.Utils;
-import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram;
-import org.broadinstitute.sting.utils.cmdLine.Argument;
-
-import java.io.File;
-import java.util.ArrayList;
-import java.util.List;
-
-public class GenomeAnalysisTK extends CommandLineProgram {
- public static GenomeAnalysisTK Instance = null;
-
- // parameters and their defaults
- @Argument(fullName="input_file",shortName="I",doc="SAM or BAM file(s)",required=false)
- protected List INPUT_FILES = null;
-
- @Argument(fullName="maximum_reads",shortName="M",doc="Maximum number of reads to process before exiting",required=false)
- protected String MAX_READS_ARG = "-1";
-
- @Argument(fullName="validation_strictness",shortName="S",doc="How strict should we be with validation (LENIENT|SILENT|STRICT)",required=false)
- protected String STRICTNESS_ARG = "strict";
-
- @Argument(fullName="reference_sequence", shortName="R",doc="Reference sequence file",required=false)
- protected File REF_FILE_ARG = null;
-
- @Argument(fullName="genome_region", shortName="L", doc="Genome region to operation on: from chr:start-end",required=false)
- protected String REGION_STR = null;
-
- @Argument(fullName="analysis_type", shortName="T", doc="Type of analysis to run")
- protected String Analysis_Name = null;
-
- @Argument(fullName="DBSNP",shortName="D",doc="DBSNP file",required=false)
- protected String DBSNP_FILE = null;
-
- @Argument(fullName="hapmap",shortName="H",doc="Hapmap file",required=false)
- protected String HAPMAP_FILE = null;
-
- @Argument(fullName="hapmap_chip",shortName="hc",doc="Hapmap chip file",required=false)
- protected String HAPMAP_CHIP_FILE = null;
-
- @Argument(fullName="threaded_IO",shortName="P",doc="If set, enables threaded I/O operations",required=false)
- protected Boolean ENABLED_THREADED_IO = false;
-
- @Argument(fullName="unsafe",shortName="U",doc="If set, enables unsafe operations, nothing will be checked at runtime.",required=false)
- protected Boolean UNSAFE = false;
-
- @Argument(fullName="sort_on_the_fly",shortName="sort",doc="Maximum number of reads to sort on the fly",required=false)
- protected String MAX_ON_FLY_SORTS = null;
-
- @Argument(fullName="downsample_to_fraction",shortName="dfrac",doc="Fraction [0.0-1.0] of reads to downsample to",required=false)
- protected String DOWNSAMPLE_FRACTION = null;
-
- @Argument(fullName="downsample_to_coverage",shortName="dcov",doc="Coverage [integer] to downsample to",required=false)
- protected String DOWNSAMPLE_COVERAGE = null;
-
- @Argument(fullName="intervals_file",shortName="V",doc="File containing list of genomic intervals to operate on. line := ",required=false)
- protected String INTERVALS_FILE = null;
-
- // our walker manager
- private WalkerManager walkerManager = null;
-
- public String pluginPathName = null;
- private TraversalEngine engine = null;
- public boolean DEBUGGING = false;
-
- @Argument(fullName="all_loci",shortName="A",doc="Should we process all loci, not just those covered by reads",required=false)
- protected Boolean WALK_ALL_LOCI = false;
-
- @Argument(fullName="disablethreading",shortName="dt",doc="Disable experimental threading support.",required=false)
- protected Boolean DISABLE_THREADING = false;
-
- /**
- * An output file presented to the walker.
- */
- @Argument(fullName="out",shortName="o",doc="An output file presented to the walker. Will overwrite contents if file exists.",required=false)
- protected String outFileName = null;
-
- /**
- * An error output file presented to the walker.
- */
- @Argument(fullName="err",shortName="e",doc="An error output file presented to the walker. Will overwrite contents if file exists.",required=false)
- protected String errFileName = null;
-
- /**
- * A joint file for both 'normal' and error output presented to the walker.
- */
- @Argument(fullName="outerr",shortName="oe",doc="A joint file for 'normal' and error output presented to the walker. Will overwrite contents if file exists.",required=false)
- protected String outErrFileName = null;
-
- /**
- * How many threads should be allocated to this analysis.
- */
- @Argument(fullName="numthreads",shortName="nt",doc="How many threads should be allocated to running this analysis.",required=false)
- protected int numThreads = 1;
-
- @Argument(fullName="rodBind",shortName="B",doc="",required=false)
- protected ArrayList ROD_BINDINGS = new ArrayList();
-
- /**
- * Collection of output streams used by the walker.
- */
- private OutputTracker outputTracker = null;
-
- /**
- * our log, which we want to capture anything from this class
- */
- private static Logger logger = Logger.getLogger(GenomeAnalysisTK.class);
-
-
-
- /**
- * GATK can add arguments dynamically based on analysis type.
- * @return true
- */
- @Override
- protected boolean canAddArgumentsDynamically() { return true; }
-
- /**
- * GATK provides the walker as an argument source. As a side-effect, initializes the walker variable.
- * @return List of walkers to load dynamically.
- */
- @Override
- protected Class[] getArgumentSources() {
- if( Analysis_Name == null )
- throw new IllegalArgumentException("Must provide analysis name");
-
- walkerManager = new WalkerManager( pluginPathName );
-
- if( !walkerManager.doesWalkerExist(Analysis_Name) )
- throw new IllegalArgumentException("Invalid analysis name");
-
- return new Class[] { walkerManager.getWalkerClassByName(Analysis_Name) };
- }
-
- @Override
- protected String getArgumentSourceName( Class argumentSource ) {
- return WalkerManager.getWalkerName( (Class)argumentSource );
- }
-
- /**
- * Required main method implementation.
- */
- public static void main(String[] argv) {
- try {
- Instance = new GenomeAnalysisTK();
- start(Instance, argv);
- } catch ( Exception e ) {
- exitSystemWithError(e);
- }
- }
-
- /**
- * Convenience function that binds RODs using the old-style command line parser to the new style list for
- * a uniform processing.
- *
- * @param name
- * @param type
- * @param file
- */
- private void bindConvenienceRods(final String name, final String type, final String file )
- {
- ROD_BINDINGS.add( Utils.join(",", new String[] { name, type, file }) );
- }
-
- private static void printExitSystemMsg(final String msg) {
- System.out.printf("------------------------------------------------------------------------------------------%n");
- System.out.printf("An error has occurred%n");
- System.out.printf("It's possible (maybe even likely) that this is an input error on your part%n");
- System.out.printf("But if it's a bug or something that should work, please report this to gsadevelopers@broad.mit.edu%n");
- System.out.printf("%n");
- System.out.printf("%s%n", msg);
- }
-
- public static void exitSystemWithError(final String msg) {
- printExitSystemMsg(msg);
- System.exit(1);
- }
-
- public static void exitSystemWithError(final String msg, Exception e ) {
- e.printStackTrace();
- printExitSystemMsg(msg);
- System.exit(1);
- }
-
- public static void exitSystemWithError(Exception e ) {
- exitSystemWithError(e.getMessage(), e);
- }
-
- protected int execute() {
- final boolean TEST_ROD = false;
- List > rods = new ArrayList >();
-
- //
- // please don't use these in the future, use the new syntax
- //
- if ( DBSNP_FILE != null ) bindConvenienceRods("dbSNP", "dbsnp", DBSNP_FILE);
- if ( HAPMAP_FILE != null ) bindConvenienceRods("hapmap", "HapMapAlleleFrequencies", HAPMAP_FILE);
- if ( HAPMAP_CHIP_FILE != null ) bindConvenienceRods("hapmap-chip", "GFF", HAPMAP_CHIP_FILE);
-
- ReferenceOrderedData.parseBindings(logger, ROD_BINDINGS, rods);
-
- initializeOutputStreams();
-
- Walker,?> my_walker = null;
- try {
- my_walker = walkerManager.createWalkerByName( Analysis_Name );
- loadArgumentsIntoObject( my_walker );
- }
- catch( InstantiationException ex ) {
- throw new RuntimeException( "Unable to instantiate walker.", ex );
- }
- catch( IllegalAccessException ex ) {
- throw new RuntimeException( "Unable to access walker", ex );
- }
-
- MicroScheduler microScheduler = null;
-
- // Get the walker specified
- if ( my_walker instanceof LocusWalker ) {
- LocusWalker, ?> walker = (LocusWalker, ?>) my_walker;
-
- if ( REF_FILE_ARG == null )
- Utils.scareUser(String.format("Locus-based traversals require a reference file but none was given"));
-
- if ( INPUT_FILES == null || INPUT_FILES.size() == 0 ) {
- if ( walker.requiresReads() )
- Utils.scareUser(String.format("Analysis %s requires reads, but none were given", Analysis_Name));
- this.engine = new TraverseByReference(null, REF_FILE_ARG, rods);
- } else {
- if ( walker.cannotHandleReads() )
- Utils.scareUser(String.format("Analysis %s doesn't support SAM/BAM reads, but a read file %s was provided", Analysis_Name, INPUT_FILES));
-
- if ( WALK_ALL_LOCI ) {
- microScheduler = MicroScheduler.create( walker, INPUT_FILES, REF_FILE_ARG, rods, numThreads );
- this.engine = microScheduler.getTraversalEngine();
- }
- else
- this.engine = new TraverseByLoci(INPUT_FILES, REF_FILE_ARG, rods);
- }
- } else if ( my_walker instanceof LocusWindowWalker ) {
- this.engine = new TraverseByLocusWindows(INPUT_FILES, REF_FILE_ARG, rods);
- } else if ( my_walker instanceof ReadWalker) {
- // we're a read walker
- this.engine = new TraverseByReads(INPUT_FILES, REF_FILE_ARG, rods);
- } else if ( my_walker instanceof DuplicateWalker) {
- // we're a duplicate walker
- this.engine = new TraverseDuplicates(INPUT_FILES, REF_FILE_ARG, rods);
- } else {
- throw new RuntimeException("Unexpected walker type: " + my_walker);
- }
-
- // Prepare the sort ordering w.r.t. the sequence dictionary
- if (REF_FILE_ARG != null) {
- final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(REF_FILE_ARG);
- GenomeLoc.setupRefContigOrdering(refFile);
- }
-
- // Determine the validation stringency. Default to ValidationStringency.STRICT.
- ValidationStringency strictness;
- try {
- strictness = Enum.valueOf(ValidationStringency.class, STRICTNESS_ARG);
- }
- catch( IllegalArgumentException ex ) {
- strictness = ValidationStringency.STRICT;
- }
-
- logger.info("Strictness is " + strictness);
- engine.setStrictness(strictness);
-
- engine.setMaxReads(Integer.parseInt(MAX_READS_ARG));
-
- if (REGION_STR != null) {
- engine.setLocation(REGION_STR);
- }
-
- if (INTERVALS_FILE != null) {
- engine.setLocationFromFile(INTERVALS_FILE);
- }
-
- if (MAX_ON_FLY_SORTS != null) {
- engine.setSortOnFly(Integer.parseInt(MAX_ON_FLY_SORTS));
- }
-
- if (DOWNSAMPLE_FRACTION != null) {
- engine.setDownsampleByFraction(Double.parseDouble(DOWNSAMPLE_FRACTION));
- }
-
- if (DOWNSAMPLE_COVERAGE != null) {
- engine.setDownsampleByCoverage(Integer.parseInt(DOWNSAMPLE_COVERAGE));
- }
-
- engine.setSafetyChecking(!UNSAFE);
- engine.setThreadedIO(ENABLED_THREADED_IO);
- engine.setWalkOverAllSites(WALK_ALL_LOCI);
- engine.initialize();
-
- if( microScheduler != null ) {
- List locs = null;
- if (INTERVALS_FILE != null)
- locs = GenomeLoc.IntervalFileToList(INTERVALS_FILE);
- else
- locs = GenomeLoc.parseGenomeLocs( REGION_STR );
- microScheduler.execute( my_walker, locs );
- }
- else
- engine.traverse(my_walker);
-
- return 0;
- }
-
- /**
- * Initialize the output streams as specified by the user.
- */
- private void initializeOutputStreams() {
- outputTracker = (outErrFileName != null) ? new OutputTracker( outErrFileName, outErrFileName )
- : new OutputTracker( outFileName, errFileName );
- }
-
- /**
- * Gets the output tracker. Tracks data available to a given walker.
- * @return The output tracker.
- */
- public OutputTracker getOutputTracker() {
- return outputTracker;
- }
-
-
- public SAMFileReader getSamReader() { return this.engine.getSamReader(); }
- public TraversalEngine getEngine() { return this.engine; }
-}
diff --git a/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisTKTest.java b/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisTKTest.java
deleted file mode 100755
index aceafcaec..000000000
--- a/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisTKTest.java
+++ /dev/null
@@ -1,170 +0,0 @@
-package org.broadinstitute.sting.gatk;
-
-import net.sf.samtools.SAMSequenceRecord;
-import org.broadinstitute.sting.BaseTest;
-import org.broadinstitute.sting.utils.GenomeLoc;
-import org.broadinstitute.sting.utils.Utils;
-import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
-import static org.junit.Assert.fail;
-import org.junit.Test;
-
-import java.io.File;
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- *
- * User: aaron
- * Date: Apr 28, 2009
- * Time: 5:21:29 PM
- *
- * The Broad Institute
- * SOFTWARE COPYRIGHT NOTICE AGREEMENT
- * This software and its documentation are copyright 2009 by the
- * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
- *
- * This software is supplied without any warranty or guaranteed support whatsoever. Neither
- * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
- *
- */
-
-
-/**
- * @author aaron
- * @version 1.0
- * @date Apr 28, 2009
- *
- * Class GenomeAnalysisTKTest
- *
- * A quick move of the unit test cases that were stuffed into GenomeAnalysisTK.
- */
-public class GenomeAnalysisTKTest extends BaseTest {
-
- // our sequence dictionary to check
- private final File refFileName = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/Homo_sapiens_assembly17.fasta");
-
- // skip n number of chromesomes when we do a chromesome by chromesome jumping
- private final int skipChrome = 15;
- /**
- * This has been blindly moved out of the GenomeAnalysisTK.java where it was hanging on,
- * but the author makes very limited promises of any functionality
- *
- */
- @Test
- public void testNewReferenceFeatures() {
-
- final FastaSequenceFile2 refFile = new FastaSequenceFile2(refFileName);
- GenomeLoc.setupRefContigOrdering(refFile);
-
- List refContigs = refFile.getSequenceDictionary().getSequences();
-
- /*
- for ( SAMSequenceRecord refContig: refContigs ) {
- System.out.printf(" Traversing from chr1 to %s would require jumping %d bytes%n",
- refContig.getSequenceName(), refFile.getDistanceBetweenContigs("chr1", refContig.getSequenceName()));
- }
- */
- String lastContig = null;
- List timings = new ArrayList();
- for ( SAMSequenceRecord startContig : refFile.getSequenceDictionary().getSequences() ) {
- final String startContigName = startContig.getSequenceName();
- int skip = 1;
- for ( SAMSequenceRecord targetContig : refFile.getSequenceDictionary().getSequences() ) {
- if (skipChrome != skip) {
- ++skip;
- continue;
- } else {
- skip = 1;
- }
-
- refFile.seekToContig(startContigName, true);
- logger.warn(String.format("Seeking: current=%s, target=%s", startContigName, targetContig.getSequenceName()));
- long lastTime = System.currentTimeMillis();
- final boolean success = refFile.seekToContig(targetContig.getSequenceName(), true);
- long curTime = System.currentTimeMillis();
- final double elapsed = (curTime - lastTime) / 1000.0;
- timings.add(elapsed);
- logger.warn(String.format(" -> Elapsed time %.2f, averaging %.2f sec / seek for %d seeks",
- elapsed, Utils.averageDouble(timings), timings.size()));
-
- if ( ! success ) {
- fail(String.format("Failured to seek to %s from %s", targetContig.getSequenceName(), lastContig ));
-
- }
- //System.exit(1);
- }
- }
-
- // code for randomly sampling the seeks
-// Random rnd = new Random();
-// String lastContig = null;
-// List timings = new ArrayList();
-// final int N_SAMPLES = 1000;
-// //try { refFile.seekToContig("chr3"); } catch ( IOException e ) {}
-// for ( int i = 0; i < N_SAMPLES; i++ ) {
-// final int nextIndex = rnd.nextInt(refContigs.size());
-// String nextContig = refFile.getSequenceDictionary().getSequence(nextIndex).getSequenceName();
-// //nextContig = "chr2";
-// try {
-// System.out.printf("Seeking: current=%s, target=%s%n", refFile.getContigName(), nextContig);
-// long lastTime = System.currentTimeMillis();
-// final boolean success = refFile.seekToContig(nextContig, true);
-// long curTime = System.currentTimeMillis();
-// final double elapsed = (curTime - lastTime) / 1000.0;
-// timings.add(elapsed);
-// System.out.printf(" -> Elapsed time %.2f, averaging %.2f sec / seek for %d seeks%n",
-// elapsed, Utils.averageDouble(timings), timings.size());
-//
-// if ( ! success ) {
-// System.out.printf("Failured to seek to %s from %s%n", nextContig, lastContig );
-// }
-// //System.exit(1);
-// } catch ( IOException e ) {
-// System.out.printf("Failured to seek to %s from %s%n", nextContig, lastContig );
-// e.printStackTrace();
-// }
-//
-// lastContig = nextContig;
-// }
-// System.exit(1);
-
-/*
- final String targetChr = "chr10";
- try {
- refFile.seekToContig(targetChr);
- } catch ( IOException e ){
- System.out.printf("Failured to seek to %s%n", targetChr);
- e.printStackTrace();
- }
- System.exit(1);
-
-
- //List timings = new ArrayList();
- final long startTime = System.currentTimeMillis();
- long lastTime = System.currentTimeMillis();
-
- int i = 0;
- String prevNextContigName = null;
- logger.info(String.format("Walking reference sequence:%n"));
- for ( SAMSequenceRecord refContig: refContigs ) {
- long curTime = System.currentTimeMillis();
- ReferenceSequence contig = refFile.nextSequence();
- final double elapsed = (curTime - lastTime) / 1000.0;
- timings.add(elapsed);
- logger.info(String.format("%2d : expected %s contig, found %s with next of %s after %.2f seconds, average is %.2f", i,
- refContig.getSequenceName(), contig.getName(), refFile.getNextContigName(), elapsed, Utils.averageDouble(timings)));
- if ( prevNextContigName != null && contig.getName() != null && ! prevNextContigName.equals(contig.getName()) )
- throw new RuntimeIOException(String.format("Unexpected contig ordering %s was expected next, but I found %s?",
- prevNextContigName, contig.getName()));
-
- prevNextContigName = refFile.getNextContigName();
- lastTime = curTime;
- i++;
-
- logger.info(String.format(" Traversing from chr1 to %s would require jumping %d bytes",
- contig.getName(), refFile.getDistanceBetweenContigs("chr1", contig.getName())));
- }
- */
- }
-
-}