diff --git a/build.xml b/build.xml index cc8f42195..b3155c105 100644 --- a/build.xml +++ b/build.xml @@ -97,7 +97,7 @@ - + diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 148381dee..007917405 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -13,7 +13,6 @@ import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.gatk.GATKArgumentCollection; import java.util.ArrayList; import java.util.List; @@ -48,7 +47,7 @@ public class GenomeAnalysisEngine { * @param args the argument collection, where we get all our setup information from * @param my_walker the walker we're running with */ - protected GenomeAnalysisEngine(GATKArgumentCollection args, Walker my_walker) { + public GenomeAnalysisEngine(GATKArgumentCollection args, Walker my_walker) { // validate our parameters if (args == null || my_walker == null) { diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisTK.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisTK.java deleted file mode 100644 index 084f49c28..000000000 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisTK.java +++ /dev/null @@ -1,342 +0,0 @@ -package org.broadinstitute.sting.gatk; - -import edu.mit.broad.picard.reference.ReferenceSequenceFile; -import edu.mit.broad.picard.reference.ReferenceSequenceFileFactory; -import net.sf.samtools.SAMFileReader; -import net.sf.samtools.SAMFileReader.ValidationStringency; -import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.executive.MicroScheduler; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; -import org.broadinstitute.sting.gatk.traversals.*; -import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram; -import org.broadinstitute.sting.utils.cmdLine.Argument; - -import java.io.File; -import java.util.ArrayList; -import java.util.List; - -public class GenomeAnalysisTK extends CommandLineProgram { - public static GenomeAnalysisTK Instance = null; - - // parameters and their defaults - @Argument(fullName="input_file",shortName="I",doc="SAM or BAM file(s)",required=false) - protected List INPUT_FILES = null; - - @Argument(fullName="maximum_reads",shortName="M",doc="Maximum number of reads to process before exiting",required=false) - protected String MAX_READS_ARG = "-1"; - - @Argument(fullName="validation_strictness",shortName="S",doc="How strict should we be with validation (LENIENT|SILENT|STRICT)",required=false) - protected String STRICTNESS_ARG = "strict"; - - @Argument(fullName="reference_sequence", shortName="R",doc="Reference sequence file",required=false) - protected File REF_FILE_ARG = null; - - @Argument(fullName="genome_region", shortName="L", doc="Genome region to operation on: from chr:start-end",required=false) - protected String REGION_STR = null; - - @Argument(fullName="analysis_type", shortName="T", doc="Type of analysis to run") - protected String Analysis_Name = null; - - @Argument(fullName="DBSNP",shortName="D",doc="DBSNP file",required=false) - protected String DBSNP_FILE = null; - - @Argument(fullName="hapmap",shortName="H",doc="Hapmap file",required=false) - protected String HAPMAP_FILE = null; - - @Argument(fullName="hapmap_chip",shortName="hc",doc="Hapmap chip file",required=false) - protected String HAPMAP_CHIP_FILE = null; - - @Argument(fullName="threaded_IO",shortName="P",doc="If set, enables threaded I/O operations",required=false) - protected Boolean ENABLED_THREADED_IO = false; - - @Argument(fullName="unsafe",shortName="U",doc="If set, enables unsafe operations, nothing will be checked at runtime.",required=false) - protected Boolean UNSAFE = false; - - @Argument(fullName="sort_on_the_fly",shortName="sort",doc="Maximum number of reads to sort on the fly",required=false) - protected String MAX_ON_FLY_SORTS = null; - - @Argument(fullName="downsample_to_fraction",shortName="dfrac",doc="Fraction [0.0-1.0] of reads to downsample to",required=false) - protected String DOWNSAMPLE_FRACTION = null; - - @Argument(fullName="downsample_to_coverage",shortName="dcov",doc="Coverage [integer] to downsample to",required=false) - protected String DOWNSAMPLE_COVERAGE = null; - - @Argument(fullName="intervals_file",shortName="V",doc="File containing list of genomic intervals to operate on. line := ",required=false) - protected String INTERVALS_FILE = null; - - // our walker manager - private WalkerManager walkerManager = null; - - public String pluginPathName = null; - private TraversalEngine engine = null; - public boolean DEBUGGING = false; - - @Argument(fullName="all_loci",shortName="A",doc="Should we process all loci, not just those covered by reads",required=false) - protected Boolean WALK_ALL_LOCI = false; - - @Argument(fullName="disablethreading",shortName="dt",doc="Disable experimental threading support.",required=false) - protected Boolean DISABLE_THREADING = false; - - /** - * An output file presented to the walker. - */ - @Argument(fullName="out",shortName="o",doc="An output file presented to the walker. Will overwrite contents if file exists.",required=false) - protected String outFileName = null; - - /** - * An error output file presented to the walker. - */ - @Argument(fullName="err",shortName="e",doc="An error output file presented to the walker. Will overwrite contents if file exists.",required=false) - protected String errFileName = null; - - /** - * A joint file for both 'normal' and error output presented to the walker. - */ - @Argument(fullName="outerr",shortName="oe",doc="A joint file for 'normal' and error output presented to the walker. Will overwrite contents if file exists.",required=false) - protected String outErrFileName = null; - - /** - * How many threads should be allocated to this analysis. - */ - @Argument(fullName="numthreads",shortName="nt",doc="How many threads should be allocated to running this analysis.",required=false) - protected int numThreads = 1; - - @Argument(fullName="rodBind",shortName="B",doc="",required=false) - protected ArrayList ROD_BINDINGS = new ArrayList(); - - /** - * Collection of output streams used by the walker. - */ - private OutputTracker outputTracker = null; - - /** - * our log, which we want to capture anything from this class - */ - private static Logger logger = Logger.getLogger(GenomeAnalysisTK.class); - - - - /** - * GATK can add arguments dynamically based on analysis type. - * @return true - */ - @Override - protected boolean canAddArgumentsDynamically() { return true; } - - /** - * GATK provides the walker as an argument source. As a side-effect, initializes the walker variable. - * @return List of walkers to load dynamically. - */ - @Override - protected Class[] getArgumentSources() { - if( Analysis_Name == null ) - throw new IllegalArgumentException("Must provide analysis name"); - - walkerManager = new WalkerManager( pluginPathName ); - - if( !walkerManager.doesWalkerExist(Analysis_Name) ) - throw new IllegalArgumentException("Invalid analysis name"); - - return new Class[] { walkerManager.getWalkerClassByName(Analysis_Name) }; - } - - @Override - protected String getArgumentSourceName( Class argumentSource ) { - return WalkerManager.getWalkerName( (Class)argumentSource ); - } - - /** - * Required main method implementation. - */ - public static void main(String[] argv) { - try { - Instance = new GenomeAnalysisTK(); - start(Instance, argv); - } catch ( Exception e ) { - exitSystemWithError(e); - } - } - - /** - * Convenience function that binds RODs using the old-style command line parser to the new style list for - * a uniform processing. - * - * @param name - * @param type - * @param file - */ - private void bindConvenienceRods(final String name, final String type, final String file ) - { - ROD_BINDINGS.add( Utils.join(",", new String[] { name, type, file }) ); - } - - private static void printExitSystemMsg(final String msg) { - System.out.printf("------------------------------------------------------------------------------------------%n"); - System.out.printf("An error has occurred%n"); - System.out.printf("It's possible (maybe even likely) that this is an input error on your part%n"); - System.out.printf("But if it's a bug or something that should work, please report this to gsadevelopers@broad.mit.edu%n"); - System.out.printf("%n"); - System.out.printf("%s%n", msg); - } - - public static void exitSystemWithError(final String msg) { - printExitSystemMsg(msg); - System.exit(1); - } - - public static void exitSystemWithError(final String msg, Exception e ) { - e.printStackTrace(); - printExitSystemMsg(msg); - System.exit(1); - } - - public static void exitSystemWithError(Exception e ) { - exitSystemWithError(e.getMessage(), e); - } - - protected int execute() { - final boolean TEST_ROD = false; - List > rods = new ArrayList >(); - - // - // please don't use these in the future, use the new syntax - // - if ( DBSNP_FILE != null ) bindConvenienceRods("dbSNP", "dbsnp", DBSNP_FILE); - if ( HAPMAP_FILE != null ) bindConvenienceRods("hapmap", "HapMapAlleleFrequencies", HAPMAP_FILE); - if ( HAPMAP_CHIP_FILE != null ) bindConvenienceRods("hapmap-chip", "GFF", HAPMAP_CHIP_FILE); - - ReferenceOrderedData.parseBindings(logger, ROD_BINDINGS, rods); - - initializeOutputStreams(); - - Walker my_walker = null; - try { - my_walker = walkerManager.createWalkerByName( Analysis_Name ); - loadArgumentsIntoObject( my_walker ); - } - catch( InstantiationException ex ) { - throw new RuntimeException( "Unable to instantiate walker.", ex ); - } - catch( IllegalAccessException ex ) { - throw new RuntimeException( "Unable to access walker", ex ); - } - - MicroScheduler microScheduler = null; - - // Get the walker specified - if ( my_walker instanceof LocusWalker ) { - LocusWalker walker = (LocusWalker) my_walker; - - if ( REF_FILE_ARG == null ) - Utils.scareUser(String.format("Locus-based traversals require a reference file but none was given")); - - if ( INPUT_FILES == null || INPUT_FILES.size() == 0 ) { - if ( walker.requiresReads() ) - Utils.scareUser(String.format("Analysis %s requires reads, but none were given", Analysis_Name)); - this.engine = new TraverseByReference(null, REF_FILE_ARG, rods); - } else { - if ( walker.cannotHandleReads() ) - Utils.scareUser(String.format("Analysis %s doesn't support SAM/BAM reads, but a read file %s was provided", Analysis_Name, INPUT_FILES)); - - if ( WALK_ALL_LOCI ) { - microScheduler = MicroScheduler.create( walker, INPUT_FILES, REF_FILE_ARG, rods, numThreads ); - this.engine = microScheduler.getTraversalEngine(); - } - else - this.engine = new TraverseByLoci(INPUT_FILES, REF_FILE_ARG, rods); - } - } else if ( my_walker instanceof LocusWindowWalker ) { - this.engine = new TraverseByLocusWindows(INPUT_FILES, REF_FILE_ARG, rods); - } else if ( my_walker instanceof ReadWalker) { - // we're a read walker - this.engine = new TraverseByReads(INPUT_FILES, REF_FILE_ARG, rods); - } else if ( my_walker instanceof DuplicateWalker) { - // we're a duplicate walker - this.engine = new TraverseDuplicates(INPUT_FILES, REF_FILE_ARG, rods); - } else { - throw new RuntimeException("Unexpected walker type: " + my_walker); - } - - // Prepare the sort ordering w.r.t. the sequence dictionary - if (REF_FILE_ARG != null) { - final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(REF_FILE_ARG); - GenomeLoc.setupRefContigOrdering(refFile); - } - - // Determine the validation stringency. Default to ValidationStringency.STRICT. - ValidationStringency strictness; - try { - strictness = Enum.valueOf(ValidationStringency.class, STRICTNESS_ARG); - } - catch( IllegalArgumentException ex ) { - strictness = ValidationStringency.STRICT; - } - - logger.info("Strictness is " + strictness); - engine.setStrictness(strictness); - - engine.setMaxReads(Integer.parseInt(MAX_READS_ARG)); - - if (REGION_STR != null) { - engine.setLocation(REGION_STR); - } - - if (INTERVALS_FILE != null) { - engine.setLocationFromFile(INTERVALS_FILE); - } - - if (MAX_ON_FLY_SORTS != null) { - engine.setSortOnFly(Integer.parseInt(MAX_ON_FLY_SORTS)); - } - - if (DOWNSAMPLE_FRACTION != null) { - engine.setDownsampleByFraction(Double.parseDouble(DOWNSAMPLE_FRACTION)); - } - - if (DOWNSAMPLE_COVERAGE != null) { - engine.setDownsampleByCoverage(Integer.parseInt(DOWNSAMPLE_COVERAGE)); - } - - engine.setSafetyChecking(!UNSAFE); - engine.setThreadedIO(ENABLED_THREADED_IO); - engine.setWalkOverAllSites(WALK_ALL_LOCI); - engine.initialize(); - - if( microScheduler != null ) { - List locs = null; - if (INTERVALS_FILE != null) - locs = GenomeLoc.IntervalFileToList(INTERVALS_FILE); - else - locs = GenomeLoc.parseGenomeLocs( REGION_STR ); - microScheduler.execute( my_walker, locs ); - } - else - engine.traverse(my_walker); - - return 0; - } - - /** - * Initialize the output streams as specified by the user. - */ - private void initializeOutputStreams() { - outputTracker = (outErrFileName != null) ? new OutputTracker( outErrFileName, outErrFileName ) - : new OutputTracker( outFileName, errFileName ); - } - - /** - * Gets the output tracker. Tracks data available to a given walker. - * @return The output tracker. - */ - public OutputTracker getOutputTracker() { - return outputTracker; - } - - - public SAMFileReader getSamReader() { return this.engine.getSamReader(); } - public TraversalEngine getEngine() { return this.engine; } -} diff --git a/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisTKTest.java b/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisTKTest.java deleted file mode 100755 index aceafcaec..000000000 --- a/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisTKTest.java +++ /dev/null @@ -1,170 +0,0 @@ -package org.broadinstitute.sting.gatk; - -import net.sf.samtools.SAMSequenceRecord; -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2; -import static org.junit.Assert.fail; -import org.junit.Test; - -import java.io.File; -import java.util.ArrayList; -import java.util.List; - -/** - * - * User: aaron - * Date: Apr 28, 2009 - * Time: 5:21:29 PM - * - * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2009 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - * - * This software is supplied without any warranty or guaranteed support whatsoever. Neither - * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - * - */ - - -/** - * @author aaron - * @version 1.0 - * @date Apr 28, 2009 - *

- * Class GenomeAnalysisTKTest - *

- * A quick move of the unit test cases that were stuffed into GenomeAnalysisTK. - */ -public class GenomeAnalysisTKTest extends BaseTest { - - // our sequence dictionary to check - private final File refFileName = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/Homo_sapiens_assembly17.fasta"); - - // skip n number of chromesomes when we do a chromesome by chromesome jumping - private final int skipChrome = 15; - /** - * This has been blindly moved out of the GenomeAnalysisTK.java where it was hanging on, - * but the author makes very limited promises of any functionality - * - */ - @Test - public void testNewReferenceFeatures() { - - final FastaSequenceFile2 refFile = new FastaSequenceFile2(refFileName); - GenomeLoc.setupRefContigOrdering(refFile); - - List refContigs = refFile.getSequenceDictionary().getSequences(); - - /* - for ( SAMSequenceRecord refContig: refContigs ) { - System.out.printf(" Traversing from chr1 to %s would require jumping %d bytes%n", - refContig.getSequenceName(), refFile.getDistanceBetweenContigs("chr1", refContig.getSequenceName())); - } - */ - String lastContig = null; - List timings = new ArrayList(); - for ( SAMSequenceRecord startContig : refFile.getSequenceDictionary().getSequences() ) { - final String startContigName = startContig.getSequenceName(); - int skip = 1; - for ( SAMSequenceRecord targetContig : refFile.getSequenceDictionary().getSequences() ) { - if (skipChrome != skip) { - ++skip; - continue; - } else { - skip = 1; - } - - refFile.seekToContig(startContigName, true); - logger.warn(String.format("Seeking: current=%s, target=%s", startContigName, targetContig.getSequenceName())); - long lastTime = System.currentTimeMillis(); - final boolean success = refFile.seekToContig(targetContig.getSequenceName(), true); - long curTime = System.currentTimeMillis(); - final double elapsed = (curTime - lastTime) / 1000.0; - timings.add(elapsed); - logger.warn(String.format(" -> Elapsed time %.2f, averaging %.2f sec / seek for %d seeks", - elapsed, Utils.averageDouble(timings), timings.size())); - - if ( ! success ) { - fail(String.format("Failured to seek to %s from %s", targetContig.getSequenceName(), lastContig )); - - } - //System.exit(1); - } - } - - // code for randomly sampling the seeks -// Random rnd = new Random(); -// String lastContig = null; -// List timings = new ArrayList(); -// final int N_SAMPLES = 1000; -// //try { refFile.seekToContig("chr3"); } catch ( IOException e ) {} -// for ( int i = 0; i < N_SAMPLES; i++ ) { -// final int nextIndex = rnd.nextInt(refContigs.size()); -// String nextContig = refFile.getSequenceDictionary().getSequence(nextIndex).getSequenceName(); -// //nextContig = "chr2"; -// try { -// System.out.printf("Seeking: current=%s, target=%s%n", refFile.getContigName(), nextContig); -// long lastTime = System.currentTimeMillis(); -// final boolean success = refFile.seekToContig(nextContig, true); -// long curTime = System.currentTimeMillis(); -// final double elapsed = (curTime - lastTime) / 1000.0; -// timings.add(elapsed); -// System.out.printf(" -> Elapsed time %.2f, averaging %.2f sec / seek for %d seeks%n", -// elapsed, Utils.averageDouble(timings), timings.size()); -// -// if ( ! success ) { -// System.out.printf("Failured to seek to %s from %s%n", nextContig, lastContig ); -// } -// //System.exit(1); -// } catch ( IOException e ) { -// System.out.printf("Failured to seek to %s from %s%n", nextContig, lastContig ); -// e.printStackTrace(); -// } -// -// lastContig = nextContig; -// } -// System.exit(1); - -/* - final String targetChr = "chr10"; - try { - refFile.seekToContig(targetChr); - } catch ( IOException e ){ - System.out.printf("Failured to seek to %s%n", targetChr); - e.printStackTrace(); - } - System.exit(1); - - - //List timings = new ArrayList(); - final long startTime = System.currentTimeMillis(); - long lastTime = System.currentTimeMillis(); - - int i = 0; - String prevNextContigName = null; - logger.info(String.format("Walking reference sequence:%n")); - for ( SAMSequenceRecord refContig: refContigs ) { - long curTime = System.currentTimeMillis(); - ReferenceSequence contig = refFile.nextSequence(); - final double elapsed = (curTime - lastTime) / 1000.0; - timings.add(elapsed); - logger.info(String.format("%2d : expected %s contig, found %s with next of %s after %.2f seconds, average is %.2f", i, - refContig.getSequenceName(), contig.getName(), refFile.getNextContigName(), elapsed, Utils.averageDouble(timings))); - if ( prevNextContigName != null && contig.getName() != null && ! prevNextContigName.equals(contig.getName()) ) - throw new RuntimeIOException(String.format("Unexpected contig ordering %s was expected next, but I found %s?", - prevNextContigName, contig.getName())); - - prevNextContigName = refFile.getNextContigName(); - lastTime = curTime; - i++; - - logger.info(String.format(" Traversing from chr1 to %s would require jumping %d bytes", - contig.getName(), refFile.getDistanceBetweenContigs("chr1", contig.getName()))); - } - */ - } - -}