diff --git a/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java b/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java index a4063881d..16cd32875 100644 --- a/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java +++ b/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java @@ -1,9 +1,11 @@ package org.broadinstitute.sting.gatk; import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram; -import org.broadinstitute.sting.utils.cmdLine.ArgumentFactory; +import org.broadinstitute.sting.utils.cmdLine.ArgumentSource; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.xReadLines; +import org.broadinstitute.sting.utils.sam.SAMFileWriterBuilder; +import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder; import org.broadinstitute.sting.gatk.walkers.Walker; import java.io.File; @@ -11,7 +13,7 @@ import java.io.FileNotFoundException; import java.util.List; import java.util.ArrayList; -import net.sf.samtools.SAMFileReader; +import net.sf.samtools.SAMFileWriter; /* * Copyright (c) 2009 The Broad Institute @@ -106,6 +108,24 @@ public abstract class CommandLineExecutable extends CommandLineProgram { return new Class[] { GATKEngine.getWalkerByName(getAnalysisName()).getClass() }; } + /** + * Allows arguments to be hijacked by subclasses of the program before being placed + * into plugin classes. + * @return True if the particular field should be hijacked; false otherwise. + */ + protected boolean intercept( ArgumentSource source, Object targetInstance, Object value ) { + if( !(Walker.class.isAssignableFrom(source.clazz)) ) + return false; + + if( value instanceof SAMFileReaderBuilder || value instanceof SAMFileWriterBuilder ) { + GATKEngine.setAdditionalIO( source.field, value ); + return true; + } + + return false; + } + + @Override protected String getArgumentSourceName( Class argumentSource ) { return WalkerManager.getWalkerName((Class) argumentSource); @@ -145,22 +165,4 @@ public abstract class CommandLineExecutable extends CommandLineProgram { } return unpackedReads; } - - /** - * Get a custom factory for instantiating specialty GATK arguments. - * @return An instance of the command-line argument of the specified type. - */ - @Override - protected ArgumentFactory getCustomArgumentFactory() { - return new ArgumentFactory() { - public Object createArgument( Class type, String... repr ) { - if (type == SAMFileReader.class && repr.length == 1) { - SAMFileReader samFileReader = new SAMFileReader(new File(repr[0]),true); - samFileReader.setValidationStringency(getArgumentCollection().strictnessLevel); - return samFileReader; - } - return null; - } - }; - } } diff --git a/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java index af5ed2311..82efa60e3 100755 --- a/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java @@ -102,10 +102,6 @@ public class GATKArgumentCollection { @Argument(fullName = "maximum_reads", shortName = "M", doc = "Maximum number of iterations to process before exiting, the lower bound is zero. Intended only for testing", required = false) public Integer maximumEngineIterations = -1; - @Element(required = false) - @Argument(fullName = "bam_compression", shortName = "compress", doc = "Compression level to use for writing BAM files", required = false) - public Integer BAMcompression = null; - @Element(required = false) @Argument(fullName = "filterZeroMappingQualityReads", shortName = "fmq0", doc = "If true, mapping quality zero reads will be filtered at the lowest GATK level. Vastly improves performance at areas with abnormal depth due to mapping Q0 reads", required = false) public Boolean filterZeroMappingQualityReads = false; @@ -246,10 +242,6 @@ public class GATKArgumentCollection { if (!other.unsafe.equals(this.unsafe)) { return false; } - if (( other.BAMcompression == null && this.BAMcompression != null ) || - ( other.BAMcompression != null && !other.BAMcompression.equals(this.BAMcompression) )) { - return false; - } if (( other.filterZeroMappingQualityReads == null && this.filterZeroMappingQualityReads != null ) || ( other.filterZeroMappingQualityReads != null && !other.filterZeroMappingQualityReads.equals(this.filterZeroMappingQualityReads) )) { return false; diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index c3cdc13a5..c58e67f0d 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -34,6 +34,9 @@ import net.sf.samtools.SAMReadGroupRecord; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; +import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy; +import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory; import org.broadinstitute.sting.gatk.executive.MicroScheduler; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; @@ -41,13 +44,13 @@ import org.broadinstitute.sting.gatk.traversals.TraversalEngine; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import org.broadinstitute.sting.utils.cmdLine.ArgumentException; import java.io.File; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Set; +import java.io.FileNotFoundException; +import java.util.*; +import java.lang.reflect.Field; public class GenomeAnalysisEngine { @@ -63,7 +66,7 @@ public class GenomeAnalysisEngine { private GATKArgumentCollection argCollection; /** Collection of output streams used by the walker. */ - private OutputTracker outputTracker = null; + private OutputTracker outputTracker = new OutputTracker(); /** our log, which we want to capture anything from this class */ private static Logger logger = Logger.getLogger(GenomeAnalysisEngine.class); @@ -124,14 +127,12 @@ public class GenomeAnalysisEngine { // Validate the walker inputs against the walker. validateInputsAgainstWalker(my_walker, argCollection, rods); + // our microscheduler, which is in charge of running everything + MicroScheduler microScheduler = createMicroscheduler(my_walker, rods); + // create the output streams initializeOutputStreams(my_walker); - // our microscheduler, which is in charge of running everything - MicroScheduler microScheduler = null; - - microScheduler = createMicroscheduler(my_walker, rods); - // Prepare the sort ordering w.r.t. the sequence dictionary if (argCollection.referenceFile != null) { final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(argCollection.referenceFile); @@ -148,8 +149,20 @@ public class GenomeAnalysisEngine { if (argCollection.intervals != null) { locs = GenomeLocSortedSet.createSetFromList(parseIntervalRegion(argCollection.intervals)); } - // excute the microscheduler, storing the results - return microScheduler.execute(my_walker, locs, argCollection.maximumEngineIterations); + + ShardStrategy shardStrategy = this.getShardStrategy(my_walker, microScheduler.getReference(), locs, argCollection.maximumEngineIterations); + + // execute the microscheduler, storing the results + return microScheduler.execute(my_walker, shardStrategy); + } + + /** + * Add additional, externally managed IO streams for walker consumption. + * @param walkerField Field in the walker into which to inject the value. + * @param value Instance to inject. + */ + public void setAdditionalIO( Field walkerField, Object value ) { + outputTracker.addAdditionalOutput( walkerField, value ); } /** @@ -182,21 +195,29 @@ public class GenomeAnalysisEngine { // the mircoscheduler to return MicroScheduler microScheduler = null; + SAMDataSource readsDataSource = createReadsDataSource(extractSourceInfo(my_walker,argCollection)); + IndexedFastaSequenceFile referenceDataSource = openReferenceSequenceFile(argCollection.referenceFile); + List rodDataSources = getReferenceOrderedDataSources(rods); + + GenomeLocSortedSet locs = null; + if (argCollection.intervals != null) { + locs = GenomeLocSortedSet.createSetFromList(parseIntervalRegion(argCollection.intervals)); + } + // we need to verify different parameter based on the walker type if (my_walker instanceof LocusWalker || my_walker instanceof LocusWindowWalker) { // create the MicroScheduler - microScheduler = MicroScheduler.create(my_walker, extractSourceInfo(my_walker,argCollection), argCollection.referenceFile, rods, argCollection.numberOfThreads); - engine = microScheduler.getTraversalEngine(); + microScheduler = MicroScheduler.create(my_walker, readsDataSource, referenceDataSource, rodDataSources, argCollection.numberOfThreads); } else if (my_walker instanceof ReadWalker || my_walker instanceof DuplicateWalker) { if (argCollection.referenceFile == null) Utils.scareUser(String.format("Read-based traversals require a reference file but none was given")); - microScheduler = MicroScheduler.create(my_walker, extractSourceInfo(my_walker,argCollection), argCollection.referenceFile, rods, argCollection.numberOfThreads); - engine = microScheduler.getTraversalEngine(); + microScheduler = MicroScheduler.create(my_walker, readsDataSource, referenceDataSource, rodDataSources, argCollection.numberOfThreads); } else { Utils.scareUser(String.format("Unable to create the appropriate TraversalEngine for analysis type " + argCollection.analysisName)); } dataSource = microScheduler.getSAMDataSource(); + engine = microScheduler.getTraversalEngine(); return microScheduler; } @@ -366,17 +387,6 @@ public class GenomeAnalysisEngine { } } - /** - * Default to 5 (based on research by Alec Wysoker) - * - * @return the BAM compression - */ - public int getBAMCompression() { - return (argCollection.BAMcompression == null || - argCollection.BAMcompression < 1 || - argCollection.BAMcompression > 8) ? 5 : argCollection.BAMcompression; - } - /** * Convenience function that binds RODs using the old-style command line parser to the new style list for * a uniform processing. @@ -389,6 +399,116 @@ public class GenomeAnalysisEngine { argCollection.RODBindings.add(Utils.join(",", new String[]{name, type, file})); } + /** + * Get the sharding strategy given a driving data source. + * + * @param walker Walker for which to infer sharding strategy. + * @param drivingDataSource Data on which to shard. + * @param intervals Intervals to use when limiting sharding. + * @param maxIterations the maximum number of iterations to run through + * + * @return Sharding strategy for this driving data source. + */ + protected ShardStrategy getShardStrategy(Walker walker, + ReferenceSequenceFile drivingDataSource, + GenomeLocSortedSet intervals, + Integer maxIterations) { + final long SHARD_SIZE = 100000L; + + ShardStrategy shardStrategy = null; + ShardStrategyFactory.SHATTER_STRATEGY shardType; + if (walker instanceof LocusWalker) { + if (intervals != null) { + shardType = (walker.isReduceByInterval()) ? + ShardStrategyFactory.SHATTER_STRATEGY.INTERVAL : + ShardStrategyFactory.SHATTER_STRATEGY.LINEAR; + + shardStrategy = ShardStrategyFactory.shatter(shardType, + drivingDataSource.getSequenceDictionary(), + SHARD_SIZE, + intervals, maxIterations); + } else + shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.LINEAR, + drivingDataSource.getSequenceDictionary(), + SHARD_SIZE, maxIterations); + + } else if (walker instanceof ReadWalker || + walker instanceof DuplicateWalker) { + + shardType = ShardStrategyFactory.SHATTER_STRATEGY.READS; + + if (intervals != null) { + shardStrategy = ShardStrategyFactory.shatter(shardType, + drivingDataSource.getSequenceDictionary(), + SHARD_SIZE, + intervals, maxIterations); + } else { + shardStrategy = ShardStrategyFactory.shatter(shardType, + drivingDataSource.getSequenceDictionary(), + SHARD_SIZE, maxIterations); + } + } else if (walker instanceof LocusWindowWalker) { + if( intervals == null ) + throw new StingException("Unable to shard: walker is of type LocusWindow, but no intervals were provided"); + shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.INTERVAL, + drivingDataSource.getSequenceDictionary(), + SHARD_SIZE, + intervals, maxIterations); + } else + throw new StingException("Unable to support walker of type" + walker.getClass().getName()); + + return shardStrategy; + } + + /** + * Gets a data source for the given set of reads. + * + * @param reads the read source information + * + * @return A data source for the given set of reads. + */ + private SAMDataSource createReadsDataSource(Reads reads) { + // By reference traversals are happy with no reads. Make sure that case is handled. + if (reads.getReadsFiles().size() == 0) + return null; + + SAMDataSource dataSource = new SAMDataSource(reads); + + return dataSource; + } + + /** + * Opens a reference sequence file paired with an index. + * + * @param refFile Handle to a reference sequence file. Non-null. + * + * @return A thread-safe file wrapper. + */ + private IndexedFastaSequenceFile openReferenceSequenceFile(File refFile) { + IndexedFastaSequenceFile ref = null; + try { + ref = new IndexedFastaSequenceFile(refFile); + } + catch (FileNotFoundException ex) { + throw new StingException("I/O error while opening fasta file: " + ex.getMessage(), ex); + } + GenomeLocParser.setupRefContigOrdering(ref); + return ref; + } + + /** + * Open the reference-ordered data sources. + * + * @param rods the reference order data to execute using + * + * @return A list of reference-ordered data sources. + */ + private List getReferenceOrderedDataSources(List> rods) { + List dataSources = new ArrayList(); + for (ReferenceOrderedData rod : rods) + dataSources.add(new ReferenceOrderedDataSource(rod)); + return dataSources; + } /** * Initialize the output streams as specified by the user. @@ -396,9 +516,11 @@ public class GenomeAnalysisEngine { * @param walker the walker to initialize output streams for */ private void initializeOutputStreams(Walker walker) { - outputTracker = (argCollection.outErrFileName != null) ? new OutputTracker(argCollection.outErrFileName, argCollection.outErrFileName) - : new OutputTracker(argCollection.outFileName, argCollection.errFileName); - walker.initializeOutputStreams(outputTracker); + if( argCollection.outErrFileName != null ) + outputTracker.initializeCoreIO( argCollection.outErrFileName, argCollection.outErrFileName ); + else + outputTracker.initializeCoreIO( argCollection.outFileName, argCollection.errFileName ); + outputTracker.prepareWalker(walker); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/OutputTracker.java b/java/src/org/broadinstitute/sting/gatk/OutputTracker.java index b48f91bfe..bf65f26a0 100755 --- a/java/src/org/broadinstitute/sting/gatk/OutputTracker.java +++ b/java/src/org/broadinstitute/sting/gatk/OutputTracker.java @@ -1,13 +1,22 @@ package org.broadinstitute.sting.gatk; import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.JVMUtils; +import org.broadinstitute.sting.utils.sam.SAMFileWriterBuilder; +import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder; import org.broadinstitute.sting.utils.io.RedirectingOutputStream; +import org.broadinstitute.sting.gatk.walkers.Walker; import java.io.FileOutputStream; import java.io.PrintStream; import java.io.FileNotFoundException; import java.io.OutputStream; import java.io.PrintWriter; +import java.lang.reflect.Field; +import java.util.Map; +import java.util.HashMap; + +import net.sf.samtools.SAMFileWriter; /** * User: hanna * Date: Apr 30, 2009 @@ -39,13 +48,15 @@ public class OutputTracker { protected ThreadLocal localOut = new ThreadLocal(); protected ThreadLocal localErr = new ThreadLocal(); + protected Map additionalIO = new HashMap(); + /** * Create an object to manage output given filenames for the output and error files. * If no files are specified, returns null. * @param outFileName Name of the output file. * @param errFileName Name of the error file. */ - public OutputTracker( String outFileName, String errFileName ) { + public void initializeCoreIO( String outFileName, String errFileName ) { // If the two output streams match and are non-null, initialize them identically. // Otherwise, initialize them separately. if( outFileName != null && outFileName.equals(errFileName) ) { @@ -55,6 +66,35 @@ public class OutputTracker { else { globalOut = (outFileName != null) ? prepareOutputFile( outFileName ) : System.out; globalErr = (errFileName != null) ? prepareOutputFile( errFileName ) : System.err; + } + } + + public void prepareWalker( Walker walker ) { + Field out = JVMUtils.findField( walker.getClass(), "out" ); + Field err = JVMUtils.findField( walker.getClass(), "err" ); + + JVMUtils.setField( out, walker, new PrintStream(getOutStream()) ); + JVMUtils.setField( err, walker, new PrintStream(getErrStream()) ); + + for( Map.Entry io: additionalIO.entrySet() ) { + Field targetField = io.getKey(); + Object targetValue = io.getValue(); + + // Ghastly hacks: reaches in and finishes building out the SAMFileReader / SAMFileWriter. + // TODO: Generalize this, and move it to its own initialization step. + if( targetValue instanceof SAMFileReaderBuilder) { + SAMFileReaderBuilder builder = (SAMFileReaderBuilder)targetValue; + builder.setValidationStringency(GenomeAnalysisEngine.instance.getArguments().strictnessLevel); + targetValue = builder.build(); + } + + if( targetValue instanceof SAMFileWriterBuilder ) { + SAMFileWriterBuilder builder = (SAMFileWriterBuilder)targetValue; + builder.setSAMFileHeader(GenomeAnalysisEngine.instance.getDataSource().getHeader()); + targetValue = builder.build(); + } + + JVMUtils.setField( targetField, walker, targetValue ); } } @@ -115,6 +155,15 @@ public class OutputTracker { localErr.set( err ); } + /** + * Provide a mechanism for injecting supplemental streams for external management. + * @param field Field into which to inject this stream. + * @param stream Stream to manage. + */ + public void addAdditionalOutput( Field field, Object stream ) { + additionalIO.put(field,stream); + } + /** * Remove pointers to alternate, local output streams. */ diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java index 486a8e8b6..b5acf6e78 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java @@ -54,7 +54,7 @@ public class ShardDataProvider { /** * Sources of reference-ordered data. */ - private final List referenceOrderedData; + private final Collection referenceOrderedData; /** * Retrieves the shard associated with this data provider. @@ -101,7 +101,7 @@ public class ShardDataProvider { * views can access it. * @return List of reference-ordered data sources. */ - List getReferenceOrderedData() { + Collection getReferenceOrderedData() { return referenceOrderedData; } @@ -111,7 +111,7 @@ public class ShardDataProvider { * @param reads A window into the reads for a given region. * @param reference A getter for a section of the reference. */ - public ShardDataProvider( Shard shard, SAMDataSource reads, IndexedFastaSequenceFile reference, List rods) { + public ShardDataProvider( Shard shard, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection rods) { this.shard = shard; // Provide basic reads information. this.reads = (reads != null) ? reads.seek( shard ) : new NullSAMIterator(new Reads(new ArrayList())); diff --git a/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java index e056dec9c..e78c7475c 100755 --- a/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java @@ -4,6 +4,8 @@ import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy; import org.broadinstitute.sting.gatk.datasources.shards.Shard; +import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; +import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.OutputTracker; import org.broadinstitute.sting.gatk.Reads; @@ -11,6 +13,7 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import org.broadinstitute.sting.utils.threading.ThreadPoolMonitor; import javax.management.MBeanServer; @@ -20,6 +23,7 @@ import java.io.File; import java.util.List; import java.util.Queue; import java.util.LinkedList; +import java.util.Collection; import java.util.concurrent.Executors; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; @@ -74,11 +78,11 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar * Create a new hierarchical microscheduler to process the given reads and reference. * * @param reads Reads file(s) to process. - * @param refFile Reference for driving the traversal. + * @param reference Reference for driving the traversal. * @param nThreadsToUse maximum number of threads to use to do the work */ - protected HierarchicalMicroScheduler( Walker walker, Reads reads, File refFile, List> rods, int nThreadsToUse ) { - super(walker, reads, refFile, rods); + protected HierarchicalMicroScheduler( Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection rods, int nThreadsToUse ) { + super(walker, reads, reference, rods); this.threadPool = Executors.newFixedThreadPool(nThreadsToUse); try { @@ -91,12 +95,11 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar } } - public Object execute( Walker walker, GenomeLocSortedSet intervals, Integer maxIterations ) { + public Object execute( Walker walker, ShardStrategy shardStrategy ) { // Fast fail for walkers not supporting TreeReducible interface. if (!( walker instanceof TreeReducible )) throw new IllegalArgumentException("Hierarchical microscheduler only works with TreeReducible walkers"); - ShardStrategy shardStrategy = getShardStrategy(walker, reference, intervals, maxIterations); ReduceTree reduceTree = new ReduceTree(this); walker.initialize(); @@ -258,7 +261,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar OutputMerger outputMerger = new OutputMerger(); ShardTraverser traverser = new ShardTraverser(this, - getTraversalEngine(), + traversalEngine, walker, shard, getShardDataProvider(shard), diff --git a/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java index dd2b4a558..f2c2c8acc 100644 --- a/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java @@ -3,14 +3,16 @@ package org.broadinstitute.sting.gatk.executive; import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy; +import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; -import java.io.File; -import java.util.List; +import java.util.Collection; /** A micro-scheduling manager for single-threaded execution of a traversal. */ public class LinearMicroScheduler extends MicroScheduler { @@ -18,23 +20,22 @@ public class LinearMicroScheduler extends MicroScheduler { /** * Create a new linear microscheduler to process the given reads and reference. * - * @param reads Reads file(s) to process. - * @param refFile Reference for driving the traversal. + * @param walker Walker for the traversal. + * @param reads Reads file(s) to process. + * @param reference Reference for driving the traversal. + * @param rods Reference-ordered data. */ - protected LinearMicroScheduler( Walker walker, Reads reads, File refFile, List> rods ) { - super(walker, reads, refFile, rods); + protected LinearMicroScheduler( Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection rods ) { + super(walker, reads, reference, rods); } /** * Run this traversal over the specified subsection of the dataset. * * @param walker Computation to perform over dataset. - * @param locations Subset of the dataset over which to walk. - * @param maxIterations the maximum number of iterations we're to perform + * @param shardStrategy A strategy for sharding the data. */ - public Object execute(Walker walker, GenomeLocSortedSet locations, Integer maxIterations) { - ShardStrategy shardStrategy = getShardStrategy(walker, reference, locations, maxIterations); - + public Object execute(Walker walker, ShardStrategy shardStrategy) { walker.initialize(); Accumulator accumulator = Accumulator.create(walker); diff --git a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java index ae948a261..975daff81 100755 --- a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java @@ -26,29 +26,19 @@ package org.broadinstitute.sting.gatk.executive; import net.sf.picard.reference.ReferenceSequenceFile; -import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy; -import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.traversals.*; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.GenomeLocSortedSet; -import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; -import java.io.File; -import java.io.FileNotFoundException; import java.util.*; @@ -62,15 +52,13 @@ import java.util.*; /** Shards and schedules data in manageable chunks. */ public abstract class MicroScheduler { - private static long SHARD_SIZE = 100000L; - protected static Logger logger = Logger.getLogger(MicroScheduler.class); protected final TraversalEngine traversalEngine; protected final IndexedFastaSequenceFile reference; private final SAMDataSource reads; - private final List rods; + private final Collection rods; /** * MicroScheduler factory function. Create a microscheduler appropriate for reducing the @@ -78,19 +66,19 @@ public abstract class MicroScheduler { * * @param walker Which walker to use. * @param reads the informations associated with the reads - * @param ref the reference file + * @param reference the reference file * @param rods the rods to include in the traversal * @param nThreadsToUse Number of threads to utilize. * * @return The best-fit microscheduler. */ - public static MicroScheduler create(Walker walker, Reads reads, File ref, List> rods, int nThreadsToUse) { + public static MicroScheduler create(Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection rods, int nThreadsToUse) { if (walker instanceof TreeReducible && nThreadsToUse > 1) { logger.info("Creating hierarchical microscheduler"); - return new HierarchicalMicroScheduler(walker, reads, ref, rods, nThreadsToUse); + return new HierarchicalMicroScheduler(walker, reads, reference, rods, nThreadsToUse); } else { logger.info("Creating linear microscheduler"); - return new LinearMicroScheduler(walker, reads, ref, rods); + return new LinearMicroScheduler(walker, reads, reference, rods); } } @@ -99,10 +87,10 @@ public abstract class MicroScheduler { * * @param walker the walker to execute with * @param reads The reads. - * @param refFile File pointer to the reference. + * @param reference The reference. * @param rods the rods to include in the traversal */ - protected MicroScheduler(Walker walker, Reads reads, File refFile, List> rods) { + protected MicroScheduler(Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection rods) { if (walker instanceof ReadWalker) { traversalEngine = new TraverseReads(); } else if (walker instanceof LocusWalker) { @@ -114,11 +102,17 @@ public abstract class MicroScheduler { } else { throw new UnsupportedOperationException("Unable to determine traversal type, the walker is an unknown type."); } - this.reads = setupReadsDataSource(reads); - this.reference = openReferenceSequenceFile(refFile); - this.rods = getReferenceOrderedDataSources(rods); + this.reads = reads; + this.reference = reference; + this.rods = rods; validate(this.reads,this.reference); + + // Side effect: initialize the traversal engine with reads data. + // TODO: Give users a dedicated way of getting the header so that the MicroScheduler + // doesn't have to bend over backward providing legacy getters and setters. + traversalEngine.setSAMHeader(reads.getHeader()); + traversalEngine.initialize(); } /** @@ -129,77 +123,18 @@ public abstract class MicroScheduler { */ public TraversalEngine getTraversalEngine() { return traversalEngine; - } + } /** * Walks a walker over the given list of intervals. * * @param walker Computation to perform over dataset. - * @param intervals A list of intervals over which to walk. Null for whole dataset. - * @param maxIterations the maximum number of iterations we're to perform + * @param shardStrategy A strategy for sharding the data. * * @return the return type of the walker */ - public abstract Object execute(Walker walker, GenomeLocSortedSet intervals, Integer maxIterations); + public abstract Object execute(Walker walker, ShardStrategy shardStrategy); - /** - * Get the sharding strategy given a driving data source. - * - * @param walker Walker for which to infer sharding strategy. - * @param drivingDataSource Data on which to shard. - * @param intervals Intervals to use when limiting sharding. - * @param maxIterations the maximum number of iterations to run through - * - * @return Sharding strategy for this driving data source. - */ - protected ShardStrategy getShardStrategy(Walker walker, - ReferenceSequenceFile drivingDataSource, - GenomeLocSortedSet intervals, - Integer maxIterations) { - ShardStrategy shardStrategy = null; - ShardStrategyFactory.SHATTER_STRATEGY shardType; - if (walker instanceof LocusWalker) { - if (intervals != null) { - shardType = (walker.isReduceByInterval()) ? - ShardStrategyFactory.SHATTER_STRATEGY.INTERVAL : - ShardStrategyFactory.SHATTER_STRATEGY.LINEAR; - - shardStrategy = ShardStrategyFactory.shatter(shardType, - drivingDataSource.getSequenceDictionary(), - SHARD_SIZE, - intervals, maxIterations); - } else - shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.LINEAR, - drivingDataSource.getSequenceDictionary(), - SHARD_SIZE, maxIterations); - - } else if (walker instanceof ReadWalker || - walker instanceof DuplicateWalker) { - - shardType = ShardStrategyFactory.SHATTER_STRATEGY.READS; - - if (intervals != null) { - shardStrategy = ShardStrategyFactory.shatter(shardType, - drivingDataSource.getSequenceDictionary(), - SHARD_SIZE, - intervals, maxIterations); - } else { - shardStrategy = ShardStrategyFactory.shatter(shardType, - drivingDataSource.getSequenceDictionary(), - SHARD_SIZE, maxIterations); - } - } else if (walker instanceof LocusWindowWalker) { - if( intervals == null ) - throw new StingException("Unable to shard: walker is of type LocusWindow, but no intervals were provided"); - shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.INTERVAL, - drivingDataSource.getSequenceDictionary(), - SHARD_SIZE, - intervals, maxIterations); - } else - throw new StingException("Unable to support walker of type" + walker.getClass().getName()); - - return shardStrategy; - } /** * Gets an window into all the data that can be viewed as a single shard. @@ -228,28 +163,6 @@ public abstract class MicroScheduler { traversalEngine.printOnTraversalDone(sum); } - /** - * Gets a data source for the given set of reads. - * - * @param reads the read source information - * - * @return A data source for the given set of reads. - */ - private SAMDataSource setupReadsDataSource(Reads reads) { - // By reference traversals are happy with no reads. Make sure that case is handled. - if (reads.getReadsFiles().size() == 0) - return null; - - SAMDataSource dataSource = new SAMDataSource(reads); - - // Side effect: initialize the traversal engine with reads data. - // TODO: Give users a dedicated way of getting the header so that the MicroScheduler - // doesn't have to bend over backward providing legacy getters and setters. - traversalEngine.setSAMHeader(dataSource.getHeader()); - - return dataSource; - } - /** * Returns data source maintained by this scheduler * @return @@ -257,37 +170,10 @@ public abstract class MicroScheduler { public SAMDataSource getSAMDataSource() { return reads; } /** - * Open the reference-ordered data sources. - * - * @param rods the reference order data to execute using - * - * @return A list of reference-ordered data sources. + * Returns the reference maintained by this scheduler. + * @return The reference maintained by this scheduler. */ - private List getReferenceOrderedDataSources(List> rods) { - List dataSources = new ArrayList(); - for (ReferenceOrderedData rod : rods) - dataSources.add(new ReferenceOrderedDataSource(rod)); - return dataSources; - } - - /** - * Opens a reference sequence file paired with an index. - * - * @param refFile Handle to a reference sequence file. Non-null. - * - * @return A thread-safe file wrapper. - */ - private IndexedFastaSequenceFile openReferenceSequenceFile(File refFile) { - IndexedFastaSequenceFile ref = null; - try { - ref = new IndexedFastaSequenceFile(refFile); - } - catch (FileNotFoundException ex) { - throw new StingException("I/O error while opening fasta file: " + ex.getMessage(), ex); - } - GenomeLocParser.setupRefContigOrdering(ref); - return ref; - } + public IndexedFastaSequenceFile getReference() { return reference; } /** * Now that all files are open, validate the sequence dictionaries of the reads vs. the reference. diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java index f3dab5f8d..9de6de5ec 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java @@ -48,7 +48,7 @@ public class PrintReadsWalker extends ReadWalker { /** an optional argument to dump the reads out to a BAM file */ @Argument(fullName = "outputBamFile", shortName = "of", doc = "Write output to this BAM filename instead of STDOUT", required = false) - String outputBamFile = null; + SAMFileWriter outputBamFile = null; @Argument(fullName = "maxReadLength", shortName = "maxRead", doc="Discard reads with length greater than the specified value", required = false) Integer maxLength = null; @Argument(fullName = "platform", shortName = "platform", doc="Discard reads not generated by the specified platform", required = false) @@ -102,12 +102,7 @@ public class PrintReadsWalker extends ReadWalker { * @return SAMFileWriter, set to the BAM output file if the command line option was set, null otherwise */ public SAMFileWriter reduceInit() { - if (outputBamFile != null) { - SAMFileHeader header = this.getToolkit().getEngine().getSAMHeader(); - return Utils.createSAMFileWriterWithCompression(header, true, outputBamFile, getToolkit().getBAMCompression()); - } else { - return null; - } + return outputBamFile; } /** diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java index 2100a5e6e..7ee79540e 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java @@ -47,6 +47,9 @@ public class SplitSamFileWalker extends ReadWalker { protected Walker() { } - public void initializeOutputStreams( OutputTracker outputTracker ) { - out = new PrintStream( outputTracker.getOutStream() ); - err = new PrintStream( outputTracker.getErrStream() ); - } - /** * Retrieve the toolkit, for peering into internal structures that can't * otherwise be read. Use sparingly, and discuss uses with software engineering diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/CleanedReadInjector.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/CleanedReadInjector.java index 79c31ab8d..53d0cba5c 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/CleanedReadInjector.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/CleanedReadInjector.java @@ -38,7 +38,7 @@ public class CleanedReadInjector extends ReadWalker { * Target file for BAM output. */ @Argument(fullName="output_bam",shortName="ob",doc="Output BAM file",required=true) - String outputBAMFileName = null; + SAMFileWriter outputBAM = null; /** * The set of (sorted) cleaned reads @@ -50,11 +50,6 @@ public class CleanedReadInjector extends ReadWalker { */ private HashSet cleanedReadHash = new HashSet(); - /** - * The writer that handles writing of SAM files. - */ - SAMFileWriter outputBAM = null; - @Override public void initialize() { @@ -68,15 +63,6 @@ public class CleanedReadInjector extends ReadWalker { cleanedReadHash.add(getUniquifiedReadName(read)); } allReads.close(); - - // HACK: The unit tests create their own output files. Make sure this walker doesn't step - // on any toes. - if( outputBAM == null ) { - outputBAM = Utils.createSAMFileWriterWithCompression(getToolkit().getEngine().getSAMHeader(), - true, - outputBAMFileName, - getToolkit().getBAMCompression()); - } } /** diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IntervalCleanerWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IntervalCleanerWalker.java index e211287c1..2628aebf6 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IntervalCleanerWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IntervalCleanerWalker.java @@ -22,7 +22,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker @Argument(fullName="allow454Reads", shortName="454", doc="process 454 reads", required=false) boolean allow454 = false; @Argument(fullName="OutputCleaned", shortName="O", required=false, doc="Output file (sam or bam) for improved (realigned) reads") - String OUT = null; + SAMFileWriter writer = null; @Argument(fullName="OutputIndels", shortName="indels", required=false, doc="Output file (text) for the indels found") String OUT_INDELS = null; @Argument(fullName="OutputCleanedReadsOnly", shortName="cleanedOnly", doc="print out cleaned reads only (otherwise, all reads within the intervals)", required=false) @@ -45,7 +45,6 @@ public class IntervalCleanerWalker extends LocusWindowWalker // fraction of mismatches that need to no longer mismatch for a column to be considered cleaned private static final double MISMATCH_COLUMN_CLEANED_FRACTION = 0.75; - private SAMFileWriter writer = null; private FileWriter indelOutput = null; private FileWriter statsOutput = null; private FileWriter snpsOutput = null; @@ -63,12 +62,11 @@ public class IntervalCleanerWalker extends LocusWindowWalker throw new RuntimeException("Entropy threshold must be a fraction between 0 and 1"); SAMFileHeader header = getToolkit().getEngine().getSAMHeader(); - if ( OUT != null ) { - writer = Utils.createSAMFileWriterWithCompression(header, true, OUT, getToolkit().getBAMCompression()); + if ( writer != null ) { readsToWrite = new TreeSet(); } - logger.info("Writing into output BAM file at compression level " + getToolkit().getBAMCompression()); + logger.info("Writing into output BAM file"); logger.info("Temporary space used: "+System.getProperty("java.io.tmpdir")); generator = new Random(); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/LogisticRecalibrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/LogisticRecalibrationWalker.java index 43d29a6bb..6a38f22eb 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/LogisticRecalibrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/LogisticRecalibrationWalker.java @@ -19,8 +19,8 @@ public class LogisticRecalibrationWalker extends ReadWalker outputs = new ArrayList(nWaysOut); for ( int i = 0; i < nWaysOut; i++ ) { SAMFileHeader header = this.getToolkit().getEngine().getSAMHeader(); - outputs.add(Utils.createSAMFileWriterWithCompression(header, true, outputBase + "." + i + ".bam", getToolkit().getBAMCompression())); + outputs.add(Utils.createSAMFileWriterWithCompression(header, true, outputBase + "." + i + ".bam", BAMcompression)); } return outputs; } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReplaceQuals.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReplaceQuals.java index 17d96ed56..bc2f88916 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReplaceQuals.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReplaceQuals.java @@ -28,7 +28,7 @@ public class ReplaceQuals extends ReadWalker { public String inputQualsBAM; @Argument(shortName="outputBAM", required=false, doc="output BAM file for reads with replaced quals") - public String outputFilename = null; + public SAMFileWriter outputBAM = null; public int MAX_READS_TO_LOAD = -1; @@ -108,13 +108,7 @@ public class ReplaceQuals extends ReadWalker { } public SAMFileWriter reduceInit() { - if ( outputFilename != null ) { // ! outputBamFile.equals("") ) { - SAMFileHeader header = this.getToolkit().getEngine().getSAMHeader(); - return Utils.createSAMFileWriterWithCompression(header, true, outputFilename, getToolkit().getBAMCompression()); - } - else { - return null; - } + return outputBAM; } /** diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/duplicates/CombineDuplicatesWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/duplicates/CombineDuplicatesWalker.java index d30f5753b..6c00e5e39 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/duplicates/CombineDuplicatesWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/duplicates/CombineDuplicatesWalker.java @@ -23,7 +23,7 @@ import net.sf.samtools.SAMFileHeader; */ public class CombineDuplicatesWalker extends DuplicateWalker { @Argument(fullName="outputBAM", shortName="outputBAM", required=false, doc="BAM File to write combined duplicates to") - public String outputFilename = null; + public SAMFileWriter outputBAM = null; @Argument(fullName="includeUniqueReads", shortName="includeUniqueReads", required=false, doc="If true, also writes out non-duplicate reads in file") public boolean INCLUDE_UNIQUE_READS = true; @@ -48,13 +48,7 @@ public class CombineDuplicatesWalker extends DuplicateWalker { /** * A mapping of all the sites where an argument definition maps to a site on the command line. */ -class ArgumentMatch { +class ArgumentMatch implements Iterable { /** * The argument definition that's been matched. */ @@ -184,20 +176,123 @@ class ArgumentMatch { /** * Maps indicies of command line arguments to values paired with that argument. */ - public final Map> indices = new HashMap>(); + public final SortedMap> indices = new TreeMap>(); /** * Create a new argument match, defining its properties later. Used to create invalid arguments. */ public ArgumentMatch() { - definition = null; - label = null; + this.label = null; + this.definition = null; } + /** + * A simple way of indicating that an argument with the given label and definition exists at this index. + * @param label Label of the argument match. Must not be null. + * @param definition The associated definition, if one exists. May be null. + * @param index Position of the argument. Must not be null. + */ public ArgumentMatch( String label, ArgumentDefinition definition, int index ) { + this( label, definition, index, null ); + } + + private ArgumentMatch( String label, ArgumentDefinition definition, int index, String value ) { this.label = label; this.definition = definition; - indices.put(index,null); + + ArrayList values = new ArrayList(); + if( value != null ) + values.add(value); + indices.put(index,values ); + } + + /** + * Creates an iterator that walks over each individual match at each position of a given argument. + * @return An iterator over the individual matches in this argument. Will not be null. + */ + public Iterator iterator() { + return new Iterator() { + /** + * Iterate over each the available index. + */ + private Iterator indexIterator = null; + + /** + * Iterate over each available token. + */ + private Iterator tokenIterator = null; + + /** + * The next index to return. Null if none remain. + */ + Integer nextIndex = null; + + /** + * The next token to return. Null if none remain. + */ + String nextToken = null; + + { + indexIterator = indices.keySet().iterator(); + prepareNext(); + } + + /** + * Is there a nextToken available to return? + * @return True if there's another token waiting in the wings. False otherwise. + */ + public boolean hasNext() { + return nextToken != null; + } + + /** + * Get the next token, if one exists. If not, throw an IllegalStateException. + * @return The next ArgumentMatch in the series. Should never be null. + */ + public ArgumentMatch next() { + if( nextIndex == null || nextToken == null ) + throw new IllegalStateException( "No more ArgumentMatches are available" ); + + ArgumentMatch match = new ArgumentMatch( label, definition, nextIndex, nextToken ); + prepareNext(); + return match; + } + + /** + * Initialize the next ArgumentMatch to return. If no ArgumentMatches are available, + * initialize nextIndex / nextToken to null. + */ + private void prepareNext() { + if( tokenIterator != null && tokenIterator.hasNext() ) { + nextToken = tokenIterator.next(); + } + else { + nextIndex = null; + nextToken = null; + + // Do a nested loop. While more data is present in the inner loop, grab that data. + // Otherwise, troll the outer iterator looking for more data. + while( indexIterator.hasNext() ) { + nextIndex = indexIterator.next(); + if( indices.get(nextIndex) != null ) { + tokenIterator = indices.get(nextIndex).iterator(); + if( tokenIterator.hasNext() ) { + nextToken = tokenIterator.next(); + break; + } + } + } + } + + } + + /** + * Remove is unsupported in this context. + */ + public void remove() { + throw new UnsupportedOperationException("Cannot remove an argument match from the collection while iterating."); + } + }; } /** diff --git a/java/src/org/broadinstitute/sting/utils/cmdLine/ArgumentSource.java b/java/src/org/broadinstitute/sting/utils/cmdLine/ArgumentSource.java index 038d71f62..f3d8f9837 100644 --- a/java/src/org/broadinstitute/sting/utils/cmdLine/ArgumentSource.java +++ b/java/src/org/broadinstitute/sting/utils/cmdLine/ArgumentSource.java @@ -100,22 +100,8 @@ public class ArgumentSource { * @return A non-null, non-empty list of argument definitions. */ public List createArgumentDefinitions() { - String fullName = descriptor.fullName().trim().length() > 0 ? descriptor.fullName().trim() : field.getName().toLowerCase(); - String shortName = descriptor.shortName().trim().length() > 0 ? descriptor.shortName().trim() : null; - String doc = descriptor.doc(); - boolean required = descriptor.required() && !isFlag(); - String exclusiveOf = descriptor.exclusiveOf().trim().length() > 0 ? descriptor.exclusiveOf().trim() : null; - String validation = descriptor.validation().trim().length() > 0 ? descriptor.validation().trim() : null; - - ArgumentDefinition argumentDefinition = new ArgumentDefinition( this, - fullName, - shortName, - doc, - required, - exclusiveOf, - validation ); - - return Collections.singletonList(argumentDefinition); + ArgumentTypeDescriptor typeDescriptor = ArgumentTypeDescriptor.create( field.getType() ); + return typeDescriptor.createArgumentDefinitions( this, descriptor ); } /** @@ -123,30 +109,16 @@ public class ArgumentSource { * @param targetInstance Instance into which to inject the parsed value. * @param values String representation of all values passed. */ - public void inject( ArgumentFactory customArgumentFactory, Object targetInstance, String... values ) { + public Object parse( ArgumentSource source, Object targetInstance, ArgumentMatch... values ) { Object value = null; + if( !isFlag() ) { + ArgumentTypeDescriptor typeDescriptor = ArgumentTypeDescriptor.create( field.getType() ); + value = typeDescriptor.parse( source, values ); + } + else + value = true; - if( customArgumentFactory != null ) { - value = customArgumentFactory.createArgument(field.getType(), values); - } - - if( value == null ) { - if( !isFlag() ) { - ArgumentTypeDescriptor typeDescriptor = ArgumentTypeDescriptor.create( field.getType() ); - value = typeDescriptor.parse( field, field.getType(), values ); - } - else - value = true; - } - - try { - field.setAccessible(true); - field.set(targetInstance, value); - } - catch( IllegalAccessException ex ) { - //logger.fatal("processArgs: cannot convert field " + field.toString()); - throw new StingException("processArgs: Failed conversion " + ex.getMessage(), ex); - } + return value; } /** diff --git a/java/src/org/broadinstitute/sting/utils/cmdLine/ArgumentTypeDescriptor.java b/java/src/org/broadinstitute/sting/utils/cmdLine/ArgumentTypeDescriptor.java index fc714324b..a755e1b1d 100644 --- a/java/src/org/broadinstitute/sting/utils/cmdLine/ArgumentTypeDescriptor.java +++ b/java/src/org/broadinstitute/sting/utils/cmdLine/ArgumentTypeDescriptor.java @@ -26,10 +26,16 @@ package org.broadinstitute.sting.utils.cmdLine; import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.sam.SAMFileWriterBuilder; +import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder; import org.apache.log4j.Logger; import java.lang.reflect.*; import java.util.*; +import java.io.File; + +import net.sf.samtools.SAMFileWriter; +import net.sf.samtools.SAMFileReader; /** * An factory capable of providing parsers that can parse any type @@ -48,7 +54,9 @@ public abstract class ArgumentTypeDescriptor { /** * Class reference to the different types of descriptors that the create method can create. */ - private static List descriptors = Arrays.asList( new SimpleArgumentTypeDescriptor(), + private static List descriptors = Arrays.asList( new SAMFileReaderArgumentTypeDescriptor(), + new SAMFileWriterArgumentTypeDescriptor(), + new SimpleArgumentTypeDescriptor(), new CompoundArgumentTypeDescriptor() ); public static ArgumentTypeDescriptor create( Class type ) { @@ -59,11 +67,91 @@ public abstract class ArgumentTypeDescriptor { throw new StingException("Can't process command-line arguments of type: " + type.getName()); } + /** + * Does this descriptor support classes of the given type? + * @param type The type to check. + * @return true if this descriptor supports the given type, false otherwise. + */ public abstract boolean supports( Class type ); + + /** + * Given the given argument source and attributes, synthesize argument definitions for command-line arguments. + * @param source Source class and field for the given argument. + * @param description Description of the fields that go into a given argument. + * @return A list of command-line argument definitions supporting this field. + */ + public List createArgumentDefinitions( ArgumentSource source, Argument description ) { + ArgumentDefinition definition = new ArgumentDefinition( source, + getFullName( source, description ), + getShortName( source, description ), + getDoc( source, description ), + isRequired( source, description ), + getExclusiveOf( source, description ), + getValidationRegex( source, description ) ); + return Collections.singletonList(definition); + } - public abstract Object parse( Field field, Class type, String... values ); + public Object parse( ArgumentSource source, ArgumentMatch... values ) { + return parse( source, source.field.getType(), values ); + } + + protected abstract Object parse( ArgumentSource source, Class type, ArgumentMatch... values ); + + /** + * Retrieves the full name of the argument, specifiable with the '--' prefix. The full name can be + * either specified explicitly with the fullName annotation parameter or implied by the field name. + * @return full name of the argument. Never null. + */ + protected String getFullName( ArgumentSource source, Argument description ) { + return description.fullName().trim().length() > 0 ? description.fullName().trim() : source.field.getName().toLowerCase(); + } + + /** + * Retrieves the short name of the argument, specifiable with the '-' prefix. The short name can + * be specified or not; if left unspecified, no short name will be present. + * @return short name of the argument. Null if no short name exists. + */ + protected String getShortName( ArgumentSource source, Argument description ) { + return description.shortName().trim().length() > 0 ? description.shortName().trim() : null; + } + + /** + * Documentation for this argument. Mandatory field. + * @return Documentation for this argument. + */ + protected String getDoc( ArgumentSource source, Argument description ) { + return description.doc(); + } + + /** + * Returns whether this field is required. Note that flag fields are always forced to 'not required'. + * @return True if the field is mandatory and not a boolean flag. False otherwise. + */ + protected boolean isRequired( ArgumentSource source, Argument description ) { + return description.required() && !source.isFlag(); + } + + /** + * Specifies other arguments which cannot be used in conjunction with tihs argument. Comma-separated list. + * @return A comma-separated list of exclusive arguments, or null if none are present. + */ + protected String getExclusiveOf( ArgumentSource source, Argument description ) { + return description.exclusiveOf().trim().length() > 0 ? description.exclusiveOf().trim() : null; + } + + /** + * A regular expression which can be used for validation. + * @return a JVM regex-compatible regular expression, or null to permit any possible value. + */ + protected String getValidationRegex( ArgumentSource source, Argument description ) { + return description.validation().trim().length() > 0 ? description.validation().trim() : null; + } } +/** + * Parse simple argument types: java primitives, wrapper classes, and anything that has + * a simple String constructor. + */ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor { @Override public boolean supports( Class type ) { @@ -83,11 +171,10 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor { } @Override - public Object parse( Field field, Class type, String... values ) { - if( values.length > 1 ) + protected Object parse( ArgumentSource source, Class type, ArgumentMatch... matches ) { + if( matches.length > 1 || matches[0].values().size() > 1 ) throw new StingException("Simple argument parser is unable to parse multiple arguments."); - - String value = values[0]; + String value = matches[0].values().get(0); // lets go through the types we support try { @@ -131,6 +218,9 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor { }; } +/** + * Process compound argument types: arrays, and typed and untyped collections. + */ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor { @Override public boolean supports( Class type ) { @@ -138,10 +228,13 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor { } @Override - public Object parse( Field field, Class type, String... values ) + public Object parse( ArgumentSource source, Class type, ArgumentMatch... matches ) { Class componentType = null; - ArgumentTypeDescriptor componentArgumentParser; + + if( matches.length > 1 ) + throw new StingException("Simple argument parser is unable to combine multiple argument types into a compound argument."); + ArgumentMatch match = matches[0]; if( Collection.class.isAssignableFrom(type) ) { @@ -155,48 +248,45 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor { } // If this is a parameterized collection, find the contained type. If blow up if only one type exists. - if( field.getGenericType() instanceof ParameterizedType) { - ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType(); + if( source.field.getGenericType() instanceof ParameterizedType) { + ParameterizedType parameterizedType = (ParameterizedType)source.field.getGenericType(); if( parameterizedType.getActualTypeArguments().length > 1 ) - throw new IllegalArgumentException("Unable to determine collection type of field: " + field.toString()); + throw new IllegalArgumentException("Unable to determine collection type of field: " + source.field.toString()); componentType = (Class)parameterizedType.getActualTypeArguments()[0]; } else componentType = String.class; - } - else if( type.isArray() ) { - componentType = type.getComponentType(); - } - else - throw new StingException("Unsupported compound argument type: " + type); - componentArgumentParser = ArgumentTypeDescriptor.create( componentType ); + ArgumentTypeDescriptor componentArgumentParser = ArgumentTypeDescriptor.create( componentType ); - if( Collection.class.isAssignableFrom(type) ) { Collection collection = null; try { collection = (Collection)type.newInstance(); } catch (InstantiationException e) { - logger.fatal("ArgumentParser: InstantiationException: cannot convert field " + field.getName()); + logger.fatal("ArgumentParser: InstantiationException: cannot convert field " + source.field.getName()); throw new StingException("constructFromString:InstantiationException: Failed conversion " + e.getMessage()); } catch (IllegalAccessException e) { - logger.fatal("ArgumentParser: IllegalAccessException: cannot convert field " + field.getName()); + logger.fatal("ArgumentParser: IllegalAccessException: cannot convert field " + source.field.getName()); throw new StingException("constructFromString:IllegalAccessException: Failed conversion " + e.getMessage()); } - for( String value: values ) - collection.add( componentArgumentParser.parse(field,componentType,value) ); + for( ArgumentMatch value: match ) + collection.add( componentArgumentParser.parse(source,componentType,value) ); return collection; + } else if( type.isArray() ) { - Object arr = Array.newInstance(componentType,values.length); + componentType = type.getComponentType(); + ArgumentTypeDescriptor componentArgumentParser = ArgumentTypeDescriptor.create( componentType ); + Object arr = Array.newInstance(componentType,match.values().size()); + + int i = 0; + for( ArgumentMatch value: match ) + Array.set( arr,i++,componentArgumentParser.parse(source,componentType,value)); - for( int i = 0; i < values.length; i++ ) - Array.set( arr,i,componentArgumentParser.parse(field,componentType,values[i])); - return arr; } else @@ -204,3 +294,102 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor { } } +/** + * Handle SAMFileReaders. + */ +class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor { + @Override + public boolean supports( Class type ) { + return SAMFileReader.class.isAssignableFrom(type); + } + + @Override + public Object parse( ArgumentSource source, Class type, ArgumentMatch... matches ) { + if( matches.length > 1 ) + throw new UnsupportedOperationException("Only an input file name and validation stringency can be supplied when creating a BAM file reader."); + + SAMFileReaderBuilder builder = new SAMFileReaderBuilder(); + + ArgumentMatch readerMatch = matches[0]; + + if( readerMatch == null ) + throw new StingException("SAM file compression was supplied, but not associated writer was supplied with it."); + if( readerMatch.values().size() > 1 ) + throw new StingException("Only one filename can be supplied per created BAM file"); + + builder.setSAMFile(new File(readerMatch.values().get(0).trim())); + + return builder; + } +} + +/** + * Handle SAMFileWriters. + */ +class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { + private static final String COMPRESSION_FULLNAME = "bam_compression"; + private static final String COMPRESSION_SHORTNAME = "compress"; + + @Override + public boolean supports( Class type ) { + return SAMFileWriter.class.isAssignableFrom(type); + } + + @Override + public List createArgumentDefinitions( ArgumentSource source, Argument description ) { + String fullName = description.fullName().trim().length() > 0 ? description.fullName().trim() : "outputBAM"; + String shortName = description.shortName().trim().length() > 0 ? description.shortName().trim() : "ob"; + + ArgumentDefinition writerDefinition = new ArgumentDefinition( source, + fullName, + shortName, + getDoc( source, description ), + isRequired( source, description ), + getExclusiveOf( source, description ), + getValidationRegex( source, description ) ); + ArgumentDefinition compressionDefinition = new ArgumentDefinition( source, + COMPRESSION_FULLNAME, + COMPRESSION_SHORTNAME, + "Compression level to use for writing BAM files", + false, + "", + "" ); + + return Arrays.asList( writerDefinition, compressionDefinition ); + } + + @Override + public Object parse( ArgumentSource source, Class type, ArgumentMatch... matches ) { + if( matches.length > 2 ) + throw new UnsupportedOperationException("Only an input file name and validation stringency can be supplied when creating a BAM file reader."); + + SAMFileWriterBuilder builder = new SAMFileWriterBuilder(); + + ArgumentMatch writerMatch = null; + ArgumentMatch compressionMatch = null; + + for( ArgumentMatch match: matches ) { + if( match.definition.fullName.equals(COMPRESSION_FULLNAME) ) + compressionMatch = match; + else + writerMatch = match; + } + + if( writerMatch == null ) + throw new StingException("SAM file compression was supplied, but not associated writer was supplied with it."); + if( writerMatch.values().size() > 1 ) + throw new StingException("Only one filename can be supplied per created BAM file"); + + builder.setSAMFile(new File(writerMatch.values().get(0).trim())); + + if( compressionMatch != null ) { + if( compressionMatch.values().size() > 1 ) + throw new StingException("Only one value can be supplied for BAM compression"); + int compressionLevel = Integer.valueOf(compressionMatch.values().get(0)); + builder.setCompressionLevel(compressionLevel); + } + + return builder; + } + +} diff --git a/java/src/org/broadinstitute/sting/utils/cmdLine/CommandLineProgram.java b/java/src/org/broadinstitute/sting/utils/cmdLine/CommandLineProgram.java index 6973dbd1f..9af1e50fd 100644 --- a/java/src/org/broadinstitute/sting/utils/cmdLine/CommandLineProgram.java +++ b/java/src/org/broadinstitute/sting/utils/cmdLine/CommandLineProgram.java @@ -109,6 +109,16 @@ public abstract class CommandLineProgram { */ protected Class[] getArgumentSources() { return new Class[] {}; } + /** + * Allows arguments to be hijacked by subclasses of the program before being placed + * into plugin classes. + * @param source Source class for the argument. + * @param targetInstance Instance into which the value should be ultimately injected. + * @param value Value to inject. + * @return True if the particular field has been hijacked; false otherwise. + */ + protected boolean intercept( ArgumentSource source, Object targetInstance, Object value ) { return false; } + /** * Name this argument source. Provides the (full) class name as a default. * @param source The argument source. @@ -124,15 +134,6 @@ public abstract class CommandLineProgram { */ protected abstract int execute(); - /** - * Retrieves a factory for custom creation of command-line arguments, specified by the - * subclass. - * @return - */ - protected ArgumentFactory getCustomArgumentFactory() { - return null; - } - static { // setup a basic log configuration BasicConfigurator.configure(); @@ -152,7 +153,7 @@ public abstract class CommandLineProgram { PatternLayout layout = new PatternLayout(); // setup the parser - ParsingEngine parser = clp.parser = new ParsingEngine( clp.getCustomArgumentFactory() ); + ParsingEngine parser = clp.parser = new ParsingEngine(clp); parser.addArgumentSource( clp.getClass() ); // process the args diff --git a/java/src/org/broadinstitute/sting/utils/cmdLine/ParsingEngine.java b/java/src/org/broadinstitute/sting/utils/cmdLine/ParsingEngine.java index e7a8f43d8..a252b08a8 100755 --- a/java/src/org/broadinstitute/sting/utils/cmdLine/ParsingEngine.java +++ b/java/src/org/broadinstitute/sting/utils/cmdLine/ParsingEngine.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.utils.cmdLine; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.Pair; +import org.broadinstitute.sting.utils.JVMUtils; import org.apache.log4j.Logger; import java.lang.reflect.*; @@ -25,6 +26,11 @@ import java.util.*; * A parser for Sting command-line arguments. */ public class ParsingEngine { + /** + * The command-line program at the heart of this parsing engine. + */ + CommandLineProgram clp = null; + /** * A collection of all the source fields which define command-line arguments. */ @@ -43,13 +49,6 @@ public class ParsingEngine { */ ArgumentMatches argumentMatches = null; - /** - * Stores a custom argument factory for building out arguments of which only - * subclasses of CommandLineProgram should be aware. - */ - ArgumentFactory customArgumentFactory = null; - - /** * Techniques for parsing and for argument lookup. */ @@ -60,8 +59,8 @@ public class ParsingEngine { */ protected static Logger logger = Logger.getLogger(ParsingEngine.class); - public ParsingEngine( ArgumentFactory customArgumentFactory ) { - this.customArgumentFactory = customArgumentFactory; + public ParsingEngine( CommandLineProgram clp ) { + this.clp = clp; parsingMethods.add( ParsingMethod.FullNameParsingMethod ); parsingMethods.add( ParsingMethod.ShortNameParsingMethod ); } @@ -245,33 +244,27 @@ public class ParsingEngine { // Get a list of argument sources, not including the children of this argument. For now, skip loading // arguments into the object recursively. List argumentSources = extractArgumentSources( object.getClass(), false ); - for( ArgumentSource argumentSource: argumentSources ) { - Collection argumentsMatchingSource = argumentMatches.findMatches( argumentSource ); - if( argumentsMatchingSource.size() != 0 ) - loadMatchesIntoObject( argumentsMatchingSource, object ); - } + for( ArgumentSource argumentSource: argumentSources ) + loadMatchesIntoObject( argumentSource, object, argumentMatches.findMatches(argumentSource) ); } /** * Loads a single argument into the object. * @param argumentMatches Argument matches to load into the object. - * @param object Target for the argument. + * @param target */ - private void loadMatchesIntoObject( Collection argumentMatches, Object object ) { - if( argumentMatches.size() > 1 ) - throw new StingException("Too many matches"); - - ArgumentMatch match = argumentMatches.iterator().next(); - ArgumentDefinition definition = match.definition; - - // A null definition might be in the list if some invalid arguments were passed in but we - // want to load in a subset of data for better error reporting. Ignore null definitions. - if( definition == null ) + private void loadMatchesIntoObject( ArgumentSource source, Object target, Collection argumentMatches ) { + // Nothing to load + if( argumentMatches.size() == 0 ) return; - if( definition.source.clazz.isAssignableFrom(object.getClass()) ) { - String[] tokens = match.values().toArray(new String[0]); - definition.source.inject( customArgumentFactory, object, tokens ); + if( argumentMatches.size() > 1 ) + throw new StingException("Too many values matched argument: " + source.field.getName()); + + if( source.clazz.isAssignableFrom(target.getClass()) ) { + Object value = source.parse( source, target, argumentMatches.toArray(new ArgumentMatch[0]) ); + if( clp == null || !clp.intercept(source, target, value) ) + JVMUtils.setField( source.field, target, value ); } } diff --git a/java/src/org/broadinstitute/sting/utils/sam/SAMFileReaderBuilder.java b/java/src/org/broadinstitute/sting/utils/sam/SAMFileReaderBuilder.java new file mode 100644 index 000000000..26d9a4cf1 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/sam/SAMFileReaderBuilder.java @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.sam; + +import net.sf.samtools.SAMFileReader; + +import java.io.File; + +import org.broadinstitute.sting.utils.StingException; + +/** + * Allows the user to steadily accumulate information about what + * components go into a SAM file writer, ultimately using this + * information to create a SAM file writer on demand. + * + * @author mhanna + * @version 0.1 + */ +public class SAMFileReaderBuilder { + /** + * To which file should output be written? + */ + private File samFile = null; + + /** + * What compression level should be used when building this file? + */ + private SAMFileReader.ValidationStringency validationStringency = null; + + /** + * Sets the handle of the sam file to which data should be written. + * @param samFile The SAM file into which data should flow. + */ + public void setSAMFile( File samFile ) { + this.samFile = samFile; + } + + /** + * Sets the validation stringency to apply when reading this sam file. + * @param validationStringency Stringency to apply. Must not be null. + */ + public void setValidationStringency( SAMFileReader.ValidationStringency validationStringency ) { + this.validationStringency = validationStringency; + } + + /** + * Create the SAM writer, given the constituent parts accrued. + * @return Newly minted SAM file writer. + */ + public SAMFileReader build() { + if( samFile == null ) + throw new StingException( "Filename for output sam file must be supplied."); + if( validationStringency == null ) + throw new StingException( "Header for output sam file must be supplied."); + + SAMFileReader reader = new SAMFileReader( samFile ); + reader.setValidationStringency( validationStringency ); + + return reader; + } +} diff --git a/java/src/org/broadinstitute/sting/utils/sam/SAMFileWriterBuilder.java b/java/src/org/broadinstitute/sting/utils/sam/SAMFileWriterBuilder.java new file mode 100644 index 000000000..97339d66e --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/sam/SAMFileWriterBuilder.java @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.sam; + +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMFileWriter; +import net.sf.samtools.SAMFileWriterFactory; + +import java.io.File; + +import org.broadinstitute.sting.utils.StingException; + +/** + * Allows the user to steadily accumulate information about what + * components go into a SAM file writer, ultimately using this + * information to create a SAM file writer on demand. + * + * @author mhanna + * @version 0.1 + */ +public class SAMFileWriterBuilder { + /** + * Default compression level for newly constructed SAM files. + * Default to 5 (based on research by Alec Wysoker) + */ + public static final int DEFAULT_COMPRESSION_LEVEL = 5; + + /** + * To which file should output be written? + */ + private File samFile = null; + + /** + * Which header should be used when writing the SAM file? + */ + private SAMFileHeader header = null; + + /** + * What compression level should be used when building this file? + */ + private int compressionLevel = DEFAULT_COMPRESSION_LEVEL; + + /** + * Sets the handle of the sam file to which data should be written. + * @param samFile The SAM file into which data should flow. + */ + public void setSAMFile( File samFile ) { + this.samFile = samFile; + } + + /** + * Sets the header to be written at the head of this SAM file. + * @param header Header to write. + */ + public void setSAMFileHeader( SAMFileHeader header ) { + this.header = header; + } + + /** + * Sets the compression level to use when writing this BAM file. + * @param compressionLevel Compression level to use when writing this SAM file. + */ + public void setCompressionLevel( int compressionLevel ) { + this.compressionLevel = compressionLevel; + } + + /** + * Create the SAM writer, given the constituent parts accrued. + * @return Newly minted SAM file writer. + */ + public SAMFileWriter build() { + if( samFile == null ) + throw new StingException( "Filename for output sam file must be supplied."); + if( header == null ) + throw new StingException( "Header for output sam file must be supplied."); + return new SAMFileWriterFactory().makeBAMWriter( header, true, samFile, compressionLevel ); + } +} diff --git a/java/test/org/broadinstitute/sting/gatk/OutputTrackerTest.java b/java/test/org/broadinstitute/sting/gatk/OutputTrackerTest.java index 9b458ee23..462a24362 100755 --- a/java/test/org/broadinstitute/sting/gatk/OutputTrackerTest.java +++ b/java/test/org/broadinstitute/sting/gatk/OutputTrackerTest.java @@ -42,7 +42,8 @@ public class OutputTrackerTest extends BaseTest { @Test public void testNullInputs() { - OutputTracker ot = new OutputTracker(null,null); + OutputTracker ot = new OutputTracker(); + ot.initializeCoreIO(null,null); Assert.assertTrue("OutputTracker: Output stream is of wrong type.", ot.getOutStream() instanceof RedirectingOutputStream ); Assert.assertTrue("OutputTracker: Error stream is of wrong type.", ot.getErrStream() instanceof RedirectingOutputStream ); @@ -56,7 +57,8 @@ public class OutputTrackerTest extends BaseTest { @Test public void testOutputStreamAlone() throws FileNotFoundException { - OutputTracker ot = new OutputTracker(OUTPUT_FILENAME,null); + OutputTracker ot = new OutputTracker(); + ot.initializeCoreIO(OUTPUT_FILENAME,null); final String OUTPUT_TEXT = "out stream test"; PrintWriter outWriter = new PrintWriter(ot.getOutStream()); @@ -76,7 +78,8 @@ public class OutputTrackerTest extends BaseTest { @Test public void testErrorStreamAlone() throws FileNotFoundException { - OutputTracker ot = new OutputTracker(null,ERROR_FILENAME); + OutputTracker ot = new OutputTracker(); + ot.initializeCoreIO(null,ERROR_FILENAME); final String ERROR_TEXT = "err stream test"; PrintWriter errWriter = new PrintWriter(ot.getErrStream()); @@ -95,7 +98,8 @@ public class OutputTrackerTest extends BaseTest { @Test public void testIndependentStreams() throws FileNotFoundException { - OutputTracker ot = new OutputTracker(OUTPUT_FILENAME,ERROR_FILENAME); + OutputTracker ot = new OutputTracker(); + ot.initializeCoreIO(OUTPUT_FILENAME,ERROR_FILENAME); final String OUTPUT_TEXT = "out stream test"; PrintWriter outWriter = new PrintWriter(ot.getOutStream()); @@ -121,7 +125,8 @@ public class OutputTrackerTest extends BaseTest { @Test public void testIdenticalInputsGetIdenticalResults() { - OutputTracker ot = new OutputTracker(OUTPUT_FILENAME,OUTPUT_FILENAME); + OutputTracker ot = new OutputTracker(); + ot.initializeCoreIO(OUTPUT_FILENAME,OUTPUT_FILENAME); Assert.assertTrue("OutputTracker: Output stream is of wrong type.", ot.getOutStream() instanceof RedirectingOutputStream ); Assert.assertTrue("OutputTracker: Error stream is of wrong type.", ot.getErrStream() instanceof RedirectingOutputStream ); diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/indels/CleanedReadInjectorTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/indels/CleanedReadInjectorTest.java index 4a91146e9..405205b36 100644 --- a/java/test/org/broadinstitute/sting/gatk/walkers/indels/CleanedReadInjectorTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/indels/CleanedReadInjectorTest.java @@ -173,8 +173,6 @@ public class CleanedReadInjectorTest extends BaseTest { walker.cleanedReadsSource = cleanedReads; walker.outputBAM = output; - walker.initializeOutputStreams( new OutputTracker(null,null) ); - return walker; }