Support for specifying SAMFileReaders and SAMFileWriters as @Arguments directly. *Very*

rough initial implementation, but should provide enough support so that people can stop
creating SAMFileWriters in reduceInit.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1332 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-07-29 16:11:45 +00:00
parent 56f769f2ce
commit 7a13647c35
29 changed files with 893 additions and 452 deletions

View File

@ -1,9 +1,11 @@
package org.broadinstitute.sting.gatk;
import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram;
import org.broadinstitute.sting.utils.cmdLine.ArgumentFactory;
import org.broadinstitute.sting.utils.cmdLine.ArgumentSource;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.xReadLines;
import org.broadinstitute.sting.utils.sam.SAMFileWriterBuilder;
import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder;
import org.broadinstitute.sting.gatk.walkers.Walker;
import java.io.File;
@ -11,7 +13,7 @@ import java.io.FileNotFoundException;
import java.util.List;
import java.util.ArrayList;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMFileWriter;
/*
* Copyright (c) 2009 The Broad Institute
@ -106,6 +108,24 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
return new Class[] { GATKEngine.getWalkerByName(getAnalysisName()).getClass() };
}
/**
* Allows arguments to be hijacked by subclasses of the program before being placed
* into plugin classes.
* @return True if the particular field should be hijacked; false otherwise.
*/
protected boolean intercept( ArgumentSource source, Object targetInstance, Object value ) {
if( !(Walker.class.isAssignableFrom(source.clazz)) )
return false;
if( value instanceof SAMFileReaderBuilder || value instanceof SAMFileWriterBuilder ) {
GATKEngine.setAdditionalIO( source.field, value );
return true;
}
return false;
}
@Override
protected String getArgumentSourceName( Class argumentSource ) {
return WalkerManager.getWalkerName((Class<Walker>) argumentSource);
@ -145,22 +165,4 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
}
return unpackedReads;
}
/**
* Get a custom factory for instantiating specialty GATK arguments.
* @return An instance of the command-line argument of the specified type.
*/
@Override
protected ArgumentFactory getCustomArgumentFactory() {
return new ArgumentFactory() {
public Object createArgument( Class type, String... repr ) {
if (type == SAMFileReader.class && repr.length == 1) {
SAMFileReader samFileReader = new SAMFileReader(new File(repr[0]),true);
samFileReader.setValidationStringency(getArgumentCollection().strictnessLevel);
return samFileReader;
}
return null;
}
};
}
}

View File

@ -102,10 +102,6 @@ public class GATKArgumentCollection {
@Argument(fullName = "maximum_reads", shortName = "M", doc = "Maximum number of iterations to process before exiting, the lower bound is zero. Intended only for testing", required = false)
public Integer maximumEngineIterations = -1;
@Element(required = false)
@Argument(fullName = "bam_compression", shortName = "compress", doc = "Compression level to use for writing BAM files", required = false)
public Integer BAMcompression = null;
@Element(required = false)
@Argument(fullName = "filterZeroMappingQualityReads", shortName = "fmq0", doc = "If true, mapping quality zero reads will be filtered at the lowest GATK level. Vastly improves performance at areas with abnormal depth due to mapping Q0 reads", required = false)
public Boolean filterZeroMappingQualityReads = false;
@ -246,10 +242,6 @@ public class GATKArgumentCollection {
if (!other.unsafe.equals(this.unsafe)) {
return false;
}
if (( other.BAMcompression == null && this.BAMcompression != null ) ||
( other.BAMcompression != null && !other.BAMcompression.equals(this.BAMcompression) )) {
return false;
}
if (( other.filterZeroMappingQualityReads == null && this.filterZeroMappingQualityReads != null ) ||
( other.filterZeroMappingQualityReads != null && !other.filterZeroMappingQualityReads.equals(this.filterZeroMappingQualityReads) )) {
return false;

View File

@ -34,6 +34,9 @@ import net.sf.samtools.SAMReadGroupRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
import org.broadinstitute.sting.gatk.executive.MicroScheduler;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
@ -41,13 +44,13 @@ import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.cmdLine.ArgumentException;
import java.io.File;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.io.FileNotFoundException;
import java.util.*;
import java.lang.reflect.Field;
public class GenomeAnalysisEngine {
@ -63,7 +66,7 @@ public class GenomeAnalysisEngine {
private GATKArgumentCollection argCollection;
/** Collection of output streams used by the walker. */
private OutputTracker outputTracker = null;
private OutputTracker outputTracker = new OutputTracker();
/** our log, which we want to capture anything from this class */
private static Logger logger = Logger.getLogger(GenomeAnalysisEngine.class);
@ -124,14 +127,12 @@ public class GenomeAnalysisEngine {
// Validate the walker inputs against the walker.
validateInputsAgainstWalker(my_walker, argCollection, rods);
// our microscheduler, which is in charge of running everything
MicroScheduler microScheduler = createMicroscheduler(my_walker, rods);
// create the output streams
initializeOutputStreams(my_walker);
// our microscheduler, which is in charge of running everything
MicroScheduler microScheduler = null;
microScheduler = createMicroscheduler(my_walker, rods);
// Prepare the sort ordering w.r.t. the sequence dictionary
if (argCollection.referenceFile != null) {
final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(argCollection.referenceFile);
@ -148,8 +149,20 @@ public class GenomeAnalysisEngine {
if (argCollection.intervals != null) {
locs = GenomeLocSortedSet.createSetFromList(parseIntervalRegion(argCollection.intervals));
}
// excute the microscheduler, storing the results
return microScheduler.execute(my_walker, locs, argCollection.maximumEngineIterations);
ShardStrategy shardStrategy = this.getShardStrategy(my_walker, microScheduler.getReference(), locs, argCollection.maximumEngineIterations);
// execute the microscheduler, storing the results
return microScheduler.execute(my_walker, shardStrategy);
}
/**
* Add additional, externally managed IO streams for walker consumption.
* @param walkerField Field in the walker into which to inject the value.
* @param value Instance to inject.
*/
public void setAdditionalIO( Field walkerField, Object value ) {
outputTracker.addAdditionalOutput( walkerField, value );
}
/**
@ -182,21 +195,29 @@ public class GenomeAnalysisEngine {
// the mircoscheduler to return
MicroScheduler microScheduler = null;
SAMDataSource readsDataSource = createReadsDataSource(extractSourceInfo(my_walker,argCollection));
IndexedFastaSequenceFile referenceDataSource = openReferenceSequenceFile(argCollection.referenceFile);
List<ReferenceOrderedDataSource> rodDataSources = getReferenceOrderedDataSources(rods);
GenomeLocSortedSet locs = null;
if (argCollection.intervals != null) {
locs = GenomeLocSortedSet.createSetFromList(parseIntervalRegion(argCollection.intervals));
}
// we need to verify different parameter based on the walker type
if (my_walker instanceof LocusWalker || my_walker instanceof LocusWindowWalker) {
// create the MicroScheduler
microScheduler = MicroScheduler.create(my_walker, extractSourceInfo(my_walker,argCollection), argCollection.referenceFile, rods, argCollection.numberOfThreads);
engine = microScheduler.getTraversalEngine();
microScheduler = MicroScheduler.create(my_walker, readsDataSource, referenceDataSource, rodDataSources, argCollection.numberOfThreads);
} else if (my_walker instanceof ReadWalker || my_walker instanceof DuplicateWalker) {
if (argCollection.referenceFile == null)
Utils.scareUser(String.format("Read-based traversals require a reference file but none was given"));
microScheduler = MicroScheduler.create(my_walker, extractSourceInfo(my_walker,argCollection), argCollection.referenceFile, rods, argCollection.numberOfThreads);
engine = microScheduler.getTraversalEngine();
microScheduler = MicroScheduler.create(my_walker, readsDataSource, referenceDataSource, rodDataSources, argCollection.numberOfThreads);
} else {
Utils.scareUser(String.format("Unable to create the appropriate TraversalEngine for analysis type " + argCollection.analysisName));
}
dataSource = microScheduler.getSAMDataSource();
engine = microScheduler.getTraversalEngine();
return microScheduler;
}
@ -366,17 +387,6 @@ public class GenomeAnalysisEngine {
}
}
/**
* Default to 5 (based on research by Alec Wysoker)
*
* @return the BAM compression
*/
public int getBAMCompression() {
return (argCollection.BAMcompression == null ||
argCollection.BAMcompression < 1 ||
argCollection.BAMcompression > 8) ? 5 : argCollection.BAMcompression;
}
/**
* Convenience function that binds RODs using the old-style command line parser to the new style list for
* a uniform processing.
@ -389,6 +399,116 @@ public class GenomeAnalysisEngine {
argCollection.RODBindings.add(Utils.join(",", new String[]{name, type, file}));
}
/**
* Get the sharding strategy given a driving data source.
*
* @param walker Walker for which to infer sharding strategy.
* @param drivingDataSource Data on which to shard.
* @param intervals Intervals to use when limiting sharding.
* @param maxIterations the maximum number of iterations to run through
*
* @return Sharding strategy for this driving data source.
*/
protected ShardStrategy getShardStrategy(Walker walker,
ReferenceSequenceFile drivingDataSource,
GenomeLocSortedSet intervals,
Integer maxIterations) {
final long SHARD_SIZE = 100000L;
ShardStrategy shardStrategy = null;
ShardStrategyFactory.SHATTER_STRATEGY shardType;
if (walker instanceof LocusWalker) {
if (intervals != null) {
shardType = (walker.isReduceByInterval()) ?
ShardStrategyFactory.SHATTER_STRATEGY.INTERVAL :
ShardStrategyFactory.SHATTER_STRATEGY.LINEAR;
shardStrategy = ShardStrategyFactory.shatter(shardType,
drivingDataSource.getSequenceDictionary(),
SHARD_SIZE,
intervals, maxIterations);
} else
shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.LINEAR,
drivingDataSource.getSequenceDictionary(),
SHARD_SIZE, maxIterations);
} else if (walker instanceof ReadWalker ||
walker instanceof DuplicateWalker) {
shardType = ShardStrategyFactory.SHATTER_STRATEGY.READS;
if (intervals != null) {
shardStrategy = ShardStrategyFactory.shatter(shardType,
drivingDataSource.getSequenceDictionary(),
SHARD_SIZE,
intervals, maxIterations);
} else {
shardStrategy = ShardStrategyFactory.shatter(shardType,
drivingDataSource.getSequenceDictionary(),
SHARD_SIZE, maxIterations);
}
} else if (walker instanceof LocusWindowWalker) {
if( intervals == null )
throw new StingException("Unable to shard: walker is of type LocusWindow, but no intervals were provided");
shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.INTERVAL,
drivingDataSource.getSequenceDictionary(),
SHARD_SIZE,
intervals, maxIterations);
} else
throw new StingException("Unable to support walker of type" + walker.getClass().getName());
return shardStrategy;
}
/**
* Gets a data source for the given set of reads.
*
* @param reads the read source information
*
* @return A data source for the given set of reads.
*/
private SAMDataSource createReadsDataSource(Reads reads) {
// By reference traversals are happy with no reads. Make sure that case is handled.
if (reads.getReadsFiles().size() == 0)
return null;
SAMDataSource dataSource = new SAMDataSource(reads);
return dataSource;
}
/**
* Opens a reference sequence file paired with an index.
*
* @param refFile Handle to a reference sequence file. Non-null.
*
* @return A thread-safe file wrapper.
*/
private IndexedFastaSequenceFile openReferenceSequenceFile(File refFile) {
IndexedFastaSequenceFile ref = null;
try {
ref = new IndexedFastaSequenceFile(refFile);
}
catch (FileNotFoundException ex) {
throw new StingException("I/O error while opening fasta file: " + ex.getMessage(), ex);
}
GenomeLocParser.setupRefContigOrdering(ref);
return ref;
}
/**
* Open the reference-ordered data sources.
*
* @param rods the reference order data to execute using
*
* @return A list of reference-ordered data sources.
*/
private List<ReferenceOrderedDataSource> getReferenceOrderedDataSources(List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
for (ReferenceOrderedData<? extends ReferenceOrderedDatum> rod : rods)
dataSources.add(new ReferenceOrderedDataSource(rod));
return dataSources;
}
/**
* Initialize the output streams as specified by the user.
@ -396,9 +516,11 @@ public class GenomeAnalysisEngine {
* @param walker the walker to initialize output streams for
*/
private void initializeOutputStreams(Walker walker) {
outputTracker = (argCollection.outErrFileName != null) ? new OutputTracker(argCollection.outErrFileName, argCollection.outErrFileName)
: new OutputTracker(argCollection.outFileName, argCollection.errFileName);
walker.initializeOutputStreams(outputTracker);
if( argCollection.outErrFileName != null )
outputTracker.initializeCoreIO( argCollection.outErrFileName, argCollection.outErrFileName );
else
outputTracker.initializeCoreIO( argCollection.outFileName, argCollection.errFileName );
outputTracker.prepareWalker(walker);
}
/**

View File

@ -1,13 +1,22 @@
package org.broadinstitute.sting.gatk;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.JVMUtils;
import org.broadinstitute.sting.utils.sam.SAMFileWriterBuilder;
import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder;
import org.broadinstitute.sting.utils.io.RedirectingOutputStream;
import org.broadinstitute.sting.gatk.walkers.Walker;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.io.FileNotFoundException;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.lang.reflect.Field;
import java.util.Map;
import java.util.HashMap;
import net.sf.samtools.SAMFileWriter;
/**
* User: hanna
* Date: Apr 30, 2009
@ -39,13 +48,15 @@ public class OutputTracker {
protected ThreadLocal<OutputStream> localOut = new ThreadLocal<OutputStream>();
protected ThreadLocal<OutputStream> localErr = new ThreadLocal<OutputStream>();
protected Map<Field,Object> additionalIO = new HashMap<Field,Object>();
/**
* Create an object to manage output given filenames for the output and error files.
* If no files are specified, returns null.
* @param outFileName Name of the output file.
* @param errFileName Name of the error file.
*/
public OutputTracker( String outFileName, String errFileName ) {
public void initializeCoreIO( String outFileName, String errFileName ) {
// If the two output streams match and are non-null, initialize them identically.
// Otherwise, initialize them separately.
if( outFileName != null && outFileName.equals(errFileName) ) {
@ -55,6 +66,35 @@ public class OutputTracker {
else {
globalOut = (outFileName != null) ? prepareOutputFile( outFileName ) : System.out;
globalErr = (errFileName != null) ? prepareOutputFile( errFileName ) : System.err;
}
}
public void prepareWalker( Walker walker ) {
Field out = JVMUtils.findField( walker.getClass(), "out" );
Field err = JVMUtils.findField( walker.getClass(), "err" );
JVMUtils.setField( out, walker, new PrintStream(getOutStream()) );
JVMUtils.setField( err, walker, new PrintStream(getErrStream()) );
for( Map.Entry<Field,Object> io: additionalIO.entrySet() ) {
Field targetField = io.getKey();
Object targetValue = io.getValue();
// Ghastly hacks: reaches in and finishes building out the SAMFileReader / SAMFileWriter.
// TODO: Generalize this, and move it to its own initialization step.
if( targetValue instanceof SAMFileReaderBuilder) {
SAMFileReaderBuilder builder = (SAMFileReaderBuilder)targetValue;
builder.setValidationStringency(GenomeAnalysisEngine.instance.getArguments().strictnessLevel);
targetValue = builder.build();
}
if( targetValue instanceof SAMFileWriterBuilder ) {
SAMFileWriterBuilder builder = (SAMFileWriterBuilder)targetValue;
builder.setSAMFileHeader(GenomeAnalysisEngine.instance.getDataSource().getHeader());
targetValue = builder.build();
}
JVMUtils.setField( targetField, walker, targetValue );
}
}
@ -115,6 +155,15 @@ public class OutputTracker {
localErr.set( err );
}
/**
* Provide a mechanism for injecting supplemental streams for external management.
* @param field Field into which to inject this stream.
* @param stream Stream to manage.
*/
public void addAdditionalOutput( Field field, Object stream ) {
additionalIO.put(field,stream);
}
/**
* Remove pointers to alternate, local output streams.
*/

View File

@ -54,7 +54,7 @@ public class ShardDataProvider {
/**
* Sources of reference-ordered data.
*/
private final List<ReferenceOrderedDataSource> referenceOrderedData;
private final Collection<ReferenceOrderedDataSource> referenceOrderedData;
/**
* Retrieves the shard associated with this data provider.
@ -101,7 +101,7 @@ public class ShardDataProvider {
* views can access it.
* @return List of reference-ordered data sources.
*/
List<ReferenceOrderedDataSource> getReferenceOrderedData() {
Collection<ReferenceOrderedDataSource> getReferenceOrderedData() {
return referenceOrderedData;
}
@ -111,7 +111,7 @@ public class ShardDataProvider {
* @param reads A window into the reads for a given region.
* @param reference A getter for a section of the reference.
*/
public ShardDataProvider( Shard shard, SAMDataSource reads, IndexedFastaSequenceFile reference, List<ReferenceOrderedDataSource> rods) {
public ShardDataProvider( Shard shard, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
this.shard = shard;
// Provide basic reads information.
this.reads = (reads != null) ? reads.seek( shard ) : new NullSAMIterator(new Reads(new ArrayList<File>()));

View File

@ -4,6 +4,8 @@ import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.OutputTracker;
import org.broadinstitute.sting.gatk.Reads;
@ -11,6 +13,7 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.threading.ThreadPoolMonitor;
import javax.management.MBeanServer;
@ -20,6 +23,7 @@ import java.io.File;
import java.util.List;
import java.util.Queue;
import java.util.LinkedList;
import java.util.Collection;
import java.util.concurrent.Executors;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
@ -74,11 +78,11 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
* Create a new hierarchical microscheduler to process the given reads and reference.
*
* @param reads Reads file(s) to process.
* @param refFile Reference for driving the traversal.
* @param reference Reference for driving the traversal.
* @param nThreadsToUse maximum number of threads to use to do the work
*/
protected HierarchicalMicroScheduler( Walker walker, Reads reads, File refFile, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods, int nThreadsToUse ) {
super(walker, reads, refFile, rods);
protected HierarchicalMicroScheduler( Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, int nThreadsToUse ) {
super(walker, reads, reference, rods);
this.threadPool = Executors.newFixedThreadPool(nThreadsToUse);
try {
@ -91,12 +95,11 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
}
}
public Object execute( Walker walker, GenomeLocSortedSet intervals, Integer maxIterations ) {
public Object execute( Walker walker, ShardStrategy shardStrategy ) {
// Fast fail for walkers not supporting TreeReducible interface.
if (!( walker instanceof TreeReducible ))
throw new IllegalArgumentException("Hierarchical microscheduler only works with TreeReducible walkers");
ShardStrategy shardStrategy = getShardStrategy(walker, reference, intervals, maxIterations);
ReduceTree reduceTree = new ReduceTree(this);
walker.initialize();
@ -258,7 +261,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
OutputMerger outputMerger = new OutputMerger();
ShardTraverser traverser = new ShardTraverser(this,
getTraversalEngine(),
traversalEngine,
walker,
shard,
getShardDataProvider(shard),

View File

@ -3,14 +3,16 @@ package org.broadinstitute.sting.gatk.executive;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import java.io.File;
import java.util.List;
import java.util.Collection;
/** A micro-scheduling manager for single-threaded execution of a traversal. */
public class LinearMicroScheduler extends MicroScheduler {
@ -18,23 +20,22 @@ public class LinearMicroScheduler extends MicroScheduler {
/**
* Create a new linear microscheduler to process the given reads and reference.
*
* @param reads Reads file(s) to process.
* @param refFile Reference for driving the traversal.
* @param walker Walker for the traversal.
* @param reads Reads file(s) to process.
* @param reference Reference for driving the traversal.
* @param rods Reference-ordered data.
*/
protected LinearMicroScheduler( Walker walker, Reads reads, File refFile, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods ) {
super(walker, reads, refFile, rods);
protected LinearMicroScheduler( Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods ) {
super(walker, reads, reference, rods);
}
/**
* Run this traversal over the specified subsection of the dataset.
*
* @param walker Computation to perform over dataset.
* @param locations Subset of the dataset over which to walk.
* @param maxIterations the maximum number of iterations we're to perform
* @param shardStrategy A strategy for sharding the data.
*/
public Object execute(Walker walker, GenomeLocSortedSet locations, Integer maxIterations) {
ShardStrategy shardStrategy = getShardStrategy(walker, reference, locations, maxIterations);
public Object execute(Walker walker, ShardStrategy shardStrategy) {
walker.initialize();
Accumulator accumulator = Accumulator.create(walker);

View File

@ -26,29 +26,19 @@
package org.broadinstitute.sting.gatk.executive;
import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.traversals.*;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.*;
@ -62,15 +52,13 @@ import java.util.*;
/** Shards and schedules data in manageable chunks. */
public abstract class MicroScheduler {
private static long SHARD_SIZE = 100000L;
protected static Logger logger = Logger.getLogger(MicroScheduler.class);
protected final TraversalEngine traversalEngine;
protected final IndexedFastaSequenceFile reference;
private final SAMDataSource reads;
private final List<ReferenceOrderedDataSource> rods;
private final Collection<ReferenceOrderedDataSource> rods;
/**
* MicroScheduler factory function. Create a microscheduler appropriate for reducing the
@ -78,19 +66,19 @@ public abstract class MicroScheduler {
*
* @param walker Which walker to use.
* @param reads the informations associated with the reads
* @param ref the reference file
* @param reference the reference file
* @param rods the rods to include in the traversal
* @param nThreadsToUse Number of threads to utilize.
*
* @return The best-fit microscheduler.
*/
public static MicroScheduler create(Walker walker, Reads reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods, int nThreadsToUse) {
public static MicroScheduler create(Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, int nThreadsToUse) {
if (walker instanceof TreeReducible && nThreadsToUse > 1) {
logger.info("Creating hierarchical microscheduler");
return new HierarchicalMicroScheduler(walker, reads, ref, rods, nThreadsToUse);
return new HierarchicalMicroScheduler(walker, reads, reference, rods, nThreadsToUse);
} else {
logger.info("Creating linear microscheduler");
return new LinearMicroScheduler(walker, reads, ref, rods);
return new LinearMicroScheduler(walker, reads, reference, rods);
}
}
@ -99,10 +87,10 @@ public abstract class MicroScheduler {
*
* @param walker the walker to execute with
* @param reads The reads.
* @param refFile File pointer to the reference.
* @param reference The reference.
* @param rods the rods to include in the traversal
*/
protected MicroScheduler(Walker walker, Reads reads, File refFile, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
protected MicroScheduler(Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
if (walker instanceof ReadWalker) {
traversalEngine = new TraverseReads();
} else if (walker instanceof LocusWalker) {
@ -114,11 +102,17 @@ public abstract class MicroScheduler {
} else {
throw new UnsupportedOperationException("Unable to determine traversal type, the walker is an unknown type.");
}
this.reads = setupReadsDataSource(reads);
this.reference = openReferenceSequenceFile(refFile);
this.rods = getReferenceOrderedDataSources(rods);
this.reads = reads;
this.reference = reference;
this.rods = rods;
validate(this.reads,this.reference);
// Side effect: initialize the traversal engine with reads data.
// TODO: Give users a dedicated way of getting the header so that the MicroScheduler
// doesn't have to bend over backward providing legacy getters and setters.
traversalEngine.setSAMHeader(reads.getHeader());
traversalEngine.initialize();
}
/**
@ -129,77 +123,18 @@ public abstract class MicroScheduler {
*/
public TraversalEngine getTraversalEngine() {
return traversalEngine;
}
}
/**
* Walks a walker over the given list of intervals.
*
* @param walker Computation to perform over dataset.
* @param intervals A list of intervals over which to walk. Null for whole dataset.
* @param maxIterations the maximum number of iterations we're to perform
* @param shardStrategy A strategy for sharding the data.
*
* @return the return type of the walker
*/
public abstract Object execute(Walker walker, GenomeLocSortedSet intervals, Integer maxIterations);
public abstract Object execute(Walker walker, ShardStrategy shardStrategy);
/**
* Get the sharding strategy given a driving data source.
*
* @param walker Walker for which to infer sharding strategy.
* @param drivingDataSource Data on which to shard.
* @param intervals Intervals to use when limiting sharding.
* @param maxIterations the maximum number of iterations to run through
*
* @return Sharding strategy for this driving data source.
*/
protected ShardStrategy getShardStrategy(Walker walker,
ReferenceSequenceFile drivingDataSource,
GenomeLocSortedSet intervals,
Integer maxIterations) {
ShardStrategy shardStrategy = null;
ShardStrategyFactory.SHATTER_STRATEGY shardType;
if (walker instanceof LocusWalker) {
if (intervals != null) {
shardType = (walker.isReduceByInterval()) ?
ShardStrategyFactory.SHATTER_STRATEGY.INTERVAL :
ShardStrategyFactory.SHATTER_STRATEGY.LINEAR;
shardStrategy = ShardStrategyFactory.shatter(shardType,
drivingDataSource.getSequenceDictionary(),
SHARD_SIZE,
intervals, maxIterations);
} else
shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.LINEAR,
drivingDataSource.getSequenceDictionary(),
SHARD_SIZE, maxIterations);
} else if (walker instanceof ReadWalker ||
walker instanceof DuplicateWalker) {
shardType = ShardStrategyFactory.SHATTER_STRATEGY.READS;
if (intervals != null) {
shardStrategy = ShardStrategyFactory.shatter(shardType,
drivingDataSource.getSequenceDictionary(),
SHARD_SIZE,
intervals, maxIterations);
} else {
shardStrategy = ShardStrategyFactory.shatter(shardType,
drivingDataSource.getSequenceDictionary(),
SHARD_SIZE, maxIterations);
}
} else if (walker instanceof LocusWindowWalker) {
if( intervals == null )
throw new StingException("Unable to shard: walker is of type LocusWindow, but no intervals were provided");
shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.INTERVAL,
drivingDataSource.getSequenceDictionary(),
SHARD_SIZE,
intervals, maxIterations);
} else
throw new StingException("Unable to support walker of type" + walker.getClass().getName());
return shardStrategy;
}
/**
* Gets an window into all the data that can be viewed as a single shard.
@ -228,28 +163,6 @@ public abstract class MicroScheduler {
traversalEngine.printOnTraversalDone(sum);
}
/**
* Gets a data source for the given set of reads.
*
* @param reads the read source information
*
* @return A data source for the given set of reads.
*/
private SAMDataSource setupReadsDataSource(Reads reads) {
// By reference traversals are happy with no reads. Make sure that case is handled.
if (reads.getReadsFiles().size() == 0)
return null;
SAMDataSource dataSource = new SAMDataSource(reads);
// Side effect: initialize the traversal engine with reads data.
// TODO: Give users a dedicated way of getting the header so that the MicroScheduler
// doesn't have to bend over backward providing legacy getters and setters.
traversalEngine.setSAMHeader(dataSource.getHeader());
return dataSource;
}
/**
* Returns data source maintained by this scheduler
* @return
@ -257,37 +170,10 @@ public abstract class MicroScheduler {
public SAMDataSource getSAMDataSource() { return reads; }
/**
* Open the reference-ordered data sources.
*
* @param rods the reference order data to execute using
*
* @return A list of reference-ordered data sources.
* Returns the reference maintained by this scheduler.
* @return The reference maintained by this scheduler.
*/
private List<ReferenceOrderedDataSource> getReferenceOrderedDataSources(List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
for (ReferenceOrderedData<? extends ReferenceOrderedDatum> rod : rods)
dataSources.add(new ReferenceOrderedDataSource(rod));
return dataSources;
}
/**
* Opens a reference sequence file paired with an index.
*
* @param refFile Handle to a reference sequence file. Non-null.
*
* @return A thread-safe file wrapper.
*/
private IndexedFastaSequenceFile openReferenceSequenceFile(File refFile) {
IndexedFastaSequenceFile ref = null;
try {
ref = new IndexedFastaSequenceFile(refFile);
}
catch (FileNotFoundException ex) {
throw new StingException("I/O error while opening fasta file: " + ex.getMessage(), ex);
}
GenomeLocParser.setupRefContigOrdering(ref);
return ref;
}
public IndexedFastaSequenceFile getReference() { return reference; }
/**
* Now that all files are open, validate the sequence dictionaries of the reads vs. the reference.

View File

@ -48,7 +48,7 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
/** an optional argument to dump the reads out to a BAM file */
@Argument(fullName = "outputBamFile", shortName = "of", doc = "Write output to this BAM filename instead of STDOUT", required = false)
String outputBamFile = null;
SAMFileWriter outputBamFile = null;
@Argument(fullName = "maxReadLength", shortName = "maxRead", doc="Discard reads with length greater than the specified value", required = false)
Integer maxLength = null;
@Argument(fullName = "platform", shortName = "platform", doc="Discard reads not generated by the specified platform", required = false)
@ -102,12 +102,7 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
* @return SAMFileWriter, set to the BAM output file if the command line option was set, null otherwise
*/
public SAMFileWriter reduceInit() {
if (outputBamFile != null) {
SAMFileHeader header = this.getToolkit().getEngine().getSAMHeader();
return Utils.createSAMFileWriterWithCompression(header, true, outputBamFile, getToolkit().getBAMCompression());
} else {
return null;
}
return outputBamFile;
}
/**

View File

@ -47,6 +47,9 @@ public class SplitSamFileWalker extends ReadWalker<SAMRecord, Map<String, SAMFil
@Argument(fullName="outputRoot", doc="output BAM file", required=false)
public String outputRoot = null;
@Argument(fullName = "bam_compression", shortName = "compress", doc = "Compression level to use for writing BAM files", required = false)
public Integer BAMcompression = 5;
private static Logger logger = Logger.getLogger(SplitSamFileWalker.class);
private static String VERSION = "0.0.1";
@ -92,7 +95,7 @@ public class SplitSamFileWalker extends ReadWalker<SAMRecord, Map<String, SAMFil
final String sample = elt.getKey();
final String filename = outputRoot + sample + ".bam";
logger.info(String.format("Creating BAM output file %s for sample %s", filename, sample));
SAMFileWriter output = Utils.createSAMFileWriterWithCompression(elt.getValue(), true, filename, getToolkit().getBAMCompression());
SAMFileWriter output = Utils.createSAMFileWriterWithCompression(elt.getValue(), true, filename, BAMcompression);
outputs.put(sample, output);
}

View File

@ -1,12 +1,9 @@
package org.broadinstitute.sting.gatk.walkers;
import java.io.PrintStream;
import java.util.HashMap;
import java.util.Map;
import java.util.List;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.OutputTracker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Pair;
import org.apache.log4j.Logger;
@ -36,11 +33,6 @@ public abstract class Walker<MapType, ReduceType> {
protected Walker() {
}
public void initializeOutputStreams( OutputTracker outputTracker ) {
out = new PrintStream( outputTracker.getOutStream() );
err = new PrintStream( outputTracker.getErrStream() );
}
/**
* Retrieve the toolkit, for peering into internal structures that can't
* otherwise be read. Use sparingly, and discuss uses with software engineering

View File

@ -38,7 +38,7 @@ public class CleanedReadInjector extends ReadWalker<Integer,Integer> {
* Target file for BAM output.
*/
@Argument(fullName="output_bam",shortName="ob",doc="Output BAM file",required=true)
String outputBAMFileName = null;
SAMFileWriter outputBAM = null;
/**
* The set of (sorted) cleaned reads
@ -50,11 +50,6 @@ public class CleanedReadInjector extends ReadWalker<Integer,Integer> {
*/
private HashSet<String> cleanedReadHash = new HashSet<String>();
/**
* The writer that handles writing of SAM files.
*/
SAMFileWriter outputBAM = null;
@Override
public void initialize() {
@ -68,15 +63,6 @@ public class CleanedReadInjector extends ReadWalker<Integer,Integer> {
cleanedReadHash.add(getUniquifiedReadName(read));
}
allReads.close();
// HACK: The unit tests create their own output files. Make sure this walker doesn't step
// on any toes.
if( outputBAM == null ) {
outputBAM = Utils.createSAMFileWriterWithCompression(getToolkit().getEngine().getSAMHeader(),
true,
outputBAMFileName,
getToolkit().getBAMCompression());
}
}
/**

View File

@ -22,7 +22,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
@Argument(fullName="allow454Reads", shortName="454", doc="process 454 reads", required=false)
boolean allow454 = false;
@Argument(fullName="OutputCleaned", shortName="O", required=false, doc="Output file (sam or bam) for improved (realigned) reads")
String OUT = null;
SAMFileWriter writer = null;
@Argument(fullName="OutputIndels", shortName="indels", required=false, doc="Output file (text) for the indels found")
String OUT_INDELS = null;
@Argument(fullName="OutputCleanedReadsOnly", shortName="cleanedOnly", doc="print out cleaned reads only (otherwise, all reads within the intervals)", required=false)
@ -45,7 +45,6 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
// fraction of mismatches that need to no longer mismatch for a column to be considered cleaned
private static final double MISMATCH_COLUMN_CLEANED_FRACTION = 0.75;
private SAMFileWriter writer = null;
private FileWriter indelOutput = null;
private FileWriter statsOutput = null;
private FileWriter snpsOutput = null;
@ -63,12 +62,11 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
throw new RuntimeException("Entropy threshold must be a fraction between 0 and 1");
SAMFileHeader header = getToolkit().getEngine().getSAMHeader();
if ( OUT != null ) {
writer = Utils.createSAMFileWriterWithCompression(header, true, OUT, getToolkit().getBAMCompression());
if ( writer != null ) {
readsToWrite = new TreeSet<ComparableSAMRecord>();
}
logger.info("Writing into output BAM file at compression level " + getToolkit().getBAMCompression());
logger.info("Writing into output BAM file");
logger.info("Temporary space used: "+System.getProperty("java.io.tmpdir"));
generator = new Random();

View File

@ -19,8 +19,8 @@ public class LogisticRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWr
@Argument(shortName="logisticParams", doc="logistic params file", required=true)
public String logisticParamsFile;
@Argument(shortName="outputBAM", doc="output BAM file", required=false)
public String outputBamFile = null;
@Argument(fullName="outputBamFile",shortName="outputBAM", doc="output BAM file", required=false)
public SAMFileWriter outputBamFile = null;
@Argument(shortName="useCache", doc="If true, uses high-performance caching of logistic regress results. Experimental", required=false)
public boolean useLogisticCache = true;
@ -308,13 +308,7 @@ public class LogisticRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWr
}
public SAMFileWriter reduceInit() {
if ( outputBamFile != null ) { // ! outputBamFile.equals("") ) {
SAMFileHeader header = this.getToolkit().getEngine().getSAMHeader();
return Utils.createSAMFileWriterWithCompression(header, true, outputBamFile, getToolkit().getBAMCompression());
}
else {
return null;
}
return outputBamFile;
}
/**

View File

@ -46,8 +46,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
@Argument(shortName="params", doc="CountCovariates params file", required=true)
public String paramsFile;
@Argument(shortName="outputBAM", doc="output BAM file", required=false)
public String outputBamFile = null;
@Argument(fullName="outputBamFile", shortName="outputBAM", doc="output BAM file", required=false)
public SAMFileWriter outputBamFile = null;
@Argument(shortName="rawQempirical", doc="If provided, we will use raw Qempirical scores calculated from the # mismatches and # bases, rather than the more conservative estimate of # mismatches + 1 / # bases + 1", required=false)
public boolean rawQempirical = false;
@ -321,13 +321,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
}
public SAMFileWriter reduceInit() {
if ( outputBamFile != null ) {
SAMFileHeader header = this.getToolkit().getEngine().getSAMHeader();
return Utils.createSAMFileWriterWithCompression(header, true, outputBamFile, getToolkit().getBAMCompression());
}
else {
return null;
}
return outputBamFile;
}
/**

View File

@ -33,6 +33,9 @@ public class IOCrusherWalker extends ReadWalker<SAMRecord, ArrayList<SAMFileWrit
@Argument(shortName="outputBase",doc="output base",required=true)
public String outputBase;
@Argument(fullName = "bam_compression", shortName = "compress", doc = "Compression level to use for writing BAM files", required = false)
public Integer BAMcompression = 5;
public long nReadsRead = 0;
public long nReadsWritten = 0;
@ -51,7 +54,7 @@ public class IOCrusherWalker extends ReadWalker<SAMRecord, ArrayList<SAMFileWrit
ArrayList<SAMFileWriter> outputs = new ArrayList<SAMFileWriter>(nWaysOut);
for ( int i = 0; i < nWaysOut; i++ ) {
SAMFileHeader header = this.getToolkit().getEngine().getSAMHeader();
outputs.add(Utils.createSAMFileWriterWithCompression(header, true, outputBase + "." + i + ".bam", getToolkit().getBAMCompression()));
outputs.add(Utils.createSAMFileWriterWithCompression(header, true, outputBase + "." + i + ".bam", BAMcompression));
}
return outputs;
}

View File

@ -28,7 +28,7 @@ public class ReplaceQuals extends ReadWalker<SAMRecord, SAMFileWriter> {
public String inputQualsBAM;
@Argument(shortName="outputBAM", required=false, doc="output BAM file for reads with replaced quals")
public String outputFilename = null;
public SAMFileWriter outputBAM = null;
public int MAX_READS_TO_LOAD = -1;
@ -108,13 +108,7 @@ public class ReplaceQuals extends ReadWalker<SAMRecord, SAMFileWriter> {
}
public SAMFileWriter reduceInit() {
if ( outputFilename != null ) { // ! outputBamFile.equals("") ) {
SAMFileHeader header = this.getToolkit().getEngine().getSAMHeader();
return Utils.createSAMFileWriterWithCompression(header, true, outputFilename, getToolkit().getBAMCompression());
}
else {
return null;
}
return outputBAM;
}
/**

View File

@ -23,7 +23,7 @@ import net.sf.samtools.SAMFileHeader;
*/
public class CombineDuplicatesWalker extends DuplicateWalker<SAMRecord, SAMFileWriter> {
@Argument(fullName="outputBAM", shortName="outputBAM", required=false, doc="BAM File to write combined duplicates to")
public String outputFilename = null;
public SAMFileWriter outputBAM = null;
@Argument(fullName="includeUniqueReads", shortName="includeUniqueReads", required=false, doc="If true, also writes out non-duplicate reads in file")
public boolean INCLUDE_UNIQUE_READS = true;
@ -48,13 +48,7 @@ public class CombineDuplicatesWalker extends DuplicateWalker<SAMRecord, SAMFileW
}
public SAMFileWriter reduceInit() {
if ( outputFilename != null ) {
SAMFileHeader header = this.getToolkit().getEngine().getSAMHeader();
return Utils.createSAMFileWriterWithCompression(header, true, outputFilename, getToolkit().getBAMCompression());
}
else {
return null;
}
return outputBAM;
}
/**

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.utils;
import java.lang.reflect.Modifier;
import java.lang.reflect.Field;
import java.io.File;
import java.io.IOException;
@ -48,4 +49,40 @@ public class JVMUtils {
!Modifier.isInterface(clazz.getModifiers());
}
/**
* Find the field with the given name in the class. Will inspect all fields, independent
* of access level.
* @param type Class in which to search for the given field.
* @param fieldName Name of the field for which to search.
* @return The field, or null if no such field exists.
*/
public static Field findField( Class type, String fieldName ) {
while( type != null ) {
Field[] fields = type.getDeclaredFields();
for( Field field: fields ) {
if( field.getName().equals(fieldName) )
return field;
}
type = type.getSuperclass();
}
return null;
}
/**
* Sets the provided field in the given instance to the given value. Circumvents access restrictions:
* a field can be private and still set successfully by this function.
* @param field Field to set in the given object.
* @param instance Instance in which to set the field.
* @param value The value to which to set the given field in the given instance.
*/
public static void setField( Field field, Object instance, Object value ) {
try {
field.setAccessible(true);
field.set(instance, value);
}
catch( IllegalAccessException ex ) {
throw new StingException(String.format("Could not set %s in instance %s to %s",field.getName(),instance.getClass().getName(),value.toString()));
}
}
}

View File

@ -1,42 +0,0 @@
/*
* Copyright (c) 2009 The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.cmdLine;
import java.util.List;
/**
* Provides a service whereby the application can provide a mechanism
* for creating specialty arguments.
* @version 0.1
*/
public abstract class ArgumentFactory {
/**
* Create an instance of a specified type of argument.
* @param type The type of the argument to create.
* @param repr A String representation of the argument.
* @return
*/
public abstract Object createArgument(Class type, String... repr);
}

View File

@ -1,14 +1,6 @@
package org.broadinstitute.sting.utils.cmdLine;
import java.util.Iterator;
import java.util.ArrayList;
import java.util.List;
import java.util.TreeMap;
import java.util.Map;
import java.util.Set;
import java.util.HashSet;
import java.util.Collection;
import java.util.HashMap;
import java.util.*;
/**
* Created by IntelliJ IDEA.
* User: mhanna
@ -170,7 +162,7 @@ public class ArgumentMatches implements Iterable<ArgumentMatch> {
/**
* A mapping of all the sites where an argument definition maps to a site on the command line.
*/
class ArgumentMatch {
class ArgumentMatch implements Iterable<ArgumentMatch> {
/**
* The argument definition that's been matched.
*/
@ -184,20 +176,123 @@ class ArgumentMatch {
/**
* Maps indicies of command line arguments to values paired with that argument.
*/
public final Map<Integer,List<String>> indices = new HashMap<Integer,List<String>>();
public final SortedMap<Integer,List<String>> indices = new TreeMap<Integer,List<String>>();
/**
* Create a new argument match, defining its properties later. Used to create invalid arguments.
*/
public ArgumentMatch() {
definition = null;
label = null;
this.label = null;
this.definition = null;
}
/**
* A simple way of indicating that an argument with the given label and definition exists at this index.
* @param label Label of the argument match. Must not be null.
* @param definition The associated definition, if one exists. May be null.
* @param index Position of the argument. Must not be null.
*/
public ArgumentMatch( String label, ArgumentDefinition definition, int index ) {
this( label, definition, index, null );
}
private ArgumentMatch( String label, ArgumentDefinition definition, int index, String value ) {
this.label = label;
this.definition = definition;
indices.put(index,null);
ArrayList<String> values = new ArrayList<String>();
if( value != null )
values.add(value);
indices.put(index,values );
}
/**
* Creates an iterator that walks over each individual match at each position of a given argument.
* @return An iterator over the individual matches in this argument. Will not be null.
*/
public Iterator<ArgumentMatch> iterator() {
return new Iterator<ArgumentMatch>() {
/**
* Iterate over each the available index.
*/
private Iterator<Integer> indexIterator = null;
/**
* Iterate over each available token.
*/
private Iterator<String> tokenIterator = null;
/**
* The next index to return. Null if none remain.
*/
Integer nextIndex = null;
/**
* The next token to return. Null if none remain.
*/
String nextToken = null;
{
indexIterator = indices.keySet().iterator();
prepareNext();
}
/**
* Is there a nextToken available to return?
* @return True if there's another token waiting in the wings. False otherwise.
*/
public boolean hasNext() {
return nextToken != null;
}
/**
* Get the next token, if one exists. If not, throw an IllegalStateException.
* @return The next ArgumentMatch in the series. Should never be null.
*/
public ArgumentMatch next() {
if( nextIndex == null || nextToken == null )
throw new IllegalStateException( "No more ArgumentMatches are available" );
ArgumentMatch match = new ArgumentMatch( label, definition, nextIndex, nextToken );
prepareNext();
return match;
}
/**
* Initialize the next ArgumentMatch to return. If no ArgumentMatches are available,
* initialize nextIndex / nextToken to null.
*/
private void prepareNext() {
if( tokenIterator != null && tokenIterator.hasNext() ) {
nextToken = tokenIterator.next();
}
else {
nextIndex = null;
nextToken = null;
// Do a nested loop. While more data is present in the inner loop, grab that data.
// Otherwise, troll the outer iterator looking for more data.
while( indexIterator.hasNext() ) {
nextIndex = indexIterator.next();
if( indices.get(nextIndex) != null ) {
tokenIterator = indices.get(nextIndex).iterator();
if( tokenIterator.hasNext() ) {
nextToken = tokenIterator.next();
break;
}
}
}
}
}
/**
* Remove is unsupported in this context.
*/
public void remove() {
throw new UnsupportedOperationException("Cannot remove an argument match from the collection while iterating.");
}
};
}
/**

View File

@ -100,22 +100,8 @@ public class ArgumentSource {
* @return A non-null, non-empty list of argument definitions.
*/
public List<ArgumentDefinition> createArgumentDefinitions() {
String fullName = descriptor.fullName().trim().length() > 0 ? descriptor.fullName().trim() : field.getName().toLowerCase();
String shortName = descriptor.shortName().trim().length() > 0 ? descriptor.shortName().trim() : null;
String doc = descriptor.doc();
boolean required = descriptor.required() && !isFlag();
String exclusiveOf = descriptor.exclusiveOf().trim().length() > 0 ? descriptor.exclusiveOf().trim() : null;
String validation = descriptor.validation().trim().length() > 0 ? descriptor.validation().trim() : null;
ArgumentDefinition argumentDefinition = new ArgumentDefinition( this,
fullName,
shortName,
doc,
required,
exclusiveOf,
validation );
return Collections.singletonList(argumentDefinition);
ArgumentTypeDescriptor typeDescriptor = ArgumentTypeDescriptor.create( field.getType() );
return typeDescriptor.createArgumentDefinitions( this, descriptor );
}
/**
@ -123,30 +109,16 @@ public class ArgumentSource {
* @param targetInstance Instance into which to inject the parsed value.
* @param values String representation of all values passed.
*/
public void inject( ArgumentFactory customArgumentFactory, Object targetInstance, String... values ) {
public Object parse( ArgumentSource source, Object targetInstance, ArgumentMatch... values ) {
Object value = null;
if( !isFlag() ) {
ArgumentTypeDescriptor typeDescriptor = ArgumentTypeDescriptor.create( field.getType() );
value = typeDescriptor.parse( source, values );
}
else
value = true;
if( customArgumentFactory != null ) {
value = customArgumentFactory.createArgument(field.getType(), values);
}
if( value == null ) {
if( !isFlag() ) {
ArgumentTypeDescriptor typeDescriptor = ArgumentTypeDescriptor.create( field.getType() );
value = typeDescriptor.parse( field, field.getType(), values );
}
else
value = true;
}
try {
field.setAccessible(true);
field.set(targetInstance, value);
}
catch( IllegalAccessException ex ) {
//logger.fatal("processArgs: cannot convert field " + field.toString());
throw new StingException("processArgs: Failed conversion " + ex.getMessage(), ex);
}
return value;
}
/**

View File

@ -26,10 +26,16 @@
package org.broadinstitute.sting.utils.cmdLine;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.sam.SAMFileWriterBuilder;
import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder;
import org.apache.log4j.Logger;
import java.lang.reflect.*;
import java.util.*;
import java.io.File;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMFileReader;
/**
* An factory capable of providing parsers that can parse any type
@ -48,7 +54,9 @@ public abstract class ArgumentTypeDescriptor {
/**
* Class reference to the different types of descriptors that the create method can create.
*/
private static List<ArgumentTypeDescriptor> descriptors = Arrays.asList( new SimpleArgumentTypeDescriptor(),
private static List<ArgumentTypeDescriptor> descriptors = Arrays.asList( new SAMFileReaderArgumentTypeDescriptor(),
new SAMFileWriterArgumentTypeDescriptor(),
new SimpleArgumentTypeDescriptor(),
new CompoundArgumentTypeDescriptor() );
public static ArgumentTypeDescriptor create( Class type ) {
@ -59,11 +67,91 @@ public abstract class ArgumentTypeDescriptor {
throw new StingException("Can't process command-line arguments of type: " + type.getName());
}
/**
* Does this descriptor support classes of the given type?
* @param type The type to check.
* @return true if this descriptor supports the given type, false otherwise.
*/
public abstract boolean supports( Class type );
/**
* Given the given argument source and attributes, synthesize argument definitions for command-line arguments.
* @param source Source class and field for the given argument.
* @param description Description of the fields that go into a given argument.
* @return A list of command-line argument definitions supporting this field.
*/
public List<ArgumentDefinition> createArgumentDefinitions( ArgumentSource source, Argument description ) {
ArgumentDefinition definition = new ArgumentDefinition( source,
getFullName( source, description ),
getShortName( source, description ),
getDoc( source, description ),
isRequired( source, description ),
getExclusiveOf( source, description ),
getValidationRegex( source, description ) );
return Collections.singletonList(definition);
}
public abstract Object parse( Field field, Class type, String... values );
public Object parse( ArgumentSource source, ArgumentMatch... values ) {
return parse( source, source.field.getType(), values );
}
protected abstract Object parse( ArgumentSource source, Class type, ArgumentMatch... values );
/**
* Retrieves the full name of the argument, specifiable with the '--' prefix. The full name can be
* either specified explicitly with the fullName annotation parameter or implied by the field name.
* @return full name of the argument. Never null.
*/
protected String getFullName( ArgumentSource source, Argument description ) {
return description.fullName().trim().length() > 0 ? description.fullName().trim() : source.field.getName().toLowerCase();
}
/**
* Retrieves the short name of the argument, specifiable with the '-' prefix. The short name can
* be specified or not; if left unspecified, no short name will be present.
* @return short name of the argument. Null if no short name exists.
*/
protected String getShortName( ArgumentSource source, Argument description ) {
return description.shortName().trim().length() > 0 ? description.shortName().trim() : null;
}
/**
* Documentation for this argument. Mandatory field.
* @return Documentation for this argument.
*/
protected String getDoc( ArgumentSource source, Argument description ) {
return description.doc();
}
/**
* Returns whether this field is required. Note that flag fields are always forced to 'not required'.
* @return True if the field is mandatory and not a boolean flag. False otherwise.
*/
protected boolean isRequired( ArgumentSource source, Argument description ) {
return description.required() && !source.isFlag();
}
/**
* Specifies other arguments which cannot be used in conjunction with tihs argument. Comma-separated list.
* @return A comma-separated list of exclusive arguments, or null if none are present.
*/
protected String getExclusiveOf( ArgumentSource source, Argument description ) {
return description.exclusiveOf().trim().length() > 0 ? description.exclusiveOf().trim() : null;
}
/**
* A regular expression which can be used for validation.
* @return a JVM regex-compatible regular expression, or null to permit any possible value.
*/
protected String getValidationRegex( ArgumentSource source, Argument description ) {
return description.validation().trim().length() > 0 ? description.validation().trim() : null;
}
}
/**
* Parse simple argument types: java primitives, wrapper classes, and anything that has
* a simple String constructor.
*/
class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override
public boolean supports( Class type ) {
@ -83,11 +171,10 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
}
@Override
public Object parse( Field field, Class type, String... values ) {
if( values.length > 1 )
protected Object parse( ArgumentSource source, Class type, ArgumentMatch... matches ) {
if( matches.length > 1 || matches[0].values().size() > 1 )
throw new StingException("Simple argument parser is unable to parse multiple arguments.");
String value = values[0];
String value = matches[0].values().get(0);
// lets go through the types we support
try {
@ -131,6 +218,9 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
};
}
/**
* Process compound argument types: arrays, and typed and untyped collections.
*/
class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override
public boolean supports( Class type ) {
@ -138,10 +228,13 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
}
@Override
public Object parse( Field field, Class type, String... values )
public Object parse( ArgumentSource source, Class type, ArgumentMatch... matches )
{
Class componentType = null;
ArgumentTypeDescriptor componentArgumentParser;
if( matches.length > 1 )
throw new StingException("Simple argument parser is unable to combine multiple argument types into a compound argument.");
ArgumentMatch match = matches[0];
if( Collection.class.isAssignableFrom(type) ) {
@ -155,48 +248,45 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
}
// If this is a parameterized collection, find the contained type. If blow up if only one type exists.
if( field.getGenericType() instanceof ParameterizedType) {
ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType();
if( source.field.getGenericType() instanceof ParameterizedType) {
ParameterizedType parameterizedType = (ParameterizedType)source.field.getGenericType();
if( parameterizedType.getActualTypeArguments().length > 1 )
throw new IllegalArgumentException("Unable to determine collection type of field: " + field.toString());
throw new IllegalArgumentException("Unable to determine collection type of field: " + source.field.toString());
componentType = (Class)parameterizedType.getActualTypeArguments()[0];
}
else
componentType = String.class;
}
else if( type.isArray() ) {
componentType = type.getComponentType();
}
else
throw new StingException("Unsupported compound argument type: " + type);
componentArgumentParser = ArgumentTypeDescriptor.create( componentType );
ArgumentTypeDescriptor componentArgumentParser = ArgumentTypeDescriptor.create( componentType );
if( Collection.class.isAssignableFrom(type) ) {
Collection collection = null;
try {
collection = (Collection)type.newInstance();
}
catch (InstantiationException e) {
logger.fatal("ArgumentParser: InstantiationException: cannot convert field " + field.getName());
logger.fatal("ArgumentParser: InstantiationException: cannot convert field " + source.field.getName());
throw new StingException("constructFromString:InstantiationException: Failed conversion " + e.getMessage());
}
catch (IllegalAccessException e) {
logger.fatal("ArgumentParser: IllegalAccessException: cannot convert field " + field.getName());
logger.fatal("ArgumentParser: IllegalAccessException: cannot convert field " + source.field.getName());
throw new StingException("constructFromString:IllegalAccessException: Failed conversion " + e.getMessage());
}
for( String value: values )
collection.add( componentArgumentParser.parse(field,componentType,value) );
for( ArgumentMatch value: match )
collection.add( componentArgumentParser.parse(source,componentType,value) );
return collection;
}
else if( type.isArray() ) {
Object arr = Array.newInstance(componentType,values.length);
componentType = type.getComponentType();
ArgumentTypeDescriptor componentArgumentParser = ArgumentTypeDescriptor.create( componentType );
Object arr = Array.newInstance(componentType,match.values().size());
int i = 0;
for( ArgumentMatch value: match )
Array.set( arr,i++,componentArgumentParser.parse(source,componentType,value));
for( int i = 0; i < values.length; i++ )
Array.set( arr,i,componentArgumentParser.parse(field,componentType,values[i]));
return arr;
}
else
@ -204,3 +294,102 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
}
}
/**
* Handle SAMFileReaders.
*/
class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override
public boolean supports( Class type ) {
return SAMFileReader.class.isAssignableFrom(type);
}
@Override
public Object parse( ArgumentSource source, Class type, ArgumentMatch... matches ) {
if( matches.length > 1 )
throw new UnsupportedOperationException("Only an input file name and validation stringency can be supplied when creating a BAM file reader.");
SAMFileReaderBuilder builder = new SAMFileReaderBuilder();
ArgumentMatch readerMatch = matches[0];
if( readerMatch == null )
throw new StingException("SAM file compression was supplied, but not associated writer was supplied with it.");
if( readerMatch.values().size() > 1 )
throw new StingException("Only one filename can be supplied per created BAM file");
builder.setSAMFile(new File(readerMatch.values().get(0).trim()));
return builder;
}
}
/**
* Handle SAMFileWriters.
*/
class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
private static final String COMPRESSION_FULLNAME = "bam_compression";
private static final String COMPRESSION_SHORTNAME = "compress";
@Override
public boolean supports( Class type ) {
return SAMFileWriter.class.isAssignableFrom(type);
}
@Override
public List<ArgumentDefinition> createArgumentDefinitions( ArgumentSource source, Argument description ) {
String fullName = description.fullName().trim().length() > 0 ? description.fullName().trim() : "outputBAM";
String shortName = description.shortName().trim().length() > 0 ? description.shortName().trim() : "ob";
ArgumentDefinition writerDefinition = new ArgumentDefinition( source,
fullName,
shortName,
getDoc( source, description ),
isRequired( source, description ),
getExclusiveOf( source, description ),
getValidationRegex( source, description ) );
ArgumentDefinition compressionDefinition = new ArgumentDefinition( source,
COMPRESSION_FULLNAME,
COMPRESSION_SHORTNAME,
"Compression level to use for writing BAM files",
false,
"",
"" );
return Arrays.asList( writerDefinition, compressionDefinition );
}
@Override
public Object parse( ArgumentSource source, Class type, ArgumentMatch... matches ) {
if( matches.length > 2 )
throw new UnsupportedOperationException("Only an input file name and validation stringency can be supplied when creating a BAM file reader.");
SAMFileWriterBuilder builder = new SAMFileWriterBuilder();
ArgumentMatch writerMatch = null;
ArgumentMatch compressionMatch = null;
for( ArgumentMatch match: matches ) {
if( match.definition.fullName.equals(COMPRESSION_FULLNAME) )
compressionMatch = match;
else
writerMatch = match;
}
if( writerMatch == null )
throw new StingException("SAM file compression was supplied, but not associated writer was supplied with it.");
if( writerMatch.values().size() > 1 )
throw new StingException("Only one filename can be supplied per created BAM file");
builder.setSAMFile(new File(writerMatch.values().get(0).trim()));
if( compressionMatch != null ) {
if( compressionMatch.values().size() > 1 )
throw new StingException("Only one value can be supplied for BAM compression");
int compressionLevel = Integer.valueOf(compressionMatch.values().get(0));
builder.setCompressionLevel(compressionLevel);
}
return builder;
}
}

View File

@ -109,6 +109,16 @@ public abstract class CommandLineProgram {
*/
protected Class[] getArgumentSources() { return new Class[] {}; }
/**
* Allows arguments to be hijacked by subclasses of the program before being placed
* into plugin classes.
* @param source Source class for the argument.
* @param targetInstance Instance into which the value should be ultimately injected.
* @param value Value to inject.
* @return True if the particular field has been hijacked; false otherwise.
*/
protected boolean intercept( ArgumentSource source, Object targetInstance, Object value ) { return false; }
/**
* Name this argument source. Provides the (full) class name as a default.
* @param source The argument source.
@ -124,15 +134,6 @@ public abstract class CommandLineProgram {
*/
protected abstract int execute();
/**
* Retrieves a factory for custom creation of command-line arguments, specified by the
* subclass.
* @return
*/
protected ArgumentFactory getCustomArgumentFactory() {
return null;
}
static {
// setup a basic log configuration
BasicConfigurator.configure();
@ -152,7 +153,7 @@ public abstract class CommandLineProgram {
PatternLayout layout = new PatternLayout();
// setup the parser
ParsingEngine parser = clp.parser = new ParsingEngine( clp.getCustomArgumentFactory() );
ParsingEngine parser = clp.parser = new ParsingEngine(clp);
parser.addArgumentSource( clp.getClass() );
// process the args

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.utils.cmdLine;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.JVMUtils;
import org.apache.log4j.Logger;
import java.lang.reflect.*;
@ -25,6 +26,11 @@ import java.util.*;
* A parser for Sting command-line arguments.
*/
public class ParsingEngine {
/**
* The command-line program at the heart of this parsing engine.
*/
CommandLineProgram clp = null;
/**
* A collection of all the source fields which define command-line arguments.
*/
@ -43,13 +49,6 @@ public class ParsingEngine {
*/
ArgumentMatches argumentMatches = null;
/**
* Stores a custom argument factory for building out arguments of which only
* subclasses of CommandLineProgram should be aware.
*/
ArgumentFactory customArgumentFactory = null;
/**
* Techniques for parsing and for argument lookup.
*/
@ -60,8 +59,8 @@ public class ParsingEngine {
*/
protected static Logger logger = Logger.getLogger(ParsingEngine.class);
public ParsingEngine( ArgumentFactory customArgumentFactory ) {
this.customArgumentFactory = customArgumentFactory;
public ParsingEngine( CommandLineProgram clp ) {
this.clp = clp;
parsingMethods.add( ParsingMethod.FullNameParsingMethod );
parsingMethods.add( ParsingMethod.ShortNameParsingMethod );
}
@ -245,33 +244,27 @@ public class ParsingEngine {
// Get a list of argument sources, not including the children of this argument. For now, skip loading
// arguments into the object recursively.
List<ArgumentSource> argumentSources = extractArgumentSources( object.getClass(), false );
for( ArgumentSource argumentSource: argumentSources ) {
Collection<ArgumentMatch> argumentsMatchingSource = argumentMatches.findMatches( argumentSource );
if( argumentsMatchingSource.size() != 0 )
loadMatchesIntoObject( argumentsMatchingSource, object );
}
for( ArgumentSource argumentSource: argumentSources )
loadMatchesIntoObject( argumentSource, object, argumentMatches.findMatches(argumentSource) );
}
/**
* Loads a single argument into the object.
* @param argumentMatches Argument matches to load into the object.
* @param object Target for the argument.
* @param target
*/
private void loadMatchesIntoObject( Collection<ArgumentMatch> argumentMatches, Object object ) {
if( argumentMatches.size() > 1 )
throw new StingException("Too many matches");
ArgumentMatch match = argumentMatches.iterator().next();
ArgumentDefinition definition = match.definition;
// A null definition might be in the list if some invalid arguments were passed in but we
// want to load in a subset of data for better error reporting. Ignore null definitions.
if( definition == null )
private void loadMatchesIntoObject( ArgumentSource source, Object target, Collection<ArgumentMatch> argumentMatches ) {
// Nothing to load
if( argumentMatches.size() == 0 )
return;
if( definition.source.clazz.isAssignableFrom(object.getClass()) ) {
String[] tokens = match.values().toArray(new String[0]);
definition.source.inject( customArgumentFactory, object, tokens );
if( argumentMatches.size() > 1 )
throw new StingException("Too many values matched argument: " + source.field.getName());
if( source.clazz.isAssignableFrom(target.getClass()) ) {
Object value = source.parse( source, target, argumentMatches.toArray(new ArgumentMatch[0]) );
if( clp == null || !clp.intercept(source, target, value) )
JVMUtils.setField( source.field, target, value );
}
}

View File

@ -0,0 +1,84 @@
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.SAMFileReader;
import java.io.File;
import org.broadinstitute.sting.utils.StingException;
/**
* Allows the user to steadily accumulate information about what
* components go into a SAM file writer, ultimately using this
* information to create a SAM file writer on demand.
*
* @author mhanna
* @version 0.1
*/
public class SAMFileReaderBuilder {
/**
* To which file should output be written?
*/
private File samFile = null;
/**
* What compression level should be used when building this file?
*/
private SAMFileReader.ValidationStringency validationStringency = null;
/**
* Sets the handle of the sam file to which data should be written.
* @param samFile The SAM file into which data should flow.
*/
public void setSAMFile( File samFile ) {
this.samFile = samFile;
}
/**
* Sets the validation stringency to apply when reading this sam file.
* @param validationStringency Stringency to apply. Must not be null.
*/
public void setValidationStringency( SAMFileReader.ValidationStringency validationStringency ) {
this.validationStringency = validationStringency;
}
/**
* Create the SAM writer, given the constituent parts accrued.
* @return Newly minted SAM file writer.
*/
public SAMFileReader build() {
if( samFile == null )
throw new StingException( "Filename for output sam file must be supplied.");
if( validationStringency == null )
throw new StingException( "Header for output sam file must be supplied.");
SAMFileReader reader = new SAMFileReader( samFile );
reader.setValidationStringency( validationStringency );
return reader;
}
}

View File

@ -0,0 +1,101 @@
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMFileWriterFactory;
import java.io.File;
import org.broadinstitute.sting.utils.StingException;
/**
* Allows the user to steadily accumulate information about what
* components go into a SAM file writer, ultimately using this
* information to create a SAM file writer on demand.
*
* @author mhanna
* @version 0.1
*/
public class SAMFileWriterBuilder {
/**
* Default compression level for newly constructed SAM files.
* Default to 5 (based on research by Alec Wysoker)
*/
public static final int DEFAULT_COMPRESSION_LEVEL = 5;
/**
* To which file should output be written?
*/
private File samFile = null;
/**
* Which header should be used when writing the SAM file?
*/
private SAMFileHeader header = null;
/**
* What compression level should be used when building this file?
*/
private int compressionLevel = DEFAULT_COMPRESSION_LEVEL;
/**
* Sets the handle of the sam file to which data should be written.
* @param samFile The SAM file into which data should flow.
*/
public void setSAMFile( File samFile ) {
this.samFile = samFile;
}
/**
* Sets the header to be written at the head of this SAM file.
* @param header Header to write.
*/
public void setSAMFileHeader( SAMFileHeader header ) {
this.header = header;
}
/**
* Sets the compression level to use when writing this BAM file.
* @param compressionLevel Compression level to use when writing this SAM file.
*/
public void setCompressionLevel( int compressionLevel ) {
this.compressionLevel = compressionLevel;
}
/**
* Create the SAM writer, given the constituent parts accrued.
* @return Newly minted SAM file writer.
*/
public SAMFileWriter build() {
if( samFile == null )
throw new StingException( "Filename for output sam file must be supplied.");
if( header == null )
throw new StingException( "Header for output sam file must be supplied.");
return new SAMFileWriterFactory().makeBAMWriter( header, true, samFile, compressionLevel );
}
}

View File

@ -42,7 +42,8 @@ public class OutputTrackerTest extends BaseTest {
@Test
public void testNullInputs() {
OutputTracker ot = new OutputTracker(null,null);
OutputTracker ot = new OutputTracker();
ot.initializeCoreIO(null,null);
Assert.assertTrue("OutputTracker: Output stream is of wrong type.", ot.getOutStream() instanceof RedirectingOutputStream );
Assert.assertTrue("OutputTracker: Error stream is of wrong type.", ot.getErrStream() instanceof RedirectingOutputStream );
@ -56,7 +57,8 @@ public class OutputTrackerTest extends BaseTest {
@Test
public void testOutputStreamAlone() throws FileNotFoundException {
OutputTracker ot = new OutputTracker(OUTPUT_FILENAME,null);
OutputTracker ot = new OutputTracker();
ot.initializeCoreIO(OUTPUT_FILENAME,null);
final String OUTPUT_TEXT = "out stream test";
PrintWriter outWriter = new PrintWriter(ot.getOutStream());
@ -76,7 +78,8 @@ public class OutputTrackerTest extends BaseTest {
@Test
public void testErrorStreamAlone() throws FileNotFoundException {
OutputTracker ot = new OutputTracker(null,ERROR_FILENAME);
OutputTracker ot = new OutputTracker();
ot.initializeCoreIO(null,ERROR_FILENAME);
final String ERROR_TEXT = "err stream test";
PrintWriter errWriter = new PrintWriter(ot.getErrStream());
@ -95,7 +98,8 @@ public class OutputTrackerTest extends BaseTest {
@Test
public void testIndependentStreams() throws FileNotFoundException {
OutputTracker ot = new OutputTracker(OUTPUT_FILENAME,ERROR_FILENAME);
OutputTracker ot = new OutputTracker();
ot.initializeCoreIO(OUTPUT_FILENAME,ERROR_FILENAME);
final String OUTPUT_TEXT = "out stream test";
PrintWriter outWriter = new PrintWriter(ot.getOutStream());
@ -121,7 +125,8 @@ public class OutputTrackerTest extends BaseTest {
@Test
public void testIdenticalInputsGetIdenticalResults() {
OutputTracker ot = new OutputTracker(OUTPUT_FILENAME,OUTPUT_FILENAME);
OutputTracker ot = new OutputTracker();
ot.initializeCoreIO(OUTPUT_FILENAME,OUTPUT_FILENAME);
Assert.assertTrue("OutputTracker: Output stream is of wrong type.", ot.getOutStream() instanceof RedirectingOutputStream );
Assert.assertTrue("OutputTracker: Error stream is of wrong type.", ot.getErrStream() instanceof RedirectingOutputStream );

View File

@ -173,8 +173,6 @@ public class CleanedReadInjectorTest extends BaseTest {
walker.cleanedReadsSource = cleanedReads;
walker.outputBAM = output;
walker.initializeOutputStreams( new OutputTracker(null,null) );
return walker;
}