Support for specifying SAMFileReaders and SAMFileWriters as @Arguments directly. *Very*
rough initial implementation, but should provide enough support so that people can stop creating SAMFileWriters in reduceInit. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1332 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
56f769f2ce
commit
7a13647c35
|
|
@ -1,9 +1,11 @@
|
|||
package org.broadinstitute.sting.gatk;
|
||||
|
||||
import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram;
|
||||
import org.broadinstitute.sting.utils.cmdLine.ArgumentFactory;
|
||||
import org.broadinstitute.sting.utils.cmdLine.ArgumentSource;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.xReadLines;
|
||||
import org.broadinstitute.sting.utils.sam.SAMFileWriterBuilder;
|
||||
import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -11,7 +13,7 @@ import java.io.FileNotFoundException;
|
|||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMFileWriter;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
|
|
@ -106,6 +108,24 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
|||
return new Class[] { GATKEngine.getWalkerByName(getAnalysisName()).getClass() };
|
||||
}
|
||||
|
||||
/**
|
||||
* Allows arguments to be hijacked by subclasses of the program before being placed
|
||||
* into plugin classes.
|
||||
* @return True if the particular field should be hijacked; false otherwise.
|
||||
*/
|
||||
protected boolean intercept( ArgumentSource source, Object targetInstance, Object value ) {
|
||||
if( !(Walker.class.isAssignableFrom(source.clazz)) )
|
||||
return false;
|
||||
|
||||
if( value instanceof SAMFileReaderBuilder || value instanceof SAMFileWriterBuilder ) {
|
||||
GATKEngine.setAdditionalIO( source.field, value );
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
protected String getArgumentSourceName( Class argumentSource ) {
|
||||
return WalkerManager.getWalkerName((Class<Walker>) argumentSource);
|
||||
|
|
@ -145,22 +165,4 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
|||
}
|
||||
return unpackedReads;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a custom factory for instantiating specialty GATK arguments.
|
||||
* @return An instance of the command-line argument of the specified type.
|
||||
*/
|
||||
@Override
|
||||
protected ArgumentFactory getCustomArgumentFactory() {
|
||||
return new ArgumentFactory() {
|
||||
public Object createArgument( Class type, String... repr ) {
|
||||
if (type == SAMFileReader.class && repr.length == 1) {
|
||||
SAMFileReader samFileReader = new SAMFileReader(new File(repr[0]),true);
|
||||
samFileReader.setValidationStringency(getArgumentCollection().strictnessLevel);
|
||||
return samFileReader;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -102,10 +102,6 @@ public class GATKArgumentCollection {
|
|||
@Argument(fullName = "maximum_reads", shortName = "M", doc = "Maximum number of iterations to process before exiting, the lower bound is zero. Intended only for testing", required = false)
|
||||
public Integer maximumEngineIterations = -1;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "bam_compression", shortName = "compress", doc = "Compression level to use for writing BAM files", required = false)
|
||||
public Integer BAMcompression = null;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "filterZeroMappingQualityReads", shortName = "fmq0", doc = "If true, mapping quality zero reads will be filtered at the lowest GATK level. Vastly improves performance at areas with abnormal depth due to mapping Q0 reads", required = false)
|
||||
public Boolean filterZeroMappingQualityReads = false;
|
||||
|
|
@ -246,10 +242,6 @@ public class GATKArgumentCollection {
|
|||
if (!other.unsafe.equals(this.unsafe)) {
|
||||
return false;
|
||||
}
|
||||
if (( other.BAMcompression == null && this.BAMcompression != null ) ||
|
||||
( other.BAMcompression != null && !other.BAMcompression.equals(this.BAMcompression) )) {
|
||||
return false;
|
||||
}
|
||||
if (( other.filterZeroMappingQualityReads == null && this.filterZeroMappingQualityReads != null ) ||
|
||||
( other.filterZeroMappingQualityReads != null && !other.filterZeroMappingQualityReads.equals(this.filterZeroMappingQualityReads) )) {
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -34,6 +34,9 @@ import net.sf.samtools.SAMReadGroupRecord;
|
|||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
|
||||
import org.broadinstitute.sting.gatk.executive.MicroScheduler;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
|
|
@ -41,13 +44,13 @@ import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
|||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.cmdLine.ArgumentException;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.*;
|
||||
import java.lang.reflect.Field;
|
||||
|
||||
public class GenomeAnalysisEngine {
|
||||
|
||||
|
|
@ -63,7 +66,7 @@ public class GenomeAnalysisEngine {
|
|||
private GATKArgumentCollection argCollection;
|
||||
|
||||
/** Collection of output streams used by the walker. */
|
||||
private OutputTracker outputTracker = null;
|
||||
private OutputTracker outputTracker = new OutputTracker();
|
||||
|
||||
/** our log, which we want to capture anything from this class */
|
||||
private static Logger logger = Logger.getLogger(GenomeAnalysisEngine.class);
|
||||
|
|
@ -124,14 +127,12 @@ public class GenomeAnalysisEngine {
|
|||
// Validate the walker inputs against the walker.
|
||||
validateInputsAgainstWalker(my_walker, argCollection, rods);
|
||||
|
||||
// our microscheduler, which is in charge of running everything
|
||||
MicroScheduler microScheduler = createMicroscheduler(my_walker, rods);
|
||||
|
||||
// create the output streams
|
||||
initializeOutputStreams(my_walker);
|
||||
|
||||
// our microscheduler, which is in charge of running everything
|
||||
MicroScheduler microScheduler = null;
|
||||
|
||||
microScheduler = createMicroscheduler(my_walker, rods);
|
||||
|
||||
// Prepare the sort ordering w.r.t. the sequence dictionary
|
||||
if (argCollection.referenceFile != null) {
|
||||
final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(argCollection.referenceFile);
|
||||
|
|
@ -148,8 +149,20 @@ public class GenomeAnalysisEngine {
|
|||
if (argCollection.intervals != null) {
|
||||
locs = GenomeLocSortedSet.createSetFromList(parseIntervalRegion(argCollection.intervals));
|
||||
}
|
||||
// excute the microscheduler, storing the results
|
||||
return microScheduler.execute(my_walker, locs, argCollection.maximumEngineIterations);
|
||||
|
||||
ShardStrategy shardStrategy = this.getShardStrategy(my_walker, microScheduler.getReference(), locs, argCollection.maximumEngineIterations);
|
||||
|
||||
// execute the microscheduler, storing the results
|
||||
return microScheduler.execute(my_walker, shardStrategy);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add additional, externally managed IO streams for walker consumption.
|
||||
* @param walkerField Field in the walker into which to inject the value.
|
||||
* @param value Instance to inject.
|
||||
*/
|
||||
public void setAdditionalIO( Field walkerField, Object value ) {
|
||||
outputTracker.addAdditionalOutput( walkerField, value );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -182,21 +195,29 @@ public class GenomeAnalysisEngine {
|
|||
// the mircoscheduler to return
|
||||
MicroScheduler microScheduler = null;
|
||||
|
||||
SAMDataSource readsDataSource = createReadsDataSource(extractSourceInfo(my_walker,argCollection));
|
||||
IndexedFastaSequenceFile referenceDataSource = openReferenceSequenceFile(argCollection.referenceFile);
|
||||
List<ReferenceOrderedDataSource> rodDataSources = getReferenceOrderedDataSources(rods);
|
||||
|
||||
GenomeLocSortedSet locs = null;
|
||||
if (argCollection.intervals != null) {
|
||||
locs = GenomeLocSortedSet.createSetFromList(parseIntervalRegion(argCollection.intervals));
|
||||
}
|
||||
|
||||
// we need to verify different parameter based on the walker type
|
||||
if (my_walker instanceof LocusWalker || my_walker instanceof LocusWindowWalker) {
|
||||
// create the MicroScheduler
|
||||
microScheduler = MicroScheduler.create(my_walker, extractSourceInfo(my_walker,argCollection), argCollection.referenceFile, rods, argCollection.numberOfThreads);
|
||||
engine = microScheduler.getTraversalEngine();
|
||||
microScheduler = MicroScheduler.create(my_walker, readsDataSource, referenceDataSource, rodDataSources, argCollection.numberOfThreads);
|
||||
} else if (my_walker instanceof ReadWalker || my_walker instanceof DuplicateWalker) {
|
||||
if (argCollection.referenceFile == null)
|
||||
Utils.scareUser(String.format("Read-based traversals require a reference file but none was given"));
|
||||
microScheduler = MicroScheduler.create(my_walker, extractSourceInfo(my_walker,argCollection), argCollection.referenceFile, rods, argCollection.numberOfThreads);
|
||||
engine = microScheduler.getTraversalEngine();
|
||||
microScheduler = MicroScheduler.create(my_walker, readsDataSource, referenceDataSource, rodDataSources, argCollection.numberOfThreads);
|
||||
} else {
|
||||
Utils.scareUser(String.format("Unable to create the appropriate TraversalEngine for analysis type " + argCollection.analysisName));
|
||||
}
|
||||
|
||||
dataSource = microScheduler.getSAMDataSource();
|
||||
engine = microScheduler.getTraversalEngine();
|
||||
|
||||
return microScheduler;
|
||||
}
|
||||
|
|
@ -366,17 +387,6 @@ public class GenomeAnalysisEngine {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Default to 5 (based on research by Alec Wysoker)
|
||||
*
|
||||
* @return the BAM compression
|
||||
*/
|
||||
public int getBAMCompression() {
|
||||
return (argCollection.BAMcompression == null ||
|
||||
argCollection.BAMcompression < 1 ||
|
||||
argCollection.BAMcompression > 8) ? 5 : argCollection.BAMcompression;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience function that binds RODs using the old-style command line parser to the new style list for
|
||||
* a uniform processing.
|
||||
|
|
@ -389,6 +399,116 @@ public class GenomeAnalysisEngine {
|
|||
argCollection.RODBindings.add(Utils.join(",", new String[]{name, type, file}));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the sharding strategy given a driving data source.
|
||||
*
|
||||
* @param walker Walker for which to infer sharding strategy.
|
||||
* @param drivingDataSource Data on which to shard.
|
||||
* @param intervals Intervals to use when limiting sharding.
|
||||
* @param maxIterations the maximum number of iterations to run through
|
||||
*
|
||||
* @return Sharding strategy for this driving data source.
|
||||
*/
|
||||
protected ShardStrategy getShardStrategy(Walker walker,
|
||||
ReferenceSequenceFile drivingDataSource,
|
||||
GenomeLocSortedSet intervals,
|
||||
Integer maxIterations) {
|
||||
final long SHARD_SIZE = 100000L;
|
||||
|
||||
ShardStrategy shardStrategy = null;
|
||||
ShardStrategyFactory.SHATTER_STRATEGY shardType;
|
||||
if (walker instanceof LocusWalker) {
|
||||
if (intervals != null) {
|
||||
shardType = (walker.isReduceByInterval()) ?
|
||||
ShardStrategyFactory.SHATTER_STRATEGY.INTERVAL :
|
||||
ShardStrategyFactory.SHATTER_STRATEGY.LINEAR;
|
||||
|
||||
shardStrategy = ShardStrategyFactory.shatter(shardType,
|
||||
drivingDataSource.getSequenceDictionary(),
|
||||
SHARD_SIZE,
|
||||
intervals, maxIterations);
|
||||
} else
|
||||
shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.LINEAR,
|
||||
drivingDataSource.getSequenceDictionary(),
|
||||
SHARD_SIZE, maxIterations);
|
||||
|
||||
} else if (walker instanceof ReadWalker ||
|
||||
walker instanceof DuplicateWalker) {
|
||||
|
||||
shardType = ShardStrategyFactory.SHATTER_STRATEGY.READS;
|
||||
|
||||
if (intervals != null) {
|
||||
shardStrategy = ShardStrategyFactory.shatter(shardType,
|
||||
drivingDataSource.getSequenceDictionary(),
|
||||
SHARD_SIZE,
|
||||
intervals, maxIterations);
|
||||
} else {
|
||||
shardStrategy = ShardStrategyFactory.shatter(shardType,
|
||||
drivingDataSource.getSequenceDictionary(),
|
||||
SHARD_SIZE, maxIterations);
|
||||
}
|
||||
} else if (walker instanceof LocusWindowWalker) {
|
||||
if( intervals == null )
|
||||
throw new StingException("Unable to shard: walker is of type LocusWindow, but no intervals were provided");
|
||||
shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.INTERVAL,
|
||||
drivingDataSource.getSequenceDictionary(),
|
||||
SHARD_SIZE,
|
||||
intervals, maxIterations);
|
||||
} else
|
||||
throw new StingException("Unable to support walker of type" + walker.getClass().getName());
|
||||
|
||||
return shardStrategy;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a data source for the given set of reads.
|
||||
*
|
||||
* @param reads the read source information
|
||||
*
|
||||
* @return A data source for the given set of reads.
|
||||
*/
|
||||
private SAMDataSource createReadsDataSource(Reads reads) {
|
||||
// By reference traversals are happy with no reads. Make sure that case is handled.
|
||||
if (reads.getReadsFiles().size() == 0)
|
||||
return null;
|
||||
|
||||
SAMDataSource dataSource = new SAMDataSource(reads);
|
||||
|
||||
return dataSource;
|
||||
}
|
||||
|
||||
/**
|
||||
* Opens a reference sequence file paired with an index.
|
||||
*
|
||||
* @param refFile Handle to a reference sequence file. Non-null.
|
||||
*
|
||||
* @return A thread-safe file wrapper.
|
||||
*/
|
||||
private IndexedFastaSequenceFile openReferenceSequenceFile(File refFile) {
|
||||
IndexedFastaSequenceFile ref = null;
|
||||
try {
|
||||
ref = new IndexedFastaSequenceFile(refFile);
|
||||
}
|
||||
catch (FileNotFoundException ex) {
|
||||
throw new StingException("I/O error while opening fasta file: " + ex.getMessage(), ex);
|
||||
}
|
||||
GenomeLocParser.setupRefContigOrdering(ref);
|
||||
return ref;
|
||||
}
|
||||
|
||||
/**
|
||||
* Open the reference-ordered data sources.
|
||||
*
|
||||
* @param rods the reference order data to execute using
|
||||
*
|
||||
* @return A list of reference-ordered data sources.
|
||||
*/
|
||||
private List<ReferenceOrderedDataSource> getReferenceOrderedDataSources(List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
||||
List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
|
||||
for (ReferenceOrderedData<? extends ReferenceOrderedDatum> rod : rods)
|
||||
dataSources.add(new ReferenceOrderedDataSource(rod));
|
||||
return dataSources;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the output streams as specified by the user.
|
||||
|
|
@ -396,9 +516,11 @@ public class GenomeAnalysisEngine {
|
|||
* @param walker the walker to initialize output streams for
|
||||
*/
|
||||
private void initializeOutputStreams(Walker walker) {
|
||||
outputTracker = (argCollection.outErrFileName != null) ? new OutputTracker(argCollection.outErrFileName, argCollection.outErrFileName)
|
||||
: new OutputTracker(argCollection.outFileName, argCollection.errFileName);
|
||||
walker.initializeOutputStreams(outputTracker);
|
||||
if( argCollection.outErrFileName != null )
|
||||
outputTracker.initializeCoreIO( argCollection.outErrFileName, argCollection.outErrFileName );
|
||||
else
|
||||
outputTracker.initializeCoreIO( argCollection.outFileName, argCollection.errFileName );
|
||||
outputTracker.prepareWalker(walker);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -1,13 +1,22 @@
|
|||
package org.broadinstitute.sting.gatk;
|
||||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.JVMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.SAMFileWriterBuilder;
|
||||
import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder;
|
||||
import org.broadinstitute.sting.utils.io.RedirectingOutputStream;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.PrintStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.OutputStream;
|
||||
import java.io.PrintWriter;
|
||||
import java.lang.reflect.Field;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
|
||||
import net.sf.samtools.SAMFileWriter;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: Apr 30, 2009
|
||||
|
|
@ -39,13 +48,15 @@ public class OutputTracker {
|
|||
protected ThreadLocal<OutputStream> localOut = new ThreadLocal<OutputStream>();
|
||||
protected ThreadLocal<OutputStream> localErr = new ThreadLocal<OutputStream>();
|
||||
|
||||
protected Map<Field,Object> additionalIO = new HashMap<Field,Object>();
|
||||
|
||||
/**
|
||||
* Create an object to manage output given filenames for the output and error files.
|
||||
* If no files are specified, returns null.
|
||||
* @param outFileName Name of the output file.
|
||||
* @param errFileName Name of the error file.
|
||||
*/
|
||||
public OutputTracker( String outFileName, String errFileName ) {
|
||||
public void initializeCoreIO( String outFileName, String errFileName ) {
|
||||
// If the two output streams match and are non-null, initialize them identically.
|
||||
// Otherwise, initialize them separately.
|
||||
if( outFileName != null && outFileName.equals(errFileName) ) {
|
||||
|
|
@ -55,6 +66,35 @@ public class OutputTracker {
|
|||
else {
|
||||
globalOut = (outFileName != null) ? prepareOutputFile( outFileName ) : System.out;
|
||||
globalErr = (errFileName != null) ? prepareOutputFile( errFileName ) : System.err;
|
||||
}
|
||||
}
|
||||
|
||||
public void prepareWalker( Walker walker ) {
|
||||
Field out = JVMUtils.findField( walker.getClass(), "out" );
|
||||
Field err = JVMUtils.findField( walker.getClass(), "err" );
|
||||
|
||||
JVMUtils.setField( out, walker, new PrintStream(getOutStream()) );
|
||||
JVMUtils.setField( err, walker, new PrintStream(getErrStream()) );
|
||||
|
||||
for( Map.Entry<Field,Object> io: additionalIO.entrySet() ) {
|
||||
Field targetField = io.getKey();
|
||||
Object targetValue = io.getValue();
|
||||
|
||||
// Ghastly hacks: reaches in and finishes building out the SAMFileReader / SAMFileWriter.
|
||||
// TODO: Generalize this, and move it to its own initialization step.
|
||||
if( targetValue instanceof SAMFileReaderBuilder) {
|
||||
SAMFileReaderBuilder builder = (SAMFileReaderBuilder)targetValue;
|
||||
builder.setValidationStringency(GenomeAnalysisEngine.instance.getArguments().strictnessLevel);
|
||||
targetValue = builder.build();
|
||||
}
|
||||
|
||||
if( targetValue instanceof SAMFileWriterBuilder ) {
|
||||
SAMFileWriterBuilder builder = (SAMFileWriterBuilder)targetValue;
|
||||
builder.setSAMFileHeader(GenomeAnalysisEngine.instance.getDataSource().getHeader());
|
||||
targetValue = builder.build();
|
||||
}
|
||||
|
||||
JVMUtils.setField( targetField, walker, targetValue );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -115,6 +155,15 @@ public class OutputTracker {
|
|||
localErr.set( err );
|
||||
}
|
||||
|
||||
/**
|
||||
* Provide a mechanism for injecting supplemental streams for external management.
|
||||
* @param field Field into which to inject this stream.
|
||||
* @param stream Stream to manage.
|
||||
*/
|
||||
public void addAdditionalOutput( Field field, Object stream ) {
|
||||
additionalIO.put(field,stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove pointers to alternate, local output streams.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ public class ShardDataProvider {
|
|||
/**
|
||||
* Sources of reference-ordered data.
|
||||
*/
|
||||
private final List<ReferenceOrderedDataSource> referenceOrderedData;
|
||||
private final Collection<ReferenceOrderedDataSource> referenceOrderedData;
|
||||
|
||||
/**
|
||||
* Retrieves the shard associated with this data provider.
|
||||
|
|
@ -101,7 +101,7 @@ public class ShardDataProvider {
|
|||
* views can access it.
|
||||
* @return List of reference-ordered data sources.
|
||||
*/
|
||||
List<ReferenceOrderedDataSource> getReferenceOrderedData() {
|
||||
Collection<ReferenceOrderedDataSource> getReferenceOrderedData() {
|
||||
return referenceOrderedData;
|
||||
}
|
||||
|
||||
|
|
@ -111,7 +111,7 @@ public class ShardDataProvider {
|
|||
* @param reads A window into the reads for a given region.
|
||||
* @param reference A getter for a section of the reference.
|
||||
*/
|
||||
public ShardDataProvider( Shard shard, SAMDataSource reads, IndexedFastaSequenceFile reference, List<ReferenceOrderedDataSource> rods) {
|
||||
public ShardDataProvider( Shard shard, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
|
||||
this.shard = shard;
|
||||
// Provide basic reads information.
|
||||
this.reads = (reads != null) ? reads.seek( shard ) : new NullSAMIterator(new Reads(new ArrayList<File>()));
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@ import org.broadinstitute.sting.gatk.walkers.Walker;
|
|||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.OutputTracker;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
|
|
@ -11,6 +13,7 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
|||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.threading.ThreadPoolMonitor;
|
||||
|
||||
import javax.management.MBeanServer;
|
||||
|
|
@ -20,6 +23,7 @@ import java.io.File;
|
|||
import java.util.List;
|
||||
import java.util.Queue;
|
||||
import java.util.LinkedList;
|
||||
import java.util.Collection;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Future;
|
||||
|
|
@ -74,11 +78,11 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
|||
* Create a new hierarchical microscheduler to process the given reads and reference.
|
||||
*
|
||||
* @param reads Reads file(s) to process.
|
||||
* @param refFile Reference for driving the traversal.
|
||||
* @param reference Reference for driving the traversal.
|
||||
* @param nThreadsToUse maximum number of threads to use to do the work
|
||||
*/
|
||||
protected HierarchicalMicroScheduler( Walker walker, Reads reads, File refFile, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods, int nThreadsToUse ) {
|
||||
super(walker, reads, refFile, rods);
|
||||
protected HierarchicalMicroScheduler( Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, int nThreadsToUse ) {
|
||||
super(walker, reads, reference, rods);
|
||||
this.threadPool = Executors.newFixedThreadPool(nThreadsToUse);
|
||||
|
||||
try {
|
||||
|
|
@ -91,12 +95,11 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
|||
}
|
||||
}
|
||||
|
||||
public Object execute( Walker walker, GenomeLocSortedSet intervals, Integer maxIterations ) {
|
||||
public Object execute( Walker walker, ShardStrategy shardStrategy ) {
|
||||
// Fast fail for walkers not supporting TreeReducible interface.
|
||||
if (!( walker instanceof TreeReducible ))
|
||||
throw new IllegalArgumentException("Hierarchical microscheduler only works with TreeReducible walkers");
|
||||
|
||||
ShardStrategy shardStrategy = getShardStrategy(walker, reference, intervals, maxIterations);
|
||||
ReduceTree reduceTree = new ReduceTree(this);
|
||||
|
||||
walker.initialize();
|
||||
|
|
@ -258,7 +261,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
|||
OutputMerger outputMerger = new OutputMerger();
|
||||
|
||||
ShardTraverser traverser = new ShardTraverser(this,
|
||||
getTraversalEngine(),
|
||||
traversalEngine,
|
||||
walker,
|
||||
shard,
|
||||
getShardDataProvider(shard),
|
||||
|
|
|
|||
|
|
@ -3,14 +3,16 @@ package org.broadinstitute.sting.gatk.executive;
|
|||
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.List;
|
||||
import java.util.Collection;
|
||||
|
||||
/** A micro-scheduling manager for single-threaded execution of a traversal. */
|
||||
public class LinearMicroScheduler extends MicroScheduler {
|
||||
|
|
@ -18,23 +20,22 @@ public class LinearMicroScheduler extends MicroScheduler {
|
|||
/**
|
||||
* Create a new linear microscheduler to process the given reads and reference.
|
||||
*
|
||||
* @param reads Reads file(s) to process.
|
||||
* @param refFile Reference for driving the traversal.
|
||||
* @param walker Walker for the traversal.
|
||||
* @param reads Reads file(s) to process.
|
||||
* @param reference Reference for driving the traversal.
|
||||
* @param rods Reference-ordered data.
|
||||
*/
|
||||
protected LinearMicroScheduler( Walker walker, Reads reads, File refFile, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods ) {
|
||||
super(walker, reads, refFile, rods);
|
||||
protected LinearMicroScheduler( Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods ) {
|
||||
super(walker, reads, reference, rods);
|
||||
}
|
||||
|
||||
/**
|
||||
* Run this traversal over the specified subsection of the dataset.
|
||||
*
|
||||
* @param walker Computation to perform over dataset.
|
||||
* @param locations Subset of the dataset over which to walk.
|
||||
* @param maxIterations the maximum number of iterations we're to perform
|
||||
* @param shardStrategy A strategy for sharding the data.
|
||||
*/
|
||||
public Object execute(Walker walker, GenomeLocSortedSet locations, Integer maxIterations) {
|
||||
ShardStrategy shardStrategy = getShardStrategy(walker, reference, locations, maxIterations);
|
||||
|
||||
public Object execute(Walker walker, ShardStrategy shardStrategy) {
|
||||
walker.initialize();
|
||||
Accumulator accumulator = Accumulator.create(walker);
|
||||
|
||||
|
|
|
|||
|
|
@ -26,29 +26,19 @@
|
|||
package org.broadinstitute.sting.gatk.executive;
|
||||
|
||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.traversals.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
|
|
@ -62,15 +52,13 @@ import java.util.*;
|
|||
|
||||
/** Shards and schedules data in manageable chunks. */
|
||||
public abstract class MicroScheduler {
|
||||
private static long SHARD_SIZE = 100000L;
|
||||
|
||||
protected static Logger logger = Logger.getLogger(MicroScheduler.class);
|
||||
|
||||
protected final TraversalEngine traversalEngine;
|
||||
protected final IndexedFastaSequenceFile reference;
|
||||
|
||||
private final SAMDataSource reads;
|
||||
private final List<ReferenceOrderedDataSource> rods;
|
||||
private final Collection<ReferenceOrderedDataSource> rods;
|
||||
|
||||
/**
|
||||
* MicroScheduler factory function. Create a microscheduler appropriate for reducing the
|
||||
|
|
@ -78,19 +66,19 @@ public abstract class MicroScheduler {
|
|||
*
|
||||
* @param walker Which walker to use.
|
||||
* @param reads the informations associated with the reads
|
||||
* @param ref the reference file
|
||||
* @param reference the reference file
|
||||
* @param rods the rods to include in the traversal
|
||||
* @param nThreadsToUse Number of threads to utilize.
|
||||
*
|
||||
* @return The best-fit microscheduler.
|
||||
*/
|
||||
public static MicroScheduler create(Walker walker, Reads reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods, int nThreadsToUse) {
|
||||
public static MicroScheduler create(Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, int nThreadsToUse) {
|
||||
if (walker instanceof TreeReducible && nThreadsToUse > 1) {
|
||||
logger.info("Creating hierarchical microscheduler");
|
||||
return new HierarchicalMicroScheduler(walker, reads, ref, rods, nThreadsToUse);
|
||||
return new HierarchicalMicroScheduler(walker, reads, reference, rods, nThreadsToUse);
|
||||
} else {
|
||||
logger.info("Creating linear microscheduler");
|
||||
return new LinearMicroScheduler(walker, reads, ref, rods);
|
||||
return new LinearMicroScheduler(walker, reads, reference, rods);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -99,10 +87,10 @@ public abstract class MicroScheduler {
|
|||
*
|
||||
* @param walker the walker to execute with
|
||||
* @param reads The reads.
|
||||
* @param refFile File pointer to the reference.
|
||||
* @param reference The reference.
|
||||
* @param rods the rods to include in the traversal
|
||||
*/
|
||||
protected MicroScheduler(Walker walker, Reads reads, File refFile, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
||||
protected MicroScheduler(Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
|
||||
if (walker instanceof ReadWalker) {
|
||||
traversalEngine = new TraverseReads();
|
||||
} else if (walker instanceof LocusWalker) {
|
||||
|
|
@ -114,11 +102,17 @@ public abstract class MicroScheduler {
|
|||
} else {
|
||||
throw new UnsupportedOperationException("Unable to determine traversal type, the walker is an unknown type.");
|
||||
}
|
||||
this.reads = setupReadsDataSource(reads);
|
||||
this.reference = openReferenceSequenceFile(refFile);
|
||||
this.rods = getReferenceOrderedDataSources(rods);
|
||||
this.reads = reads;
|
||||
this.reference = reference;
|
||||
this.rods = rods;
|
||||
|
||||
validate(this.reads,this.reference);
|
||||
|
||||
// Side effect: initialize the traversal engine with reads data.
|
||||
// TODO: Give users a dedicated way of getting the header so that the MicroScheduler
|
||||
// doesn't have to bend over backward providing legacy getters and setters.
|
||||
traversalEngine.setSAMHeader(reads.getHeader());
|
||||
traversalEngine.initialize();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -129,77 +123,18 @@ public abstract class MicroScheduler {
|
|||
*/
|
||||
public TraversalEngine getTraversalEngine() {
|
||||
return traversalEngine;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Walks a walker over the given list of intervals.
|
||||
*
|
||||
* @param walker Computation to perform over dataset.
|
||||
* @param intervals A list of intervals over which to walk. Null for whole dataset.
|
||||
* @param maxIterations the maximum number of iterations we're to perform
|
||||
* @param shardStrategy A strategy for sharding the data.
|
||||
*
|
||||
* @return the return type of the walker
|
||||
*/
|
||||
public abstract Object execute(Walker walker, GenomeLocSortedSet intervals, Integer maxIterations);
|
||||
public abstract Object execute(Walker walker, ShardStrategy shardStrategy);
|
||||
|
||||
/**
|
||||
* Get the sharding strategy given a driving data source.
|
||||
*
|
||||
* @param walker Walker for which to infer sharding strategy.
|
||||
* @param drivingDataSource Data on which to shard.
|
||||
* @param intervals Intervals to use when limiting sharding.
|
||||
* @param maxIterations the maximum number of iterations to run through
|
||||
*
|
||||
* @return Sharding strategy for this driving data source.
|
||||
*/
|
||||
protected ShardStrategy getShardStrategy(Walker walker,
|
||||
ReferenceSequenceFile drivingDataSource,
|
||||
GenomeLocSortedSet intervals,
|
||||
Integer maxIterations) {
|
||||
ShardStrategy shardStrategy = null;
|
||||
ShardStrategyFactory.SHATTER_STRATEGY shardType;
|
||||
if (walker instanceof LocusWalker) {
|
||||
if (intervals != null) {
|
||||
shardType = (walker.isReduceByInterval()) ?
|
||||
ShardStrategyFactory.SHATTER_STRATEGY.INTERVAL :
|
||||
ShardStrategyFactory.SHATTER_STRATEGY.LINEAR;
|
||||
|
||||
shardStrategy = ShardStrategyFactory.shatter(shardType,
|
||||
drivingDataSource.getSequenceDictionary(),
|
||||
SHARD_SIZE,
|
||||
intervals, maxIterations);
|
||||
} else
|
||||
shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.LINEAR,
|
||||
drivingDataSource.getSequenceDictionary(),
|
||||
SHARD_SIZE, maxIterations);
|
||||
|
||||
} else if (walker instanceof ReadWalker ||
|
||||
walker instanceof DuplicateWalker) {
|
||||
|
||||
shardType = ShardStrategyFactory.SHATTER_STRATEGY.READS;
|
||||
|
||||
if (intervals != null) {
|
||||
shardStrategy = ShardStrategyFactory.shatter(shardType,
|
||||
drivingDataSource.getSequenceDictionary(),
|
||||
SHARD_SIZE,
|
||||
intervals, maxIterations);
|
||||
} else {
|
||||
shardStrategy = ShardStrategyFactory.shatter(shardType,
|
||||
drivingDataSource.getSequenceDictionary(),
|
||||
SHARD_SIZE, maxIterations);
|
||||
}
|
||||
} else if (walker instanceof LocusWindowWalker) {
|
||||
if( intervals == null )
|
||||
throw new StingException("Unable to shard: walker is of type LocusWindow, but no intervals were provided");
|
||||
shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.INTERVAL,
|
||||
drivingDataSource.getSequenceDictionary(),
|
||||
SHARD_SIZE,
|
||||
intervals, maxIterations);
|
||||
} else
|
||||
throw new StingException("Unable to support walker of type" + walker.getClass().getName());
|
||||
|
||||
return shardStrategy;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets an window into all the data that can be viewed as a single shard.
|
||||
|
|
@ -228,28 +163,6 @@ public abstract class MicroScheduler {
|
|||
traversalEngine.printOnTraversalDone(sum);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a data source for the given set of reads.
|
||||
*
|
||||
* @param reads the read source information
|
||||
*
|
||||
* @return A data source for the given set of reads.
|
||||
*/
|
||||
private SAMDataSource setupReadsDataSource(Reads reads) {
|
||||
// By reference traversals are happy with no reads. Make sure that case is handled.
|
||||
if (reads.getReadsFiles().size() == 0)
|
||||
return null;
|
||||
|
||||
SAMDataSource dataSource = new SAMDataSource(reads);
|
||||
|
||||
// Side effect: initialize the traversal engine with reads data.
|
||||
// TODO: Give users a dedicated way of getting the header so that the MicroScheduler
|
||||
// doesn't have to bend over backward providing legacy getters and setters.
|
||||
traversalEngine.setSAMHeader(dataSource.getHeader());
|
||||
|
||||
return dataSource;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns data source maintained by this scheduler
|
||||
* @return
|
||||
|
|
@ -257,37 +170,10 @@ public abstract class MicroScheduler {
|
|||
public SAMDataSource getSAMDataSource() { return reads; }
|
||||
|
||||
/**
|
||||
* Open the reference-ordered data sources.
|
||||
*
|
||||
* @param rods the reference order data to execute using
|
||||
*
|
||||
* @return A list of reference-ordered data sources.
|
||||
* Returns the reference maintained by this scheduler.
|
||||
* @return The reference maintained by this scheduler.
|
||||
*/
|
||||
private List<ReferenceOrderedDataSource> getReferenceOrderedDataSources(List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
||||
List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
|
||||
for (ReferenceOrderedData<? extends ReferenceOrderedDatum> rod : rods)
|
||||
dataSources.add(new ReferenceOrderedDataSource(rod));
|
||||
return dataSources;
|
||||
}
|
||||
|
||||
/**
|
||||
* Opens a reference sequence file paired with an index.
|
||||
*
|
||||
* @param refFile Handle to a reference sequence file. Non-null.
|
||||
*
|
||||
* @return A thread-safe file wrapper.
|
||||
*/
|
||||
private IndexedFastaSequenceFile openReferenceSequenceFile(File refFile) {
|
||||
IndexedFastaSequenceFile ref = null;
|
||||
try {
|
||||
ref = new IndexedFastaSequenceFile(refFile);
|
||||
}
|
||||
catch (FileNotFoundException ex) {
|
||||
throw new StingException("I/O error while opening fasta file: " + ex.getMessage(), ex);
|
||||
}
|
||||
GenomeLocParser.setupRefContigOrdering(ref);
|
||||
return ref;
|
||||
}
|
||||
public IndexedFastaSequenceFile getReference() { return reference; }
|
||||
|
||||
/**
|
||||
* Now that all files are open, validate the sequence dictionaries of the reads vs. the reference.
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
|||
|
||||
/** an optional argument to dump the reads out to a BAM file */
|
||||
@Argument(fullName = "outputBamFile", shortName = "of", doc = "Write output to this BAM filename instead of STDOUT", required = false)
|
||||
String outputBamFile = null;
|
||||
SAMFileWriter outputBamFile = null;
|
||||
@Argument(fullName = "maxReadLength", shortName = "maxRead", doc="Discard reads with length greater than the specified value", required = false)
|
||||
Integer maxLength = null;
|
||||
@Argument(fullName = "platform", shortName = "platform", doc="Discard reads not generated by the specified platform", required = false)
|
||||
|
|
@ -102,12 +102,7 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
|||
* @return SAMFileWriter, set to the BAM output file if the command line option was set, null otherwise
|
||||
*/
|
||||
public SAMFileWriter reduceInit() {
|
||||
if (outputBamFile != null) {
|
||||
SAMFileHeader header = this.getToolkit().getEngine().getSAMHeader();
|
||||
return Utils.createSAMFileWriterWithCompression(header, true, outputBamFile, getToolkit().getBAMCompression());
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
return outputBamFile;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -47,6 +47,9 @@ public class SplitSamFileWalker extends ReadWalker<SAMRecord, Map<String, SAMFil
|
|||
@Argument(fullName="outputRoot", doc="output BAM file", required=false)
|
||||
public String outputRoot = null;
|
||||
|
||||
@Argument(fullName = "bam_compression", shortName = "compress", doc = "Compression level to use for writing BAM files", required = false)
|
||||
public Integer BAMcompression = 5;
|
||||
|
||||
private static Logger logger = Logger.getLogger(SplitSamFileWalker.class);
|
||||
private static String VERSION = "0.0.1";
|
||||
|
||||
|
|
@ -92,7 +95,7 @@ public class SplitSamFileWalker extends ReadWalker<SAMRecord, Map<String, SAMFil
|
|||
final String sample = elt.getKey();
|
||||
final String filename = outputRoot + sample + ".bam";
|
||||
logger.info(String.format("Creating BAM output file %s for sample %s", filename, sample));
|
||||
SAMFileWriter output = Utils.createSAMFileWriterWithCompression(elt.getValue(), true, filename, getToolkit().getBAMCompression());
|
||||
SAMFileWriter output = Utils.createSAMFileWriterWithCompression(elt.getValue(), true, filename, BAMcompression);
|
||||
outputs.put(sample, output);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,12 +1,9 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.List;
|
||||
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.OutputTracker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.apache.log4j.Logger;
|
||||
|
|
@ -36,11 +33,6 @@ public abstract class Walker<MapType, ReduceType> {
|
|||
protected Walker() {
|
||||
}
|
||||
|
||||
public void initializeOutputStreams( OutputTracker outputTracker ) {
|
||||
out = new PrintStream( outputTracker.getOutStream() );
|
||||
err = new PrintStream( outputTracker.getErrStream() );
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve the toolkit, for peering into internal structures that can't
|
||||
* otherwise be read. Use sparingly, and discuss uses with software engineering
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ public class CleanedReadInjector extends ReadWalker<Integer,Integer> {
|
|||
* Target file for BAM output.
|
||||
*/
|
||||
@Argument(fullName="output_bam",shortName="ob",doc="Output BAM file",required=true)
|
||||
String outputBAMFileName = null;
|
||||
SAMFileWriter outputBAM = null;
|
||||
|
||||
/**
|
||||
* The set of (sorted) cleaned reads
|
||||
|
|
@ -50,11 +50,6 @@ public class CleanedReadInjector extends ReadWalker<Integer,Integer> {
|
|||
*/
|
||||
private HashSet<String> cleanedReadHash = new HashSet<String>();
|
||||
|
||||
/**
|
||||
* The writer that handles writing of SAM files.
|
||||
*/
|
||||
SAMFileWriter outputBAM = null;
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
|
||||
|
|
@ -68,15 +63,6 @@ public class CleanedReadInjector extends ReadWalker<Integer,Integer> {
|
|||
cleanedReadHash.add(getUniquifiedReadName(read));
|
||||
}
|
||||
allReads.close();
|
||||
|
||||
// HACK: The unit tests create their own output files. Make sure this walker doesn't step
|
||||
// on any toes.
|
||||
if( outputBAM == null ) {
|
||||
outputBAM = Utils.createSAMFileWriterWithCompression(getToolkit().getEngine().getSAMHeader(),
|
||||
true,
|
||||
outputBAMFileName,
|
||||
getToolkit().getBAMCompression());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
|||
@Argument(fullName="allow454Reads", shortName="454", doc="process 454 reads", required=false)
|
||||
boolean allow454 = false;
|
||||
@Argument(fullName="OutputCleaned", shortName="O", required=false, doc="Output file (sam or bam) for improved (realigned) reads")
|
||||
String OUT = null;
|
||||
SAMFileWriter writer = null;
|
||||
@Argument(fullName="OutputIndels", shortName="indels", required=false, doc="Output file (text) for the indels found")
|
||||
String OUT_INDELS = null;
|
||||
@Argument(fullName="OutputCleanedReadsOnly", shortName="cleanedOnly", doc="print out cleaned reads only (otherwise, all reads within the intervals)", required=false)
|
||||
|
|
@ -45,7 +45,6 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
|||
// fraction of mismatches that need to no longer mismatch for a column to be considered cleaned
|
||||
private static final double MISMATCH_COLUMN_CLEANED_FRACTION = 0.75;
|
||||
|
||||
private SAMFileWriter writer = null;
|
||||
private FileWriter indelOutput = null;
|
||||
private FileWriter statsOutput = null;
|
||||
private FileWriter snpsOutput = null;
|
||||
|
|
@ -63,12 +62,11 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
|||
throw new RuntimeException("Entropy threshold must be a fraction between 0 and 1");
|
||||
|
||||
SAMFileHeader header = getToolkit().getEngine().getSAMHeader();
|
||||
if ( OUT != null ) {
|
||||
writer = Utils.createSAMFileWriterWithCompression(header, true, OUT, getToolkit().getBAMCompression());
|
||||
if ( writer != null ) {
|
||||
readsToWrite = new TreeSet<ComparableSAMRecord>();
|
||||
}
|
||||
|
||||
logger.info("Writing into output BAM file at compression level " + getToolkit().getBAMCompression());
|
||||
logger.info("Writing into output BAM file");
|
||||
logger.info("Temporary space used: "+System.getProperty("java.io.tmpdir"));
|
||||
generator = new Random();
|
||||
|
||||
|
|
|
|||
|
|
@ -19,8 +19,8 @@ public class LogisticRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWr
|
|||
@Argument(shortName="logisticParams", doc="logistic params file", required=true)
|
||||
public String logisticParamsFile;
|
||||
|
||||
@Argument(shortName="outputBAM", doc="output BAM file", required=false)
|
||||
public String outputBamFile = null;
|
||||
@Argument(fullName="outputBamFile",shortName="outputBAM", doc="output BAM file", required=false)
|
||||
public SAMFileWriter outputBamFile = null;
|
||||
|
||||
@Argument(shortName="useCache", doc="If true, uses high-performance caching of logistic regress results. Experimental", required=false)
|
||||
public boolean useLogisticCache = true;
|
||||
|
|
@ -308,13 +308,7 @@ public class LogisticRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWr
|
|||
}
|
||||
|
||||
public SAMFileWriter reduceInit() {
|
||||
if ( outputBamFile != null ) { // ! outputBamFile.equals("") ) {
|
||||
SAMFileHeader header = this.getToolkit().getEngine().getSAMHeader();
|
||||
return Utils.createSAMFileWriterWithCompression(header, true, outputBamFile, getToolkit().getBAMCompression());
|
||||
}
|
||||
else {
|
||||
return null;
|
||||
}
|
||||
return outputBamFile;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -46,8 +46,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
@Argument(shortName="params", doc="CountCovariates params file", required=true)
|
||||
public String paramsFile;
|
||||
|
||||
@Argument(shortName="outputBAM", doc="output BAM file", required=false)
|
||||
public String outputBamFile = null;
|
||||
@Argument(fullName="outputBamFile", shortName="outputBAM", doc="output BAM file", required=false)
|
||||
public SAMFileWriter outputBamFile = null;
|
||||
|
||||
@Argument(shortName="rawQempirical", doc="If provided, we will use raw Qempirical scores calculated from the # mismatches and # bases, rather than the more conservative estimate of # mismatches + 1 / # bases + 1", required=false)
|
||||
public boolean rawQempirical = false;
|
||||
|
|
@ -321,13 +321,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
}
|
||||
|
||||
public SAMFileWriter reduceInit() {
|
||||
if ( outputBamFile != null ) {
|
||||
SAMFileHeader header = this.getToolkit().getEngine().getSAMHeader();
|
||||
return Utils.createSAMFileWriterWithCompression(header, true, outputBamFile, getToolkit().getBAMCompression());
|
||||
}
|
||||
else {
|
||||
return null;
|
||||
}
|
||||
return outputBamFile;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -33,6 +33,9 @@ public class IOCrusherWalker extends ReadWalker<SAMRecord, ArrayList<SAMFileWrit
|
|||
@Argument(shortName="outputBase",doc="output base",required=true)
|
||||
public String outputBase;
|
||||
|
||||
@Argument(fullName = "bam_compression", shortName = "compress", doc = "Compression level to use for writing BAM files", required = false)
|
||||
public Integer BAMcompression = 5;
|
||||
|
||||
public long nReadsRead = 0;
|
||||
public long nReadsWritten = 0;
|
||||
|
||||
|
|
@ -51,7 +54,7 @@ public class IOCrusherWalker extends ReadWalker<SAMRecord, ArrayList<SAMFileWrit
|
|||
ArrayList<SAMFileWriter> outputs = new ArrayList<SAMFileWriter>(nWaysOut);
|
||||
for ( int i = 0; i < nWaysOut; i++ ) {
|
||||
SAMFileHeader header = this.getToolkit().getEngine().getSAMHeader();
|
||||
outputs.add(Utils.createSAMFileWriterWithCompression(header, true, outputBase + "." + i + ".bam", getToolkit().getBAMCompression()));
|
||||
outputs.add(Utils.createSAMFileWriterWithCompression(header, true, outputBase + "." + i + ".bam", BAMcompression));
|
||||
}
|
||||
return outputs;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ public class ReplaceQuals extends ReadWalker<SAMRecord, SAMFileWriter> {
|
|||
public String inputQualsBAM;
|
||||
|
||||
@Argument(shortName="outputBAM", required=false, doc="output BAM file for reads with replaced quals")
|
||||
public String outputFilename = null;
|
||||
public SAMFileWriter outputBAM = null;
|
||||
|
||||
public int MAX_READS_TO_LOAD = -1;
|
||||
|
||||
|
|
@ -108,13 +108,7 @@ public class ReplaceQuals extends ReadWalker<SAMRecord, SAMFileWriter> {
|
|||
}
|
||||
|
||||
public SAMFileWriter reduceInit() {
|
||||
if ( outputFilename != null ) { // ! outputBamFile.equals("") ) {
|
||||
SAMFileHeader header = this.getToolkit().getEngine().getSAMHeader();
|
||||
return Utils.createSAMFileWriterWithCompression(header, true, outputFilename, getToolkit().getBAMCompression());
|
||||
}
|
||||
else {
|
||||
return null;
|
||||
}
|
||||
return outputBAM;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ import net.sf.samtools.SAMFileHeader;
|
|||
*/
|
||||
public class CombineDuplicatesWalker extends DuplicateWalker<SAMRecord, SAMFileWriter> {
|
||||
@Argument(fullName="outputBAM", shortName="outputBAM", required=false, doc="BAM File to write combined duplicates to")
|
||||
public String outputFilename = null;
|
||||
public SAMFileWriter outputBAM = null;
|
||||
|
||||
@Argument(fullName="includeUniqueReads", shortName="includeUniqueReads", required=false, doc="If true, also writes out non-duplicate reads in file")
|
||||
public boolean INCLUDE_UNIQUE_READS = true;
|
||||
|
|
@ -48,13 +48,7 @@ public class CombineDuplicatesWalker extends DuplicateWalker<SAMRecord, SAMFileW
|
|||
}
|
||||
|
||||
public SAMFileWriter reduceInit() {
|
||||
if ( outputFilename != null ) {
|
||||
SAMFileHeader header = this.getToolkit().getEngine().getSAMHeader();
|
||||
return Utils.createSAMFileWriterWithCompression(header, true, outputFilename, getToolkit().getBAMCompression());
|
||||
}
|
||||
else {
|
||||
return null;
|
||||
}
|
||||
return outputBAM;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import java.lang.reflect.Modifier;
|
||||
import java.lang.reflect.Field;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
||||
|
|
@ -48,4 +49,40 @@ public class JVMUtils {
|
|||
!Modifier.isInterface(clazz.getModifiers());
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the field with the given name in the class. Will inspect all fields, independent
|
||||
* of access level.
|
||||
* @param type Class in which to search for the given field.
|
||||
* @param fieldName Name of the field for which to search.
|
||||
* @return The field, or null if no such field exists.
|
||||
*/
|
||||
public static Field findField( Class type, String fieldName ) {
|
||||
while( type != null ) {
|
||||
Field[] fields = type.getDeclaredFields();
|
||||
for( Field field: fields ) {
|
||||
if( field.getName().equals(fieldName) )
|
||||
return field;
|
||||
}
|
||||
type = type.getSuperclass();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the provided field in the given instance to the given value. Circumvents access restrictions:
|
||||
* a field can be private and still set successfully by this function.
|
||||
* @param field Field to set in the given object.
|
||||
* @param instance Instance in which to set the field.
|
||||
* @param value The value to which to set the given field in the given instance.
|
||||
*/
|
||||
public static void setField( Field field, Object instance, Object value ) {
|
||||
try {
|
||||
field.setAccessible(true);
|
||||
field.set(instance, value);
|
||||
}
|
||||
catch( IllegalAccessException ex ) {
|
||||
throw new StingException(String.format("Could not set %s in instance %s to %s",field.getName(),instance.getClass().getName(),value.toString()));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,42 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.cmdLine;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Provides a service whereby the application can provide a mechanism
|
||||
* for creating specialty arguments.
|
||||
* @version 0.1
|
||||
*/
|
||||
public abstract class ArgumentFactory {
|
||||
/**
|
||||
* Create an instance of a specified type of argument.
|
||||
* @param type The type of the argument to create.
|
||||
* @param repr A String representation of the argument.
|
||||
* @return
|
||||
*/
|
||||
public abstract Object createArgument(Class type, String... repr);
|
||||
}
|
||||
|
|
@ -1,14 +1,6 @@
|
|||
package org.broadinstitute.sting.utils.cmdLine;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.TreeMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.HashSet;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.*;
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: mhanna
|
||||
|
|
@ -170,7 +162,7 @@ public class ArgumentMatches implements Iterable<ArgumentMatch> {
|
|||
/**
|
||||
* A mapping of all the sites where an argument definition maps to a site on the command line.
|
||||
*/
|
||||
class ArgumentMatch {
|
||||
class ArgumentMatch implements Iterable<ArgumentMatch> {
|
||||
/**
|
||||
* The argument definition that's been matched.
|
||||
*/
|
||||
|
|
@ -184,20 +176,123 @@ class ArgumentMatch {
|
|||
/**
|
||||
* Maps indicies of command line arguments to values paired with that argument.
|
||||
*/
|
||||
public final Map<Integer,List<String>> indices = new HashMap<Integer,List<String>>();
|
||||
public final SortedMap<Integer,List<String>> indices = new TreeMap<Integer,List<String>>();
|
||||
|
||||
/**
|
||||
* Create a new argument match, defining its properties later. Used to create invalid arguments.
|
||||
*/
|
||||
public ArgumentMatch() {
|
||||
definition = null;
|
||||
label = null;
|
||||
this.label = null;
|
||||
this.definition = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* A simple way of indicating that an argument with the given label and definition exists at this index.
|
||||
* @param label Label of the argument match. Must not be null.
|
||||
* @param definition The associated definition, if one exists. May be null.
|
||||
* @param index Position of the argument. Must not be null.
|
||||
*/
|
||||
public ArgumentMatch( String label, ArgumentDefinition definition, int index ) {
|
||||
this( label, definition, index, null );
|
||||
}
|
||||
|
||||
private ArgumentMatch( String label, ArgumentDefinition definition, int index, String value ) {
|
||||
this.label = label;
|
||||
this.definition = definition;
|
||||
indices.put(index,null);
|
||||
|
||||
ArrayList<String> values = new ArrayList<String>();
|
||||
if( value != null )
|
||||
values.add(value);
|
||||
indices.put(index,values );
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an iterator that walks over each individual match at each position of a given argument.
|
||||
* @return An iterator over the individual matches in this argument. Will not be null.
|
||||
*/
|
||||
public Iterator<ArgumentMatch> iterator() {
|
||||
return new Iterator<ArgumentMatch>() {
|
||||
/**
|
||||
* Iterate over each the available index.
|
||||
*/
|
||||
private Iterator<Integer> indexIterator = null;
|
||||
|
||||
/**
|
||||
* Iterate over each available token.
|
||||
*/
|
||||
private Iterator<String> tokenIterator = null;
|
||||
|
||||
/**
|
||||
* The next index to return. Null if none remain.
|
||||
*/
|
||||
Integer nextIndex = null;
|
||||
|
||||
/**
|
||||
* The next token to return. Null if none remain.
|
||||
*/
|
||||
String nextToken = null;
|
||||
|
||||
{
|
||||
indexIterator = indices.keySet().iterator();
|
||||
prepareNext();
|
||||
}
|
||||
|
||||
/**
|
||||
* Is there a nextToken available to return?
|
||||
* @return True if there's another token waiting in the wings. False otherwise.
|
||||
*/
|
||||
public boolean hasNext() {
|
||||
return nextToken != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the next token, if one exists. If not, throw an IllegalStateException.
|
||||
* @return The next ArgumentMatch in the series. Should never be null.
|
||||
*/
|
||||
public ArgumentMatch next() {
|
||||
if( nextIndex == null || nextToken == null )
|
||||
throw new IllegalStateException( "No more ArgumentMatches are available" );
|
||||
|
||||
ArgumentMatch match = new ArgumentMatch( label, definition, nextIndex, nextToken );
|
||||
prepareNext();
|
||||
return match;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the next ArgumentMatch to return. If no ArgumentMatches are available,
|
||||
* initialize nextIndex / nextToken to null.
|
||||
*/
|
||||
private void prepareNext() {
|
||||
if( tokenIterator != null && tokenIterator.hasNext() ) {
|
||||
nextToken = tokenIterator.next();
|
||||
}
|
||||
else {
|
||||
nextIndex = null;
|
||||
nextToken = null;
|
||||
|
||||
// Do a nested loop. While more data is present in the inner loop, grab that data.
|
||||
// Otherwise, troll the outer iterator looking for more data.
|
||||
while( indexIterator.hasNext() ) {
|
||||
nextIndex = indexIterator.next();
|
||||
if( indices.get(nextIndex) != null ) {
|
||||
tokenIterator = indices.get(nextIndex).iterator();
|
||||
if( tokenIterator.hasNext() ) {
|
||||
nextToken = tokenIterator.next();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove is unsupported in this context.
|
||||
*/
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Cannot remove an argument match from the collection while iterating.");
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -100,22 +100,8 @@ public class ArgumentSource {
|
|||
* @return A non-null, non-empty list of argument definitions.
|
||||
*/
|
||||
public List<ArgumentDefinition> createArgumentDefinitions() {
|
||||
String fullName = descriptor.fullName().trim().length() > 0 ? descriptor.fullName().trim() : field.getName().toLowerCase();
|
||||
String shortName = descriptor.shortName().trim().length() > 0 ? descriptor.shortName().trim() : null;
|
||||
String doc = descriptor.doc();
|
||||
boolean required = descriptor.required() && !isFlag();
|
||||
String exclusiveOf = descriptor.exclusiveOf().trim().length() > 0 ? descriptor.exclusiveOf().trim() : null;
|
||||
String validation = descriptor.validation().trim().length() > 0 ? descriptor.validation().trim() : null;
|
||||
|
||||
ArgumentDefinition argumentDefinition = new ArgumentDefinition( this,
|
||||
fullName,
|
||||
shortName,
|
||||
doc,
|
||||
required,
|
||||
exclusiveOf,
|
||||
validation );
|
||||
|
||||
return Collections.singletonList(argumentDefinition);
|
||||
ArgumentTypeDescriptor typeDescriptor = ArgumentTypeDescriptor.create( field.getType() );
|
||||
return typeDescriptor.createArgumentDefinitions( this, descriptor );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -123,30 +109,16 @@ public class ArgumentSource {
|
|||
* @param targetInstance Instance into which to inject the parsed value.
|
||||
* @param values String representation of all values passed.
|
||||
*/
|
||||
public void inject( ArgumentFactory customArgumentFactory, Object targetInstance, String... values ) {
|
||||
public Object parse( ArgumentSource source, Object targetInstance, ArgumentMatch... values ) {
|
||||
Object value = null;
|
||||
if( !isFlag() ) {
|
||||
ArgumentTypeDescriptor typeDescriptor = ArgumentTypeDescriptor.create( field.getType() );
|
||||
value = typeDescriptor.parse( source, values );
|
||||
}
|
||||
else
|
||||
value = true;
|
||||
|
||||
if( customArgumentFactory != null ) {
|
||||
value = customArgumentFactory.createArgument(field.getType(), values);
|
||||
}
|
||||
|
||||
if( value == null ) {
|
||||
if( !isFlag() ) {
|
||||
ArgumentTypeDescriptor typeDescriptor = ArgumentTypeDescriptor.create( field.getType() );
|
||||
value = typeDescriptor.parse( field, field.getType(), values );
|
||||
}
|
||||
else
|
||||
value = true;
|
||||
}
|
||||
|
||||
try {
|
||||
field.setAccessible(true);
|
||||
field.set(targetInstance, value);
|
||||
}
|
||||
catch( IllegalAccessException ex ) {
|
||||
//logger.fatal("processArgs: cannot convert field " + field.toString());
|
||||
throw new StingException("processArgs: Failed conversion " + ex.getMessage(), ex);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -26,10 +26,16 @@
|
|||
package org.broadinstitute.sting.utils.cmdLine;
|
||||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.sam.SAMFileWriterBuilder;
|
||||
import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.lang.reflect.*;
|
||||
import java.util.*;
|
||||
import java.io.File;
|
||||
|
||||
import net.sf.samtools.SAMFileWriter;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
|
||||
/**
|
||||
* An factory capable of providing parsers that can parse any type
|
||||
|
|
@ -48,7 +54,9 @@ public abstract class ArgumentTypeDescriptor {
|
|||
/**
|
||||
* Class reference to the different types of descriptors that the create method can create.
|
||||
*/
|
||||
private static List<ArgumentTypeDescriptor> descriptors = Arrays.asList( new SimpleArgumentTypeDescriptor(),
|
||||
private static List<ArgumentTypeDescriptor> descriptors = Arrays.asList( new SAMFileReaderArgumentTypeDescriptor(),
|
||||
new SAMFileWriterArgumentTypeDescriptor(),
|
||||
new SimpleArgumentTypeDescriptor(),
|
||||
new CompoundArgumentTypeDescriptor() );
|
||||
|
||||
public static ArgumentTypeDescriptor create( Class type ) {
|
||||
|
|
@ -59,11 +67,91 @@ public abstract class ArgumentTypeDescriptor {
|
|||
throw new StingException("Can't process command-line arguments of type: " + type.getName());
|
||||
}
|
||||
|
||||
/**
|
||||
* Does this descriptor support classes of the given type?
|
||||
* @param type The type to check.
|
||||
* @return true if this descriptor supports the given type, false otherwise.
|
||||
*/
|
||||
public abstract boolean supports( Class type );
|
||||
|
||||
/**
|
||||
* Given the given argument source and attributes, synthesize argument definitions for command-line arguments.
|
||||
* @param source Source class and field for the given argument.
|
||||
* @param description Description of the fields that go into a given argument.
|
||||
* @return A list of command-line argument definitions supporting this field.
|
||||
*/
|
||||
public List<ArgumentDefinition> createArgumentDefinitions( ArgumentSource source, Argument description ) {
|
||||
ArgumentDefinition definition = new ArgumentDefinition( source,
|
||||
getFullName( source, description ),
|
||||
getShortName( source, description ),
|
||||
getDoc( source, description ),
|
||||
isRequired( source, description ),
|
||||
getExclusiveOf( source, description ),
|
||||
getValidationRegex( source, description ) );
|
||||
return Collections.singletonList(definition);
|
||||
}
|
||||
|
||||
public abstract Object parse( Field field, Class type, String... values );
|
||||
public Object parse( ArgumentSource source, ArgumentMatch... values ) {
|
||||
return parse( source, source.field.getType(), values );
|
||||
}
|
||||
|
||||
protected abstract Object parse( ArgumentSource source, Class type, ArgumentMatch... values );
|
||||
|
||||
/**
|
||||
* Retrieves the full name of the argument, specifiable with the '--' prefix. The full name can be
|
||||
* either specified explicitly with the fullName annotation parameter or implied by the field name.
|
||||
* @return full name of the argument. Never null.
|
||||
*/
|
||||
protected String getFullName( ArgumentSource source, Argument description ) {
|
||||
return description.fullName().trim().length() > 0 ? description.fullName().trim() : source.field.getName().toLowerCase();
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the short name of the argument, specifiable with the '-' prefix. The short name can
|
||||
* be specified or not; if left unspecified, no short name will be present.
|
||||
* @return short name of the argument. Null if no short name exists.
|
||||
*/
|
||||
protected String getShortName( ArgumentSource source, Argument description ) {
|
||||
return description.shortName().trim().length() > 0 ? description.shortName().trim() : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Documentation for this argument. Mandatory field.
|
||||
* @return Documentation for this argument.
|
||||
*/
|
||||
protected String getDoc( ArgumentSource source, Argument description ) {
|
||||
return description.doc();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether this field is required. Note that flag fields are always forced to 'not required'.
|
||||
* @return True if the field is mandatory and not a boolean flag. False otherwise.
|
||||
*/
|
||||
protected boolean isRequired( ArgumentSource source, Argument description ) {
|
||||
return description.required() && !source.isFlag();
|
||||
}
|
||||
|
||||
/**
|
||||
* Specifies other arguments which cannot be used in conjunction with tihs argument. Comma-separated list.
|
||||
* @return A comma-separated list of exclusive arguments, or null if none are present.
|
||||
*/
|
||||
protected String getExclusiveOf( ArgumentSource source, Argument description ) {
|
||||
return description.exclusiveOf().trim().length() > 0 ? description.exclusiveOf().trim() : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* A regular expression which can be used for validation.
|
||||
* @return a JVM regex-compatible regular expression, or null to permit any possible value.
|
||||
*/
|
||||
protected String getValidationRegex( ArgumentSource source, Argument description ) {
|
||||
return description.validation().trim().length() > 0 ? description.validation().trim() : null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse simple argument types: java primitives, wrapper classes, and anything that has
|
||||
* a simple String constructor.
|
||||
*/
|
||||
class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||
@Override
|
||||
public boolean supports( Class type ) {
|
||||
|
|
@ -83,11 +171,10 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Object parse( Field field, Class type, String... values ) {
|
||||
if( values.length > 1 )
|
||||
protected Object parse( ArgumentSource source, Class type, ArgumentMatch... matches ) {
|
||||
if( matches.length > 1 || matches[0].values().size() > 1 )
|
||||
throw new StingException("Simple argument parser is unable to parse multiple arguments.");
|
||||
|
||||
String value = values[0];
|
||||
String value = matches[0].values().get(0);
|
||||
|
||||
// lets go through the types we support
|
||||
try {
|
||||
|
|
@ -131,6 +218,9 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Process compound argument types: arrays, and typed and untyped collections.
|
||||
*/
|
||||
class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||
@Override
|
||||
public boolean supports( Class type ) {
|
||||
|
|
@ -138,10 +228,13 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Object parse( Field field, Class type, String... values )
|
||||
public Object parse( ArgumentSource source, Class type, ArgumentMatch... matches )
|
||||
{
|
||||
Class componentType = null;
|
||||
ArgumentTypeDescriptor componentArgumentParser;
|
||||
|
||||
if( matches.length > 1 )
|
||||
throw new StingException("Simple argument parser is unable to combine multiple argument types into a compound argument.");
|
||||
ArgumentMatch match = matches[0];
|
||||
|
||||
if( Collection.class.isAssignableFrom(type) ) {
|
||||
|
||||
|
|
@ -155,48 +248,45 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
}
|
||||
|
||||
// If this is a parameterized collection, find the contained type. If blow up if only one type exists.
|
||||
if( field.getGenericType() instanceof ParameterizedType) {
|
||||
ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType();
|
||||
if( source.field.getGenericType() instanceof ParameterizedType) {
|
||||
ParameterizedType parameterizedType = (ParameterizedType)source.field.getGenericType();
|
||||
if( parameterizedType.getActualTypeArguments().length > 1 )
|
||||
throw new IllegalArgumentException("Unable to determine collection type of field: " + field.toString());
|
||||
throw new IllegalArgumentException("Unable to determine collection type of field: " + source.field.toString());
|
||||
componentType = (Class)parameterizedType.getActualTypeArguments()[0];
|
||||
}
|
||||
else
|
||||
componentType = String.class;
|
||||
}
|
||||
else if( type.isArray() ) {
|
||||
componentType = type.getComponentType();
|
||||
}
|
||||
else
|
||||
throw new StingException("Unsupported compound argument type: " + type);
|
||||
|
||||
componentArgumentParser = ArgumentTypeDescriptor.create( componentType );
|
||||
ArgumentTypeDescriptor componentArgumentParser = ArgumentTypeDescriptor.create( componentType );
|
||||
|
||||
if( Collection.class.isAssignableFrom(type) ) {
|
||||
Collection collection = null;
|
||||
try {
|
||||
collection = (Collection)type.newInstance();
|
||||
}
|
||||
catch (InstantiationException e) {
|
||||
logger.fatal("ArgumentParser: InstantiationException: cannot convert field " + field.getName());
|
||||
logger.fatal("ArgumentParser: InstantiationException: cannot convert field " + source.field.getName());
|
||||
throw new StingException("constructFromString:InstantiationException: Failed conversion " + e.getMessage());
|
||||
}
|
||||
catch (IllegalAccessException e) {
|
||||
logger.fatal("ArgumentParser: IllegalAccessException: cannot convert field " + field.getName());
|
||||
logger.fatal("ArgumentParser: IllegalAccessException: cannot convert field " + source.field.getName());
|
||||
throw new StingException("constructFromString:IllegalAccessException: Failed conversion " + e.getMessage());
|
||||
}
|
||||
|
||||
for( String value: values )
|
||||
collection.add( componentArgumentParser.parse(field,componentType,value) );
|
||||
for( ArgumentMatch value: match )
|
||||
collection.add( componentArgumentParser.parse(source,componentType,value) );
|
||||
|
||||
return collection;
|
||||
|
||||
}
|
||||
else if( type.isArray() ) {
|
||||
Object arr = Array.newInstance(componentType,values.length);
|
||||
componentType = type.getComponentType();
|
||||
ArgumentTypeDescriptor componentArgumentParser = ArgumentTypeDescriptor.create( componentType );
|
||||
Object arr = Array.newInstance(componentType,match.values().size());
|
||||
|
||||
int i = 0;
|
||||
for( ArgumentMatch value: match )
|
||||
Array.set( arr,i++,componentArgumentParser.parse(source,componentType,value));
|
||||
|
||||
for( int i = 0; i < values.length; i++ )
|
||||
Array.set( arr,i,componentArgumentParser.parse(field,componentType,values[i]));
|
||||
|
||||
return arr;
|
||||
}
|
||||
else
|
||||
|
|
@ -204,3 +294,102 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle SAMFileReaders.
|
||||
*/
|
||||
class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||
@Override
|
||||
public boolean supports( Class type ) {
|
||||
return SAMFileReader.class.isAssignableFrom(type);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object parse( ArgumentSource source, Class type, ArgumentMatch... matches ) {
|
||||
if( matches.length > 1 )
|
||||
throw new UnsupportedOperationException("Only an input file name and validation stringency can be supplied when creating a BAM file reader.");
|
||||
|
||||
SAMFileReaderBuilder builder = new SAMFileReaderBuilder();
|
||||
|
||||
ArgumentMatch readerMatch = matches[0];
|
||||
|
||||
if( readerMatch == null )
|
||||
throw new StingException("SAM file compression was supplied, but not associated writer was supplied with it.");
|
||||
if( readerMatch.values().size() > 1 )
|
||||
throw new StingException("Only one filename can be supplied per created BAM file");
|
||||
|
||||
builder.setSAMFile(new File(readerMatch.values().get(0).trim()));
|
||||
|
||||
return builder;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle SAMFileWriters.
|
||||
*/
|
||||
class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||
private static final String COMPRESSION_FULLNAME = "bam_compression";
|
||||
private static final String COMPRESSION_SHORTNAME = "compress";
|
||||
|
||||
@Override
|
||||
public boolean supports( Class type ) {
|
||||
return SAMFileWriter.class.isAssignableFrom(type);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<ArgumentDefinition> createArgumentDefinitions( ArgumentSource source, Argument description ) {
|
||||
String fullName = description.fullName().trim().length() > 0 ? description.fullName().trim() : "outputBAM";
|
||||
String shortName = description.shortName().trim().length() > 0 ? description.shortName().trim() : "ob";
|
||||
|
||||
ArgumentDefinition writerDefinition = new ArgumentDefinition( source,
|
||||
fullName,
|
||||
shortName,
|
||||
getDoc( source, description ),
|
||||
isRequired( source, description ),
|
||||
getExclusiveOf( source, description ),
|
||||
getValidationRegex( source, description ) );
|
||||
ArgumentDefinition compressionDefinition = new ArgumentDefinition( source,
|
||||
COMPRESSION_FULLNAME,
|
||||
COMPRESSION_SHORTNAME,
|
||||
"Compression level to use for writing BAM files",
|
||||
false,
|
||||
"",
|
||||
"" );
|
||||
|
||||
return Arrays.asList( writerDefinition, compressionDefinition );
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object parse( ArgumentSource source, Class type, ArgumentMatch... matches ) {
|
||||
if( matches.length > 2 )
|
||||
throw new UnsupportedOperationException("Only an input file name and validation stringency can be supplied when creating a BAM file reader.");
|
||||
|
||||
SAMFileWriterBuilder builder = new SAMFileWriterBuilder();
|
||||
|
||||
ArgumentMatch writerMatch = null;
|
||||
ArgumentMatch compressionMatch = null;
|
||||
|
||||
for( ArgumentMatch match: matches ) {
|
||||
if( match.definition.fullName.equals(COMPRESSION_FULLNAME) )
|
||||
compressionMatch = match;
|
||||
else
|
||||
writerMatch = match;
|
||||
}
|
||||
|
||||
if( writerMatch == null )
|
||||
throw new StingException("SAM file compression was supplied, but not associated writer was supplied with it.");
|
||||
if( writerMatch.values().size() > 1 )
|
||||
throw new StingException("Only one filename can be supplied per created BAM file");
|
||||
|
||||
builder.setSAMFile(new File(writerMatch.values().get(0).trim()));
|
||||
|
||||
if( compressionMatch != null ) {
|
||||
if( compressionMatch.values().size() > 1 )
|
||||
throw new StingException("Only one value can be supplied for BAM compression");
|
||||
int compressionLevel = Integer.valueOf(compressionMatch.values().get(0));
|
||||
builder.setCompressionLevel(compressionLevel);
|
||||
}
|
||||
|
||||
return builder;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -109,6 +109,16 @@ public abstract class CommandLineProgram {
|
|||
*/
|
||||
protected Class[] getArgumentSources() { return new Class[] {}; }
|
||||
|
||||
/**
|
||||
* Allows arguments to be hijacked by subclasses of the program before being placed
|
||||
* into plugin classes.
|
||||
* @param source Source class for the argument.
|
||||
* @param targetInstance Instance into which the value should be ultimately injected.
|
||||
* @param value Value to inject.
|
||||
* @return True if the particular field has been hijacked; false otherwise.
|
||||
*/
|
||||
protected boolean intercept( ArgumentSource source, Object targetInstance, Object value ) { return false; }
|
||||
|
||||
/**
|
||||
* Name this argument source. Provides the (full) class name as a default.
|
||||
* @param source The argument source.
|
||||
|
|
@ -124,15 +134,6 @@ public abstract class CommandLineProgram {
|
|||
*/
|
||||
protected abstract int execute();
|
||||
|
||||
/**
|
||||
* Retrieves a factory for custom creation of command-line arguments, specified by the
|
||||
* subclass.
|
||||
* @return
|
||||
*/
|
||||
protected ArgumentFactory getCustomArgumentFactory() {
|
||||
return null;
|
||||
}
|
||||
|
||||
static {
|
||||
// setup a basic log configuration
|
||||
BasicConfigurator.configure();
|
||||
|
|
@ -152,7 +153,7 @@ public abstract class CommandLineProgram {
|
|||
PatternLayout layout = new PatternLayout();
|
||||
|
||||
// setup the parser
|
||||
ParsingEngine parser = clp.parser = new ParsingEngine( clp.getCustomArgumentFactory() );
|
||||
ParsingEngine parser = clp.parser = new ParsingEngine(clp);
|
||||
parser.addArgumentSource( clp.getClass() );
|
||||
|
||||
// process the args
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.utils.cmdLine;
|
|||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.JVMUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.lang.reflect.*;
|
||||
|
|
@ -25,6 +26,11 @@ import java.util.*;
|
|||
* A parser for Sting command-line arguments.
|
||||
*/
|
||||
public class ParsingEngine {
|
||||
/**
|
||||
* The command-line program at the heart of this parsing engine.
|
||||
*/
|
||||
CommandLineProgram clp = null;
|
||||
|
||||
/**
|
||||
* A collection of all the source fields which define command-line arguments.
|
||||
*/
|
||||
|
|
@ -43,13 +49,6 @@ public class ParsingEngine {
|
|||
*/
|
||||
ArgumentMatches argumentMatches = null;
|
||||
|
||||
/**
|
||||
* Stores a custom argument factory for building out arguments of which only
|
||||
* subclasses of CommandLineProgram should be aware.
|
||||
*/
|
||||
ArgumentFactory customArgumentFactory = null;
|
||||
|
||||
|
||||
/**
|
||||
* Techniques for parsing and for argument lookup.
|
||||
*/
|
||||
|
|
@ -60,8 +59,8 @@ public class ParsingEngine {
|
|||
*/
|
||||
protected static Logger logger = Logger.getLogger(ParsingEngine.class);
|
||||
|
||||
public ParsingEngine( ArgumentFactory customArgumentFactory ) {
|
||||
this.customArgumentFactory = customArgumentFactory;
|
||||
public ParsingEngine( CommandLineProgram clp ) {
|
||||
this.clp = clp;
|
||||
parsingMethods.add( ParsingMethod.FullNameParsingMethod );
|
||||
parsingMethods.add( ParsingMethod.ShortNameParsingMethod );
|
||||
}
|
||||
|
|
@ -245,33 +244,27 @@ public class ParsingEngine {
|
|||
// Get a list of argument sources, not including the children of this argument. For now, skip loading
|
||||
// arguments into the object recursively.
|
||||
List<ArgumentSource> argumentSources = extractArgumentSources( object.getClass(), false );
|
||||
for( ArgumentSource argumentSource: argumentSources ) {
|
||||
Collection<ArgumentMatch> argumentsMatchingSource = argumentMatches.findMatches( argumentSource );
|
||||
if( argumentsMatchingSource.size() != 0 )
|
||||
loadMatchesIntoObject( argumentsMatchingSource, object );
|
||||
}
|
||||
for( ArgumentSource argumentSource: argumentSources )
|
||||
loadMatchesIntoObject( argumentSource, object, argumentMatches.findMatches(argumentSource) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a single argument into the object.
|
||||
* @param argumentMatches Argument matches to load into the object.
|
||||
* @param object Target for the argument.
|
||||
* @param target
|
||||
*/
|
||||
private void loadMatchesIntoObject( Collection<ArgumentMatch> argumentMatches, Object object ) {
|
||||
if( argumentMatches.size() > 1 )
|
||||
throw new StingException("Too many matches");
|
||||
|
||||
ArgumentMatch match = argumentMatches.iterator().next();
|
||||
ArgumentDefinition definition = match.definition;
|
||||
|
||||
// A null definition might be in the list if some invalid arguments were passed in but we
|
||||
// want to load in a subset of data for better error reporting. Ignore null definitions.
|
||||
if( definition == null )
|
||||
private void loadMatchesIntoObject( ArgumentSource source, Object target, Collection<ArgumentMatch> argumentMatches ) {
|
||||
// Nothing to load
|
||||
if( argumentMatches.size() == 0 )
|
||||
return;
|
||||
|
||||
if( definition.source.clazz.isAssignableFrom(object.getClass()) ) {
|
||||
String[] tokens = match.values().toArray(new String[0]);
|
||||
definition.source.inject( customArgumentFactory, object, tokens );
|
||||
if( argumentMatches.size() > 1 )
|
||||
throw new StingException("Too many values matched argument: " + source.field.getName());
|
||||
|
||||
if( source.clazz.isAssignableFrom(target.getClass()) ) {
|
||||
Object value = source.parse( source, target, argumentMatches.toArray(new ArgumentMatch[0]) );
|
||||
if( clp == null || !clp.intercept(source, target, value) )
|
||||
JVMUtils.setField( source.field, target, value );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,84 @@
|
|||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.sam;
|
||||
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
/**
|
||||
* Allows the user to steadily accumulate information about what
|
||||
* components go into a SAM file writer, ultimately using this
|
||||
* information to create a SAM file writer on demand.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class SAMFileReaderBuilder {
|
||||
/**
|
||||
* To which file should output be written?
|
||||
*/
|
||||
private File samFile = null;
|
||||
|
||||
/**
|
||||
* What compression level should be used when building this file?
|
||||
*/
|
||||
private SAMFileReader.ValidationStringency validationStringency = null;
|
||||
|
||||
/**
|
||||
* Sets the handle of the sam file to which data should be written.
|
||||
* @param samFile The SAM file into which data should flow.
|
||||
*/
|
||||
public void setSAMFile( File samFile ) {
|
||||
this.samFile = samFile;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the validation stringency to apply when reading this sam file.
|
||||
* @param validationStringency Stringency to apply. Must not be null.
|
||||
*/
|
||||
public void setValidationStringency( SAMFileReader.ValidationStringency validationStringency ) {
|
||||
this.validationStringency = validationStringency;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the SAM writer, given the constituent parts accrued.
|
||||
* @return Newly minted SAM file writer.
|
||||
*/
|
||||
public SAMFileReader build() {
|
||||
if( samFile == null )
|
||||
throw new StingException( "Filename for output sam file must be supplied.");
|
||||
if( validationStringency == null )
|
||||
throw new StingException( "Header for output sam file must be supplied.");
|
||||
|
||||
SAMFileReader reader = new SAMFileReader( samFile );
|
||||
reader.setValidationStringency( validationStringency );
|
||||
|
||||
return reader;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,101 @@
|
|||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.sam;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMFileWriter;
|
||||
import net.sf.samtools.SAMFileWriterFactory;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
/**
|
||||
* Allows the user to steadily accumulate information about what
|
||||
* components go into a SAM file writer, ultimately using this
|
||||
* information to create a SAM file writer on demand.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class SAMFileWriterBuilder {
|
||||
/**
|
||||
* Default compression level for newly constructed SAM files.
|
||||
* Default to 5 (based on research by Alec Wysoker)
|
||||
*/
|
||||
public static final int DEFAULT_COMPRESSION_LEVEL = 5;
|
||||
|
||||
/**
|
||||
* To which file should output be written?
|
||||
*/
|
||||
private File samFile = null;
|
||||
|
||||
/**
|
||||
* Which header should be used when writing the SAM file?
|
||||
*/
|
||||
private SAMFileHeader header = null;
|
||||
|
||||
/**
|
||||
* What compression level should be used when building this file?
|
||||
*/
|
||||
private int compressionLevel = DEFAULT_COMPRESSION_LEVEL;
|
||||
|
||||
/**
|
||||
* Sets the handle of the sam file to which data should be written.
|
||||
* @param samFile The SAM file into which data should flow.
|
||||
*/
|
||||
public void setSAMFile( File samFile ) {
|
||||
this.samFile = samFile;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the header to be written at the head of this SAM file.
|
||||
* @param header Header to write.
|
||||
*/
|
||||
public void setSAMFileHeader( SAMFileHeader header ) {
|
||||
this.header = header;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the compression level to use when writing this BAM file.
|
||||
* @param compressionLevel Compression level to use when writing this SAM file.
|
||||
*/
|
||||
public void setCompressionLevel( int compressionLevel ) {
|
||||
this.compressionLevel = compressionLevel;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the SAM writer, given the constituent parts accrued.
|
||||
* @return Newly minted SAM file writer.
|
||||
*/
|
||||
public SAMFileWriter build() {
|
||||
if( samFile == null )
|
||||
throw new StingException( "Filename for output sam file must be supplied.");
|
||||
if( header == null )
|
||||
throw new StingException( "Header for output sam file must be supplied.");
|
||||
return new SAMFileWriterFactory().makeBAMWriter( header, true, samFile, compressionLevel );
|
||||
}
|
||||
}
|
||||
|
|
@ -42,7 +42,8 @@ public class OutputTrackerTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testNullInputs() {
|
||||
OutputTracker ot = new OutputTracker(null,null);
|
||||
OutputTracker ot = new OutputTracker();
|
||||
ot.initializeCoreIO(null,null);
|
||||
|
||||
Assert.assertTrue("OutputTracker: Output stream is of wrong type.", ot.getOutStream() instanceof RedirectingOutputStream );
|
||||
Assert.assertTrue("OutputTracker: Error stream is of wrong type.", ot.getErrStream() instanceof RedirectingOutputStream );
|
||||
|
|
@ -56,7 +57,8 @@ public class OutputTrackerTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testOutputStreamAlone() throws FileNotFoundException {
|
||||
OutputTracker ot = new OutputTracker(OUTPUT_FILENAME,null);
|
||||
OutputTracker ot = new OutputTracker();
|
||||
ot.initializeCoreIO(OUTPUT_FILENAME,null);
|
||||
|
||||
final String OUTPUT_TEXT = "out stream test";
|
||||
PrintWriter outWriter = new PrintWriter(ot.getOutStream());
|
||||
|
|
@ -76,7 +78,8 @@ public class OutputTrackerTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testErrorStreamAlone() throws FileNotFoundException {
|
||||
OutputTracker ot = new OutputTracker(null,ERROR_FILENAME);
|
||||
OutputTracker ot = new OutputTracker();
|
||||
ot.initializeCoreIO(null,ERROR_FILENAME);
|
||||
|
||||
final String ERROR_TEXT = "err stream test";
|
||||
PrintWriter errWriter = new PrintWriter(ot.getErrStream());
|
||||
|
|
@ -95,7 +98,8 @@ public class OutputTrackerTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testIndependentStreams() throws FileNotFoundException {
|
||||
OutputTracker ot = new OutputTracker(OUTPUT_FILENAME,ERROR_FILENAME);
|
||||
OutputTracker ot = new OutputTracker();
|
||||
ot.initializeCoreIO(OUTPUT_FILENAME,ERROR_FILENAME);
|
||||
|
||||
final String OUTPUT_TEXT = "out stream test";
|
||||
PrintWriter outWriter = new PrintWriter(ot.getOutStream());
|
||||
|
|
@ -121,7 +125,8 @@ public class OutputTrackerTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testIdenticalInputsGetIdenticalResults() {
|
||||
OutputTracker ot = new OutputTracker(OUTPUT_FILENAME,OUTPUT_FILENAME);
|
||||
OutputTracker ot = new OutputTracker();
|
||||
ot.initializeCoreIO(OUTPUT_FILENAME,OUTPUT_FILENAME);
|
||||
|
||||
Assert.assertTrue("OutputTracker: Output stream is of wrong type.", ot.getOutStream() instanceof RedirectingOutputStream );
|
||||
Assert.assertTrue("OutputTracker: Error stream is of wrong type.", ot.getErrStream() instanceof RedirectingOutputStream );
|
||||
|
|
|
|||
|
|
@ -173,8 +173,6 @@ public class CleanedReadInjectorTest extends BaseTest {
|
|||
walker.cleanedReadsSource = cleanedReads;
|
||||
walker.outputBAM = output;
|
||||
|
||||
walker.initializeOutputStreams( new OutputTracker(null,null) );
|
||||
|
||||
return walker;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue