2009-06-11 01:34:02 +08:00
/ *
2010-04-20 07:00:08 +08:00
* Copyright ( c ) 2010 The Broad Institute
2010-04-20 23:26:32 +08:00
*
2009-06-11 01:34:02 +08:00
* Permission is hereby granted , free of charge , to any person
* obtaining a copy of this software and associated documentation
2010-04-20 23:26:32 +08:00
* files ( the "Software" ) , to deal in the Software without
2009-06-11 01:34:02 +08:00
* restriction , including without limitation the rights to use ,
* copy , modify , merge , publish , distribute , sublicense , and / or sell
* copies of the Software , and to permit persons to whom the
* Software is furnished to do so , subject to the following
* conditions :
2010-04-20 23:26:32 +08:00
*
2009-06-11 01:34:02 +08:00
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software .
*
2010-04-20 23:26:32 +08:00
* THE SOFTWARE IS PROVIDED "AS IS" , WITHOUT WARRANTY OF ANY KIND ,
2009-06-11 01:34:02 +08:00
* EXPRESS OR IMPLIED , INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY , FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT . IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM , DAMAGES OR OTHER LIABILITY ,
* WHETHER IN AN ACTION OF CONTRACT , TORT OR OTHERWISE , ARISING
2010-04-20 07:00:08 +08:00
* FROM , OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE .
2009-06-11 01:34:02 +08:00
* /
2010-04-20 07:00:08 +08:00
package org.broadinstitute.sting.gatk ;
2011-01-21 08:22:42 +08:00
import org.broadinstitute.sting.commandline.Tags ;
2010-04-20 07:00:08 +08:00
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection ;
import org.broadinstitute.sting.commandline.CommandLineProgram ;
import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor ;
2010-12-23 03:00:17 +08:00
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID ;
2010-09-22 23:27:58 +08:00
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor ;
import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor ;
import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor ;
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor ;
2010-08-29 06:53:32 +08:00
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport ;
2010-12-23 03:00:17 +08:00
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet ;
2010-12-31 12:52:22 +08:00
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType ;
2010-12-23 03:00:17 +08:00
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper ;
2010-04-20 07:00:08 +08:00
import org.broadinstitute.sting.gatk.walkers.Walker ;
2010-12-23 03:00:17 +08:00
import java.io.File ;
import java.io.FileNotFoundException ;
2010-04-20 07:00:08 +08:00
import java.util.* ;
import net.sf.picard.filter.SamRecordFilter ;
2010-12-23 03:00:17 +08:00
import org.broadinstitute.sting.utils.exceptions.UserException ;
import org.broadinstitute.sting.utils.text.XReadLines ;
2010-04-20 07:00:08 +08:00
2009-06-11 01:34:02 +08:00
/ * *
* @author aaron
* /
public abstract class CommandLineExecutable extends CommandLineProgram {
2009-07-17 06:48:44 +08:00
/ * *
2009-07-22 02:50:51 +08:00
* The actual engine which performs the analysis .
2009-07-17 06:48:44 +08:00
* /
2010-09-22 23:27:58 +08:00
protected GenomeAnalysisEngine engine = new GenomeAnalysisEngine ( ) ;
2009-06-11 01:34:02 +08:00
// get the analysis name
2010-09-22 23:27:58 +08:00
public abstract String getAnalysisName ( ) ;
2009-06-11 01:34:02 +08:00
2009-07-17 06:48:44 +08:00
/ * *
* Gets the GATK argument bundle .
* @return A structure consisting of whatever arguments should be used to initialize the GATK engine .
* /
2009-07-17 06:02:21 +08:00
protected abstract GATKArgumentCollection getArgumentCollection ( ) ;
2010-09-22 23:27:58 +08:00
/ * *
* A list of all the arguments initially used as sources .
* /
private final Collection < Object > argumentSources = new ArrayList < Object > ( ) ;
2009-06-11 01:34:02 +08:00
/ * *
* this is the function that the inheriting class can expect to have called
* when the command line system has initialized .
*
* @return the return code to exit the program with
* /
2010-08-29 06:53:32 +08:00
protected int execute ( ) throws Exception {
2010-10-28 03:44:55 +08:00
engine . setParser ( parser ) ;
2010-09-22 23:27:58 +08:00
argumentSources . add ( this ) ;
Walker < ? , ? > walker = engine . getWalkerByName ( getAnalysisName ( ) ) ;
2009-07-17 06:48:44 +08:00
2010-08-29 06:53:32 +08:00
try {
2010-09-24 07:28:55 +08:00
engine . setArguments ( getArgumentCollection ( ) ) ;
2010-12-23 03:00:17 +08:00
// File lists can require a bit of additional expansion. Set these explicitly by the engine.
engine . setSAMFileIDs ( unpackBAMFileList ( getArgumentCollection ( ) ) ) ;
engine . setReferenceMetaDataFiles ( unpackRODBindings ( getArgumentCollection ( ) ) ) ;
2010-09-24 07:28:55 +08:00
engine . setWalker ( walker ) ;
2010-09-25 10:49:30 +08:00
walker . setToolkit ( engine ) ;
2010-09-24 07:28:55 +08:00
Collection < SamRecordFilter > filters = engine . createFilters ( ) ;
engine . setFilters ( filters ) ;
2010-09-12 22:02:43 +08:00
// load the arguments into the walker / filters.
2010-09-22 23:27:58 +08:00
// TODO: The fact that this extra load call exists here when all the parsing happens at the engine
// TODO: level indicates that we're doing something wrong. Turn this around so that the GATK can drive
// TODO: argument processing.
loadArgumentsIntoObject ( walker ) ;
argumentSources . add ( walker ) ;
for ( SamRecordFilter filter : filters ) {
2010-09-12 22:02:43 +08:00
loadArgumentsIntoObject ( filter ) ;
2010-09-22 23:27:58 +08:00
argumentSources . add ( filter ) ;
}
2010-09-12 22:02:43 +08:00
2010-09-24 07:28:55 +08:00
engine . execute ( ) ;
2010-09-22 23:27:58 +08:00
generateGATKRunReport ( walker ) ;
2010-08-29 06:53:32 +08:00
} catch ( Exception e ) {
2010-09-22 23:27:58 +08:00
generateGATKRunReport ( walker , e ) ;
2010-08-29 06:53:32 +08:00
throw e ;
}
// always return 0
return 0 ;
}
/ * *
2010-08-29 23:59:25 +08:00
* Generate the GATK run report for this walker using the current GATKEngine , if - et is enabled .
* This report will be written to either STDOUT or to the run repository , depending on the options
* for - et .
*
2010-08-29 06:53:32 +08:00
* @param e the exception , can be null if no exception occurred
* /
2010-09-22 23:27:58 +08:00
private void generateGATKRunReport ( Walker < ? , ? > walker , Exception e ) {
2010-08-29 06:53:32 +08:00
if ( getArgumentCollection ( ) . phoneHomeType ! = GATKRunReport . PhoneHomeOption . NO_ET ) {
2010-09-22 23:27:58 +08:00
GATKRunReport report = new GATKRunReport ( walker , e , engine , getArgumentCollection ( ) . phoneHomeType ) ;
2011-01-31 05:23:54 +08:00
report . postReport ( getArgumentCollection ( ) . phoneHomeType ) ;
2010-08-29 06:53:32 +08:00
}
}
2010-08-29 23:59:25 +08:00
/ * *
* Convenience method for fully parameterized generateGATKRunReport when an exception has
* not occurred
2010-09-24 07:28:55 +08:00
*
2010-09-22 23:27:58 +08:00
* @param walker
2010-08-29 23:59:25 +08:00
* /
2010-09-22 23:27:58 +08:00
private void generateGATKRunReport ( Walker < ? , ? > walker ) {
generateGATKRunReport ( walker , null ) ;
2009-06-11 01:34:02 +08:00
}
2009-08-23 08:56:02 +08:00
/ * *
* Subclasses of CommandLinePrograms can provide their own types of command - line arguments .
* @return A collection of type descriptors generating implementation - dependent placeholders .
* /
protected Collection < ArgumentTypeDescriptor > getArgumentTypeDescriptors ( ) {
2010-09-22 23:27:58 +08:00
return Arrays . asList ( new VCFWriterArgumentTypeDescriptor ( engine , System . out , argumentSources ) ,
new SAMFileReaderArgumentTypeDescriptor ( engine ) ,
new SAMFileWriterArgumentTypeDescriptor ( engine , System . out ) ,
new OutputStreamArgumentTypeDescriptor ( engine , System . out ) ) ;
2009-08-23 08:56:02 +08:00
}
2009-06-11 01:34:02 +08:00
/ * *
* GATK can add arguments dynamically based on analysis type .
*
* @return true
* /
@Override
protected boolean canAddArgumentsDynamically ( ) {
return true ;
}
/ * *
2009-07-17 06:02:21 +08:00
* GATK provides the walker as an argument source .
2009-06-11 01:34:02 +08:00
* @return List of walkers to load dynamically .
* /
@Override
protected Class [ ] getArgumentSources ( ) {
// No walker info? No plugins.
2009-07-17 06:02:21 +08:00
if ( getAnalysisName ( ) = = null ) return new Class [ ] { } ;
2009-11-11 07:36:17 +08:00
Collection < Class > argumentSources = new ArrayList < Class > ( ) ;
2010-09-22 23:27:58 +08:00
Walker walker = engine . getWalkerByName ( getAnalysisName ( ) ) ;
2010-09-24 07:28:55 +08:00
engine . setArguments ( getArgumentCollection ( ) ) ;
engine . setWalker ( walker ) ;
2010-09-25 10:49:30 +08:00
walker . setToolkit ( engine ) ;
2009-11-11 07:36:17 +08:00
argumentSources . add ( walker . getClass ( ) ) ;
2010-09-24 07:28:55 +08:00
Collection < SamRecordFilter > filters = engine . createFilters ( ) ;
2009-11-11 07:36:17 +08:00
for ( SamRecordFilter filter : filters )
argumentSources . add ( filter . getClass ( ) ) ;
Class [ ] argumentSourcesAsArray = new Class [ argumentSources . size ( ) ] ;
return argumentSources . toArray ( argumentSourcesAsArray ) ;
2009-06-11 01:34:02 +08:00
}
2009-07-17 06:02:21 +08:00
@Override
protected String getArgumentSourceName ( Class argumentSource ) {
2010-09-22 23:27:58 +08:00
return engine . getWalkerName ( ( Class < Walker > ) argumentSource ) ;
2009-06-11 01:34:02 +08:00
}
2010-12-23 03:00:17 +08:00
/ * *
* Unpack the bam files to be processed , given a list of files . That list of files can
* itself contain entries which are lists of other files to be read ( note : you cannot have lists of lists of lists )
*
* @param argCollection the command - line arguments from which to extract the BAM file list .
* @return a flattened list of the bam files provided
* /
private List < SAMReaderID > unpackBAMFileList ( GATKArgumentCollection argCollection ) {
List < SAMReaderID > unpackedReads = new ArrayList < SAMReaderID > ( ) ;
2010-12-31 12:52:22 +08:00
for ( String inputFileName : argCollection . samFiles ) {
2011-01-21 08:22:42 +08:00
Tags inputFileNameTags = parser . getTags ( inputFileName ) ;
2010-12-31 12:52:22 +08:00
inputFileName = expandFileName ( inputFileName ) ;
if ( inputFileName . toLowerCase ( ) . endsWith ( ".list" ) ) {
2010-12-23 03:00:17 +08:00
try {
2010-12-31 12:52:22 +08:00
for ( String fileName : new XReadLines ( new File ( inputFileName ) ) )
unpackedReads . add ( new SAMReaderID ( fileName , parser . getTags ( inputFileName ) ) ) ;
2010-12-23 03:00:17 +08:00
}
catch ( FileNotFoundException ex ) {
2010-12-31 12:52:22 +08:00
throw new UserException . CouldNotReadInputFile ( new File ( inputFileName ) , "Unable to find file while unpacking reads" , ex ) ;
2010-12-23 03:00:17 +08:00
}
}
2010-12-31 12:52:22 +08:00
else if ( inputFileName . toLowerCase ( ) . endsWith ( ".bam" ) ) {
unpackedReads . add ( new SAMReaderID ( inputFileName , inputFileNameTags ) ) ;
2010-12-23 03:00:17 +08:00
}
2010-12-31 12:52:22 +08:00
else if ( inputFileName . endsWith ( "stdin" ) ) {
unpackedReads . add ( new SAMReaderID ( inputFileName , inputFileNameTags ) ) ;
2010-12-23 03:00:17 +08:00
}
else {
throw new UserException . CommandLineException ( String . format ( "The GATK reads argument (-I) supports only BAM files with the .bam extension and lists of BAM files " +
"with the .list extension, but the file %s has neither extension. Please ensure that your BAM file or list " +
2010-12-31 12:52:22 +08:00
"of BAM files is in the correct format, update the extension, and try again." , inputFileName ) ) ;
2010-12-23 03:00:17 +08:00
}
}
return unpackedReads ;
}
/ * *
* Convert command - line argument representation of ROD bindings to something more easily understandable by the engine .
* @param argCollection input arguments to the GATK .
* @return a list of expanded , bound RODs .
* /
private Collection < RMDTriplet > unpackRODBindings ( GATKArgumentCollection argCollection ) {
Collection < RMDTriplet > rodBindings = new ArrayList < RMDTriplet > ( ) ;
2010-12-31 12:52:22 +08:00
for ( String fileName : argCollection . RODBindings ) {
2011-01-21 08:22:42 +08:00
Tags tags = parser . getTags ( fileName ) ;
2010-12-31 12:52:22 +08:00
fileName = expandFileName ( fileName ) ;
2011-01-13 05:54:51 +08:00
2011-01-21 08:22:42 +08:00
List < String > positionalTags = tags . getPositionalTags ( ) ;
if ( positionalTags . size ( ) ! = 2 )
throw new UserException ( "Invalid syntax for -B (reference-ordered data) input flag. " +
"Please use the following syntax when providing reference-ordered " +
"data: -B:<name>,<type> <filename>." ) ;
// Assume that if tags are present, those tags are name and type.
// Name is always first, followed by type.
String name = positionalTags . get ( 0 ) ;
String type = positionalTags . get ( 1 ) ;
2011-01-13 05:54:51 +08:00
RMDStorageType storageType = null ;
2011-01-21 08:22:42 +08:00
if ( tags . getValue ( "storage" ) ! = null )
storageType = Enum . valueOf ( RMDStorageType . class , tags . getValue ( "storage" ) ) ;
2011-01-13 05:54:51 +08:00
else if ( fileName . toLowerCase ( ) . endsWith ( "stdin" ) )
storageType = RMDStorageType . STREAM ;
else
storageType = RMDStorageType . FILE ;
2010-12-31 12:52:22 +08:00
rodBindings . add ( new RMDTriplet ( name , type , fileName , storageType ) ) ;
2010-12-23 03:00:17 +08:00
}
if ( argCollection . DBSNPFile ! = null ) {
if ( argCollection . DBSNPFile . toLowerCase ( ) . contains ( "vcf" ) )
throw new UserException ( "--DBSNP (-D) argument currently does not support VCF. To use dbSNP in VCF format, please use -B:dbsnp,vcf <filename>." ) ;
2010-12-31 12:52:22 +08:00
String fileName = expandFileName ( argCollection . DBSNPFile ) ;
RMDStorageType storageType = fileName . toLowerCase ( ) . endsWith ( "stdin" ) ? RMDStorageType . STREAM : RMDStorageType . FILE ;
rodBindings . add ( new RMDTriplet ( DbSNPHelper . STANDARD_DBSNP_TRACK_NAME , "dbsnp" , fileName , storageType ) ) ;
2010-12-23 03:00:17 +08:00
}
return rodBindings ;
}
2010-12-31 12:52:22 +08:00
/ * *
* Expand any special characters that appear in the filename . Right now , '-' is expanded to
* ' / dev / stdin ' only , but in the future , special characters like '~' and '*' that are passed
* directly to the command line in some circumstances could be expanded as well . Be careful
* when adding UNIX - isms .
* @param argument the text appearing on the command - line .
* @return An expanded string suitable for opening by Java / UNIX file handling utilities .
* /
private String expandFileName ( String argument ) {
if ( argument . trim ( ) . equals ( "-" ) )
return "/dev/stdin" ;
return argument ;
}
2010-10-29 02:37:42 +08:00
}