293 lines
12 KiB
Java
293 lines
12 KiB
Java
/*
|
|
* Copyright (c) 2010 The Broad Institute
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person
|
|
* obtaining a copy of this software and associated documentation
|
|
* files (the "Software"), to deal in the Software without
|
|
* restriction, including without limitation the rights to use,
|
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following
|
|
* conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be
|
|
* included in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
package org.broadinstitute.sting.gatk;
|
|
|
|
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
|
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
|
import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor;
|
|
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
|
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor;
|
|
import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor;
|
|
import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor;
|
|
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
|
|
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
|
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
|
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
|
|
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
|
|
|
import java.io.File;
|
|
import java.io.FileNotFoundException;
|
|
import java.util.*;
|
|
|
|
import net.sf.picard.filter.SamRecordFilter;
|
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
|
import org.broadinstitute.sting.utils.text.XReadLines;
|
|
|
|
/**
|
|
* @author aaron
|
|
*/
|
|
public abstract class CommandLineExecutable extends CommandLineProgram {
|
|
/**
|
|
* The actual engine which performs the analysis.
|
|
*/
|
|
protected GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
|
|
|
|
// get the analysis name
|
|
public abstract String getAnalysisName();
|
|
|
|
/**
|
|
* Gets the GATK argument bundle.
|
|
* @return A structure consisting of whatever arguments should be used to initialize the GATK engine.
|
|
*/
|
|
protected abstract GATKArgumentCollection getArgumentCollection();
|
|
|
|
/**
|
|
* A list of all the arguments initially used as sources.
|
|
*/
|
|
private final Collection<Object> argumentSources = new ArrayList<Object>();
|
|
|
|
/**
|
|
* this is the function that the inheriting class can expect to have called
|
|
* when the command line system has initialized.
|
|
*
|
|
* @return the return code to exit the program with
|
|
*/
|
|
protected int execute() throws Exception {
|
|
engine.setParser(parser);
|
|
argumentSources.add(this);
|
|
|
|
Walker<?,?> walker = engine.getWalkerByName(getAnalysisName());
|
|
|
|
try {
|
|
engine.setArguments(getArgumentCollection());
|
|
|
|
// File lists can require a bit of additional expansion. Set these explicitly by the engine.
|
|
engine.setSAMFileIDs(unpackBAMFileList(getArgumentCollection()));
|
|
engine.setReferenceMetaDataFiles(unpackRODBindings(getArgumentCollection()));
|
|
|
|
engine.setWalker(walker);
|
|
walker.setToolkit(engine);
|
|
|
|
Collection<SamRecordFilter> filters = engine.createFilters();
|
|
engine.setFilters(filters);
|
|
|
|
// load the arguments into the walker / filters.
|
|
// TODO: The fact that this extra load call exists here when all the parsing happens at the engine
|
|
// TODO: level indicates that we're doing something wrong. Turn this around so that the GATK can drive
|
|
// TODO: argument processing.
|
|
loadArgumentsIntoObject(walker);
|
|
argumentSources.add(walker);
|
|
|
|
for (SamRecordFilter filter: filters) {
|
|
loadArgumentsIntoObject(filter);
|
|
argumentSources.add(filter);
|
|
}
|
|
|
|
engine.execute();
|
|
generateGATKRunReport(walker);
|
|
} catch ( Exception e ) {
|
|
generateGATKRunReport(walker, e);
|
|
throw e;
|
|
}
|
|
|
|
// always return 0
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Generate the GATK run report for this walker using the current GATKEngine, if -et is enabled.
|
|
* This report will be written to either STDOUT or to the run repository, depending on the options
|
|
* for -et.
|
|
*
|
|
* @param e the exception, can be null if no exception occurred
|
|
*/
|
|
private void generateGATKRunReport(Walker<?,?> walker, Exception e) {
|
|
if ( getArgumentCollection().phoneHomeType != GATKRunReport.PhoneHomeOption.NO_ET ) {
|
|
GATKRunReport report = new GATKRunReport(walker, e, engine, getArgumentCollection().phoneHomeType );
|
|
if ( getArgumentCollection().phoneHomeType == GATKRunReport.PhoneHomeOption.STDOUT )
|
|
report.postReport(System.out);
|
|
else
|
|
report.postReport();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Convenience method for fully parameterized generateGATKRunReport when an exception has
|
|
* not occurred
|
|
*
|
|
* @param walker
|
|
*/
|
|
private void generateGATKRunReport(Walker<?,?> walker) {
|
|
generateGATKRunReport(walker, null);
|
|
}
|
|
|
|
/**
|
|
* Subclasses of CommandLinePrograms can provide their own types of command-line arguments.
|
|
* @return A collection of type descriptors generating implementation-dependent placeholders.
|
|
*/
|
|
protected Collection<ArgumentTypeDescriptor> getArgumentTypeDescriptors() {
|
|
return Arrays.asList( new VCFWriterArgumentTypeDescriptor(engine,System.out,argumentSources),
|
|
new SAMFileReaderArgumentTypeDescriptor(engine),
|
|
new SAMFileWriterArgumentTypeDescriptor(engine,System.out),
|
|
new OutputStreamArgumentTypeDescriptor(engine,System.out) );
|
|
}
|
|
|
|
/**
|
|
* GATK can add arguments dynamically based on analysis type.
|
|
*
|
|
* @return true
|
|
*/
|
|
@Override
|
|
protected boolean canAddArgumentsDynamically() {
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* GATK provides the walker as an argument source.
|
|
* @return List of walkers to load dynamically.
|
|
*/
|
|
@Override
|
|
protected Class[] getArgumentSources() {
|
|
// No walker info? No plugins.
|
|
if (getAnalysisName() == null) return new Class[] {};
|
|
|
|
Collection<Class> argumentSources = new ArrayList<Class>();
|
|
|
|
Walker walker = engine.getWalkerByName(getAnalysisName());
|
|
engine.setArguments(getArgumentCollection());
|
|
engine.setWalker(walker);
|
|
walker.setToolkit(engine);
|
|
argumentSources.add(walker.getClass());
|
|
|
|
Collection<SamRecordFilter> filters = engine.createFilters();
|
|
for(SamRecordFilter filter: filters)
|
|
argumentSources.add(filter.getClass());
|
|
|
|
Class[] argumentSourcesAsArray = new Class[argumentSources.size()];
|
|
return argumentSources.toArray(argumentSourcesAsArray);
|
|
}
|
|
|
|
@Override
|
|
protected String getArgumentSourceName( Class argumentSource ) {
|
|
return engine.getWalkerName((Class<Walker>)argumentSource);
|
|
}
|
|
|
|
/**
|
|
* Unpack the bam files to be processed, given a list of files. That list of files can
|
|
* itself contain entries which are lists of other files to be read (note: you cannot have lists of lists of lists)
|
|
*
|
|
* @param argCollection the command-line arguments from which to extract the BAM file list.
|
|
* @return a flattened list of the bam files provided
|
|
*/
|
|
private List<SAMReaderID> unpackBAMFileList(GATKArgumentCollection argCollection) {
|
|
List<SAMReaderID> unpackedReads = new ArrayList<SAMReaderID>();
|
|
for( String inputFileName: argCollection.samFiles ) {
|
|
List<String> inputFileNameTags = parser.getTags(inputFileName);
|
|
inputFileName = expandFileName(inputFileName);
|
|
if (inputFileName.toLowerCase().endsWith(".list") ) {
|
|
try {
|
|
for(String fileName : new XReadLines(new File(inputFileName)))
|
|
unpackedReads.add(new SAMReaderID(fileName,parser.getTags(inputFileName)));
|
|
}
|
|
catch( FileNotFoundException ex ) {
|
|
throw new UserException.CouldNotReadInputFile(new File(inputFileName), "Unable to find file while unpacking reads", ex);
|
|
}
|
|
}
|
|
else if(inputFileName.toLowerCase().endsWith(".bam")) {
|
|
unpackedReads.add(new SAMReaderID(inputFileName,inputFileNameTags));
|
|
}
|
|
else if(inputFileName.endsWith("stdin")) {
|
|
unpackedReads.add(new SAMReaderID(inputFileName,inputFileNameTags));
|
|
}
|
|
else {
|
|
throw new UserException.CommandLineException(String.format("The GATK reads argument (-I) supports only BAM files with the .bam extension and lists of BAM files " +
|
|
"with the .list extension, but the file %s has neither extension. Please ensure that your BAM file or list " +
|
|
"of BAM files is in the correct format, update the extension, and try again.",inputFileName));
|
|
}
|
|
}
|
|
return unpackedReads;
|
|
}
|
|
/**
|
|
* Convert command-line argument representation of ROD bindings to something more easily understandable by the engine.
|
|
* @param argCollection input arguments to the GATK.
|
|
* @return a list of expanded, bound RODs.
|
|
*/
|
|
private Collection<RMDTriplet> unpackRODBindings(GATKArgumentCollection argCollection) {
|
|
Collection<RMDTriplet> rodBindings = new ArrayList<RMDTriplet>();
|
|
|
|
|
|
for (String fileName: argCollection.RODBindings) {
|
|
List<String> parameters = parser.getTags(fileName);
|
|
fileName = expandFileName(fileName);
|
|
|
|
RMDStorageType storageType = null;
|
|
if(argCollection.rodInputType != null)
|
|
storageType = argCollection.rodInputType;
|
|
else if(fileName.toLowerCase().endsWith("stdin"))
|
|
storageType = RMDStorageType.STREAM;
|
|
else
|
|
storageType = RMDStorageType.FILE;
|
|
|
|
if(parameters.size() != 2)
|
|
throw new UserException("Invalid syntax for -B (reference-ordered data) input flag. " +
|
|
"Please use the following syntax when providing reference-ordered " +
|
|
"data: -B:<name>,<type> <filename>.");
|
|
// Assume that if tags are present, those tags are name and type.
|
|
// Name is always first, followed by type.
|
|
String name = parameters.get(0);
|
|
String type = parameters.get(1);
|
|
rodBindings.add(new RMDTriplet(name,type,fileName,storageType));
|
|
}
|
|
|
|
if (argCollection.DBSNPFile != null) {
|
|
if(argCollection.DBSNPFile.toLowerCase().contains("vcf"))
|
|
throw new UserException("--DBSNP (-D) argument currently does not support VCF. To use dbSNP in VCF format, please use -B:dbsnp,vcf <filename>.");
|
|
|
|
String fileName = expandFileName(argCollection.DBSNPFile);
|
|
RMDStorageType storageType = fileName.toLowerCase().endsWith("stdin") ? RMDStorageType.STREAM : RMDStorageType.FILE;
|
|
|
|
rodBindings.add(new RMDTriplet(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME,"dbsnp",fileName,storageType));
|
|
}
|
|
|
|
return rodBindings;
|
|
}
|
|
|
|
/**
|
|
* Expand any special characters that appear in the filename. Right now, '-' is expanded to
|
|
* '/dev/stdin' only, but in the future, special characters like '~' and '*' that are passed
|
|
* directly to the command line in some circumstances could be expanded as well. Be careful
|
|
* when adding UNIX-isms.
|
|
* @param argument the text appearing on the command-line.
|
|
* @return An expanded string suitable for opening by Java/UNIX file handling utilities.
|
|
*/
|
|
private String expandFileName(String argument) {
|
|
if(argument.trim().equals("-"))
|
|
return "/dev/stdin";
|
|
return argument;
|
|
}
|
|
}
|