Moved arguments controlling options in output files into the engine

* Arguments involved are --no_cmdline_in_header, --sites_only, and --bcf for VCF files and --bam_compression, --simplifyBAM, --disable_bam_indexing, and --generate_md5 for BAM files
 * PT 52740563
 * Removed ReadUtils.createSAMFileWriterWithCompression(), replaced with ReadUtils.createSAMFileWriter(), which applies all appropriate engine-level arguments
 * Replaced hard-coded field names in ArgumentDefinitionField (Queue extension generator) with a Reflections-based lookup that will fail noisily during extension generation if there's an error
This commit is contained in:
Phillip Dexheimer 2014-08-26 21:46:30 -04:00
parent 5c4a3eb89c
commit a35f5b8685
16 changed files with 209 additions and 304 deletions

View File

@ -1097,7 +1097,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
} catch ( final Exception e ) {
// Capture any exception that might be thrown, and write out the assembly failure BAM if requested
if ( captureAssemblyFailureBAM ) {
final SAMFileWriter writer = ReadUtils.createSAMFileWriterWithCompression(getToolkit().getSAMFileHeader(), true, "assemblyFailure.bam", 5);
final SAMFileWriter writer = ReadUtils.createSAMFileWriter("assemblyFailure.bam", getToolkit());
for ( final GATKSAMRecord read : activeRegion.getReads() ) {
writer.addAlignment(read);
}

View File

@ -27,12 +27,13 @@ package org.broadinstitute.gatk.queue.extensions.gatk;
import htsjdk.samtools.BAMIndex;
import htsjdk.samtools.SAMFileWriter;
import htsjdk.tribble.Tribble;
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.engine.io.stubs.SAMFileWriterArgumentTypeDescriptor;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import java.io.File;
import java.lang.annotation.Annotation;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@ -116,7 +117,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
}
public static List<? extends ArgumentField> getArgumentFields(ParsingEngine parsingEngine,Class<?> classType) {
List<ArgumentField> argumentFields = new ArrayList<ArgumentField>();
List<ArgumentField> argumentFields = new ArrayList<>();
for (ArgumentSource argumentSource: parsingEngine.extractArgumentSources(classType))
if (!argumentSource.isDeprecated()) {
String gatherer = null;
@ -133,7 +134,31 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
return argumentFields;
}
private static final List<String> intervalFields = Arrays.asList("intervals", "excludeIntervals", "targetIntervals");
public static String getArgumentFullName(final Class<?> collection, final String fieldName) {
try {
final Field field = collection.getField(fieldName);
final Argument arg = field.getAnnotation(Argument.class);
if (arg != null)
return arg.fullName();
final Input inputAnnotation = field.getAnnotation(Input.class);
if (inputAnnotation != null)
return inputAnnotation.fullName();
final Output outputAnnotation = field.getAnnotation(Output.class);
if (outputAnnotation != null)
return outputAnnotation.fullName();
} catch (NoSuchFieldException e) {
throw new IllegalStateException(String.format("Can't find field %s in ArgumentCollection %s", fieldName, collection.getSimpleName()), e);
}
throw new IllegalStateException(String.format("Field %s in class %s is not annotated as an argument", fieldName, collection.getName()));
}
private static final List<String> intervalFields = new ArrayList<>();
private static final String inputFileArgument = getArgumentFullName(GATKArgumentCollection.class, "samFiles");
static {
intervalFields.add(getArgumentFullName(IntervalArgumentCollection.class, "intervals"));
intervalFields.add(getArgumentFullName(IntervalArgumentCollection.class, "excludeIntervals"));
}
private static List<? extends ArgumentField> getArgumentFields(ArgumentDefinition argumentDefinition, String gatherer) {
if (intervalFields.contains(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) {
@ -144,7 +169,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
} else if (NumThreadsArgumentField.NUM_THREADS_FIELD.equals(argumentDefinition.fullName)) {
return Arrays.asList(new NumThreadsArgumentField(argumentDefinition));
} else if ("input_file".equals(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) {
} else if (inputFileArgument.equals(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) {
return Arrays.asList(new InputTaggedFileDefinitionField(argumentDefinition), new InputIndexesArgumentField(argumentDefinition, BAMIndex.BAMIndexSuffix, ".bam"));
} else if ((RodBinding.class.equals(argumentDefinition.argumentType) || RodBinding.class.equals(argumentDefinition.componentType) || RodBindingCollection.class.equals(argumentDefinition.componentType)) && argumentDefinition.ioType == ArgumentIOType.INPUT) {
@ -155,7 +180,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
} else if (argumentDefinition.ioType == ArgumentIOType.OUTPUT) {
List<ArgumentField> fields = new ArrayList<ArgumentField>();
List<ArgumentField> fields = new ArrayList<>();
String gatherClass;
@ -193,7 +218,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
} else if (!argumentDefinition.required && useOption(argumentDefinition.argumentType)) {
boolean useFormat = useFormatter(argumentDefinition.argumentType);
List<ArgumentField> fields = new ArrayList<ArgumentField>();
List<ArgumentField> fields = new ArrayList<>();
ArgumentField field = new OptionedArgumentField(argumentDefinition, useFormat);
fields.add(field);
if (useFormat) fields.add(new FormatterArgumentField(field));
@ -201,7 +226,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
} else {
boolean useFormat = useFormatter(argumentDefinition.argumentType);
List<ArgumentField> fields = new ArrayList<ArgumentField>();
List<ArgumentField> fields = new ArrayList<>();
ArgumentField field = new DefaultArgumentField(argumentDefinition, useFormat);
fields.add(field);
if (useFormat) fields.add(new FormatterArgumentField(field));
@ -349,7 +374,8 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
// Allows the user to specify the track name, track type, and the file.
public static class NumThreadsArgumentField extends OptionedArgumentField {
public static final String NUM_THREADS_FIELD = "num_threads";
public static final String NUM_THREADS_FIELD = getArgumentFullName(GATKArgumentCollection.class, "numberOfDataThreads");
public static final String NCT_FIELD = getArgumentFullName(GATKArgumentCollection.class, "numberOfCPUThreadsPerDataThread");
public NumThreadsArgumentField(ArgumentDefinition argumentDefinition) {
super(argumentDefinition, false);
@ -357,7 +383,8 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
@Override
protected String getFreezeFields() {
return String.format("if (num_threads.isDefined) nCoresRequest = num_threads%nif (num_cpu_threads_per_data_thread.isDefined) nCoresRequest = Some(nCoresRequest.getOrElse(1) * num_cpu_threads_per_data_thread.getOrElse(1))%n");
return String.format("if (%1$s.isDefined) nCoresRequest = %1$s%nif (%2$s.isDefined) nCoresRequest = Some(nCoresRequest.getOrElse(1) * %2$s.getOrElse(1))%n",
NUM_THREADS_FIELD, NCT_FIELD);
}
}
@ -495,7 +522,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
("if (%2$s != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(%2$s))%n" +
" if (!%3$s)%n" +
" %1$s = new File(%2$s.getPath.stripSuffix(\".bam\") + \"%4$s\")%n"),
auxFieldName, originalFieldName, SAMFileWriterArgumentTypeDescriptor.DISABLE_INDEXING_FULLNAME, BAMIndex.BAMIndexSuffix);
auxFieldName, originalFieldName, getArgumentFullName(GATKArgumentCollection.class, "disableBAMIndexing"), BAMIndex.BAMIndexSuffix);
}
}
@ -508,7 +535,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
("if (%2$s != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(%2$s))%n" +
" if (%3$s)%n" +
" %1$s = new File(%2$s.getPath + \"%4$s\")%n"),
auxFieldName, originalFieldName, SAMFileWriterArgumentTypeDescriptor.ENABLE_MD5_FULLNAME, ".md5");
auxFieldName, originalFieldName, getArgumentFullName(GATKArgumentCollection.class, "enableBAMmd5"), ".md5");
}
}

View File

@ -281,18 +281,6 @@ class MuTect extends org.broadinstitute.gatk.queue.extensions.gatk.CommandLineGA
@Gather(enabled=false)
private var vcfIndex: File = _
/** Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests. */
@Argument(fullName="no_cmdline_in_header", shortName="no_cmdline_in_header", doc="Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.", required=false, exclusiveOf="", validation="")
var no_cmdline_in_header: Boolean = _
/** Just output sites without genotypes (i.e. only the first 8 columns of the VCF) */
@Argument(fullName="sites_only", shortName="sites_only", doc="Just output sites without genotypes (i.e. only the first 8 columns of the VCF)", required=false, exclusiveOf="", validation="")
var sites_only: Boolean = _
/** force BCF output, regardless of the file's extension */
@Argument(fullName="bcf", shortName="bcf", doc="force BCF output, regardless of the file's extension", required=false, exclusiveOf="", validation="")
var bcf: Boolean = _
/** VCF file of DBSNP information */
@Input(fullName="dbsnp", shortName="dbsnp", doc="VCF file of DBSNP information", required=false, exclusiveOf="", validation="")
var dbsnp: Seq[File] = Nil

View File

@ -28,8 +28,6 @@ package org.broadinstitute.gatk.queue.extensions.gatk
import org.broadinstitute.gatk.queue.function.scattergather.GatherFunction
import org.broadinstitute.gatk.queue.extensions.picard.MergeSamFiles
import org.broadinstitute.gatk.queue.function.RetryMemoryLimit
import org.broadinstitute.gatk.engine.io.stubs.SAMFileWriterArgumentTypeDescriptor
import org.broadinstitute.gatk.queue.util.ClassFieldCache
import java.io.File
/**
@ -50,18 +48,9 @@ class BamGatherFunction extends MergeSamFiles with GatherFunction with RetryMemo
// Whatever the original function can handle, merging *should* do less.
this.memoryLimit = originalFunction.memoryLimit
// bam_compression and index_output_bam_on_the_fly from SAMFileWriterArgumentTypeDescriptor
// are added by the GATKExtensionsGenerator to the subclass of CommandLineGATK
val compression = ClassFieldCache.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.COMPRESSION_FULLNAME)
this.compressionLevel = originalGATK.getFieldValue(compression).asInstanceOf[Option[Int]]
val disableIndex = ClassFieldCache.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.DISABLE_INDEXING_FULLNAME)
this.createIndex = Some(!originalGATK.getFieldValue(disableIndex).asInstanceOf[Boolean])
val enableMD5 = ClassFieldCache.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.ENABLE_MD5_FULLNAME)
this.createMD5 = Some(originalGATK.getFieldValue(enableMD5).asInstanceOf[Boolean])
this.compressionLevel = originalGATK.bam_compression
this.createIndex = Some(!originalGATK.disable_bam_indexing)
this.createMD5 = Some(originalGATK.generate_md5)
super.freezeFieldValues()
}

View File

@ -26,9 +26,7 @@
package org.broadinstitute.gatk.queue.extensions.gatk
import org.broadinstitute.gatk.queue.function.scattergather.GatherFunction
import org.broadinstitute.gatk.queue.function.{RetryMemoryLimit, QFunction}
import org.broadinstitute.gatk.engine.io.stubs.VCFWriterArgumentTypeDescriptor
import org.broadinstitute.gatk.queue.util.ClassFieldCache
import org.broadinstitute.gatk.queue.function.RetryMemoryLimit
/**
* Merges a vcf text file.
@ -44,14 +42,8 @@ class VcfGatherFunction extends CombineVariants with GatherFunction with RetryMe
this.out = this.originalOutput
GATKIntervals.copyIntervalArguments(this.originalGATK, this)
// NO_HEADER and sites_only from VCFWriterArgumentTypeDescriptor
// are added by the GATKExtensionsGenerator to the subclass of CommandLineGATK
val noHeader = ClassFieldCache.findField(originalFunction.getClass, VCFWriterArgumentTypeDescriptor.NO_HEADER_ARG_NAME)
this.no_cmdline_in_header = originalGATK.getFieldValue(noHeader).asInstanceOf[Boolean]
val sitesOnly = ClassFieldCache.findField(originalFunction.getClass, VCFWriterArgumentTypeDescriptor.SITES_ONLY_ARG_NAME)
this.sites_only = originalGATK.getFieldValue(sitesOnly).asInstanceOf[Boolean]
this.no_cmdline_in_header = originalGATK.no_cmdline_in_header
this.sites_only = originalGATK.sites_only
// ensure that the gather function receives the same unsafe parameter as the scattered function
this.unsafe = this.originalGATK.unsafe

View File

@ -44,7 +44,9 @@ import org.broadinstitute.gatk.engine.filters.FilterManager;
import org.broadinstitute.gatk.engine.filters.ReadFilter;
import org.broadinstitute.gatk.engine.filters.ReadGroupBlackListFilter;
import org.broadinstitute.gatk.engine.io.OutputTracker;
import org.broadinstitute.gatk.engine.io.stubs.SAMFileWriterStub;
import org.broadinstitute.gatk.engine.io.stubs.Stub;
import org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub;
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
import org.broadinstitute.gatk.engine.iterators.ReadTransformersMode;
import org.broadinstitute.gatk.engine.phonehome.GATKRunReport;
@ -65,6 +67,7 @@ import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.interval.IntervalUtils;
import org.broadinstitute.gatk.utils.progressmeter.ProgressMeter;
import org.broadinstitute.gatk.utils.recalibration.BQSRArgumentSet;
import org.broadinstitute.gatk.utils.sam.ReadUtils;
import org.broadinstitute.gatk.utils.text.XReadLines;
import org.broadinstitute.gatk.utils.threading.ThreadEfficiencyMonitor;
@ -666,11 +669,13 @@ public class GenomeAnalysisEngine {
*
* @param outputTracker the tracker supplying the initialization data.
*/
private void initializeOutputStreams(OutputTracker outputTracker) {
for (Map.Entry<ArgumentSource, Object> input : getInputs().entrySet())
private void initializeOutputStreams(final OutputTracker outputTracker) {
for (final Map.Entry<ArgumentSource, Object> input : getInputs().entrySet())
outputTracker.addInput(input.getKey(), input.getValue());
for (Stub<?> stub : getOutputs())
for (final Stub<?> stub : getOutputs()) {
stub.processArguments(argCollection);
outputTracker.addOutput(stub);
}
outputTracker.prepareWalker(walker, getArguments().strictnessLevel);
}

View File

@ -25,7 +25,6 @@
package org.broadinstitute.gatk.engine.arguments;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.ValidationStringency;
import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
@ -61,7 +60,7 @@ public class GATKArgumentCollection {
* BAM file. Please see our online documentation for more details on input formatting requirements.
*/
@Input(fullName = "input_file", shortName = "I", doc = "Input file containing sequence data (SAM or BAM)", required = false)
public List<String> samFiles = new ArrayList<String>();
public List<String> samFiles = new ArrayList<>();
@Hidden
@Argument(fullName = "showFullBamList",doc="Emit a log entry (level INFO) containing the full list of sequence data files to be included in the analysis (including files inside .bam.list files).")
@ -120,7 +119,7 @@ public class GATKArgumentCollection {
* is specified in each tool's documentation. The default filters cannot be disabled.
*/
@Argument(fullName = "read_filter", shortName = "rf", doc = "Filters to apply to reads before analysis", required = false)
public final List<String> readFilters = new ArrayList<String>();
public final List<String> readFilters = new ArrayList<>();
@ArgumentCollection
public IntervalArgumentCollection intervalArguments = new IntervalArgumentCollection();
@ -408,6 +407,39 @@ public class GATKArgumentCollection {
required = false)
public boolean disableAutoIndexCreationAndLockingWhenReadingRods = false;
@Hidden
@Argument(fullName = "no_cmdline_in_header", shortName = "no_cmdline_in_header", doc = "Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.",
required = false)
public boolean disableCommandLineInVCF = false;
@Argument(fullName = "sites_only", shortName = "sites_only", doc = "Just output sites without genotypes (i.e. only the first 8 columns of the VCF)",
required = false)
public boolean sitesOnlyVCF = false;
@Hidden
@Argument(fullName = "bcf", shortName = "bcf", doc = "Force BCF output, regardless of the file's extension",
required = false)
public boolean forceBCFOutput = false;
@Advanced
@Argument(fullName = "bam_compression", shortName = "compress", doc = "Compression level to use for writing BAM files (0 - 9, higher is more compressed)",
minValue = 0, maxValue = 9, required = false)
public Integer bamCompression = null;
@Advanced
@Argument(fullName = "simplifyBAM", shortName = "simplifyBAM",
doc = "If provided, output BAM files will be simplified to include just key reads for downstream variation discovery analyses (removing duplicates, PF-, non-primary reads), as well stripping all extended tags from the kept reads except the read group identifier",
required = false)
public boolean simplifyBAM = false;
@Argument(fullName = "disable_bam_indexing", doc = "Turn off on-the-fly creation of indices for output BAM files.",
required = false)
public boolean disableBAMIndexing = false;
@Argument(fullName = "generate_md5", doc = "Enable on-the-fly creation of md5s for output BAM files.",
required = false)
public boolean enableBAMmd5 = false;
// --------------------------------------------------------------------------------------------------------------
//
// Multi-threading arguments

View File

@ -25,6 +25,7 @@
package org.broadinstitute.gatk.engine.io.stubs;
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
import org.broadinstitute.gatk.engine.io.OutputTracker;
import java.io.File;
@ -101,6 +102,9 @@ public class OutputStreamStub extends OutputStream implements Stub<OutputStream>
this.outputTracker = outputTracker;
}
@Override
public void processArguments( final GATKArgumentCollection argumentCollection ) {}
/**
* @{inheritDoc}
*/

View File

@ -30,29 +30,14 @@ import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.io.GATKSAMFileWriter;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.broadinstitute.gatk.utils.sam.ReadUtils;
import java.io.OutputStream;
import java.lang.annotation.Annotation;
import java.lang.reflect.Type;
import java.util.Arrays;
import java.util.List;
/**
* Insert a SAMFileWriterStub instead of a full-fledged concrete OutputStream implementations.
*/
public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
public static final String DEFAULT_ARGUMENT_FULLNAME = "outputBAM";
public static final String DEFAULT_ARGUMENT_SHORTNAME = "ob";
public static final String COMPRESSION_FULLNAME = "bam_compression";
public static final String COMPRESSION_SHORTNAME = "compress";
public static final String SIMPLIFY_BAM_FULLNAME = "simplifyBAM";
public static final String SIMPLIFY_BAM_SHORTNAME = SIMPLIFY_BAM_FULLNAME;
public static final String DISABLE_INDEXING_FULLNAME = "disable_bam_indexing";
public static final String ENABLE_MD5_FULLNAME = "generate_md5";
/**
* The engine into which output stubs should be fed.
@ -79,15 +64,6 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
return SAMFileWriter.class.equals(type) || GATKSAMFileWriter.class.equals(type);
}
@Override
public List<ArgumentDefinition> createArgumentDefinitions( ArgumentSource source ) {
return Arrays.asList( createBAMArgumentDefinition(source),
createBAMCompressionArgumentDefinition(source),
disableWriteIndexArgumentDefinition(source),
enableMD5GenerationArgumentDefinition(source),
createSimplifyBAMArgumentDefinition(source));
}
@Override
public boolean createsTypeDefault(ArgumentSource source) {
return !source.isRequired() && source.defaultsToStdout();
@ -110,38 +86,15 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
@Override
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) {
// Extract all possible parameters that could be passed to a BAM file writer?
ArgumentDefinition bamArgumentDefinition = createBAMArgumentDefinition(source);
ArgumentDefinition bamArgumentDefinition = createDefaultArgumentDefinition(source);
ArgumentMatchValue writerFileName = getArgumentValue( bamArgumentDefinition, matches );
ArgumentMatchValue compressionLevelText = getArgumentValue( createBAMCompressionArgumentDefinition(source), matches );
Integer compressionLevel = compressionLevelText != null ? Integer.valueOf(compressionLevelText.asString()) : null;
boolean indexOnTheFly = !argumentIsPresent(disableWriteIndexArgumentDefinition(source),matches);
boolean generateMD5 = argumentIsPresent(this.enableMD5GenerationArgumentDefinition(source),matches);
boolean simplifyBAM = argumentIsPresent(createSimplifyBAMArgumentDefinition(source),matches);
// Validate the combination of parameters passed in.
// This parser has been passed a null filename and the GATK is not responsible for creating a type default for the object;
// therefore, the user must have failed to specify a type default
if(writerFileName != null && writerFileName.asFile() == null && generateMD5)
throw new ArgumentException("MD5 generation specified, but no output file specified. If md5 generation is desired, please specify a BAM output file and an md5 file will be written alongside.");
// Create the stub and set parameters.
// Create the stub
SAMFileWriterStub stub = null; // stub = new SAMFileWriterStub(engine, defaultOutputStream);
if (writerFileName != null && writerFileName.asFile() != null ) {
stub = new SAMFileWriterStub(engine, writerFileName.asFile());
if ( compressionLevel != null ) {
stub.setCompressionLevel(ReadUtils.validateCompressionLevel(compressionLevel));
} if ( indexOnTheFly )
stub.setIndexOnTheFly(indexOnTheFly);
if ( generateMD5 )
stub.setGenerateMD5(generateMD5);
if ( simplifyBAM )
stub.setSimplifyBAM(simplifyBAM);
// WARNING: Side effects required by engine!
parsingEngine.addTags(stub,getArgumentTags(matches));
engine.addOutput(stub);
@ -150,96 +103,4 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
return stub;
}
/**
* Gets the definition of the argument representing the BAM file itself.
* @param source Argument source for the BAM file. Must not be null.
* @return Argument definition for the BAM file itself. Will not be null.
*/
private ArgumentDefinition createBAMArgumentDefinition(ArgumentSource source) {
Annotation annotation = getArgumentAnnotation(source);
return new ArgumentDefinition( annotation,
ArgumentIOType.getIOType(annotation),
source.field.getType(),
DEFAULT_ARGUMENT_FULLNAME,
DEFAULT_ARGUMENT_SHORTNAME,
ArgumentDefinition.getDoc(annotation),
source.isRequired(),
false,
source.isMultiValued(),
source.isHidden(),
null,
null,
null,
null);
}
/**
* Creates the optional compression level argument for the BAM file.
* @param source Argument source for the BAM file. Must not be null.
* @return Argument definition for the BAM file itself. Will not be null.
*/
private ArgumentDefinition createBAMCompressionArgumentDefinition(ArgumentSource source) {
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
int.class,
COMPRESSION_FULLNAME,
COMPRESSION_SHORTNAME,
"Compression level to use for writing BAM files",
false,
false,
false,
source.isHidden(),
null,
null,
null,
null );
}
private ArgumentDefinition disableWriteIndexArgumentDefinition(ArgumentSource source) {
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
boolean.class,
DISABLE_INDEXING_FULLNAME,
null,
"Turn off on-the-fly creation of indices for output BAM files.",
false,
true,
false,
source.isHidden(),
null,
null,
null,
null );
}
private ArgumentDefinition enableMD5GenerationArgumentDefinition(ArgumentSource source) {
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
boolean.class,
ENABLE_MD5_FULLNAME,
null,
"Enable on-the-fly creation of md5s for output BAM files.",
false,
true,
false,
source.isHidden(),
null,
null,
null,
null );
}
private ArgumentDefinition createSimplifyBAMArgumentDefinition(ArgumentSource source) {
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
boolean.class,
SIMPLIFY_BAM_FULLNAME,
SIMPLIFY_BAM_SHORTNAME,
"If provided, output BAM files will be simplified to include just key reads for downstream variation discovery analyses (removing duplicates, PF-, non-primary reads), as well stripping all extended tags from the kept reads except the read group identifier",
false,
true,
false,
source.isHidden(),
null,
null,
null,
null );
}
}

View File

@ -30,6 +30,7 @@ import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.util.ProgressLoggerInterface;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
import org.broadinstitute.gatk.engine.io.OutputTracker;
import org.broadinstitute.gatk.engine.io.GATKSAMFileWriter;
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
@ -273,6 +274,16 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, GATKSAMFileWriter
this.outputTracker = outputTracker;
}
@Override
public void processArguments( final GATKArgumentCollection argumentCollection ) {
if (argumentCollection.bamCompression != null)
setCompressionLevel(argumentCollection.bamCompression);
setGenerateMD5(argumentCollection.enableBAMmd5);
setIndexOnTheFly(!argumentCollection.disableBAMIndexing);
setSimplifyBAM(argumentCollection.simplifyBAM);
}
/**
* Use the given header as the target for this writer.
* @param header The header to write.
@ -284,7 +295,7 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, GATKSAMFileWriter
}
private void initializeReadTransformers() {
this.onOutputReadTransformers = new ArrayList<ReadTransformer>(engine.getReadTransformers().size());
this.onOutputReadTransformers = new ArrayList<>(engine.getReadTransformers().size());
for ( final ReadTransformer transformer : engine.getReadTransformers() ) {
if ( transformer.getApplicationTime() == ReadTransformer.ApplicationTime.ON_OUTPUT )
onOutputReadTransformers.add(transformer);

View File

@ -25,6 +25,7 @@
package org.broadinstitute.gatk.engine.io.stubs;
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
import org.broadinstitute.gatk.engine.io.OutputTracker;
import java.io.File;
@ -47,6 +48,15 @@ public interface Stub<StreamType> {
*/
public void register( OutputTracker outputTracker );
/**
* Provides a mechanism for uniformly processing command-line arguments
* that are important for file processing. For example, this method
* might pass on the compression value specified by the user to
* a SAMFileWriter
* @param argumentCollection The arguments to be processed
*/
public void processArguments( final GATKArgumentCollection argumentCollection );
/**
* Returns the OutputStream represented by this stub or null if not available.
*/

View File

@ -30,15 +30,11 @@ import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import htsjdk.variant.variantcontext.writer.VariantContextWriterFactory;
import java.io.File;
import java.io.OutputStream;
import java.lang.reflect.Type;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
/**
* Injects new command-line arguments into the system providing support for the genotype writer.
@ -47,9 +43,6 @@ import java.util.List;
* @version 0.1
*/
public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
public static final String NO_HEADER_ARG_NAME = "no_cmdline_in_header";
public static final String SITES_ONLY_ARG_NAME = "sites_only";
public static final String FORCE_BCF = "bcf";
/**
* The engine into which output stubs should be fed.
@ -88,15 +81,6 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
return VariantContextWriter.class.equals(type);
}
@Override
public List<ArgumentDefinition> createArgumentDefinitions( ArgumentSource source ) {
return Arrays.asList(
createDefaultArgumentDefinition(source),
createNoCommandLineHeaderArgumentDefinition(),
createSitesOnlyArgumentDefinition(),
createBCFArgumentDefinition() );
}
/**
* This command-line argument descriptor does want to override the provided default value.
* @return true always.
@ -145,9 +129,6 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
: new VariantContextWriterStub(engine, defaultOutputStream, argumentSources);
stub.setCompressed(isCompressed(writerFileName == null ? null: writerFileName.asString()));
stub.setDoNotWriteGenotypes(argumentIsPresent(createSitesOnlyArgumentDefinition(),matches));
stub.setSkipWritingCommandLineHeader(argumentIsPresent(createNoCommandLineHeaderArgumentDefinition(),matches));
stub.setForceBCF(argumentIsPresent(createBCFArgumentDefinition(),matches));
// WARNING: Side effects required by engine!
parsingEngine.addTags(stub,getArgumentTags(matches));
@ -156,66 +137,6 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
return stub;
}
/**
* Creates the optional no_header argument for the VCF file.
* @return Argument definition for the VCF file itself. Will not be null.
*/
private ArgumentDefinition createNoCommandLineHeaderArgumentDefinition() {
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
boolean.class,
NO_HEADER_ARG_NAME,
NO_HEADER_ARG_NAME,
"Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.",
false,
true,
false,
true,
null,
null,
null,
null );
}
/**
* Creates the optional sites_only argument definition
* @return Argument definition for the VCF file itself. Will not be null.
*/
private ArgumentDefinition createSitesOnlyArgumentDefinition() {
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
boolean.class,
SITES_ONLY_ARG_NAME,
SITES_ONLY_ARG_NAME,
"Just output sites without genotypes (i.e. only the first 8 columns of the VCF)",
false,
true,
false,
true,
null,
null,
null,
null );
}
/**
* Creates the optional bcf argument definition
* @return Argument definition for the VCF file itself. Will not be null.
*/
private ArgumentDefinition createBCFArgumentDefinition() {
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
boolean.class,
FORCE_BCF,
FORCE_BCF,
"force BCF output, regardless of the file's extension",
false,
true,
false,
true,
null,
null,
null,
null );
}
/**
* Returns true if the file will be compressed.
* @param writerFileName Name of the file

View File

@ -28,6 +28,7 @@ package org.broadinstitute.gatk.engine.io.stubs;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.tribble.index.IndexCreator;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
import org.broadinstitute.gatk.engine.io.OutputTracker;
import org.broadinstitute.gatk.utils.variant.GATKVCFUtils;
import htsjdk.variant.variantcontext.VariantContext;
@ -229,6 +230,14 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
this.outputTracker = outputTracker;
}
@Override
public void processArguments( final GATKArgumentCollection argumentCollection ) {
setDoNotWriteGenotypes(argumentCollection.sitesOnlyVCF);
setSkipWritingCommandLineHeader(argumentCollection.disableCommandLineInVCF);
setForceBCF(argumentCollection.forceBCFOutput);
}
public void writeHeader(VCFHeader header) {
vcfHeader = header;

View File

@ -59,16 +59,15 @@ public class SplitSamFile extends ReadWalker<SAMRecord, Map<String, SAMFileWrite
@Argument(fullName="outputRoot", doc="output BAM file", required=false)
public String outputRoot = "";
@Argument(fullName = "bam_compression", shortName = "compress", doc = "Compression level to use for writing BAM files", required = false)
public Integer BAMcompression = 5;
private static Logger logger = Logger.getLogger(SplitSamFile.class);
private static String VERSION = "0.0.1";
private static final Logger logger = Logger.getLogger(SplitSamFile.class);
private static final String VERSION = "0.0.1";
@Override
public void initialize() {
logger.info("SplitSamFile version: " + VERSION);
}
@Override
public SAMRecord map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) {
return read;
}
@ -78,36 +77,39 @@ public class SplitSamFile extends ReadWalker<SAMRecord, Map<String, SAMFileWrite
// Standard I/O routines
//
// --------------------------------------------------------------------------------------------------------------
@Override
public void onTraversalDone(Map<String, SAMFileWriter> outputs) {
for ( SAMFileWriter output : outputs.values() ) {
output.close();
}
}
@Override
public Map<String, SAMFileWriter> reduceInit() {
HashMap<String, SAMFileHeader> headers = new HashMap<String, SAMFileHeader>();
HashMap<String, SAMFileHeader> headers = new HashMap<>();
for ( SAMReadGroupRecord readGroup : this.getToolkit().getSAMFileHeader().getReadGroups()) {
final String sample = readGroup.getSample();
if ( ! headers.containsKey(sample) ) {
SAMFileHeader header = duplicateSAMFileHeader(this.getToolkit().getSAMFileHeader());
logger.debug(String.format("Creating BAM header for sample %s", sample));
ArrayList<SAMReadGroupRecord> readGroups = new ArrayList<SAMReadGroupRecord>();
ArrayList<SAMReadGroupRecord> readGroups = new ArrayList<>();
header.setReadGroups(readGroups);
headers.put(sample, header);
}
SAMFileHeader header = headers.get(sample);
List<SAMReadGroupRecord> newReadGroups = new ArrayList<SAMReadGroupRecord>(header.getReadGroups());
List<SAMReadGroupRecord> newReadGroups = new ArrayList<>(header.getReadGroups());
newReadGroups.add(readGroup);
header.setReadGroups(newReadGroups);
}
HashMap<String, SAMFileWriter> outputs = new HashMap<String, SAMFileWriter>();
HashMap<String, SAMFileWriter> outputs = new HashMap<>();
for ( Map.Entry<String, SAMFileHeader> elt : headers.entrySet() ) {
final String sample = elt.getKey();
final String filename = outputRoot + sample + ".bam";
logger.info(String.format("Creating BAM output file %s for sample %s", filename, sample));
SAMFileWriter output = ReadUtils.createSAMFileWriterWithCompression(elt.getValue(), true, filename, BAMcompression);
final SAMFileWriter output = ReadUtils.createSAMFileWriter(filename, getToolkit(), elt.getValue());
outputs.put(sample, output);
}
@ -117,6 +119,7 @@ public class SplitSamFile extends ReadWalker<SAMRecord, Map<String, SAMFileWrite
/**
* Write out the read
*/
@Override
public Map<String, SAMFileWriter> reduce(SAMRecord read, Map<String, SAMFileWriter> outputs) {
final String sample = read.getReadGroup().getSample();
SAMFileWriter output = outputs.get(sample);

View File

@ -30,10 +30,10 @@ import com.google.java.contract.Requires;
import htsjdk.samtools.*;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.io.stubs.SAMFileWriterStub;
import org.broadinstitute.gatk.utils.*;
import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import java.io.File;
import java.util.*;
@ -131,26 +131,24 @@ public class ReadUtils {
public enum ReadAndIntervalOverlap {NO_OVERLAP_CONTIG, NO_OVERLAP_LEFT, NO_OVERLAP_RIGHT, NO_OVERLAP_HARDCLIPPED_LEFT, NO_OVERLAP_HARDCLIPPED_RIGHT, OVERLAP_LEFT, OVERLAP_RIGHT, OVERLAP_LEFT_AND_RIGHT, OVERLAP_CONTAINED}
/**
* Creates a SAMFileWriter with the given compression level if you request a bam file. Creates a regular
* SAMFileWriter without compression otherwise.
*
* @param header
* @param presorted
* @param file
* @param compression
* @return a SAMFileWriter with the compression level if it is a bam.
* Creates a SAMFileWriter using all of the features currently set in the engine (command line arguments, ReadTransformers, etc)
* @param file the filename to write to
* @param engine the engine
* @return a SAMFileWriter with the correct options set
*/
public static SAMFileWriter createSAMFileWriterWithCompression(SAMFileHeader header, boolean presorted, String file, int compression) {
validateCompressionLevel(compression);
if (file.endsWith(".bam"))
return new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(header, presorted, new File(file), compression);
return new SAMFileWriterFactory().setCreateIndex(true).makeSAMOrBAMWriter(header, presorted, new File(file));
public static SAMFileWriter createSAMFileWriter(final String file, final GenomeAnalysisEngine engine) {
final SAMFileWriterStub output = new SAMFileWriterStub(engine, new File(file));
output.processArguments(engine.getArguments());
return output;
}
public static int validateCompressionLevel(final int requestedCompressionLevel) {
if ( requestedCompressionLevel < 0 || requestedCompressionLevel > 9 )
throw new UserException.BadArgumentValue("compress", "Compression level must be 0-9 but got " + requestedCompressionLevel);
return requestedCompressionLevel;
/**
* As {@link #createSAMFileWriter(String, org.broadinstitute.gatk.engine.GenomeAnalysisEngine)}, but also sets the header
*/
public static SAMFileWriter createSAMFileWriter(final String file, final GenomeAnalysisEngine engine, final SAMFileHeader header) {
final SAMFileWriterStub output = (SAMFileWriterStub) createSAMFileWriter(file, engine);
output.writeHeader(header);
return output;
}
/**

View File

@ -666,4 +666,59 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
return counter + sum;
}
}
// --------------------------------------------------------------------------------
//
// Test output file-specific options
//
// --------------------------------------------------------------------------------
//Returns the output file
private File testBAMFeatures(final String args, final String md5) {
WalkerTestSpec spec = new WalkerTestSpec("-T PrintReads -R " + b37KGReference +
" -I " + privateTestDir + "NA20313.highCoverageRegion.bam"
+ " --no_pg_tag -o %s " + args,
1, Arrays.asList(".bam"), Arrays.asList(md5));
return executeTest("testBAMFeatures: "+args, spec).first.get(0);
}
@Test
public void testSAMWriterFeatures() {
testBAMFeatures("-compress 0", "bb4b55b1f80423970bb9384cbf0d8793");
testBAMFeatures("-compress 9", "b85ee1636d62e1bb8ed65a245c307167");
testBAMFeatures("-simplifyBAM", "38f9c30a27dfbc085a2ff52a1617d579");
//Validate MD5
final String expectedMD5 = "6627b9ea33293a0083983feb94948c1d";
final File md5Target = testBAMFeatures("--generate_md5", expectedMD5);
final File md5File = new File(md5Target.getAbsoluteFile() + ".md5");
md5File.deleteOnExit();
Assert.assertTrue(md5File.exists(), "MD5 wasn't created");
try {
String md5 = new BufferedReader(new FileReader(md5File)).readLine();
Assert.assertEquals(md5, expectedMD5, "Generated MD5 doesn't match expected");
} catch (IOException e) {
Assert.fail("Can't parse MD5 file", e);
}
//Validate that index isn't created
final String unindexedBAM = testBAMFeatures("--disable_bam_indexing", expectedMD5).getAbsolutePath();
Assert.assertTrue(!(new File(unindexedBAM+".bai").exists()) &&
!(new File(unindexedBAM.replace(".bam", ".bai")).exists()),
"BAM index was created even though it was disabled");
}
private void testVCFFeatures(final String args, final String md5) {
WalkerTestSpec spec = new WalkerTestSpec("-T SelectVariants -R " + b37KGReference +
" -V " + privateTestDir + "CEUtrioTest.vcf"
+ " --no_cmdline_in_header -o %s " + args,
1, Arrays.asList(md5));
executeTest("testVCFFeatures: "+args, spec);
}
@Test
public void testVCFWriterFeatures() {
testVCFFeatures("--sites_only", "94bf1f2c0946e933515e4322323a5716");
testVCFFeatures("--bcf", "03f2d6988f54a332da48803c78f9c4b3");
}
}