Moved arguments controlling options in output files into the engine

* Arguments involved are --no_cmdline_in_header, --sites_only, and --bcf for VCF files and --bam_compression, --simplifyBAM, --disable_bam_indexing, and --generate_md5 for BAM files
 * PT 52740563
 * Removed ReadUtils.createSAMFileWriterWithCompression(), replaced with ReadUtils.createSAMFileWriter(), which applies all appropriate engine-level arguments
 * Replaced hard-coded field names in ArgumentDefinitionField (Queue extension generator) with a Reflections-based lookup that will fail noisily during extension generation if there's an error
This commit is contained in:
Phillip Dexheimer 2014-08-26 21:46:30 -04:00
parent 5c4a3eb89c
commit a35f5b8685
16 changed files with 209 additions and 304 deletions

View File

@ -1097,7 +1097,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
} catch ( final Exception e ) { } catch ( final Exception e ) {
// Capture any exception that might be thrown, and write out the assembly failure BAM if requested // Capture any exception that might be thrown, and write out the assembly failure BAM if requested
if ( captureAssemblyFailureBAM ) { if ( captureAssemblyFailureBAM ) {
final SAMFileWriter writer = ReadUtils.createSAMFileWriterWithCompression(getToolkit().getSAMFileHeader(), true, "assemblyFailure.bam", 5); final SAMFileWriter writer = ReadUtils.createSAMFileWriter("assemblyFailure.bam", getToolkit());
for ( final GATKSAMRecord read : activeRegion.getReads() ) { for ( final GATKSAMRecord read : activeRegion.getReads() ) {
writer.addAlignment(read); writer.addAlignment(read);
} }

View File

@ -27,12 +27,13 @@ package org.broadinstitute.gatk.queue.extensions.gatk;
import htsjdk.samtools.BAMIndex; import htsjdk.samtools.BAMIndex;
import htsjdk.samtools.SAMFileWriter; import htsjdk.samtools.SAMFileWriter;
import htsjdk.tribble.Tribble; import htsjdk.tribble.Tribble;
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
import org.broadinstitute.gatk.utils.commandline.*; import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.engine.io.stubs.SAMFileWriterArgumentTypeDescriptor;
import htsjdk.variant.variantcontext.writer.VariantContextWriter; import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import java.io.File; import java.io.File;
import java.lang.annotation.Annotation; import java.lang.annotation.Annotation;
import java.lang.reflect.Field;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
@ -116,7 +117,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
} }
public static List<? extends ArgumentField> getArgumentFields(ParsingEngine parsingEngine,Class<?> classType) { public static List<? extends ArgumentField> getArgumentFields(ParsingEngine parsingEngine,Class<?> classType) {
List<ArgumentField> argumentFields = new ArrayList<ArgumentField>(); List<ArgumentField> argumentFields = new ArrayList<>();
for (ArgumentSource argumentSource: parsingEngine.extractArgumentSources(classType)) for (ArgumentSource argumentSource: parsingEngine.extractArgumentSources(classType))
if (!argumentSource.isDeprecated()) { if (!argumentSource.isDeprecated()) {
String gatherer = null; String gatherer = null;
@ -133,7 +134,31 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
return argumentFields; return argumentFields;
} }
private static final List<String> intervalFields = Arrays.asList("intervals", "excludeIntervals", "targetIntervals"); public static String getArgumentFullName(final Class<?> collection, final String fieldName) {
try {
final Field field = collection.getField(fieldName);
final Argument arg = field.getAnnotation(Argument.class);
if (arg != null)
return arg.fullName();
final Input inputAnnotation = field.getAnnotation(Input.class);
if (inputAnnotation != null)
return inputAnnotation.fullName();
final Output outputAnnotation = field.getAnnotation(Output.class);
if (outputAnnotation != null)
return outputAnnotation.fullName();
} catch (NoSuchFieldException e) {
throw new IllegalStateException(String.format("Can't find field %s in ArgumentCollection %s", fieldName, collection.getSimpleName()), e);
}
throw new IllegalStateException(String.format("Field %s in class %s is not annotated as an argument", fieldName, collection.getName()));
}
private static final List<String> intervalFields = new ArrayList<>();
private static final String inputFileArgument = getArgumentFullName(GATKArgumentCollection.class, "samFiles");
static {
intervalFields.add(getArgumentFullName(IntervalArgumentCollection.class, "intervals"));
intervalFields.add(getArgumentFullName(IntervalArgumentCollection.class, "excludeIntervals"));
}
private static List<? extends ArgumentField> getArgumentFields(ArgumentDefinition argumentDefinition, String gatherer) { private static List<? extends ArgumentField> getArgumentFields(ArgumentDefinition argumentDefinition, String gatherer) {
if (intervalFields.contains(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) { if (intervalFields.contains(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) {
@ -144,7 +169,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
} else if (NumThreadsArgumentField.NUM_THREADS_FIELD.equals(argumentDefinition.fullName)) { } else if (NumThreadsArgumentField.NUM_THREADS_FIELD.equals(argumentDefinition.fullName)) {
return Arrays.asList(new NumThreadsArgumentField(argumentDefinition)); return Arrays.asList(new NumThreadsArgumentField(argumentDefinition));
} else if ("input_file".equals(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) { } else if (inputFileArgument.equals(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) {
return Arrays.asList(new InputTaggedFileDefinitionField(argumentDefinition), new InputIndexesArgumentField(argumentDefinition, BAMIndex.BAMIndexSuffix, ".bam")); return Arrays.asList(new InputTaggedFileDefinitionField(argumentDefinition), new InputIndexesArgumentField(argumentDefinition, BAMIndex.BAMIndexSuffix, ".bam"));
} else if ((RodBinding.class.equals(argumentDefinition.argumentType) || RodBinding.class.equals(argumentDefinition.componentType) || RodBindingCollection.class.equals(argumentDefinition.componentType)) && argumentDefinition.ioType == ArgumentIOType.INPUT) { } else if ((RodBinding.class.equals(argumentDefinition.argumentType) || RodBinding.class.equals(argumentDefinition.componentType) || RodBindingCollection.class.equals(argumentDefinition.componentType)) && argumentDefinition.ioType == ArgumentIOType.INPUT) {
@ -155,7 +180,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
} else if (argumentDefinition.ioType == ArgumentIOType.OUTPUT) { } else if (argumentDefinition.ioType == ArgumentIOType.OUTPUT) {
List<ArgumentField> fields = new ArrayList<ArgumentField>(); List<ArgumentField> fields = new ArrayList<>();
String gatherClass; String gatherClass;
@ -193,7 +218,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
} else if (!argumentDefinition.required && useOption(argumentDefinition.argumentType)) { } else if (!argumentDefinition.required && useOption(argumentDefinition.argumentType)) {
boolean useFormat = useFormatter(argumentDefinition.argumentType); boolean useFormat = useFormatter(argumentDefinition.argumentType);
List<ArgumentField> fields = new ArrayList<ArgumentField>(); List<ArgumentField> fields = new ArrayList<>();
ArgumentField field = new OptionedArgumentField(argumentDefinition, useFormat); ArgumentField field = new OptionedArgumentField(argumentDefinition, useFormat);
fields.add(field); fields.add(field);
if (useFormat) fields.add(new FormatterArgumentField(field)); if (useFormat) fields.add(new FormatterArgumentField(field));
@ -201,7 +226,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
} else { } else {
boolean useFormat = useFormatter(argumentDefinition.argumentType); boolean useFormat = useFormatter(argumentDefinition.argumentType);
List<ArgumentField> fields = new ArrayList<ArgumentField>(); List<ArgumentField> fields = new ArrayList<>();
ArgumentField field = new DefaultArgumentField(argumentDefinition, useFormat); ArgumentField field = new DefaultArgumentField(argumentDefinition, useFormat);
fields.add(field); fields.add(field);
if (useFormat) fields.add(new FormatterArgumentField(field)); if (useFormat) fields.add(new FormatterArgumentField(field));
@ -349,7 +374,8 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
// Allows the user to specify the track name, track type, and the file. // Allows the user to specify the track name, track type, and the file.
public static class NumThreadsArgumentField extends OptionedArgumentField { public static class NumThreadsArgumentField extends OptionedArgumentField {
public static final String NUM_THREADS_FIELD = "num_threads"; public static final String NUM_THREADS_FIELD = getArgumentFullName(GATKArgumentCollection.class, "numberOfDataThreads");
public static final String NCT_FIELD = getArgumentFullName(GATKArgumentCollection.class, "numberOfCPUThreadsPerDataThread");
public NumThreadsArgumentField(ArgumentDefinition argumentDefinition) { public NumThreadsArgumentField(ArgumentDefinition argumentDefinition) {
super(argumentDefinition, false); super(argumentDefinition, false);
@ -357,7 +383,8 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
@Override @Override
protected String getFreezeFields() { protected String getFreezeFields() {
return String.format("if (num_threads.isDefined) nCoresRequest = num_threads%nif (num_cpu_threads_per_data_thread.isDefined) nCoresRequest = Some(nCoresRequest.getOrElse(1) * num_cpu_threads_per_data_thread.getOrElse(1))%n"); return String.format("if (%1$s.isDefined) nCoresRequest = %1$s%nif (%2$s.isDefined) nCoresRequest = Some(nCoresRequest.getOrElse(1) * %2$s.getOrElse(1))%n",
NUM_THREADS_FIELD, NCT_FIELD);
} }
} }
@ -495,7 +522,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
("if (%2$s != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(%2$s))%n" + ("if (%2$s != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(%2$s))%n" +
" if (!%3$s)%n" + " if (!%3$s)%n" +
" %1$s = new File(%2$s.getPath.stripSuffix(\".bam\") + \"%4$s\")%n"), " %1$s = new File(%2$s.getPath.stripSuffix(\".bam\") + \"%4$s\")%n"),
auxFieldName, originalFieldName, SAMFileWriterArgumentTypeDescriptor.DISABLE_INDEXING_FULLNAME, BAMIndex.BAMIndexSuffix); auxFieldName, originalFieldName, getArgumentFullName(GATKArgumentCollection.class, "disableBAMIndexing"), BAMIndex.BAMIndexSuffix);
} }
} }
@ -508,7 +535,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
("if (%2$s != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(%2$s))%n" + ("if (%2$s != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(%2$s))%n" +
" if (%3$s)%n" + " if (%3$s)%n" +
" %1$s = new File(%2$s.getPath + \"%4$s\")%n"), " %1$s = new File(%2$s.getPath + \"%4$s\")%n"),
auxFieldName, originalFieldName, SAMFileWriterArgumentTypeDescriptor.ENABLE_MD5_FULLNAME, ".md5"); auxFieldName, originalFieldName, getArgumentFullName(GATKArgumentCollection.class, "enableBAMmd5"), ".md5");
} }
} }

View File

@ -281,18 +281,6 @@ class MuTect extends org.broadinstitute.gatk.queue.extensions.gatk.CommandLineGA
@Gather(enabled=false) @Gather(enabled=false)
private var vcfIndex: File = _ private var vcfIndex: File = _
/** Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests. */
@Argument(fullName="no_cmdline_in_header", shortName="no_cmdline_in_header", doc="Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.", required=false, exclusiveOf="", validation="")
var no_cmdline_in_header: Boolean = _
/** Just output sites without genotypes (i.e. only the first 8 columns of the VCF) */
@Argument(fullName="sites_only", shortName="sites_only", doc="Just output sites without genotypes (i.e. only the first 8 columns of the VCF)", required=false, exclusiveOf="", validation="")
var sites_only: Boolean = _
/** force BCF output, regardless of the file's extension */
@Argument(fullName="bcf", shortName="bcf", doc="force BCF output, regardless of the file's extension", required=false, exclusiveOf="", validation="")
var bcf: Boolean = _
/** VCF file of DBSNP information */ /** VCF file of DBSNP information */
@Input(fullName="dbsnp", shortName="dbsnp", doc="VCF file of DBSNP information", required=false, exclusiveOf="", validation="") @Input(fullName="dbsnp", shortName="dbsnp", doc="VCF file of DBSNP information", required=false, exclusiveOf="", validation="")
var dbsnp: Seq[File] = Nil var dbsnp: Seq[File] = Nil

View File

@ -28,8 +28,6 @@ package org.broadinstitute.gatk.queue.extensions.gatk
import org.broadinstitute.gatk.queue.function.scattergather.GatherFunction import org.broadinstitute.gatk.queue.function.scattergather.GatherFunction
import org.broadinstitute.gatk.queue.extensions.picard.MergeSamFiles import org.broadinstitute.gatk.queue.extensions.picard.MergeSamFiles
import org.broadinstitute.gatk.queue.function.RetryMemoryLimit import org.broadinstitute.gatk.queue.function.RetryMemoryLimit
import org.broadinstitute.gatk.engine.io.stubs.SAMFileWriterArgumentTypeDescriptor
import org.broadinstitute.gatk.queue.util.ClassFieldCache
import java.io.File import java.io.File
/** /**
@ -50,18 +48,9 @@ class BamGatherFunction extends MergeSamFiles with GatherFunction with RetryMemo
// Whatever the original function can handle, merging *should* do less. // Whatever the original function can handle, merging *should* do less.
this.memoryLimit = originalFunction.memoryLimit this.memoryLimit = originalFunction.memoryLimit
this.compressionLevel = originalGATK.bam_compression
// bam_compression and index_output_bam_on_the_fly from SAMFileWriterArgumentTypeDescriptor this.createIndex = Some(!originalGATK.disable_bam_indexing)
// are added by the GATKExtensionsGenerator to the subclass of CommandLineGATK this.createMD5 = Some(originalGATK.generate_md5)
val compression = ClassFieldCache.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.COMPRESSION_FULLNAME)
this.compressionLevel = originalGATK.getFieldValue(compression).asInstanceOf[Option[Int]]
val disableIndex = ClassFieldCache.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.DISABLE_INDEXING_FULLNAME)
this.createIndex = Some(!originalGATK.getFieldValue(disableIndex).asInstanceOf[Boolean])
val enableMD5 = ClassFieldCache.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.ENABLE_MD5_FULLNAME)
this.createMD5 = Some(originalGATK.getFieldValue(enableMD5).asInstanceOf[Boolean])
super.freezeFieldValues() super.freezeFieldValues()
} }

View File

@ -26,9 +26,7 @@
package org.broadinstitute.gatk.queue.extensions.gatk package org.broadinstitute.gatk.queue.extensions.gatk
import org.broadinstitute.gatk.queue.function.scattergather.GatherFunction import org.broadinstitute.gatk.queue.function.scattergather.GatherFunction
import org.broadinstitute.gatk.queue.function.{RetryMemoryLimit, QFunction} import org.broadinstitute.gatk.queue.function.RetryMemoryLimit
import org.broadinstitute.gatk.engine.io.stubs.VCFWriterArgumentTypeDescriptor
import org.broadinstitute.gatk.queue.util.ClassFieldCache
/** /**
* Merges a vcf text file. * Merges a vcf text file.
@ -44,14 +42,8 @@ class VcfGatherFunction extends CombineVariants with GatherFunction with RetryMe
this.out = this.originalOutput this.out = this.originalOutput
GATKIntervals.copyIntervalArguments(this.originalGATK, this) GATKIntervals.copyIntervalArguments(this.originalGATK, this)
// NO_HEADER and sites_only from VCFWriterArgumentTypeDescriptor this.no_cmdline_in_header = originalGATK.no_cmdline_in_header
// are added by the GATKExtensionsGenerator to the subclass of CommandLineGATK this.sites_only = originalGATK.sites_only
val noHeader = ClassFieldCache.findField(originalFunction.getClass, VCFWriterArgumentTypeDescriptor.NO_HEADER_ARG_NAME)
this.no_cmdline_in_header = originalGATK.getFieldValue(noHeader).asInstanceOf[Boolean]
val sitesOnly = ClassFieldCache.findField(originalFunction.getClass, VCFWriterArgumentTypeDescriptor.SITES_ONLY_ARG_NAME)
this.sites_only = originalGATK.getFieldValue(sitesOnly).asInstanceOf[Boolean]
// ensure that the gather function receives the same unsafe parameter as the scattered function // ensure that the gather function receives the same unsafe parameter as the scattered function
this.unsafe = this.originalGATK.unsafe this.unsafe = this.originalGATK.unsafe

View File

@ -44,7 +44,9 @@ import org.broadinstitute.gatk.engine.filters.FilterManager;
import org.broadinstitute.gatk.engine.filters.ReadFilter; import org.broadinstitute.gatk.engine.filters.ReadFilter;
import org.broadinstitute.gatk.engine.filters.ReadGroupBlackListFilter; import org.broadinstitute.gatk.engine.filters.ReadGroupBlackListFilter;
import org.broadinstitute.gatk.engine.io.OutputTracker; import org.broadinstitute.gatk.engine.io.OutputTracker;
import org.broadinstitute.gatk.engine.io.stubs.SAMFileWriterStub;
import org.broadinstitute.gatk.engine.io.stubs.Stub; import org.broadinstitute.gatk.engine.io.stubs.Stub;
import org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub;
import org.broadinstitute.gatk.engine.iterators.ReadTransformer; import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
import org.broadinstitute.gatk.engine.iterators.ReadTransformersMode; import org.broadinstitute.gatk.engine.iterators.ReadTransformersMode;
import org.broadinstitute.gatk.engine.phonehome.GATKRunReport; import org.broadinstitute.gatk.engine.phonehome.GATKRunReport;
@ -65,6 +67,7 @@ import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.interval.IntervalUtils; import org.broadinstitute.gatk.utils.interval.IntervalUtils;
import org.broadinstitute.gatk.utils.progressmeter.ProgressMeter; import org.broadinstitute.gatk.utils.progressmeter.ProgressMeter;
import org.broadinstitute.gatk.utils.recalibration.BQSRArgumentSet; import org.broadinstitute.gatk.utils.recalibration.BQSRArgumentSet;
import org.broadinstitute.gatk.utils.sam.ReadUtils;
import org.broadinstitute.gatk.utils.text.XReadLines; import org.broadinstitute.gatk.utils.text.XReadLines;
import org.broadinstitute.gatk.utils.threading.ThreadEfficiencyMonitor; import org.broadinstitute.gatk.utils.threading.ThreadEfficiencyMonitor;
@ -666,11 +669,13 @@ public class GenomeAnalysisEngine {
* *
* @param outputTracker the tracker supplying the initialization data. * @param outputTracker the tracker supplying the initialization data.
*/ */
private void initializeOutputStreams(OutputTracker outputTracker) { private void initializeOutputStreams(final OutputTracker outputTracker) {
for (Map.Entry<ArgumentSource, Object> input : getInputs().entrySet()) for (final Map.Entry<ArgumentSource, Object> input : getInputs().entrySet())
outputTracker.addInput(input.getKey(), input.getValue()); outputTracker.addInput(input.getKey(), input.getValue());
for (Stub<?> stub : getOutputs()) for (final Stub<?> stub : getOutputs()) {
stub.processArguments(argCollection);
outputTracker.addOutput(stub); outputTracker.addOutput(stub);
}
outputTracker.prepareWalker(walker, getArguments().strictnessLevel); outputTracker.prepareWalker(walker, getArguments().strictnessLevel);
} }

View File

@ -25,7 +25,6 @@
package org.broadinstitute.gatk.engine.arguments; package org.broadinstitute.gatk.engine.arguments;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.ValidationStringency; import htsjdk.samtools.ValidationStringency;
import org.broadinstitute.gatk.utils.commandline.*; import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
@ -61,7 +60,7 @@ public class GATKArgumentCollection {
* BAM file. Please see our online documentation for more details on input formatting requirements. * BAM file. Please see our online documentation for more details on input formatting requirements.
*/ */
@Input(fullName = "input_file", shortName = "I", doc = "Input file containing sequence data (SAM or BAM)", required = false) @Input(fullName = "input_file", shortName = "I", doc = "Input file containing sequence data (SAM or BAM)", required = false)
public List<String> samFiles = new ArrayList<String>(); public List<String> samFiles = new ArrayList<>();
@Hidden @Hidden
@Argument(fullName = "showFullBamList",doc="Emit a log entry (level INFO) containing the full list of sequence data files to be included in the analysis (including files inside .bam.list files).") @Argument(fullName = "showFullBamList",doc="Emit a log entry (level INFO) containing the full list of sequence data files to be included in the analysis (including files inside .bam.list files).")
@ -120,7 +119,7 @@ public class GATKArgumentCollection {
* is specified in each tool's documentation. The default filters cannot be disabled. * is specified in each tool's documentation. The default filters cannot be disabled.
*/ */
@Argument(fullName = "read_filter", shortName = "rf", doc = "Filters to apply to reads before analysis", required = false) @Argument(fullName = "read_filter", shortName = "rf", doc = "Filters to apply to reads before analysis", required = false)
public final List<String> readFilters = new ArrayList<String>(); public final List<String> readFilters = new ArrayList<>();
@ArgumentCollection @ArgumentCollection
public IntervalArgumentCollection intervalArguments = new IntervalArgumentCollection(); public IntervalArgumentCollection intervalArguments = new IntervalArgumentCollection();
@ -408,6 +407,39 @@ public class GATKArgumentCollection {
required = false) required = false)
public boolean disableAutoIndexCreationAndLockingWhenReadingRods = false; public boolean disableAutoIndexCreationAndLockingWhenReadingRods = false;
@Hidden
@Argument(fullName = "no_cmdline_in_header", shortName = "no_cmdline_in_header", doc = "Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.",
required = false)
public boolean disableCommandLineInVCF = false;
@Argument(fullName = "sites_only", shortName = "sites_only", doc = "Just output sites without genotypes (i.e. only the first 8 columns of the VCF)",
required = false)
public boolean sitesOnlyVCF = false;
@Hidden
@Argument(fullName = "bcf", shortName = "bcf", doc = "Force BCF output, regardless of the file's extension",
required = false)
public boolean forceBCFOutput = false;
@Advanced
@Argument(fullName = "bam_compression", shortName = "compress", doc = "Compression level to use for writing BAM files (0 - 9, higher is more compressed)",
minValue = 0, maxValue = 9, required = false)
public Integer bamCompression = null;
@Advanced
@Argument(fullName = "simplifyBAM", shortName = "simplifyBAM",
doc = "If provided, output BAM files will be simplified to include just key reads for downstream variation discovery analyses (removing duplicates, PF-, non-primary reads), as well stripping all extended tags from the kept reads except the read group identifier",
required = false)
public boolean simplifyBAM = false;
@Argument(fullName = "disable_bam_indexing", doc = "Turn off on-the-fly creation of indices for output BAM files.",
required = false)
public boolean disableBAMIndexing = false;
@Argument(fullName = "generate_md5", doc = "Enable on-the-fly creation of md5s for output BAM files.",
required = false)
public boolean enableBAMmd5 = false;
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
// //
// Multi-threading arguments // Multi-threading arguments

View File

@ -25,6 +25,7 @@
package org.broadinstitute.gatk.engine.io.stubs; package org.broadinstitute.gatk.engine.io.stubs;
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
import org.broadinstitute.gatk.engine.io.OutputTracker; import org.broadinstitute.gatk.engine.io.OutputTracker;
import java.io.File; import java.io.File;
@ -101,6 +102,9 @@ public class OutputStreamStub extends OutputStream implements Stub<OutputStream>
this.outputTracker = outputTracker; this.outputTracker = outputTracker;
} }
@Override
public void processArguments( final GATKArgumentCollection argumentCollection ) {}
/** /**
* @{inheritDoc} * @{inheritDoc}
*/ */

View File

@ -30,29 +30,14 @@ import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.io.GATKSAMFileWriter; import org.broadinstitute.gatk.engine.io.GATKSAMFileWriter;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.broadinstitute.gatk.utils.sam.ReadUtils;
import java.io.OutputStream; import java.io.OutputStream;
import java.lang.annotation.Annotation;
import java.lang.reflect.Type; import java.lang.reflect.Type;
import java.util.Arrays;
import java.util.List;
/** /**
* Insert a SAMFileWriterStub instead of a full-fledged concrete OutputStream implementations. * Insert a SAMFileWriterStub instead of a full-fledged concrete OutputStream implementations.
*/ */
public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
public static final String DEFAULT_ARGUMENT_FULLNAME = "outputBAM";
public static final String DEFAULT_ARGUMENT_SHORTNAME = "ob";
public static final String COMPRESSION_FULLNAME = "bam_compression";
public static final String COMPRESSION_SHORTNAME = "compress";
public static final String SIMPLIFY_BAM_FULLNAME = "simplifyBAM";
public static final String SIMPLIFY_BAM_SHORTNAME = SIMPLIFY_BAM_FULLNAME;
public static final String DISABLE_INDEXING_FULLNAME = "disable_bam_indexing";
public static final String ENABLE_MD5_FULLNAME = "generate_md5";
/** /**
* The engine into which output stubs should be fed. * The engine into which output stubs should be fed.
@ -79,15 +64,6 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
return SAMFileWriter.class.equals(type) || GATKSAMFileWriter.class.equals(type); return SAMFileWriter.class.equals(type) || GATKSAMFileWriter.class.equals(type);
} }
@Override
public List<ArgumentDefinition> createArgumentDefinitions( ArgumentSource source ) {
return Arrays.asList( createBAMArgumentDefinition(source),
createBAMCompressionArgumentDefinition(source),
disableWriteIndexArgumentDefinition(source),
enableMD5GenerationArgumentDefinition(source),
createSimplifyBAMArgumentDefinition(source));
}
@Override @Override
public boolean createsTypeDefault(ArgumentSource source) { public boolean createsTypeDefault(ArgumentSource source) {
return !source.isRequired() && source.defaultsToStdout(); return !source.isRequired() && source.defaultsToStdout();
@ -110,38 +86,15 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
@Override @Override
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) { public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) {
// Extract all possible parameters that could be passed to a BAM file writer? // Extract all possible parameters that could be passed to a BAM file writer?
ArgumentDefinition bamArgumentDefinition = createBAMArgumentDefinition(source); ArgumentDefinition bamArgumentDefinition = createDefaultArgumentDefinition(source);
ArgumentMatchValue writerFileName = getArgumentValue( bamArgumentDefinition, matches ); ArgumentMatchValue writerFileName = getArgumentValue( bamArgumentDefinition, matches );
ArgumentMatchValue compressionLevelText = getArgumentValue( createBAMCompressionArgumentDefinition(source), matches ); // Create the stub
Integer compressionLevel = compressionLevelText != null ? Integer.valueOf(compressionLevelText.asString()) : null;
boolean indexOnTheFly = !argumentIsPresent(disableWriteIndexArgumentDefinition(source),matches);
boolean generateMD5 = argumentIsPresent(this.enableMD5GenerationArgumentDefinition(source),matches);
boolean simplifyBAM = argumentIsPresent(createSimplifyBAMArgumentDefinition(source),matches);
// Validate the combination of parameters passed in.
// This parser has been passed a null filename and the GATK is not responsible for creating a type default for the object;
// therefore, the user must have failed to specify a type default
if(writerFileName != null && writerFileName.asFile() == null && generateMD5)
throw new ArgumentException("MD5 generation specified, but no output file specified. If md5 generation is desired, please specify a BAM output file and an md5 file will be written alongside.");
// Create the stub and set parameters.
SAMFileWriterStub stub = null; // stub = new SAMFileWriterStub(engine, defaultOutputStream); SAMFileWriterStub stub = null; // stub = new SAMFileWriterStub(engine, defaultOutputStream);
if (writerFileName != null && writerFileName.asFile() != null ) { if (writerFileName != null && writerFileName.asFile() != null ) {
stub = new SAMFileWriterStub(engine, writerFileName.asFile()); stub = new SAMFileWriterStub(engine, writerFileName.asFile());
if ( compressionLevel != null ) {
stub.setCompressionLevel(ReadUtils.validateCompressionLevel(compressionLevel));
} if ( indexOnTheFly )
stub.setIndexOnTheFly(indexOnTheFly);
if ( generateMD5 )
stub.setGenerateMD5(generateMD5);
if ( simplifyBAM )
stub.setSimplifyBAM(simplifyBAM);
// WARNING: Side effects required by engine! // WARNING: Side effects required by engine!
parsingEngine.addTags(stub,getArgumentTags(matches)); parsingEngine.addTags(stub,getArgumentTags(matches));
engine.addOutput(stub); engine.addOutput(stub);
@ -150,96 +103,4 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
return stub; return stub;
} }
/**
* Gets the definition of the argument representing the BAM file itself.
* @param source Argument source for the BAM file. Must not be null.
* @return Argument definition for the BAM file itself. Will not be null.
*/
private ArgumentDefinition createBAMArgumentDefinition(ArgumentSource source) {
Annotation annotation = getArgumentAnnotation(source);
return new ArgumentDefinition( annotation,
ArgumentIOType.getIOType(annotation),
source.field.getType(),
DEFAULT_ARGUMENT_FULLNAME,
DEFAULT_ARGUMENT_SHORTNAME,
ArgumentDefinition.getDoc(annotation),
source.isRequired(),
false,
source.isMultiValued(),
source.isHidden(),
null,
null,
null,
null);
}
/**
* Creates the optional compression level argument for the BAM file.
* @param source Argument source for the BAM file. Must not be null.
* @return Argument definition for the BAM file itself. Will not be null.
*/
private ArgumentDefinition createBAMCompressionArgumentDefinition(ArgumentSource source) {
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
int.class,
COMPRESSION_FULLNAME,
COMPRESSION_SHORTNAME,
"Compression level to use for writing BAM files",
false,
false,
false,
source.isHidden(),
null,
null,
null,
null );
}
private ArgumentDefinition disableWriteIndexArgumentDefinition(ArgumentSource source) {
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
boolean.class,
DISABLE_INDEXING_FULLNAME,
null,
"Turn off on-the-fly creation of indices for output BAM files.",
false,
true,
false,
source.isHidden(),
null,
null,
null,
null );
}
private ArgumentDefinition enableMD5GenerationArgumentDefinition(ArgumentSource source) {
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
boolean.class,
ENABLE_MD5_FULLNAME,
null,
"Enable on-the-fly creation of md5s for output BAM files.",
false,
true,
false,
source.isHidden(),
null,
null,
null,
null );
}
private ArgumentDefinition createSimplifyBAMArgumentDefinition(ArgumentSource source) {
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
boolean.class,
SIMPLIFY_BAM_FULLNAME,
SIMPLIFY_BAM_SHORTNAME,
"If provided, output BAM files will be simplified to include just key reads for downstream variation discovery analyses (removing duplicates, PF-, non-primary reads), as well stripping all extended tags from the kept reads except the read group identifier",
false,
true,
false,
source.isHidden(),
null,
null,
null,
null );
}
} }

View File

@ -30,6 +30,7 @@ import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.util.ProgressLoggerInterface; import htsjdk.samtools.util.ProgressLoggerInterface;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
import org.broadinstitute.gatk.engine.io.OutputTracker; import org.broadinstitute.gatk.engine.io.OutputTracker;
import org.broadinstitute.gatk.engine.io.GATKSAMFileWriter; import org.broadinstitute.gatk.engine.io.GATKSAMFileWriter;
import org.broadinstitute.gatk.engine.iterators.ReadTransformer; import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
@ -273,6 +274,16 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, GATKSAMFileWriter
this.outputTracker = outputTracker; this.outputTracker = outputTracker;
} }
@Override
public void processArguments( final GATKArgumentCollection argumentCollection ) {
if (argumentCollection.bamCompression != null)
setCompressionLevel(argumentCollection.bamCompression);
setGenerateMD5(argumentCollection.enableBAMmd5);
setIndexOnTheFly(!argumentCollection.disableBAMIndexing);
setSimplifyBAM(argumentCollection.simplifyBAM);
}
/** /**
* Use the given header as the target for this writer. * Use the given header as the target for this writer.
* @param header The header to write. * @param header The header to write.
@ -284,7 +295,7 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, GATKSAMFileWriter
} }
private void initializeReadTransformers() { private void initializeReadTransformers() {
this.onOutputReadTransformers = new ArrayList<ReadTransformer>(engine.getReadTransformers().size()); this.onOutputReadTransformers = new ArrayList<>(engine.getReadTransformers().size());
for ( final ReadTransformer transformer : engine.getReadTransformers() ) { for ( final ReadTransformer transformer : engine.getReadTransformers() ) {
if ( transformer.getApplicationTime() == ReadTransformer.ApplicationTime.ON_OUTPUT ) if ( transformer.getApplicationTime() == ReadTransformer.ApplicationTime.ON_OUTPUT )
onOutputReadTransformers.add(transformer); onOutputReadTransformers.add(transformer);

View File

@ -25,6 +25,7 @@
package org.broadinstitute.gatk.engine.io.stubs; package org.broadinstitute.gatk.engine.io.stubs;
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
import org.broadinstitute.gatk.engine.io.OutputTracker; import org.broadinstitute.gatk.engine.io.OutputTracker;
import java.io.File; import java.io.File;
@ -47,6 +48,15 @@ public interface Stub<StreamType> {
*/ */
public void register( OutputTracker outputTracker ); public void register( OutputTracker outputTracker );
/**
* Provides a mechanism for uniformly processing command-line arguments
* that are important for file processing. For example, this method
* might pass on the compression value specified by the user to
* a SAMFileWriter
* @param argumentCollection The arguments to be processed
*/
public void processArguments( final GATKArgumentCollection argumentCollection );
/** /**
* Returns the OutputStream represented by this stub or null if not available. * Returns the OutputStream represented by this stub or null if not available.
*/ */

View File

@ -30,15 +30,11 @@ import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import htsjdk.variant.variantcontext.writer.VariantContextWriter; import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import htsjdk.variant.variantcontext.writer.VariantContextWriterFactory;
import java.io.File; import java.io.File;
import java.io.OutputStream; import java.io.OutputStream;
import java.lang.reflect.Type; import java.lang.reflect.Type;
import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.HashSet;
import java.util.List;
/** /**
* Injects new command-line arguments into the system providing support for the genotype writer. * Injects new command-line arguments into the system providing support for the genotype writer.
@ -47,9 +43,6 @@ import java.util.List;
* @version 0.1 * @version 0.1
*/ */
public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
public static final String NO_HEADER_ARG_NAME = "no_cmdline_in_header";
public static final String SITES_ONLY_ARG_NAME = "sites_only";
public static final String FORCE_BCF = "bcf";
/** /**
* The engine into which output stubs should be fed. * The engine into which output stubs should be fed.
@ -88,15 +81,6 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
return VariantContextWriter.class.equals(type); return VariantContextWriter.class.equals(type);
} }
@Override
public List<ArgumentDefinition> createArgumentDefinitions( ArgumentSource source ) {
return Arrays.asList(
createDefaultArgumentDefinition(source),
createNoCommandLineHeaderArgumentDefinition(),
createSitesOnlyArgumentDefinition(),
createBCFArgumentDefinition() );
}
/** /**
* This command-line argument descriptor does want to override the provided default value. * This command-line argument descriptor does want to override the provided default value.
* @return true always. * @return true always.
@ -145,9 +129,6 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
: new VariantContextWriterStub(engine, defaultOutputStream, argumentSources); : new VariantContextWriterStub(engine, defaultOutputStream, argumentSources);
stub.setCompressed(isCompressed(writerFileName == null ? null: writerFileName.asString())); stub.setCompressed(isCompressed(writerFileName == null ? null: writerFileName.asString()));
stub.setDoNotWriteGenotypes(argumentIsPresent(createSitesOnlyArgumentDefinition(),matches));
stub.setSkipWritingCommandLineHeader(argumentIsPresent(createNoCommandLineHeaderArgumentDefinition(),matches));
stub.setForceBCF(argumentIsPresent(createBCFArgumentDefinition(),matches));
// WARNING: Side effects required by engine! // WARNING: Side effects required by engine!
parsingEngine.addTags(stub,getArgumentTags(matches)); parsingEngine.addTags(stub,getArgumentTags(matches));
@ -156,66 +137,6 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
return stub; return stub;
} }
/**
* Creates the optional no_header argument for the VCF file.
* @return Argument definition for the VCF file itself. Will not be null.
*/
private ArgumentDefinition createNoCommandLineHeaderArgumentDefinition() {
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
boolean.class,
NO_HEADER_ARG_NAME,
NO_HEADER_ARG_NAME,
"Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.",
false,
true,
false,
true,
null,
null,
null,
null );
}
/**
* Creates the optional sites_only argument definition
* @return Argument definition for the VCF file itself. Will not be null.
*/
private ArgumentDefinition createSitesOnlyArgumentDefinition() {
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
boolean.class,
SITES_ONLY_ARG_NAME,
SITES_ONLY_ARG_NAME,
"Just output sites without genotypes (i.e. only the first 8 columns of the VCF)",
false,
true,
false,
true,
null,
null,
null,
null );
}
/**
* Creates the optional bcf argument definition
* @return Argument definition for the VCF file itself. Will not be null.
*/
private ArgumentDefinition createBCFArgumentDefinition() {
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
boolean.class,
FORCE_BCF,
FORCE_BCF,
"force BCF output, regardless of the file's extension",
false,
true,
false,
true,
null,
null,
null,
null );
}
/** /**
* Returns true if the file will be compressed. * Returns true if the file will be compressed.
* @param writerFileName Name of the file * @param writerFileName Name of the file

View File

@ -28,6 +28,7 @@ package org.broadinstitute.gatk.engine.io.stubs;
import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.tribble.index.IndexCreator; import htsjdk.tribble.index.IndexCreator;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
import org.broadinstitute.gatk.engine.io.OutputTracker; import org.broadinstitute.gatk.engine.io.OutputTracker;
import org.broadinstitute.gatk.utils.variant.GATKVCFUtils; import org.broadinstitute.gatk.utils.variant.GATKVCFUtils;
import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContext;
@ -229,6 +230,14 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
this.outputTracker = outputTracker; this.outputTracker = outputTracker;
} }
@Override
public void processArguments( final GATKArgumentCollection argumentCollection ) {
setDoNotWriteGenotypes(argumentCollection.sitesOnlyVCF);
setSkipWritingCommandLineHeader(argumentCollection.disableCommandLineInVCF);
setForceBCF(argumentCollection.forceBCFOutput);
}
public void writeHeader(VCFHeader header) { public void writeHeader(VCFHeader header) {
vcfHeader = header; vcfHeader = header;

View File

@ -59,16 +59,15 @@ public class SplitSamFile extends ReadWalker<SAMRecord, Map<String, SAMFileWrite
@Argument(fullName="outputRoot", doc="output BAM file", required=false) @Argument(fullName="outputRoot", doc="output BAM file", required=false)
public String outputRoot = ""; public String outputRoot = "";
@Argument(fullName = "bam_compression", shortName = "compress", doc = "Compression level to use for writing BAM files", required = false) private static final Logger logger = Logger.getLogger(SplitSamFile.class);
public Integer BAMcompression = 5; private static final String VERSION = "0.0.1";
private static Logger logger = Logger.getLogger(SplitSamFile.class);
private static String VERSION = "0.0.1";
@Override
public void initialize() { public void initialize() {
logger.info("SplitSamFile version: " + VERSION); logger.info("SplitSamFile version: " + VERSION);
} }
@Override
public SAMRecord map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) { public SAMRecord map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) {
return read; return read;
} }
@ -78,36 +77,39 @@ public class SplitSamFile extends ReadWalker<SAMRecord, Map<String, SAMFileWrite
// Standard I/O routines // Standard I/O routines
// //
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
@Override
public void onTraversalDone(Map<String, SAMFileWriter> outputs) { public void onTraversalDone(Map<String, SAMFileWriter> outputs) {
for ( SAMFileWriter output : outputs.values() ) { for ( SAMFileWriter output : outputs.values() ) {
output.close(); output.close();
} }
} }
@Override
public Map<String, SAMFileWriter> reduceInit() { public Map<String, SAMFileWriter> reduceInit() {
HashMap<String, SAMFileHeader> headers = new HashMap<String, SAMFileHeader>(); HashMap<String, SAMFileHeader> headers = new HashMap<>();
for ( SAMReadGroupRecord readGroup : this.getToolkit().getSAMFileHeader().getReadGroups()) { for ( SAMReadGroupRecord readGroup : this.getToolkit().getSAMFileHeader().getReadGroups()) {
final String sample = readGroup.getSample(); final String sample = readGroup.getSample();
if ( ! headers.containsKey(sample) ) { if ( ! headers.containsKey(sample) ) {
SAMFileHeader header = duplicateSAMFileHeader(this.getToolkit().getSAMFileHeader()); SAMFileHeader header = duplicateSAMFileHeader(this.getToolkit().getSAMFileHeader());
logger.debug(String.format("Creating BAM header for sample %s", sample)); logger.debug(String.format("Creating BAM header for sample %s", sample));
ArrayList<SAMReadGroupRecord> readGroups = new ArrayList<SAMReadGroupRecord>(); ArrayList<SAMReadGroupRecord> readGroups = new ArrayList<>();
header.setReadGroups(readGroups); header.setReadGroups(readGroups);
headers.put(sample, header); headers.put(sample, header);
} }
SAMFileHeader header = headers.get(sample); SAMFileHeader header = headers.get(sample);
List<SAMReadGroupRecord> newReadGroups = new ArrayList<SAMReadGroupRecord>(header.getReadGroups()); List<SAMReadGroupRecord> newReadGroups = new ArrayList<>(header.getReadGroups());
newReadGroups.add(readGroup); newReadGroups.add(readGroup);
header.setReadGroups(newReadGroups); header.setReadGroups(newReadGroups);
} }
HashMap<String, SAMFileWriter> outputs = new HashMap<String, SAMFileWriter>(); HashMap<String, SAMFileWriter> outputs = new HashMap<>();
for ( Map.Entry<String, SAMFileHeader> elt : headers.entrySet() ) { for ( Map.Entry<String, SAMFileHeader> elt : headers.entrySet() ) {
final String sample = elt.getKey(); final String sample = elt.getKey();
final String filename = outputRoot + sample + ".bam"; final String filename = outputRoot + sample + ".bam";
logger.info(String.format("Creating BAM output file %s for sample %s", filename, sample)); logger.info(String.format("Creating BAM output file %s for sample %s", filename, sample));
SAMFileWriter output = ReadUtils.createSAMFileWriterWithCompression(elt.getValue(), true, filename, BAMcompression);
final SAMFileWriter output = ReadUtils.createSAMFileWriter(filename, getToolkit(), elt.getValue());
outputs.put(sample, output); outputs.put(sample, output);
} }
@ -117,6 +119,7 @@ public class SplitSamFile extends ReadWalker<SAMRecord, Map<String, SAMFileWrite
/** /**
* Write out the read * Write out the read
*/ */
@Override
public Map<String, SAMFileWriter> reduce(SAMRecord read, Map<String, SAMFileWriter> outputs) { public Map<String, SAMFileWriter> reduce(SAMRecord read, Map<String, SAMFileWriter> outputs) {
final String sample = read.getReadGroup().getSample(); final String sample = read.getReadGroup().getSample();
SAMFileWriter output = outputs.get(sample); SAMFileWriter output = outputs.get(sample);

View File

@ -30,10 +30,10 @@ import com.google.java.contract.Requires;
import htsjdk.samtools.*; import htsjdk.samtools.*;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.io.stubs.SAMFileWriterStub;
import org.broadinstitute.gatk.utils.*; import org.broadinstitute.gatk.utils.*;
import org.broadinstitute.gatk.utils.collections.Pair; import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import java.io.File; import java.io.File;
import java.util.*; import java.util.*;
@ -131,26 +131,24 @@ public class ReadUtils {
public enum ReadAndIntervalOverlap {NO_OVERLAP_CONTIG, NO_OVERLAP_LEFT, NO_OVERLAP_RIGHT, NO_OVERLAP_HARDCLIPPED_LEFT, NO_OVERLAP_HARDCLIPPED_RIGHT, OVERLAP_LEFT, OVERLAP_RIGHT, OVERLAP_LEFT_AND_RIGHT, OVERLAP_CONTAINED} public enum ReadAndIntervalOverlap {NO_OVERLAP_CONTIG, NO_OVERLAP_LEFT, NO_OVERLAP_RIGHT, NO_OVERLAP_HARDCLIPPED_LEFT, NO_OVERLAP_HARDCLIPPED_RIGHT, OVERLAP_LEFT, OVERLAP_RIGHT, OVERLAP_LEFT_AND_RIGHT, OVERLAP_CONTAINED}
/** /**
* Creates a SAMFileWriter with the given compression level if you request a bam file. Creates a regular * Creates a SAMFileWriter using all of the features currently set in the engine (command line arguments, ReadTransformers, etc)
* SAMFileWriter without compression otherwise. * @param file the filename to write to
* * @param engine the engine
* @param header * @return a SAMFileWriter with the correct options set
* @param presorted
* @param file
* @param compression
* @return a SAMFileWriter with the compression level if it is a bam.
*/ */
public static SAMFileWriter createSAMFileWriterWithCompression(SAMFileHeader header, boolean presorted, String file, int compression) { public static SAMFileWriter createSAMFileWriter(final String file, final GenomeAnalysisEngine engine) {
validateCompressionLevel(compression); final SAMFileWriterStub output = new SAMFileWriterStub(engine, new File(file));
if (file.endsWith(".bam")) output.processArguments(engine.getArguments());
return new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(header, presorted, new File(file), compression); return output;
return new SAMFileWriterFactory().setCreateIndex(true).makeSAMOrBAMWriter(header, presorted, new File(file));
} }
public static int validateCompressionLevel(final int requestedCompressionLevel) { /**
if ( requestedCompressionLevel < 0 || requestedCompressionLevel > 9 ) * As {@link #createSAMFileWriter(String, org.broadinstitute.gatk.engine.GenomeAnalysisEngine)}, but also sets the header
throw new UserException.BadArgumentValue("compress", "Compression level must be 0-9 but got " + requestedCompressionLevel); */
return requestedCompressionLevel; public static SAMFileWriter createSAMFileWriter(final String file, final GenomeAnalysisEngine engine, final SAMFileHeader header) {
final SAMFileWriterStub output = (SAMFileWriterStub) createSAMFileWriter(file, engine);
output.writeHeader(header);
return output;
} }
/** /**

View File

@ -666,4 +666,59 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
return counter + sum; return counter + sum;
} }
} }
// --------------------------------------------------------------------------------
//
// Test output file-specific options
//
// --------------------------------------------------------------------------------
//Returns the output file
private File testBAMFeatures(final String args, final String md5) {
WalkerTestSpec spec = new WalkerTestSpec("-T PrintReads -R " + b37KGReference +
" -I " + privateTestDir + "NA20313.highCoverageRegion.bam"
+ " --no_pg_tag -o %s " + args,
1, Arrays.asList(".bam"), Arrays.asList(md5));
return executeTest("testBAMFeatures: "+args, spec).first.get(0);
}
@Test
public void testSAMWriterFeatures() {
testBAMFeatures("-compress 0", "bb4b55b1f80423970bb9384cbf0d8793");
testBAMFeatures("-compress 9", "b85ee1636d62e1bb8ed65a245c307167");
testBAMFeatures("-simplifyBAM", "38f9c30a27dfbc085a2ff52a1617d579");
//Validate MD5
final String expectedMD5 = "6627b9ea33293a0083983feb94948c1d";
final File md5Target = testBAMFeatures("--generate_md5", expectedMD5);
final File md5File = new File(md5Target.getAbsoluteFile() + ".md5");
md5File.deleteOnExit();
Assert.assertTrue(md5File.exists(), "MD5 wasn't created");
try {
String md5 = new BufferedReader(new FileReader(md5File)).readLine();
Assert.assertEquals(md5, expectedMD5, "Generated MD5 doesn't match expected");
} catch (IOException e) {
Assert.fail("Can't parse MD5 file", e);
}
//Validate that index isn't created
final String unindexedBAM = testBAMFeatures("--disable_bam_indexing", expectedMD5).getAbsolutePath();
Assert.assertTrue(!(new File(unindexedBAM+".bai").exists()) &&
!(new File(unindexedBAM.replace(".bam", ".bai")).exists()),
"BAM index was created even though it was disabled");
}
private void testVCFFeatures(final String args, final String md5) {
WalkerTestSpec spec = new WalkerTestSpec("-T SelectVariants -R " + b37KGReference +
" -V " + privateTestDir + "CEUtrioTest.vcf"
+ " --no_cmdline_in_header -o %s " + args,
1, Arrays.asList(md5));
executeTest("testVCFFeatures: "+args, spec);
}
@Test
public void testVCFWriterFeatures() {
testVCFFeatures("--sites_only", "94bf1f2c0946e933515e4322323a5716");
testVCFFeatures("--bcf", "03f2d6988f54a332da48803c78f9c4b3");
}
} }