From a35f5b8685ee9ae21a995ec5913ecdcc0e2cbedc Mon Sep 17 00:00:00 2001 From: Phillip Dexheimer Date: Tue, 26 Aug 2014 21:46:30 -0400 Subject: [PATCH] Moved arguments controlling options in output files into the engine * Arguments involved are --no_cmdline_in_header, --sites_only, and --bcf for VCF files and --bam_compression, --simplifyBAM, --disable_bam_indexing, and --generate_md5 for BAM files * PT 52740563 * Removed ReadUtils.createSAMFileWriterWithCompression(), replaced with ReadUtils.createSAMFileWriter(), which applies all appropriate engine-level arguments * Replaced hard-coded field names in ArgumentDefinitionField (Queue extension generator) with a Reflections-based lookup that will fail noisily during extension generation if there's an error --- .../haplotypecaller/HaplotypeCaller.java | 2 +- .../gatk/ArgumentDefinitionField.java | 49 ++++-- .../gatk/queue/extensions/cancer/MuTect.scala | 12 -- .../extensions/gatk/BamGatherFunction.scala | 17 +-- .../extensions/gatk/VcfGatherFunction.scala | 14 +- .../gatk/engine/GenomeAnalysisEngine.java | 11 +- .../arguments/GATKArgumentCollection.java | 38 ++++- .../engine/io/stubs/OutputStreamStub.java | 4 + .../SAMFileWriterArgumentTypeDescriptor.java | 143 +----------------- .../engine/io/stubs/SAMFileWriterStub.java | 13 +- .../gatk/engine/io/stubs/Stub.java | 10 ++ .../VCFWriterArgumentTypeDescriptor.java | 79 ---------- .../io/stubs/VariantContextWriterStub.java | 9 ++ .../tools/walkers/readutils/SplitSamFile.java | 23 +-- .../gatk/utils/sam/ReadUtils.java | 34 ++--- .../engine/EngineFeaturesIntegrationTest.java | 55 +++++++ 16 files changed, 209 insertions(+), 304 deletions(-) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java index 5bbe4ae27..81363461e 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java @@ -1097,7 +1097,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In } catch ( final Exception e ) { // Capture any exception that might be thrown, and write out the assembly failure BAM if requested if ( captureAssemblyFailureBAM ) { - final SAMFileWriter writer = ReadUtils.createSAMFileWriterWithCompression(getToolkit().getSAMFileHeader(), true, "assemblyFailure.bam", 5); + final SAMFileWriter writer = ReadUtils.createSAMFileWriter("assemblyFailure.bam", getToolkit()); for ( final GATKSAMRecord read : activeRegion.getReads() ) { writer.addAlignment(read); } diff --git a/public/gatk-queue-extensions-generator/src/main/java/org/broadinstitute/gatk/queue/extensions/gatk/ArgumentDefinitionField.java b/public/gatk-queue-extensions-generator/src/main/java/org/broadinstitute/gatk/queue/extensions/gatk/ArgumentDefinitionField.java index c410c9748..1e9e5cc45 100644 --- a/public/gatk-queue-extensions-generator/src/main/java/org/broadinstitute/gatk/queue/extensions/gatk/ArgumentDefinitionField.java +++ b/public/gatk-queue-extensions-generator/src/main/java/org/broadinstitute/gatk/queue/extensions/gatk/ArgumentDefinitionField.java @@ -27,12 +27,13 @@ package org.broadinstitute.gatk.queue.extensions.gatk; import htsjdk.samtools.BAMIndex; import htsjdk.samtools.SAMFileWriter; import htsjdk.tribble.Tribble; +import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection; import org.broadinstitute.gatk.utils.commandline.*; -import org.broadinstitute.gatk.engine.io.stubs.SAMFileWriterArgumentTypeDescriptor; import htsjdk.variant.variantcontext.writer.VariantContextWriter; import java.io.File; import java.lang.annotation.Annotation; +import java.lang.reflect.Field; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -116,7 +117,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField { } public static List getArgumentFields(ParsingEngine parsingEngine,Class classType) { - List argumentFields = new ArrayList(); + List argumentFields = new ArrayList<>(); for (ArgumentSource argumentSource: parsingEngine.extractArgumentSources(classType)) if (!argumentSource.isDeprecated()) { String gatherer = null; @@ -133,7 +134,31 @@ public abstract class ArgumentDefinitionField extends ArgumentField { return argumentFields; } - private static final List intervalFields = Arrays.asList("intervals", "excludeIntervals", "targetIntervals"); + public static String getArgumentFullName(final Class collection, final String fieldName) { + try { + final Field field = collection.getField(fieldName); + final Argument arg = field.getAnnotation(Argument.class); + if (arg != null) + return arg.fullName(); + final Input inputAnnotation = field.getAnnotation(Input.class); + if (inputAnnotation != null) + return inputAnnotation.fullName(); + final Output outputAnnotation = field.getAnnotation(Output.class); + if (outputAnnotation != null) + return outputAnnotation.fullName(); + } catch (NoSuchFieldException e) { + throw new IllegalStateException(String.format("Can't find field %s in ArgumentCollection %s", fieldName, collection.getSimpleName()), e); + } + throw new IllegalStateException(String.format("Field %s in class %s is not annotated as an argument", fieldName, collection.getName())); + } + + private static final List intervalFields = new ArrayList<>(); + private static final String inputFileArgument = getArgumentFullName(GATKArgumentCollection.class, "samFiles"); + + static { + intervalFields.add(getArgumentFullName(IntervalArgumentCollection.class, "intervals")); + intervalFields.add(getArgumentFullName(IntervalArgumentCollection.class, "excludeIntervals")); + } private static List getArgumentFields(ArgumentDefinition argumentDefinition, String gatherer) { if (intervalFields.contains(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) { @@ -144,7 +169,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField { } else if (NumThreadsArgumentField.NUM_THREADS_FIELD.equals(argumentDefinition.fullName)) { return Arrays.asList(new NumThreadsArgumentField(argumentDefinition)); - } else if ("input_file".equals(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) { + } else if (inputFileArgument.equals(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) { return Arrays.asList(new InputTaggedFileDefinitionField(argumentDefinition), new InputIndexesArgumentField(argumentDefinition, BAMIndex.BAMIndexSuffix, ".bam")); } else if ((RodBinding.class.equals(argumentDefinition.argumentType) || RodBinding.class.equals(argumentDefinition.componentType) || RodBindingCollection.class.equals(argumentDefinition.componentType)) && argumentDefinition.ioType == ArgumentIOType.INPUT) { @@ -155,7 +180,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField { } else if (argumentDefinition.ioType == ArgumentIOType.OUTPUT) { - List fields = new ArrayList(); + List fields = new ArrayList<>(); String gatherClass; @@ -193,7 +218,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField { } else if (!argumentDefinition.required && useOption(argumentDefinition.argumentType)) { boolean useFormat = useFormatter(argumentDefinition.argumentType); - List fields = new ArrayList(); + List fields = new ArrayList<>(); ArgumentField field = new OptionedArgumentField(argumentDefinition, useFormat); fields.add(field); if (useFormat) fields.add(new FormatterArgumentField(field)); @@ -201,7 +226,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField { } else { boolean useFormat = useFormatter(argumentDefinition.argumentType); - List fields = new ArrayList(); + List fields = new ArrayList<>(); ArgumentField field = new DefaultArgumentField(argumentDefinition, useFormat); fields.add(field); if (useFormat) fields.add(new FormatterArgumentField(field)); @@ -349,7 +374,8 @@ public abstract class ArgumentDefinitionField extends ArgumentField { // Allows the user to specify the track name, track type, and the file. public static class NumThreadsArgumentField extends OptionedArgumentField { - public static final String NUM_THREADS_FIELD = "num_threads"; + public static final String NUM_THREADS_FIELD = getArgumentFullName(GATKArgumentCollection.class, "numberOfDataThreads"); + public static final String NCT_FIELD = getArgumentFullName(GATKArgumentCollection.class, "numberOfCPUThreadsPerDataThread"); public NumThreadsArgumentField(ArgumentDefinition argumentDefinition) { super(argumentDefinition, false); @@ -357,7 +383,8 @@ public abstract class ArgumentDefinitionField extends ArgumentField { @Override protected String getFreezeFields() { - return String.format("if (num_threads.isDefined) nCoresRequest = num_threads%nif (num_cpu_threads_per_data_thread.isDefined) nCoresRequest = Some(nCoresRequest.getOrElse(1) * num_cpu_threads_per_data_thread.getOrElse(1))%n"); + return String.format("if (%1$s.isDefined) nCoresRequest = %1$s%nif (%2$s.isDefined) nCoresRequest = Some(nCoresRequest.getOrElse(1) * %2$s.getOrElse(1))%n", + NUM_THREADS_FIELD, NCT_FIELD); } } @@ -495,7 +522,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField { ("if (%2$s != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(%2$s))%n" + " if (!%3$s)%n" + " %1$s = new File(%2$s.getPath.stripSuffix(\".bam\") + \"%4$s\")%n"), - auxFieldName, originalFieldName, SAMFileWriterArgumentTypeDescriptor.DISABLE_INDEXING_FULLNAME, BAMIndex.BAMIndexSuffix); + auxFieldName, originalFieldName, getArgumentFullName(GATKArgumentCollection.class, "disableBAMIndexing"), BAMIndex.BAMIndexSuffix); } } @@ -508,7 +535,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField { ("if (%2$s != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(%2$s))%n" + " if (%3$s)%n" + " %1$s = new File(%2$s.getPath + \"%4$s\")%n"), - auxFieldName, originalFieldName, SAMFileWriterArgumentTypeDescriptor.ENABLE_MD5_FULLNAME, ".md5"); + auxFieldName, originalFieldName, getArgumentFullName(GATKArgumentCollection.class, "enableBAMmd5"), ".md5"); } } diff --git a/public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/cancer/MuTect.scala b/public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/cancer/MuTect.scala index 92bafd810..36031d948 100644 --- a/public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/cancer/MuTect.scala +++ b/public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/cancer/MuTect.scala @@ -281,18 +281,6 @@ class MuTect extends org.broadinstitute.gatk.queue.extensions.gatk.CommandLineGA @Gather(enabled=false) private var vcfIndex: File = _ - /** Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests. */ - @Argument(fullName="no_cmdline_in_header", shortName="no_cmdline_in_header", doc="Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.", required=false, exclusiveOf="", validation="") - var no_cmdline_in_header: Boolean = _ - - /** Just output sites without genotypes (i.e. only the first 8 columns of the VCF) */ - @Argument(fullName="sites_only", shortName="sites_only", doc="Just output sites without genotypes (i.e. only the first 8 columns of the VCF)", required=false, exclusiveOf="", validation="") - var sites_only: Boolean = _ - - /** force BCF output, regardless of the file's extension */ - @Argument(fullName="bcf", shortName="bcf", doc="force BCF output, regardless of the file's extension", required=false, exclusiveOf="", validation="") - var bcf: Boolean = _ - /** VCF file of DBSNP information */ @Input(fullName="dbsnp", shortName="dbsnp", doc="VCF file of DBSNP information", required=false, exclusiveOf="", validation="") var dbsnp: Seq[File] = Nil diff --git a/public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/gatk/BamGatherFunction.scala b/public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/gatk/BamGatherFunction.scala index f852e9d9e..53885109e 100644 --- a/public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/gatk/BamGatherFunction.scala +++ b/public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/gatk/BamGatherFunction.scala @@ -28,8 +28,6 @@ package org.broadinstitute.gatk.queue.extensions.gatk import org.broadinstitute.gatk.queue.function.scattergather.GatherFunction import org.broadinstitute.gatk.queue.extensions.picard.MergeSamFiles import org.broadinstitute.gatk.queue.function.RetryMemoryLimit -import org.broadinstitute.gatk.engine.io.stubs.SAMFileWriterArgumentTypeDescriptor -import org.broadinstitute.gatk.queue.util.ClassFieldCache import java.io.File /** @@ -50,18 +48,9 @@ class BamGatherFunction extends MergeSamFiles with GatherFunction with RetryMemo // Whatever the original function can handle, merging *should* do less. this.memoryLimit = originalFunction.memoryLimit - - // bam_compression and index_output_bam_on_the_fly from SAMFileWriterArgumentTypeDescriptor - // are added by the GATKExtensionsGenerator to the subclass of CommandLineGATK - - val compression = ClassFieldCache.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.COMPRESSION_FULLNAME) - this.compressionLevel = originalGATK.getFieldValue(compression).asInstanceOf[Option[Int]] - - val disableIndex = ClassFieldCache.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.DISABLE_INDEXING_FULLNAME) - this.createIndex = Some(!originalGATK.getFieldValue(disableIndex).asInstanceOf[Boolean]) - - val enableMD5 = ClassFieldCache.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.ENABLE_MD5_FULLNAME) - this.createMD5 = Some(originalGATK.getFieldValue(enableMD5).asInstanceOf[Boolean]) + this.compressionLevel = originalGATK.bam_compression + this.createIndex = Some(!originalGATK.disable_bam_indexing) + this.createMD5 = Some(originalGATK.generate_md5) super.freezeFieldValues() } diff --git a/public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/gatk/VcfGatherFunction.scala b/public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/gatk/VcfGatherFunction.scala index fe778fcfe..68664c34b 100644 --- a/public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/gatk/VcfGatherFunction.scala +++ b/public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/gatk/VcfGatherFunction.scala @@ -26,9 +26,7 @@ package org.broadinstitute.gatk.queue.extensions.gatk import org.broadinstitute.gatk.queue.function.scattergather.GatherFunction -import org.broadinstitute.gatk.queue.function.{RetryMemoryLimit, QFunction} -import org.broadinstitute.gatk.engine.io.stubs.VCFWriterArgumentTypeDescriptor -import org.broadinstitute.gatk.queue.util.ClassFieldCache +import org.broadinstitute.gatk.queue.function.RetryMemoryLimit /** * Merges a vcf text file. @@ -44,14 +42,8 @@ class VcfGatherFunction extends CombineVariants with GatherFunction with RetryMe this.out = this.originalOutput GATKIntervals.copyIntervalArguments(this.originalGATK, this) - // NO_HEADER and sites_only from VCFWriterArgumentTypeDescriptor - // are added by the GATKExtensionsGenerator to the subclass of CommandLineGATK - - val noHeader = ClassFieldCache.findField(originalFunction.getClass, VCFWriterArgumentTypeDescriptor.NO_HEADER_ARG_NAME) - this.no_cmdline_in_header = originalGATK.getFieldValue(noHeader).asInstanceOf[Boolean] - - val sitesOnly = ClassFieldCache.findField(originalFunction.getClass, VCFWriterArgumentTypeDescriptor.SITES_ONLY_ARG_NAME) - this.sites_only = originalGATK.getFieldValue(sitesOnly).asInstanceOf[Boolean] + this.no_cmdline_in_header = originalGATK.no_cmdline_in_header + this.sites_only = originalGATK.sites_only // ensure that the gather function receives the same unsafe parameter as the scattered function this.unsafe = this.originalGATK.unsafe diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java index c090010c3..abb699301 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java @@ -44,7 +44,9 @@ import org.broadinstitute.gatk.engine.filters.FilterManager; import org.broadinstitute.gatk.engine.filters.ReadFilter; import org.broadinstitute.gatk.engine.filters.ReadGroupBlackListFilter; import org.broadinstitute.gatk.engine.io.OutputTracker; +import org.broadinstitute.gatk.engine.io.stubs.SAMFileWriterStub; import org.broadinstitute.gatk.engine.io.stubs.Stub; +import org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub; import org.broadinstitute.gatk.engine.iterators.ReadTransformer; import org.broadinstitute.gatk.engine.iterators.ReadTransformersMode; import org.broadinstitute.gatk.engine.phonehome.GATKRunReport; @@ -65,6 +67,7 @@ import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.interval.IntervalUtils; import org.broadinstitute.gatk.utils.progressmeter.ProgressMeter; import org.broadinstitute.gatk.utils.recalibration.BQSRArgumentSet; +import org.broadinstitute.gatk.utils.sam.ReadUtils; import org.broadinstitute.gatk.utils.text.XReadLines; import org.broadinstitute.gatk.utils.threading.ThreadEfficiencyMonitor; @@ -666,11 +669,13 @@ public class GenomeAnalysisEngine { * * @param outputTracker the tracker supplying the initialization data. */ - private void initializeOutputStreams(OutputTracker outputTracker) { - for (Map.Entry input : getInputs().entrySet()) + private void initializeOutputStreams(final OutputTracker outputTracker) { + for (final Map.Entry input : getInputs().entrySet()) outputTracker.addInput(input.getKey(), input.getValue()); - for (Stub stub : getOutputs()) + for (final Stub stub : getOutputs()) { + stub.processArguments(argCollection); outputTracker.addOutput(stub); + } outputTracker.prepareWalker(walker, getArguments().strictnessLevel); } diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/arguments/GATKArgumentCollection.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/arguments/GATKArgumentCollection.java index ad0dc435e..be09c5879 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/arguments/GATKArgumentCollection.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/arguments/GATKArgumentCollection.java @@ -25,7 +25,6 @@ package org.broadinstitute.gatk.engine.arguments; -import htsjdk.samtools.SAMFileReader; import htsjdk.samtools.ValidationStringency; import org.broadinstitute.gatk.utils.commandline.*; import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; @@ -61,7 +60,7 @@ public class GATKArgumentCollection { * BAM file. Please see our online documentation for more details on input formatting requirements. */ @Input(fullName = "input_file", shortName = "I", doc = "Input file containing sequence data (SAM or BAM)", required = false) - public List samFiles = new ArrayList(); + public List samFiles = new ArrayList<>(); @Hidden @Argument(fullName = "showFullBamList",doc="Emit a log entry (level INFO) containing the full list of sequence data files to be included in the analysis (including files inside .bam.list files).") @@ -120,7 +119,7 @@ public class GATKArgumentCollection { * is specified in each tool's documentation. The default filters cannot be disabled. */ @Argument(fullName = "read_filter", shortName = "rf", doc = "Filters to apply to reads before analysis", required = false) - public final List readFilters = new ArrayList(); + public final List readFilters = new ArrayList<>(); @ArgumentCollection public IntervalArgumentCollection intervalArguments = new IntervalArgumentCollection(); @@ -408,6 +407,39 @@ public class GATKArgumentCollection { required = false) public boolean disableAutoIndexCreationAndLockingWhenReadingRods = false; + @Hidden + @Argument(fullName = "no_cmdline_in_header", shortName = "no_cmdline_in_header", doc = "Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.", + required = false) + public boolean disableCommandLineInVCF = false; + + @Argument(fullName = "sites_only", shortName = "sites_only", doc = "Just output sites without genotypes (i.e. only the first 8 columns of the VCF)", + required = false) + public boolean sitesOnlyVCF = false; + + @Hidden + @Argument(fullName = "bcf", shortName = "bcf", doc = "Force BCF output, regardless of the file's extension", + required = false) + public boolean forceBCFOutput = false; + + @Advanced + @Argument(fullName = "bam_compression", shortName = "compress", doc = "Compression level to use for writing BAM files (0 - 9, higher is more compressed)", + minValue = 0, maxValue = 9, required = false) + public Integer bamCompression = null; + + @Advanced + @Argument(fullName = "simplifyBAM", shortName = "simplifyBAM", + doc = "If provided, output BAM files will be simplified to include just key reads for downstream variation discovery analyses (removing duplicates, PF-, non-primary reads), as well stripping all extended tags from the kept reads except the read group identifier", + required = false) + public boolean simplifyBAM = false; + + @Argument(fullName = "disable_bam_indexing", doc = "Turn off on-the-fly creation of indices for output BAM files.", + required = false) + public boolean disableBAMIndexing = false; + + @Argument(fullName = "generate_md5", doc = "Enable on-the-fly creation of md5s for output BAM files.", + required = false) + public boolean enableBAMmd5 = false; + // -------------------------------------------------------------------------------------------------------------- // // Multi-threading arguments diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/io/stubs/OutputStreamStub.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/io/stubs/OutputStreamStub.java index 51c3aac3a..2f64dc236 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/io/stubs/OutputStreamStub.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/io/stubs/OutputStreamStub.java @@ -25,6 +25,7 @@ package org.broadinstitute.gatk.engine.io.stubs; +import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection; import org.broadinstitute.gatk.engine.io.OutputTracker; import java.io.File; @@ -101,6 +102,9 @@ public class OutputStreamStub extends OutputStream implements Stub this.outputTracker = outputTracker; } + @Override + public void processArguments( final GATKArgumentCollection argumentCollection ) {} + /** * @{inheritDoc} */ diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/io/stubs/SAMFileWriterArgumentTypeDescriptor.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/io/stubs/SAMFileWriterArgumentTypeDescriptor.java index a3394013e..c45432471 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/io/stubs/SAMFileWriterArgumentTypeDescriptor.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/io/stubs/SAMFileWriterArgumentTypeDescriptor.java @@ -30,29 +30,14 @@ import org.broadinstitute.gatk.utils.commandline.*; import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; import org.broadinstitute.gatk.engine.io.GATKSAMFileWriter; import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; -import org.broadinstitute.gatk.utils.sam.ReadUtils; import java.io.OutputStream; -import java.lang.annotation.Annotation; import java.lang.reflect.Type; -import java.util.Arrays; -import java.util.List; /** * Insert a SAMFileWriterStub instead of a full-fledged concrete OutputStream implementations. */ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { - public static final String DEFAULT_ARGUMENT_FULLNAME = "outputBAM"; - public static final String DEFAULT_ARGUMENT_SHORTNAME = "ob"; - - public static final String COMPRESSION_FULLNAME = "bam_compression"; - public static final String COMPRESSION_SHORTNAME = "compress"; - - public static final String SIMPLIFY_BAM_FULLNAME = "simplifyBAM"; - public static final String SIMPLIFY_BAM_SHORTNAME = SIMPLIFY_BAM_FULLNAME; - - public static final String DISABLE_INDEXING_FULLNAME = "disable_bam_indexing"; - public static final String ENABLE_MD5_FULLNAME = "generate_md5"; /** * The engine into which output stubs should be fed. @@ -79,15 +64,6 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor return SAMFileWriter.class.equals(type) || GATKSAMFileWriter.class.equals(type); } - @Override - public List createArgumentDefinitions( ArgumentSource source ) { - return Arrays.asList( createBAMArgumentDefinition(source), - createBAMCompressionArgumentDefinition(source), - disableWriteIndexArgumentDefinition(source), - enableMD5GenerationArgumentDefinition(source), - createSimplifyBAMArgumentDefinition(source)); - } - @Override public boolean createsTypeDefault(ArgumentSource source) { return !source.isRequired() && source.defaultsToStdout(); @@ -110,38 +86,15 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor @Override public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) { // Extract all possible parameters that could be passed to a BAM file writer? - ArgumentDefinition bamArgumentDefinition = createBAMArgumentDefinition(source); + ArgumentDefinition bamArgumentDefinition = createDefaultArgumentDefinition(source); ArgumentMatchValue writerFileName = getArgumentValue( bamArgumentDefinition, matches ); - ArgumentMatchValue compressionLevelText = getArgumentValue( createBAMCompressionArgumentDefinition(source), matches ); - Integer compressionLevel = compressionLevelText != null ? Integer.valueOf(compressionLevelText.asString()) : null; - - boolean indexOnTheFly = !argumentIsPresent(disableWriteIndexArgumentDefinition(source),matches); - boolean generateMD5 = argumentIsPresent(this.enableMD5GenerationArgumentDefinition(source),matches); - boolean simplifyBAM = argumentIsPresent(createSimplifyBAMArgumentDefinition(source),matches); - - // Validate the combination of parameters passed in. - - // This parser has been passed a null filename and the GATK is not responsible for creating a type default for the object; - // therefore, the user must have failed to specify a type default - if(writerFileName != null && writerFileName.asFile() == null && generateMD5) - throw new ArgumentException("MD5 generation specified, but no output file specified. If md5 generation is desired, please specify a BAM output file and an md5 file will be written alongside."); - - // Create the stub and set parameters. + // Create the stub SAMFileWriterStub stub = null; // stub = new SAMFileWriterStub(engine, defaultOutputStream); if (writerFileName != null && writerFileName.asFile() != null ) { stub = new SAMFileWriterStub(engine, writerFileName.asFile()); - if ( compressionLevel != null ) { - stub.setCompressionLevel(ReadUtils.validateCompressionLevel(compressionLevel)); - } if ( indexOnTheFly ) - stub.setIndexOnTheFly(indexOnTheFly); - if ( generateMD5 ) - stub.setGenerateMD5(generateMD5); - if ( simplifyBAM ) - stub.setSimplifyBAM(simplifyBAM); - // WARNING: Side effects required by engine! parsingEngine.addTags(stub,getArgumentTags(matches)); engine.addOutput(stub); @@ -150,96 +103,4 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor return stub; } - /** - * Gets the definition of the argument representing the BAM file itself. - * @param source Argument source for the BAM file. Must not be null. - * @return Argument definition for the BAM file itself. Will not be null. - */ - private ArgumentDefinition createBAMArgumentDefinition(ArgumentSource source) { - Annotation annotation = getArgumentAnnotation(source); - return new ArgumentDefinition( annotation, - ArgumentIOType.getIOType(annotation), - source.field.getType(), - DEFAULT_ARGUMENT_FULLNAME, - DEFAULT_ARGUMENT_SHORTNAME, - ArgumentDefinition.getDoc(annotation), - source.isRequired(), - false, - source.isMultiValued(), - source.isHidden(), - null, - null, - null, - null); - } - - /** - * Creates the optional compression level argument for the BAM file. - * @param source Argument source for the BAM file. Must not be null. - * @return Argument definition for the BAM file itself. Will not be null. - */ - private ArgumentDefinition createBAMCompressionArgumentDefinition(ArgumentSource source) { - return new ArgumentDefinition( ArgumentIOType.ARGUMENT, - int.class, - COMPRESSION_FULLNAME, - COMPRESSION_SHORTNAME, - "Compression level to use for writing BAM files", - false, - false, - false, - source.isHidden(), - null, - null, - null, - null ); - } - - private ArgumentDefinition disableWriteIndexArgumentDefinition(ArgumentSource source) { - return new ArgumentDefinition( ArgumentIOType.ARGUMENT, - boolean.class, - DISABLE_INDEXING_FULLNAME, - null, - "Turn off on-the-fly creation of indices for output BAM files.", - false, - true, - false, - source.isHidden(), - null, - null, - null, - null ); - } - - private ArgumentDefinition enableMD5GenerationArgumentDefinition(ArgumentSource source) { - return new ArgumentDefinition( ArgumentIOType.ARGUMENT, - boolean.class, - ENABLE_MD5_FULLNAME, - null, - "Enable on-the-fly creation of md5s for output BAM files.", - false, - true, - false, - source.isHidden(), - null, - null, - null, - null ); - } - - - private ArgumentDefinition createSimplifyBAMArgumentDefinition(ArgumentSource source) { - return new ArgumentDefinition( ArgumentIOType.ARGUMENT, - boolean.class, - SIMPLIFY_BAM_FULLNAME, - SIMPLIFY_BAM_SHORTNAME, - "If provided, output BAM files will be simplified to include just key reads for downstream variation discovery analyses (removing duplicates, PF-, non-primary reads), as well stripping all extended tags from the kept reads except the read group identifier", - false, - true, - false, - source.isHidden(), - null, - null, - null, - null ); - } } diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/io/stubs/SAMFileWriterStub.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/io/stubs/SAMFileWriterStub.java index 2bcbd4660..cc814e9e6 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/io/stubs/SAMFileWriterStub.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/io/stubs/SAMFileWriterStub.java @@ -30,6 +30,7 @@ import htsjdk.samtools.SAMFileWriter; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.util.ProgressLoggerInterface; import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; +import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection; import org.broadinstitute.gatk.engine.io.OutputTracker; import org.broadinstitute.gatk.engine.io.GATKSAMFileWriter; import org.broadinstitute.gatk.engine.iterators.ReadTransformer; @@ -273,6 +274,16 @@ public class SAMFileWriterStub implements Stub, GATKSAMFileWriter this.outputTracker = outputTracker; } + @Override + public void processArguments( final GATKArgumentCollection argumentCollection ) { + if (argumentCollection.bamCompression != null) + setCompressionLevel(argumentCollection.bamCompression); + setGenerateMD5(argumentCollection.enableBAMmd5); + setIndexOnTheFly(!argumentCollection.disableBAMIndexing); + setSimplifyBAM(argumentCollection.simplifyBAM); + + } + /** * Use the given header as the target for this writer. * @param header The header to write. @@ -284,7 +295,7 @@ public class SAMFileWriterStub implements Stub, GATKSAMFileWriter } private void initializeReadTransformers() { - this.onOutputReadTransformers = new ArrayList(engine.getReadTransformers().size()); + this.onOutputReadTransformers = new ArrayList<>(engine.getReadTransformers().size()); for ( final ReadTransformer transformer : engine.getReadTransformers() ) { if ( transformer.getApplicationTime() == ReadTransformer.ApplicationTime.ON_OUTPUT ) onOutputReadTransformers.add(transformer); diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/io/stubs/Stub.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/io/stubs/Stub.java index cdfd67ae1..8a000077b 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/io/stubs/Stub.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/io/stubs/Stub.java @@ -25,6 +25,7 @@ package org.broadinstitute.gatk.engine.io.stubs; +import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection; import org.broadinstitute.gatk.engine.io.OutputTracker; import java.io.File; @@ -47,6 +48,15 @@ public interface Stub { */ public void register( OutputTracker outputTracker ); + /** + * Provides a mechanism for uniformly processing command-line arguments + * that are important for file processing. For example, this method + * might pass on the compression value specified by the user to + * a SAMFileWriter + * @param argumentCollection The arguments to be processed + */ + public void processArguments( final GATKArgumentCollection argumentCollection ); + /** * Returns the OutputStream represented by this stub or null if not available. */ diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/io/stubs/VCFWriterArgumentTypeDescriptor.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/io/stubs/VCFWriterArgumentTypeDescriptor.java index 27fa858b1..686133922 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/io/stubs/VCFWriterArgumentTypeDescriptor.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/io/stubs/VCFWriterArgumentTypeDescriptor.java @@ -30,15 +30,11 @@ import org.broadinstitute.gatk.utils.commandline.*; import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; import htsjdk.variant.variantcontext.writer.VariantContextWriter; import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; -import htsjdk.variant.variantcontext.writer.VariantContextWriterFactory; import java.io.File; import java.io.OutputStream; import java.lang.reflect.Type; -import java.util.Arrays; import java.util.Collection; -import java.util.HashSet; -import java.util.List; /** * Injects new command-line arguments into the system providing support for the genotype writer. @@ -47,9 +43,6 @@ import java.util.List; * @version 0.1 */ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { - public static final String NO_HEADER_ARG_NAME = "no_cmdline_in_header"; - public static final String SITES_ONLY_ARG_NAME = "sites_only"; - public static final String FORCE_BCF = "bcf"; /** * The engine into which output stubs should be fed. @@ -88,15 +81,6 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { return VariantContextWriter.class.equals(type); } - @Override - public List createArgumentDefinitions( ArgumentSource source ) { - return Arrays.asList( - createDefaultArgumentDefinition(source), - createNoCommandLineHeaderArgumentDefinition(), - createSitesOnlyArgumentDefinition(), - createBCFArgumentDefinition() ); - } - /** * This command-line argument descriptor does want to override the provided default value. * @return true always. @@ -145,9 +129,6 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { : new VariantContextWriterStub(engine, defaultOutputStream, argumentSources); stub.setCompressed(isCompressed(writerFileName == null ? null: writerFileName.asString())); - stub.setDoNotWriteGenotypes(argumentIsPresent(createSitesOnlyArgumentDefinition(),matches)); - stub.setSkipWritingCommandLineHeader(argumentIsPresent(createNoCommandLineHeaderArgumentDefinition(),matches)); - stub.setForceBCF(argumentIsPresent(createBCFArgumentDefinition(),matches)); // WARNING: Side effects required by engine! parsingEngine.addTags(stub,getArgumentTags(matches)); @@ -156,66 +137,6 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { return stub; } - /** - * Creates the optional no_header argument for the VCF file. - * @return Argument definition for the VCF file itself. Will not be null. - */ - private ArgumentDefinition createNoCommandLineHeaderArgumentDefinition() { - return new ArgumentDefinition( ArgumentIOType.ARGUMENT, - boolean.class, - NO_HEADER_ARG_NAME, - NO_HEADER_ARG_NAME, - "Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.", - false, - true, - false, - true, - null, - null, - null, - null ); - } - - /** - * Creates the optional sites_only argument definition - * @return Argument definition for the VCF file itself. Will not be null. - */ - private ArgumentDefinition createSitesOnlyArgumentDefinition() { - return new ArgumentDefinition( ArgumentIOType.ARGUMENT, - boolean.class, - SITES_ONLY_ARG_NAME, - SITES_ONLY_ARG_NAME, - "Just output sites without genotypes (i.e. only the first 8 columns of the VCF)", - false, - true, - false, - true, - null, - null, - null, - null ); - } - - /** - * Creates the optional bcf argument definition - * @return Argument definition for the VCF file itself. Will not be null. - */ - private ArgumentDefinition createBCFArgumentDefinition() { - return new ArgumentDefinition( ArgumentIOType.ARGUMENT, - boolean.class, - FORCE_BCF, - FORCE_BCF, - "force BCF output, regardless of the file's extension", - false, - true, - false, - true, - null, - null, - null, - null ); - } - /** * Returns true if the file will be compressed. * @param writerFileName Name of the file diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/io/stubs/VariantContextWriterStub.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/io/stubs/VariantContextWriterStub.java index fcfddffdb..e3bf0e4da 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/io/stubs/VariantContextWriterStub.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/io/stubs/VariantContextWriterStub.java @@ -28,6 +28,7 @@ package org.broadinstitute.gatk.engine.io.stubs; import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.tribble.index.IndexCreator; import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; +import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection; import org.broadinstitute.gatk.engine.io.OutputTracker; import org.broadinstitute.gatk.utils.variant.GATKVCFUtils; import htsjdk.variant.variantcontext.VariantContext; @@ -229,6 +230,14 @@ public class VariantContextWriterStub implements Stub, Var this.outputTracker = outputTracker; } + @Override + public void processArguments( final GATKArgumentCollection argumentCollection ) { + setDoNotWriteGenotypes(argumentCollection.sitesOnlyVCF); + setSkipWritingCommandLineHeader(argumentCollection.disableCommandLineInVCF); + setForceBCF(argumentCollection.forceBCFOutput); + + } + public void writeHeader(VCFHeader header) { vcfHeader = header; diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/readutils/SplitSamFile.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/readutils/SplitSamFile.java index 157a44f0a..f4ee4a499 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/readutils/SplitSamFile.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/readutils/SplitSamFile.java @@ -59,16 +59,15 @@ public class SplitSamFile extends ReadWalker outputs) { for ( SAMFileWriter output : outputs.values() ) { output.close(); } } + @Override public Map reduceInit() { - HashMap headers = new HashMap(); + HashMap headers = new HashMap<>(); for ( SAMReadGroupRecord readGroup : this.getToolkit().getSAMFileHeader().getReadGroups()) { final String sample = readGroup.getSample(); if ( ! headers.containsKey(sample) ) { SAMFileHeader header = duplicateSAMFileHeader(this.getToolkit().getSAMFileHeader()); logger.debug(String.format("Creating BAM header for sample %s", sample)); - ArrayList readGroups = new ArrayList(); + ArrayList readGroups = new ArrayList<>(); header.setReadGroups(readGroups); headers.put(sample, header); } SAMFileHeader header = headers.get(sample); - List newReadGroups = new ArrayList(header.getReadGroups()); + List newReadGroups = new ArrayList<>(header.getReadGroups()); newReadGroups.add(readGroup); header.setReadGroups(newReadGroups); } - HashMap outputs = new HashMap(); + HashMap outputs = new HashMap<>(); for ( Map.Entry elt : headers.entrySet() ) { final String sample = elt.getKey(); final String filename = outputRoot + sample + ".bam"; logger.info(String.format("Creating BAM output file %s for sample %s", filename, sample)); - SAMFileWriter output = ReadUtils.createSAMFileWriterWithCompression(elt.getValue(), true, filename, BAMcompression); + + final SAMFileWriter output = ReadUtils.createSAMFileWriter(filename, getToolkit(), elt.getValue()); outputs.put(sample, output); } @@ -117,6 +119,7 @@ public class SplitSamFile extends ReadWalker reduce(SAMRecord read, Map outputs) { final String sample = read.getReadGroup().getSample(); SAMFileWriter output = outputs.get(sample); diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/sam/ReadUtils.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/sam/ReadUtils.java index 100749847..7fc1b40f7 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/sam/ReadUtils.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/sam/ReadUtils.java @@ -30,10 +30,10 @@ import com.google.java.contract.Requires; import htsjdk.samtools.*; import org.apache.log4j.Logger; import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; +import org.broadinstitute.gatk.engine.io.stubs.SAMFileWriterStub; import org.broadinstitute.gatk.utils.*; import org.broadinstitute.gatk.utils.collections.Pair; import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; -import org.broadinstitute.gatk.utils.exceptions.UserException; import java.io.File; import java.util.*; @@ -131,26 +131,24 @@ public class ReadUtils { public enum ReadAndIntervalOverlap {NO_OVERLAP_CONTIG, NO_OVERLAP_LEFT, NO_OVERLAP_RIGHT, NO_OVERLAP_HARDCLIPPED_LEFT, NO_OVERLAP_HARDCLIPPED_RIGHT, OVERLAP_LEFT, OVERLAP_RIGHT, OVERLAP_LEFT_AND_RIGHT, OVERLAP_CONTAINED} /** - * Creates a SAMFileWriter with the given compression level if you request a bam file. Creates a regular - * SAMFileWriter without compression otherwise. - * - * @param header - * @param presorted - * @param file - * @param compression - * @return a SAMFileWriter with the compression level if it is a bam. + * Creates a SAMFileWriter using all of the features currently set in the engine (command line arguments, ReadTransformers, etc) + * @param file the filename to write to + * @param engine the engine + * @return a SAMFileWriter with the correct options set */ - public static SAMFileWriter createSAMFileWriterWithCompression(SAMFileHeader header, boolean presorted, String file, int compression) { - validateCompressionLevel(compression); - if (file.endsWith(".bam")) - return new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(header, presorted, new File(file), compression); - return new SAMFileWriterFactory().setCreateIndex(true).makeSAMOrBAMWriter(header, presorted, new File(file)); + public static SAMFileWriter createSAMFileWriter(final String file, final GenomeAnalysisEngine engine) { + final SAMFileWriterStub output = new SAMFileWriterStub(engine, new File(file)); + output.processArguments(engine.getArguments()); + return output; } - public static int validateCompressionLevel(final int requestedCompressionLevel) { - if ( requestedCompressionLevel < 0 || requestedCompressionLevel > 9 ) - throw new UserException.BadArgumentValue("compress", "Compression level must be 0-9 but got " + requestedCompressionLevel); - return requestedCompressionLevel; + /** + * As {@link #createSAMFileWriter(String, org.broadinstitute.gatk.engine.GenomeAnalysisEngine)}, but also sets the header + */ + public static SAMFileWriter createSAMFileWriter(final String file, final GenomeAnalysisEngine engine, final SAMFileHeader header) { + final SAMFileWriterStub output = (SAMFileWriterStub) createSAMFileWriter(file, engine); + output.writeHeader(header); + return output; } /** diff --git a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/engine/EngineFeaturesIntegrationTest.java b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/engine/EngineFeaturesIntegrationTest.java index 7f8b3bc5d..b8cea85e1 100644 --- a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/engine/EngineFeaturesIntegrationTest.java +++ b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/engine/EngineFeaturesIntegrationTest.java @@ -666,4 +666,59 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { return counter + sum; } } + + // -------------------------------------------------------------------------------- + // + // Test output file-specific options + // + // -------------------------------------------------------------------------------- + + //Returns the output file + private File testBAMFeatures(final String args, final String md5) { + WalkerTestSpec spec = new WalkerTestSpec("-T PrintReads -R " + b37KGReference + + " -I " + privateTestDir + "NA20313.highCoverageRegion.bam" + + " --no_pg_tag -o %s " + args, + 1, Arrays.asList(".bam"), Arrays.asList(md5)); + return executeTest("testBAMFeatures: "+args, spec).first.get(0); + } + + @Test + public void testSAMWriterFeatures() { + testBAMFeatures("-compress 0", "bb4b55b1f80423970bb9384cbf0d8793"); + testBAMFeatures("-compress 9", "b85ee1636d62e1bb8ed65a245c307167"); + testBAMFeatures("-simplifyBAM", "38f9c30a27dfbc085a2ff52a1617d579"); + + //Validate MD5 + final String expectedMD5 = "6627b9ea33293a0083983feb94948c1d"; + final File md5Target = testBAMFeatures("--generate_md5", expectedMD5); + final File md5File = new File(md5Target.getAbsoluteFile() + ".md5"); + md5File.deleteOnExit(); + Assert.assertTrue(md5File.exists(), "MD5 wasn't created"); + try { + String md5 = new BufferedReader(new FileReader(md5File)).readLine(); + Assert.assertEquals(md5, expectedMD5, "Generated MD5 doesn't match expected"); + } catch (IOException e) { + Assert.fail("Can't parse MD5 file", e); + } + + //Validate that index isn't created + final String unindexedBAM = testBAMFeatures("--disable_bam_indexing", expectedMD5).getAbsolutePath(); + Assert.assertTrue(!(new File(unindexedBAM+".bai").exists()) && + !(new File(unindexedBAM.replace(".bam", ".bai")).exists()), + "BAM index was created even though it was disabled"); + } + + private void testVCFFeatures(final String args, final String md5) { + WalkerTestSpec spec = new WalkerTestSpec("-T SelectVariants -R " + b37KGReference + + " -V " + privateTestDir + "CEUtrioTest.vcf" + + " --no_cmdline_in_header -o %s " + args, + 1, Arrays.asList(md5)); + executeTest("testVCFFeatures: "+args, spec); + } + + @Test + public void testVCFWriterFeatures() { + testVCFFeatures("--sites_only", "94bf1f2c0946e933515e4322323a5716"); + testVCFFeatures("--bcf", "03f2d6988f54a332da48803c78f9c4b3"); + } } \ No newline at end of file