Moved arguments controlling options in output files into the engine
* Arguments involved are --no_cmdline_in_header, --sites_only, and --bcf for VCF files and --bam_compression, --simplifyBAM, --disable_bam_indexing, and --generate_md5 for BAM files * PT 52740563 * Removed ReadUtils.createSAMFileWriterWithCompression(), replaced with ReadUtils.createSAMFileWriter(), which applies all appropriate engine-level arguments * Replaced hard-coded field names in ArgumentDefinitionField (Queue extension generator) with a Reflections-based lookup that will fail noisily during extension generation if there's an error
This commit is contained in:
parent
5c4a3eb89c
commit
a35f5b8685
|
|
@ -1097,7 +1097,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
} catch ( final Exception e ) {
|
||||
// Capture any exception that might be thrown, and write out the assembly failure BAM if requested
|
||||
if ( captureAssemblyFailureBAM ) {
|
||||
final SAMFileWriter writer = ReadUtils.createSAMFileWriterWithCompression(getToolkit().getSAMFileHeader(), true, "assemblyFailure.bam", 5);
|
||||
final SAMFileWriter writer = ReadUtils.createSAMFileWriter("assemblyFailure.bam", getToolkit());
|
||||
for ( final GATKSAMRecord read : activeRegion.getReads() ) {
|
||||
writer.addAlignment(read);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,12 +27,13 @@ package org.broadinstitute.gatk.queue.extensions.gatk;
|
|||
import htsjdk.samtools.BAMIndex;
|
||||
import htsjdk.samtools.SAMFileWriter;
|
||||
import htsjdk.tribble.Tribble;
|
||||
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.gatk.utils.commandline.*;
|
||||
import org.broadinstitute.gatk.engine.io.stubs.SAMFileWriterArgumentTypeDescriptor;
|
||||
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
|
||||
|
||||
import java.io.File;
|
||||
import java.lang.annotation.Annotation;
|
||||
import java.lang.reflect.Field;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
|
|
@ -116,7 +117,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
|
|||
}
|
||||
|
||||
public static List<? extends ArgumentField> getArgumentFields(ParsingEngine parsingEngine,Class<?> classType) {
|
||||
List<ArgumentField> argumentFields = new ArrayList<ArgumentField>();
|
||||
List<ArgumentField> argumentFields = new ArrayList<>();
|
||||
for (ArgumentSource argumentSource: parsingEngine.extractArgumentSources(classType))
|
||||
if (!argumentSource.isDeprecated()) {
|
||||
String gatherer = null;
|
||||
|
|
@ -133,7 +134,31 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
|
|||
return argumentFields;
|
||||
}
|
||||
|
||||
private static final List<String> intervalFields = Arrays.asList("intervals", "excludeIntervals", "targetIntervals");
|
||||
public static String getArgumentFullName(final Class<?> collection, final String fieldName) {
|
||||
try {
|
||||
final Field field = collection.getField(fieldName);
|
||||
final Argument arg = field.getAnnotation(Argument.class);
|
||||
if (arg != null)
|
||||
return arg.fullName();
|
||||
final Input inputAnnotation = field.getAnnotation(Input.class);
|
||||
if (inputAnnotation != null)
|
||||
return inputAnnotation.fullName();
|
||||
final Output outputAnnotation = field.getAnnotation(Output.class);
|
||||
if (outputAnnotation != null)
|
||||
return outputAnnotation.fullName();
|
||||
} catch (NoSuchFieldException e) {
|
||||
throw new IllegalStateException(String.format("Can't find field %s in ArgumentCollection %s", fieldName, collection.getSimpleName()), e);
|
||||
}
|
||||
throw new IllegalStateException(String.format("Field %s in class %s is not annotated as an argument", fieldName, collection.getName()));
|
||||
}
|
||||
|
||||
private static final List<String> intervalFields = new ArrayList<>();
|
||||
private static final String inputFileArgument = getArgumentFullName(GATKArgumentCollection.class, "samFiles");
|
||||
|
||||
static {
|
||||
intervalFields.add(getArgumentFullName(IntervalArgumentCollection.class, "intervals"));
|
||||
intervalFields.add(getArgumentFullName(IntervalArgumentCollection.class, "excludeIntervals"));
|
||||
}
|
||||
|
||||
private static List<? extends ArgumentField> getArgumentFields(ArgumentDefinition argumentDefinition, String gatherer) {
|
||||
if (intervalFields.contains(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) {
|
||||
|
|
@ -144,7 +169,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
|
|||
} else if (NumThreadsArgumentField.NUM_THREADS_FIELD.equals(argumentDefinition.fullName)) {
|
||||
return Arrays.asList(new NumThreadsArgumentField(argumentDefinition));
|
||||
|
||||
} else if ("input_file".equals(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) {
|
||||
} else if (inputFileArgument.equals(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) {
|
||||
return Arrays.asList(new InputTaggedFileDefinitionField(argumentDefinition), new InputIndexesArgumentField(argumentDefinition, BAMIndex.BAMIndexSuffix, ".bam"));
|
||||
|
||||
} else if ((RodBinding.class.equals(argumentDefinition.argumentType) || RodBinding.class.equals(argumentDefinition.componentType) || RodBindingCollection.class.equals(argumentDefinition.componentType)) && argumentDefinition.ioType == ArgumentIOType.INPUT) {
|
||||
|
|
@ -155,7 +180,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
|
|||
|
||||
} else if (argumentDefinition.ioType == ArgumentIOType.OUTPUT) {
|
||||
|
||||
List<ArgumentField> fields = new ArrayList<ArgumentField>();
|
||||
List<ArgumentField> fields = new ArrayList<>();
|
||||
|
||||
String gatherClass;
|
||||
|
||||
|
|
@ -193,7 +218,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
|
|||
|
||||
} else if (!argumentDefinition.required && useOption(argumentDefinition.argumentType)) {
|
||||
boolean useFormat = useFormatter(argumentDefinition.argumentType);
|
||||
List<ArgumentField> fields = new ArrayList<ArgumentField>();
|
||||
List<ArgumentField> fields = new ArrayList<>();
|
||||
ArgumentField field = new OptionedArgumentField(argumentDefinition, useFormat);
|
||||
fields.add(field);
|
||||
if (useFormat) fields.add(new FormatterArgumentField(field));
|
||||
|
|
@ -201,7 +226,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
|
|||
|
||||
} else {
|
||||
boolean useFormat = useFormatter(argumentDefinition.argumentType);
|
||||
List<ArgumentField> fields = new ArrayList<ArgumentField>();
|
||||
List<ArgumentField> fields = new ArrayList<>();
|
||||
ArgumentField field = new DefaultArgumentField(argumentDefinition, useFormat);
|
||||
fields.add(field);
|
||||
if (useFormat) fields.add(new FormatterArgumentField(field));
|
||||
|
|
@ -349,7 +374,8 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
|
|||
|
||||
// Allows the user to specify the track name, track type, and the file.
|
||||
public static class NumThreadsArgumentField extends OptionedArgumentField {
|
||||
public static final String NUM_THREADS_FIELD = "num_threads";
|
||||
public static final String NUM_THREADS_FIELD = getArgumentFullName(GATKArgumentCollection.class, "numberOfDataThreads");
|
||||
public static final String NCT_FIELD = getArgumentFullName(GATKArgumentCollection.class, "numberOfCPUThreadsPerDataThread");
|
||||
|
||||
public NumThreadsArgumentField(ArgumentDefinition argumentDefinition) {
|
||||
super(argumentDefinition, false);
|
||||
|
|
@ -357,7 +383,8 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
|
|||
|
||||
@Override
|
||||
protected String getFreezeFields() {
|
||||
return String.format("if (num_threads.isDefined) nCoresRequest = num_threads%nif (num_cpu_threads_per_data_thread.isDefined) nCoresRequest = Some(nCoresRequest.getOrElse(1) * num_cpu_threads_per_data_thread.getOrElse(1))%n");
|
||||
return String.format("if (%1$s.isDefined) nCoresRequest = %1$s%nif (%2$s.isDefined) nCoresRequest = Some(nCoresRequest.getOrElse(1) * %2$s.getOrElse(1))%n",
|
||||
NUM_THREADS_FIELD, NCT_FIELD);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -495,7 +522,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
|
|||
("if (%2$s != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(%2$s))%n" +
|
||||
" if (!%3$s)%n" +
|
||||
" %1$s = new File(%2$s.getPath.stripSuffix(\".bam\") + \"%4$s\")%n"),
|
||||
auxFieldName, originalFieldName, SAMFileWriterArgumentTypeDescriptor.DISABLE_INDEXING_FULLNAME, BAMIndex.BAMIndexSuffix);
|
||||
auxFieldName, originalFieldName, getArgumentFullName(GATKArgumentCollection.class, "disableBAMIndexing"), BAMIndex.BAMIndexSuffix);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -508,7 +535,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
|
|||
("if (%2$s != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(%2$s))%n" +
|
||||
" if (%3$s)%n" +
|
||||
" %1$s = new File(%2$s.getPath + \"%4$s\")%n"),
|
||||
auxFieldName, originalFieldName, SAMFileWriterArgumentTypeDescriptor.ENABLE_MD5_FULLNAME, ".md5");
|
||||
auxFieldName, originalFieldName, getArgumentFullName(GATKArgumentCollection.class, "enableBAMmd5"), ".md5");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -281,18 +281,6 @@ class MuTect extends org.broadinstitute.gatk.queue.extensions.gatk.CommandLineGA
|
|||
@Gather(enabled=false)
|
||||
private var vcfIndex: File = _
|
||||
|
||||
/** Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests. */
|
||||
@Argument(fullName="no_cmdline_in_header", shortName="no_cmdline_in_header", doc="Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.", required=false, exclusiveOf="", validation="")
|
||||
var no_cmdline_in_header: Boolean = _
|
||||
|
||||
/** Just output sites without genotypes (i.e. only the first 8 columns of the VCF) */
|
||||
@Argument(fullName="sites_only", shortName="sites_only", doc="Just output sites without genotypes (i.e. only the first 8 columns of the VCF)", required=false, exclusiveOf="", validation="")
|
||||
var sites_only: Boolean = _
|
||||
|
||||
/** force BCF output, regardless of the file's extension */
|
||||
@Argument(fullName="bcf", shortName="bcf", doc="force BCF output, regardless of the file's extension", required=false, exclusiveOf="", validation="")
|
||||
var bcf: Boolean = _
|
||||
|
||||
/** VCF file of DBSNP information */
|
||||
@Input(fullName="dbsnp", shortName="dbsnp", doc="VCF file of DBSNP information", required=false, exclusiveOf="", validation="")
|
||||
var dbsnp: Seq[File] = Nil
|
||||
|
|
|
|||
|
|
@ -28,8 +28,6 @@ package org.broadinstitute.gatk.queue.extensions.gatk
|
|||
import org.broadinstitute.gatk.queue.function.scattergather.GatherFunction
|
||||
import org.broadinstitute.gatk.queue.extensions.picard.MergeSamFiles
|
||||
import org.broadinstitute.gatk.queue.function.RetryMemoryLimit
|
||||
import org.broadinstitute.gatk.engine.io.stubs.SAMFileWriterArgumentTypeDescriptor
|
||||
import org.broadinstitute.gatk.queue.util.ClassFieldCache
|
||||
import java.io.File
|
||||
|
||||
/**
|
||||
|
|
@ -50,18 +48,9 @@ class BamGatherFunction extends MergeSamFiles with GatherFunction with RetryMemo
|
|||
|
||||
// Whatever the original function can handle, merging *should* do less.
|
||||
this.memoryLimit = originalFunction.memoryLimit
|
||||
|
||||
// bam_compression and index_output_bam_on_the_fly from SAMFileWriterArgumentTypeDescriptor
|
||||
// are added by the GATKExtensionsGenerator to the subclass of CommandLineGATK
|
||||
|
||||
val compression = ClassFieldCache.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.COMPRESSION_FULLNAME)
|
||||
this.compressionLevel = originalGATK.getFieldValue(compression).asInstanceOf[Option[Int]]
|
||||
|
||||
val disableIndex = ClassFieldCache.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.DISABLE_INDEXING_FULLNAME)
|
||||
this.createIndex = Some(!originalGATK.getFieldValue(disableIndex).asInstanceOf[Boolean])
|
||||
|
||||
val enableMD5 = ClassFieldCache.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.ENABLE_MD5_FULLNAME)
|
||||
this.createMD5 = Some(originalGATK.getFieldValue(enableMD5).asInstanceOf[Boolean])
|
||||
this.compressionLevel = originalGATK.bam_compression
|
||||
this.createIndex = Some(!originalGATK.disable_bam_indexing)
|
||||
this.createMD5 = Some(originalGATK.generate_md5)
|
||||
|
||||
super.freezeFieldValues()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,9 +26,7 @@
|
|||
package org.broadinstitute.gatk.queue.extensions.gatk
|
||||
|
||||
import org.broadinstitute.gatk.queue.function.scattergather.GatherFunction
|
||||
import org.broadinstitute.gatk.queue.function.{RetryMemoryLimit, QFunction}
|
||||
import org.broadinstitute.gatk.engine.io.stubs.VCFWriterArgumentTypeDescriptor
|
||||
import org.broadinstitute.gatk.queue.util.ClassFieldCache
|
||||
import org.broadinstitute.gatk.queue.function.RetryMemoryLimit
|
||||
|
||||
/**
|
||||
* Merges a vcf text file.
|
||||
|
|
@ -44,14 +42,8 @@ class VcfGatherFunction extends CombineVariants with GatherFunction with RetryMe
|
|||
this.out = this.originalOutput
|
||||
GATKIntervals.copyIntervalArguments(this.originalGATK, this)
|
||||
|
||||
// NO_HEADER and sites_only from VCFWriterArgumentTypeDescriptor
|
||||
// are added by the GATKExtensionsGenerator to the subclass of CommandLineGATK
|
||||
|
||||
val noHeader = ClassFieldCache.findField(originalFunction.getClass, VCFWriterArgumentTypeDescriptor.NO_HEADER_ARG_NAME)
|
||||
this.no_cmdline_in_header = originalGATK.getFieldValue(noHeader).asInstanceOf[Boolean]
|
||||
|
||||
val sitesOnly = ClassFieldCache.findField(originalFunction.getClass, VCFWriterArgumentTypeDescriptor.SITES_ONLY_ARG_NAME)
|
||||
this.sites_only = originalGATK.getFieldValue(sitesOnly).asInstanceOf[Boolean]
|
||||
this.no_cmdline_in_header = originalGATK.no_cmdline_in_header
|
||||
this.sites_only = originalGATK.sites_only
|
||||
|
||||
// ensure that the gather function receives the same unsafe parameter as the scattered function
|
||||
this.unsafe = this.originalGATK.unsafe
|
||||
|
|
|
|||
|
|
@ -44,7 +44,9 @@ import org.broadinstitute.gatk.engine.filters.FilterManager;
|
|||
import org.broadinstitute.gatk.engine.filters.ReadFilter;
|
||||
import org.broadinstitute.gatk.engine.filters.ReadGroupBlackListFilter;
|
||||
import org.broadinstitute.gatk.engine.io.OutputTracker;
|
||||
import org.broadinstitute.gatk.engine.io.stubs.SAMFileWriterStub;
|
||||
import org.broadinstitute.gatk.engine.io.stubs.Stub;
|
||||
import org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub;
|
||||
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
|
||||
import org.broadinstitute.gatk.engine.iterators.ReadTransformersMode;
|
||||
import org.broadinstitute.gatk.engine.phonehome.GATKRunReport;
|
||||
|
|
@ -65,6 +67,7 @@ import org.broadinstitute.gatk.utils.exceptions.UserException;
|
|||
import org.broadinstitute.gatk.utils.interval.IntervalUtils;
|
||||
import org.broadinstitute.gatk.utils.progressmeter.ProgressMeter;
|
||||
import org.broadinstitute.gatk.utils.recalibration.BQSRArgumentSet;
|
||||
import org.broadinstitute.gatk.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.gatk.utils.text.XReadLines;
|
||||
import org.broadinstitute.gatk.utils.threading.ThreadEfficiencyMonitor;
|
||||
|
||||
|
|
@ -666,11 +669,13 @@ public class GenomeAnalysisEngine {
|
|||
*
|
||||
* @param outputTracker the tracker supplying the initialization data.
|
||||
*/
|
||||
private void initializeOutputStreams(OutputTracker outputTracker) {
|
||||
for (Map.Entry<ArgumentSource, Object> input : getInputs().entrySet())
|
||||
private void initializeOutputStreams(final OutputTracker outputTracker) {
|
||||
for (final Map.Entry<ArgumentSource, Object> input : getInputs().entrySet())
|
||||
outputTracker.addInput(input.getKey(), input.getValue());
|
||||
for (Stub<?> stub : getOutputs())
|
||||
for (final Stub<?> stub : getOutputs()) {
|
||||
stub.processArguments(argCollection);
|
||||
outputTracker.addOutput(stub);
|
||||
}
|
||||
|
||||
outputTracker.prepareWalker(walker, getArguments().strictnessLevel);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@
|
|||
|
||||
package org.broadinstitute.gatk.engine.arguments;
|
||||
|
||||
import htsjdk.samtools.SAMFileReader;
|
||||
import htsjdk.samtools.ValidationStringency;
|
||||
import org.broadinstitute.gatk.utils.commandline.*;
|
||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||
|
|
@ -61,7 +60,7 @@ public class GATKArgumentCollection {
|
|||
* BAM file. Please see our online documentation for more details on input formatting requirements.
|
||||
*/
|
||||
@Input(fullName = "input_file", shortName = "I", doc = "Input file containing sequence data (SAM or BAM)", required = false)
|
||||
public List<String> samFiles = new ArrayList<String>();
|
||||
public List<String> samFiles = new ArrayList<>();
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "showFullBamList",doc="Emit a log entry (level INFO) containing the full list of sequence data files to be included in the analysis (including files inside .bam.list files).")
|
||||
|
|
@ -120,7 +119,7 @@ public class GATKArgumentCollection {
|
|||
* is specified in each tool's documentation. The default filters cannot be disabled.
|
||||
*/
|
||||
@Argument(fullName = "read_filter", shortName = "rf", doc = "Filters to apply to reads before analysis", required = false)
|
||||
public final List<String> readFilters = new ArrayList<String>();
|
||||
public final List<String> readFilters = new ArrayList<>();
|
||||
|
||||
@ArgumentCollection
|
||||
public IntervalArgumentCollection intervalArguments = new IntervalArgumentCollection();
|
||||
|
|
@ -408,6 +407,39 @@ public class GATKArgumentCollection {
|
|||
required = false)
|
||||
public boolean disableAutoIndexCreationAndLockingWhenReadingRods = false;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "no_cmdline_in_header", shortName = "no_cmdline_in_header", doc = "Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.",
|
||||
required = false)
|
||||
public boolean disableCommandLineInVCF = false;
|
||||
|
||||
@Argument(fullName = "sites_only", shortName = "sites_only", doc = "Just output sites without genotypes (i.e. only the first 8 columns of the VCF)",
|
||||
required = false)
|
||||
public boolean sitesOnlyVCF = false;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "bcf", shortName = "bcf", doc = "Force BCF output, regardless of the file's extension",
|
||||
required = false)
|
||||
public boolean forceBCFOutput = false;
|
||||
|
||||
@Advanced
|
||||
@Argument(fullName = "bam_compression", shortName = "compress", doc = "Compression level to use for writing BAM files (0 - 9, higher is more compressed)",
|
||||
minValue = 0, maxValue = 9, required = false)
|
||||
public Integer bamCompression = null;
|
||||
|
||||
@Advanced
|
||||
@Argument(fullName = "simplifyBAM", shortName = "simplifyBAM",
|
||||
doc = "If provided, output BAM files will be simplified to include just key reads for downstream variation discovery analyses (removing duplicates, PF-, non-primary reads), as well stripping all extended tags from the kept reads except the read group identifier",
|
||||
required = false)
|
||||
public boolean simplifyBAM = false;
|
||||
|
||||
@Argument(fullName = "disable_bam_indexing", doc = "Turn off on-the-fly creation of indices for output BAM files.",
|
||||
required = false)
|
||||
public boolean disableBAMIndexing = false;
|
||||
|
||||
@Argument(fullName = "generate_md5", doc = "Enable on-the-fly creation of md5s for output BAM files.",
|
||||
required = false)
|
||||
public boolean enableBAMmd5 = false;
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Multi-threading arguments
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.gatk.engine.io.stubs;
|
||||
|
||||
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.gatk.engine.io.OutputTracker;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -101,6 +102,9 @@ public class OutputStreamStub extends OutputStream implements Stub<OutputStream>
|
|||
this.outputTracker = outputTracker;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processArguments( final GATKArgumentCollection argumentCollection ) {}
|
||||
|
||||
/**
|
||||
* @{inheritDoc}
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -30,29 +30,14 @@ import org.broadinstitute.gatk.utils.commandline.*;
|
|||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.gatk.engine.io.GATKSAMFileWriter;
|
||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||
import org.broadinstitute.gatk.utils.sam.ReadUtils;
|
||||
|
||||
import java.io.OutputStream;
|
||||
import java.lang.annotation.Annotation;
|
||||
import java.lang.reflect.Type;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Insert a SAMFileWriterStub instead of a full-fledged concrete OutputStream implementations.
|
||||
*/
|
||||
public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||
public static final String DEFAULT_ARGUMENT_FULLNAME = "outputBAM";
|
||||
public static final String DEFAULT_ARGUMENT_SHORTNAME = "ob";
|
||||
|
||||
public static final String COMPRESSION_FULLNAME = "bam_compression";
|
||||
public static final String COMPRESSION_SHORTNAME = "compress";
|
||||
|
||||
public static final String SIMPLIFY_BAM_FULLNAME = "simplifyBAM";
|
||||
public static final String SIMPLIFY_BAM_SHORTNAME = SIMPLIFY_BAM_FULLNAME;
|
||||
|
||||
public static final String DISABLE_INDEXING_FULLNAME = "disable_bam_indexing";
|
||||
public static final String ENABLE_MD5_FULLNAME = "generate_md5";
|
||||
|
||||
/**
|
||||
* The engine into which output stubs should be fed.
|
||||
|
|
@ -79,15 +64,6 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
|
|||
return SAMFileWriter.class.equals(type) || GATKSAMFileWriter.class.equals(type);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<ArgumentDefinition> createArgumentDefinitions( ArgumentSource source ) {
|
||||
return Arrays.asList( createBAMArgumentDefinition(source),
|
||||
createBAMCompressionArgumentDefinition(source),
|
||||
disableWriteIndexArgumentDefinition(source),
|
||||
enableMD5GenerationArgumentDefinition(source),
|
||||
createSimplifyBAMArgumentDefinition(source));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean createsTypeDefault(ArgumentSource source) {
|
||||
return !source.isRequired() && source.defaultsToStdout();
|
||||
|
|
@ -110,38 +86,15 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
|
|||
@Override
|
||||
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) {
|
||||
// Extract all possible parameters that could be passed to a BAM file writer?
|
||||
ArgumentDefinition bamArgumentDefinition = createBAMArgumentDefinition(source);
|
||||
ArgumentDefinition bamArgumentDefinition = createDefaultArgumentDefinition(source);
|
||||
ArgumentMatchValue writerFileName = getArgumentValue( bamArgumentDefinition, matches );
|
||||
|
||||
ArgumentMatchValue compressionLevelText = getArgumentValue( createBAMCompressionArgumentDefinition(source), matches );
|
||||
Integer compressionLevel = compressionLevelText != null ? Integer.valueOf(compressionLevelText.asString()) : null;
|
||||
|
||||
boolean indexOnTheFly = !argumentIsPresent(disableWriteIndexArgumentDefinition(source),matches);
|
||||
boolean generateMD5 = argumentIsPresent(this.enableMD5GenerationArgumentDefinition(source),matches);
|
||||
boolean simplifyBAM = argumentIsPresent(createSimplifyBAMArgumentDefinition(source),matches);
|
||||
|
||||
// Validate the combination of parameters passed in.
|
||||
|
||||
// This parser has been passed a null filename and the GATK is not responsible for creating a type default for the object;
|
||||
// therefore, the user must have failed to specify a type default
|
||||
if(writerFileName != null && writerFileName.asFile() == null && generateMD5)
|
||||
throw new ArgumentException("MD5 generation specified, but no output file specified. If md5 generation is desired, please specify a BAM output file and an md5 file will be written alongside.");
|
||||
|
||||
// Create the stub and set parameters.
|
||||
// Create the stub
|
||||
SAMFileWriterStub stub = null; // stub = new SAMFileWriterStub(engine, defaultOutputStream);
|
||||
|
||||
if (writerFileName != null && writerFileName.asFile() != null ) {
|
||||
stub = new SAMFileWriterStub(engine, writerFileName.asFile());
|
||||
|
||||
if ( compressionLevel != null ) {
|
||||
stub.setCompressionLevel(ReadUtils.validateCompressionLevel(compressionLevel));
|
||||
} if ( indexOnTheFly )
|
||||
stub.setIndexOnTheFly(indexOnTheFly);
|
||||
if ( generateMD5 )
|
||||
stub.setGenerateMD5(generateMD5);
|
||||
if ( simplifyBAM )
|
||||
stub.setSimplifyBAM(simplifyBAM);
|
||||
|
||||
// WARNING: Side effects required by engine!
|
||||
parsingEngine.addTags(stub,getArgumentTags(matches));
|
||||
engine.addOutput(stub);
|
||||
|
|
@ -150,96 +103,4 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
|
|||
return stub;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the definition of the argument representing the BAM file itself.
|
||||
* @param source Argument source for the BAM file. Must not be null.
|
||||
* @return Argument definition for the BAM file itself. Will not be null.
|
||||
*/
|
||||
private ArgumentDefinition createBAMArgumentDefinition(ArgumentSource source) {
|
||||
Annotation annotation = getArgumentAnnotation(source);
|
||||
return new ArgumentDefinition( annotation,
|
||||
ArgumentIOType.getIOType(annotation),
|
||||
source.field.getType(),
|
||||
DEFAULT_ARGUMENT_FULLNAME,
|
||||
DEFAULT_ARGUMENT_SHORTNAME,
|
||||
ArgumentDefinition.getDoc(annotation),
|
||||
source.isRequired(),
|
||||
false,
|
||||
source.isMultiValued(),
|
||||
source.isHidden(),
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates the optional compression level argument for the BAM file.
|
||||
* @param source Argument source for the BAM file. Must not be null.
|
||||
* @return Argument definition for the BAM file itself. Will not be null.
|
||||
*/
|
||||
private ArgumentDefinition createBAMCompressionArgumentDefinition(ArgumentSource source) {
|
||||
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
|
||||
int.class,
|
||||
COMPRESSION_FULLNAME,
|
||||
COMPRESSION_SHORTNAME,
|
||||
"Compression level to use for writing BAM files",
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
source.isHidden(),
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null );
|
||||
}
|
||||
|
||||
private ArgumentDefinition disableWriteIndexArgumentDefinition(ArgumentSource source) {
|
||||
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
|
||||
boolean.class,
|
||||
DISABLE_INDEXING_FULLNAME,
|
||||
null,
|
||||
"Turn off on-the-fly creation of indices for output BAM files.",
|
||||
false,
|
||||
true,
|
||||
false,
|
||||
source.isHidden(),
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null );
|
||||
}
|
||||
|
||||
private ArgumentDefinition enableMD5GenerationArgumentDefinition(ArgumentSource source) {
|
||||
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
|
||||
boolean.class,
|
||||
ENABLE_MD5_FULLNAME,
|
||||
null,
|
||||
"Enable on-the-fly creation of md5s for output BAM files.",
|
||||
false,
|
||||
true,
|
||||
false,
|
||||
source.isHidden(),
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null );
|
||||
}
|
||||
|
||||
|
||||
private ArgumentDefinition createSimplifyBAMArgumentDefinition(ArgumentSource source) {
|
||||
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
|
||||
boolean.class,
|
||||
SIMPLIFY_BAM_FULLNAME,
|
||||
SIMPLIFY_BAM_SHORTNAME,
|
||||
"If provided, output BAM files will be simplified to include just key reads for downstream variation discovery analyses (removing duplicates, PF-, non-primary reads), as well stripping all extended tags from the kept reads except the read group identifier",
|
||||
false,
|
||||
true,
|
||||
false,
|
||||
source.isHidden(),
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null );
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ import htsjdk.samtools.SAMFileWriter;
|
|||
import htsjdk.samtools.SAMRecord;
|
||||
import htsjdk.samtools.util.ProgressLoggerInterface;
|
||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.gatk.engine.io.OutputTracker;
|
||||
import org.broadinstitute.gatk.engine.io.GATKSAMFileWriter;
|
||||
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
|
||||
|
|
@ -273,6 +274,16 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, GATKSAMFileWriter
|
|||
this.outputTracker = outputTracker;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processArguments( final GATKArgumentCollection argumentCollection ) {
|
||||
if (argumentCollection.bamCompression != null)
|
||||
setCompressionLevel(argumentCollection.bamCompression);
|
||||
setGenerateMD5(argumentCollection.enableBAMmd5);
|
||||
setIndexOnTheFly(!argumentCollection.disableBAMIndexing);
|
||||
setSimplifyBAM(argumentCollection.simplifyBAM);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Use the given header as the target for this writer.
|
||||
* @param header The header to write.
|
||||
|
|
@ -284,7 +295,7 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, GATKSAMFileWriter
|
|||
}
|
||||
|
||||
private void initializeReadTransformers() {
|
||||
this.onOutputReadTransformers = new ArrayList<ReadTransformer>(engine.getReadTransformers().size());
|
||||
this.onOutputReadTransformers = new ArrayList<>(engine.getReadTransformers().size());
|
||||
for ( final ReadTransformer transformer : engine.getReadTransformers() ) {
|
||||
if ( transformer.getApplicationTime() == ReadTransformer.ApplicationTime.ON_OUTPUT )
|
||||
onOutputReadTransformers.add(transformer);
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.gatk.engine.io.stubs;
|
||||
|
||||
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.gatk.engine.io.OutputTracker;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -47,6 +48,15 @@ public interface Stub<StreamType> {
|
|||
*/
|
||||
public void register( OutputTracker outputTracker );
|
||||
|
||||
/**
|
||||
* Provides a mechanism for uniformly processing command-line arguments
|
||||
* that are important for file processing. For example, this method
|
||||
* might pass on the compression value specified by the user to
|
||||
* a SAMFileWriter
|
||||
* @param argumentCollection The arguments to be processed
|
||||
*/
|
||||
public void processArguments( final GATKArgumentCollection argumentCollection );
|
||||
|
||||
/**
|
||||
* Returns the OutputStream represented by this stub or null if not available.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -30,15 +30,11 @@ import org.broadinstitute.gatk.utils.commandline.*;
|
|||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||
import htsjdk.variant.variantcontext.writer.VariantContextWriterFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.OutputStream;
|
||||
import java.lang.reflect.Type;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Injects new command-line arguments into the system providing support for the genotype writer.
|
||||
|
|
@ -47,9 +43,6 @@ import java.util.List;
|
|||
* @version 0.1
|
||||
*/
|
||||
public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||
public static final String NO_HEADER_ARG_NAME = "no_cmdline_in_header";
|
||||
public static final String SITES_ONLY_ARG_NAME = "sites_only";
|
||||
public static final String FORCE_BCF = "bcf";
|
||||
|
||||
/**
|
||||
* The engine into which output stubs should be fed.
|
||||
|
|
@ -88,15 +81,6 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
return VariantContextWriter.class.equals(type);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<ArgumentDefinition> createArgumentDefinitions( ArgumentSource source ) {
|
||||
return Arrays.asList(
|
||||
createDefaultArgumentDefinition(source),
|
||||
createNoCommandLineHeaderArgumentDefinition(),
|
||||
createSitesOnlyArgumentDefinition(),
|
||||
createBCFArgumentDefinition() );
|
||||
}
|
||||
|
||||
/**
|
||||
* This command-line argument descriptor does want to override the provided default value.
|
||||
* @return true always.
|
||||
|
|
@ -145,9 +129,6 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
: new VariantContextWriterStub(engine, defaultOutputStream, argumentSources);
|
||||
|
||||
stub.setCompressed(isCompressed(writerFileName == null ? null: writerFileName.asString()));
|
||||
stub.setDoNotWriteGenotypes(argumentIsPresent(createSitesOnlyArgumentDefinition(),matches));
|
||||
stub.setSkipWritingCommandLineHeader(argumentIsPresent(createNoCommandLineHeaderArgumentDefinition(),matches));
|
||||
stub.setForceBCF(argumentIsPresent(createBCFArgumentDefinition(),matches));
|
||||
|
||||
// WARNING: Side effects required by engine!
|
||||
parsingEngine.addTags(stub,getArgumentTags(matches));
|
||||
|
|
@ -156,66 +137,6 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
return stub;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates the optional no_header argument for the VCF file.
|
||||
* @return Argument definition for the VCF file itself. Will not be null.
|
||||
*/
|
||||
private ArgumentDefinition createNoCommandLineHeaderArgumentDefinition() {
|
||||
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
|
||||
boolean.class,
|
||||
NO_HEADER_ARG_NAME,
|
||||
NO_HEADER_ARG_NAME,
|
||||
"Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.",
|
||||
false,
|
||||
true,
|
||||
false,
|
||||
true,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null );
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates the optional sites_only argument definition
|
||||
* @return Argument definition for the VCF file itself. Will not be null.
|
||||
*/
|
||||
private ArgumentDefinition createSitesOnlyArgumentDefinition() {
|
||||
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
|
||||
boolean.class,
|
||||
SITES_ONLY_ARG_NAME,
|
||||
SITES_ONLY_ARG_NAME,
|
||||
"Just output sites without genotypes (i.e. only the first 8 columns of the VCF)",
|
||||
false,
|
||||
true,
|
||||
false,
|
||||
true,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null );
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates the optional bcf argument definition
|
||||
* @return Argument definition for the VCF file itself. Will not be null.
|
||||
*/
|
||||
private ArgumentDefinition createBCFArgumentDefinition() {
|
||||
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
|
||||
boolean.class,
|
||||
FORCE_BCF,
|
||||
FORCE_BCF,
|
||||
"force BCF output, regardless of the file's extension",
|
||||
false,
|
||||
true,
|
||||
false,
|
||||
true,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null );
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the file will be compressed.
|
||||
* @param writerFileName Name of the file
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ package org.broadinstitute.gatk.engine.io.stubs;
|
|||
import htsjdk.samtools.SAMSequenceDictionary;
|
||||
import htsjdk.tribble.index.IndexCreator;
|
||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.gatk.engine.io.OutputTracker;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFUtils;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
|
|
@ -229,6 +230,14 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
|
|||
this.outputTracker = outputTracker;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processArguments( final GATKArgumentCollection argumentCollection ) {
|
||||
setDoNotWriteGenotypes(argumentCollection.sitesOnlyVCF);
|
||||
setSkipWritingCommandLineHeader(argumentCollection.disableCommandLineInVCF);
|
||||
setForceBCF(argumentCollection.forceBCFOutput);
|
||||
|
||||
}
|
||||
|
||||
public void writeHeader(VCFHeader header) {
|
||||
vcfHeader = header;
|
||||
|
||||
|
|
|
|||
|
|
@ -59,16 +59,15 @@ public class SplitSamFile extends ReadWalker<SAMRecord, Map<String, SAMFileWrite
|
|||
@Argument(fullName="outputRoot", doc="output BAM file", required=false)
|
||||
public String outputRoot = "";
|
||||
|
||||
@Argument(fullName = "bam_compression", shortName = "compress", doc = "Compression level to use for writing BAM files", required = false)
|
||||
public Integer BAMcompression = 5;
|
||||
|
||||
private static Logger logger = Logger.getLogger(SplitSamFile.class);
|
||||
private static String VERSION = "0.0.1";
|
||||
private static final Logger logger = Logger.getLogger(SplitSamFile.class);
|
||||
private static final String VERSION = "0.0.1";
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
logger.info("SplitSamFile version: " + VERSION);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SAMRecord map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) {
|
||||
return read;
|
||||
}
|
||||
|
|
@ -78,36 +77,39 @@ public class SplitSamFile extends ReadWalker<SAMRecord, Map<String, SAMFileWrite
|
|||
// Standard I/O routines
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
@Override
|
||||
public void onTraversalDone(Map<String, SAMFileWriter> outputs) {
|
||||
for ( SAMFileWriter output : outputs.values() ) {
|
||||
output.close();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, SAMFileWriter> reduceInit() {
|
||||
HashMap<String, SAMFileHeader> headers = new HashMap<String, SAMFileHeader>();
|
||||
HashMap<String, SAMFileHeader> headers = new HashMap<>();
|
||||
for ( SAMReadGroupRecord readGroup : this.getToolkit().getSAMFileHeader().getReadGroups()) {
|
||||
final String sample = readGroup.getSample();
|
||||
if ( ! headers.containsKey(sample) ) {
|
||||
SAMFileHeader header = duplicateSAMFileHeader(this.getToolkit().getSAMFileHeader());
|
||||
logger.debug(String.format("Creating BAM header for sample %s", sample));
|
||||
ArrayList<SAMReadGroupRecord> readGroups = new ArrayList<SAMReadGroupRecord>();
|
||||
ArrayList<SAMReadGroupRecord> readGroups = new ArrayList<>();
|
||||
header.setReadGroups(readGroups);
|
||||
headers.put(sample, header);
|
||||
}
|
||||
|
||||
SAMFileHeader header = headers.get(sample);
|
||||
List<SAMReadGroupRecord> newReadGroups = new ArrayList<SAMReadGroupRecord>(header.getReadGroups());
|
||||
List<SAMReadGroupRecord> newReadGroups = new ArrayList<>(header.getReadGroups());
|
||||
newReadGroups.add(readGroup);
|
||||
header.setReadGroups(newReadGroups);
|
||||
}
|
||||
|
||||
HashMap<String, SAMFileWriter> outputs = new HashMap<String, SAMFileWriter>();
|
||||
HashMap<String, SAMFileWriter> outputs = new HashMap<>();
|
||||
for ( Map.Entry<String, SAMFileHeader> elt : headers.entrySet() ) {
|
||||
final String sample = elt.getKey();
|
||||
final String filename = outputRoot + sample + ".bam";
|
||||
logger.info(String.format("Creating BAM output file %s for sample %s", filename, sample));
|
||||
SAMFileWriter output = ReadUtils.createSAMFileWriterWithCompression(elt.getValue(), true, filename, BAMcompression);
|
||||
|
||||
final SAMFileWriter output = ReadUtils.createSAMFileWriter(filename, getToolkit(), elt.getValue());
|
||||
outputs.put(sample, output);
|
||||
}
|
||||
|
||||
|
|
@ -117,6 +119,7 @@ public class SplitSamFile extends ReadWalker<SAMRecord, Map<String, SAMFileWrite
|
|||
/**
|
||||
* Write out the read
|
||||
*/
|
||||
@Override
|
||||
public Map<String, SAMFileWriter> reduce(SAMRecord read, Map<String, SAMFileWriter> outputs) {
|
||||
final String sample = read.getReadGroup().getSample();
|
||||
SAMFileWriter output = outputs.get(sample);
|
||||
|
|
|
|||
|
|
@ -30,10 +30,10 @@ import com.google.java.contract.Requires;
|
|||
import htsjdk.samtools.*;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.gatk.engine.io.stubs.SAMFileWriterStub;
|
||||
import org.broadinstitute.gatk.utils.*;
|
||||
import org.broadinstitute.gatk.utils.collections.Pair;
|
||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
|
@ -131,26 +131,24 @@ public class ReadUtils {
|
|||
public enum ReadAndIntervalOverlap {NO_OVERLAP_CONTIG, NO_OVERLAP_LEFT, NO_OVERLAP_RIGHT, NO_OVERLAP_HARDCLIPPED_LEFT, NO_OVERLAP_HARDCLIPPED_RIGHT, OVERLAP_LEFT, OVERLAP_RIGHT, OVERLAP_LEFT_AND_RIGHT, OVERLAP_CONTAINED}
|
||||
|
||||
/**
|
||||
* Creates a SAMFileWriter with the given compression level if you request a bam file. Creates a regular
|
||||
* SAMFileWriter without compression otherwise.
|
||||
*
|
||||
* @param header
|
||||
* @param presorted
|
||||
* @param file
|
||||
* @param compression
|
||||
* @return a SAMFileWriter with the compression level if it is a bam.
|
||||
* Creates a SAMFileWriter using all of the features currently set in the engine (command line arguments, ReadTransformers, etc)
|
||||
* @param file the filename to write to
|
||||
* @param engine the engine
|
||||
* @return a SAMFileWriter with the correct options set
|
||||
*/
|
||||
public static SAMFileWriter createSAMFileWriterWithCompression(SAMFileHeader header, boolean presorted, String file, int compression) {
|
||||
validateCompressionLevel(compression);
|
||||
if (file.endsWith(".bam"))
|
||||
return new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(header, presorted, new File(file), compression);
|
||||
return new SAMFileWriterFactory().setCreateIndex(true).makeSAMOrBAMWriter(header, presorted, new File(file));
|
||||
public static SAMFileWriter createSAMFileWriter(final String file, final GenomeAnalysisEngine engine) {
|
||||
final SAMFileWriterStub output = new SAMFileWriterStub(engine, new File(file));
|
||||
output.processArguments(engine.getArguments());
|
||||
return output;
|
||||
}
|
||||
|
||||
public static int validateCompressionLevel(final int requestedCompressionLevel) {
|
||||
if ( requestedCompressionLevel < 0 || requestedCompressionLevel > 9 )
|
||||
throw new UserException.BadArgumentValue("compress", "Compression level must be 0-9 but got " + requestedCompressionLevel);
|
||||
return requestedCompressionLevel;
|
||||
/**
|
||||
* As {@link #createSAMFileWriter(String, org.broadinstitute.gatk.engine.GenomeAnalysisEngine)}, but also sets the header
|
||||
*/
|
||||
public static SAMFileWriter createSAMFileWriter(final String file, final GenomeAnalysisEngine engine, final SAMFileHeader header) {
|
||||
final SAMFileWriterStub output = (SAMFileWriterStub) createSAMFileWriter(file, engine);
|
||||
output.writeHeader(header);
|
||||
return output;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -666,4 +666,59 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
|
|||
return counter + sum;
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Test output file-specific options
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
//Returns the output file
|
||||
private File testBAMFeatures(final String args, final String md5) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec("-T PrintReads -R " + b37KGReference +
|
||||
" -I " + privateTestDir + "NA20313.highCoverageRegion.bam"
|
||||
+ " --no_pg_tag -o %s " + args,
|
||||
1, Arrays.asList(".bam"), Arrays.asList(md5));
|
||||
return executeTest("testBAMFeatures: "+args, spec).first.get(0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSAMWriterFeatures() {
|
||||
testBAMFeatures("-compress 0", "bb4b55b1f80423970bb9384cbf0d8793");
|
||||
testBAMFeatures("-compress 9", "b85ee1636d62e1bb8ed65a245c307167");
|
||||
testBAMFeatures("-simplifyBAM", "38f9c30a27dfbc085a2ff52a1617d579");
|
||||
|
||||
//Validate MD5
|
||||
final String expectedMD5 = "6627b9ea33293a0083983feb94948c1d";
|
||||
final File md5Target = testBAMFeatures("--generate_md5", expectedMD5);
|
||||
final File md5File = new File(md5Target.getAbsoluteFile() + ".md5");
|
||||
md5File.deleteOnExit();
|
||||
Assert.assertTrue(md5File.exists(), "MD5 wasn't created");
|
||||
try {
|
||||
String md5 = new BufferedReader(new FileReader(md5File)).readLine();
|
||||
Assert.assertEquals(md5, expectedMD5, "Generated MD5 doesn't match expected");
|
||||
} catch (IOException e) {
|
||||
Assert.fail("Can't parse MD5 file", e);
|
||||
}
|
||||
|
||||
//Validate that index isn't created
|
||||
final String unindexedBAM = testBAMFeatures("--disable_bam_indexing", expectedMD5).getAbsolutePath();
|
||||
Assert.assertTrue(!(new File(unindexedBAM+".bai").exists()) &&
|
||||
!(new File(unindexedBAM.replace(".bam", ".bai")).exists()),
|
||||
"BAM index was created even though it was disabled");
|
||||
}
|
||||
|
||||
private void testVCFFeatures(final String args, final String md5) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec("-T SelectVariants -R " + b37KGReference +
|
||||
" -V " + privateTestDir + "CEUtrioTest.vcf"
|
||||
+ " --no_cmdline_in_header -o %s " + args,
|
||||
1, Arrays.asList(md5));
|
||||
executeTest("testVCFFeatures: "+args, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVCFWriterFeatures() {
|
||||
testVCFFeatures("--sites_only", "94bf1f2c0946e933515e4322323a5716");
|
||||
testVCFFeatures("--bcf", "03f2d6988f54a332da48803c78f9c4b3");
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue