diff --git a/public/gatk-engine/pom.xml b/public/gatk-engine/pom.xml index e35726a06..db1e33bb5 100644 --- a/public/gatk-engine/pom.xml +++ b/public/gatk-engine/pom.xml @@ -24,6 +24,10 @@ gatk-utils ${project.version} + + com.intel.gkl + gkl + net.java.dev.jets3t jets3t diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java index c564d78d6..6a8479645 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java @@ -26,11 +26,15 @@ package org.broadinstitute.gatk.engine; import com.google.java.contract.Ensures; +import com.intel.gkl.compression.IntelDeflaterFactory; +import com.intel.gkl.compression.IntelInflaterFactory; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.reference.IndexedFastaSequenceFile; import htsjdk.samtools.reference.ReferenceSequenceFile; +import htsjdk.samtools.util.BlockCompressedOutputStream; +import htsjdk.samtools.util.BlockGunzipper; import htsjdk.variant.vcf.VCFConstants; import org.apache.log4j.Logger; import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection; @@ -268,6 +272,9 @@ public class GenomeAnalysisEngine { if (args.nonDeterministicRandomSeed) Utils.resetRandomGenerator(System.currentTimeMillis()); + // Try to use the accelerated Intel zlib implementations if possible, or fall back to the JDK implementation if necessary (or requested) + initializeCompressionAndDecompression(); + // if the use specified an input BQSR recalibration table then enable on the fly recalibration if (args.BQSR_RECAL_FILE != null) { if (args.BQSR_RECAL_FILE.exists()) { @@ -386,6 +393,21 @@ public class GenomeAnalysisEngine { return Collections.unmodifiableList(filters); } + public void initializeCompressionAndDecompression() { + // Use the Intel Inflater/Deflater for accelerated BAM reading/writing, if possible: + if (! getArguments().useJdkDeflater) { + BlockCompressedOutputStream.setDefaultDeflaterFactory(new IntelDeflaterFactory()); + } + if (! getArguments().useJdkInflater) { + BlockGunzipper.setDefaultInflaterFactory(new IntelInflaterFactory()); + } + + final boolean usingIntelDeflater = (BlockCompressedOutputStream.getDefaultDeflaterFactory() instanceof IntelDeflaterFactory && ((IntelDeflaterFactory)BlockCompressedOutputStream.getDefaultDeflaterFactory()).usingIntelDeflater()); + logger.info("Deflater: " + (usingIntelDeflater ? "IntelDeflater": "JdkDeflater")); + final boolean usingIntelInflater = (BlockGunzipper.getDefaultInflaterFactory() instanceof IntelInflaterFactory && ((IntelInflaterFactory)BlockGunzipper.getDefaultInflaterFactory()).usingIntelInflater()); + logger.info("Inflater: " + (usingIntelInflater ? "IntelInflater": "JdkInflater")); + } + /** * Returns a list of active, initialized read transformers * diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/arguments/GATKArgumentCollection.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/arguments/GATKArgumentCollection.java index f7d7ca0a1..b7d0d5a4b 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/arguments/GATKArgumentCollection.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/arguments/GATKArgumentCollection.java @@ -402,6 +402,19 @@ public class GATKArgumentCollection { @Advanced @Argument(fullName = "unsafe", shortName = "U", doc = "Enable unsafe operations: nothing will be checked at runtime", required = false) public ValidationExclusion.TYPE unsafe; + + /** + * There are two different libraries that can be used for compression when writing BAM files: IntelDeflater (the new default in GATK version 3.8) and the JDK Deflater (the previous GATK default) which is an older implementation and is slower in our tests. Use this flag to disable the IntelDeflater and use the JDK Deflater in its place. + */ + @Argument(fullName = "use_jdk_deflater", shortName = "jdk_deflater", doc = "Use the JDK Deflater instead of the IntelDeflater for writing BAMs") + public boolean useJdkDeflater = false; + + /** + * There are two different libraries that can be used for decompression when reading BAM files: IntelInflater (the new default in GATK version 3.8) and the JDK Inflater (the previous GATK default) which is an older implementation and is slower in our tests. Use this flag to disable the IntelInflater and use the JDK Inflater in its place. + */ + @Argument(fullName = "use_jdk_inflater", shortName = "jdk_inflater", doc = "Use the JDK Inflater instead of the IntelInflater for reading BAMs") + public boolean useJdkInflater = false; + /** * Not recommended for general use. Disables both auto-generation of index files and index file locking * when reading VCFs and other rods and an index isn't present or is out-of-date. The file locking necessary for auto index diff --git a/public/gatk-root/pom.xml b/public/gatk-root/pom.xml index 5937b95ff..e6427dae5 100644 --- a/public/gatk-root/pom.xml +++ b/public/gatk-root/pom.xml @@ -86,6 +86,11 @@ picard ${picard.version} + + com.intel.gkl + gkl + 0.4.3 + log4j log4j