diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java index 91013673f..34831471a 100644 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java @@ -29,6 +29,7 @@ import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriterFactory; import java.io.File; import java.io.OutputStream; @@ -48,15 +49,8 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { public static final String NO_HEADER_ARG_NAME = "no_cmdline_in_header"; public static final String SITES_ONLY_ARG_NAME = "sites_only"; public static final String FORCE_BCF = "bcf"; - public static final HashSet SUPPORTED_ZIPPED_SUFFIXES = new HashSet(); - - // - // static list of zipped suffixes supported by this system. - // - static { - SUPPORTED_ZIPPED_SUFFIXES.add(".gz"); - SUPPORTED_ZIPPED_SUFFIXES.add(".gzip"); - } + public static final HashSet SUPPORTED_ZIPPED_SUFFIXES = + new HashSet<>(Arrays.asList(VariantContextWriterFactory.BLOCK_COMPRESSED_EXTENSIONS)); /** * The engine into which output stubs should be fed. diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/tools/CatVariants.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/tools/CatVariants.java index 1dc5f8516..b1fa87807 100644 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/tools/CatVariants.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/tools/CatVariants.java @@ -213,7 +213,7 @@ public class CatVariants extends CommandLineProgram { FileOutputStream outputStream = new FileOutputStream(outputFile); EnumSet options = EnumSet.of(Options.INDEX_ON_THE_FLY); - final IndexCreator idxCreator = GATKVCFUtils.getIndexCreator(variant_index_type, variant_index_parameter, outputFile); + final IndexCreator idxCreator = GATKVCFUtils.getIndexCreator(variant_index_type, variant_index_parameter, outputFile, ref.getSequenceDictionary()); final VariantContextWriter outputWriter = VariantContextWriterFactory.create(outputFile, outputStream, ref.getSequenceDictionary(), idxCreator, options); boolean firstFile = true; diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/variant/GATKVCFUtils.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/variant/GATKVCFUtils.java index 8f0b385b4..670c6c89d 100644 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/variant/GATKVCFUtils.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/variant/GATKVCFUtils.java @@ -25,6 +25,8 @@ package org.broadinstitute.sting.utils.variant; +import net.sf.samtools.SAMSequenceDictionary; +import org.apache.log4j.Logger; import org.broad.tribble.Feature; import org.broad.tribble.FeatureCodec; import org.broad.tribble.FeatureCodecHeader; @@ -33,12 +35,15 @@ import org.broad.tribble.index.IndexCreator; import org.broad.tribble.index.IndexFactory; import org.broad.tribble.index.interval.IntervalIndexCreator; import org.broad.tribble.index.linear.LinearIndexCreator; +import org.broad.tribble.index.tabix.TabixFormat; +import org.broad.tribble.index.tabix.TabixIndexCreator; import org.broad.tribble.readers.LineIterator; import org.broad.tribble.readers.PositionalBufferedStream; import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.variant.variantcontext.VariantContext; import org.broadinstitute.variant.vcf.*; @@ -59,6 +64,7 @@ public class GATKVCFUtils { */ private GATKVCFUtils() { } + public static final Logger logger = Logger.getLogger(GATKVCFUtils.class); public final static String GATK_COMMAND_LINE_KEY = "GATKCommandLine"; public final static GATKVCFIndexType DEFAULT_INDEX_TYPE = GATKVCFIndexType.DYNAMIC_SEEK; // by default, optimize for seek time. All indices prior to Nov 2013 used this type. @@ -192,6 +198,28 @@ public class GATKVCFUtils { * @return */ public static IndexCreator getIndexCreator(GATKVCFIndexType type, int parameter, File outFile) { + return getIndexCreator(type, parameter, outFile, null); + } + + /** + * Create and return an IndexCreator + * @param type + * @param parameter + * @param outFile + * @param sequenceDictionary + * @return + */ + public static IndexCreator getIndexCreator(GATKVCFIndexType type, int parameter, File outFile, SAMSequenceDictionary sequenceDictionary) { + if (VCFWriterArgumentTypeDescriptor.isCompressed(outFile.toString())) { + if (type != GATKVCFUtils.DEFAULT_INDEX_TYPE || parameter != GATKVCFUtils.DEFAULT_INDEX_PARAMETER) + logger.warn("Creating Tabix index for " + outFile + ", ignoring user-specified index type and parameter"); + + if (sequenceDictionary == null) + return new TabixIndexCreator(TabixFormat.VCF); + else + return new TabixIndexCreator(sequenceDictionary, TabixFormat.VCF); + } + IndexCreator idxCreator; switch (type) { case DYNAMIC_SEEK: idxCreator = new DynamicIndexCreator(outFile, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME); break; diff --git a/public/gatk-framework/src/test/java/org/broadinstitute/sting/WalkerTest.java b/public/gatk-framework/src/test/java/org/broadinstitute/sting/WalkerTest.java index 994a2419c..c6bfebace 100644 --- a/public/gatk-framework/src/test/java/org/broadinstitute/sting/WalkerTest.java +++ b/public/gatk-framework/src/test/java/org/broadinstitute/sting/WalkerTest.java @@ -29,6 +29,8 @@ import org.apache.commons.lang.StringUtils; import org.broad.tribble.Tribble; import org.broad.tribble.index.Index; import org.broad.tribble.index.IndexFactory; +import org.broad.tribble.index.tabix.TabixFormat; +import org.broad.tribble.util.TabixUtils; import org.broadinstitute.sting.gatk.CommandLineExecutable; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; @@ -314,8 +316,9 @@ public class WalkerTest extends BaseTest { String ext = spec.exts == null ? ".tmp" : "." + spec.exts.get(i); File fl = createTempFile(String.format("walktest.tmp_param.%d", i), ext); - // Mark corresponding *.idx for deletion on exit as well just in case an index is created for the temp file: - new File(fl.getAbsolutePath() + ".idx").deleteOnExit(); + // Mark corresponding indices for deletion on exit as well just in case an index is created for the temp file: + new File(fl.getAbsolutePath() + Tribble.STANDARD_INDEX_EXTENSION).deleteOnExit(); + new File(fl.getAbsolutePath() + TabixUtils.STANDARD_INDEX_EXTENSION).deleteOnExit(); tmpFiles.add(fl); } diff --git a/public/gatk-framework/src/test/java/org/broadinstitute/sting/utils/variant/VCFIntegrationTest.java b/public/gatk-framework/src/test/java/org/broadinstitute/sting/utils/variant/VCFIntegrationTest.java index f29a1106c..379a2295b 100644 --- a/public/gatk-framework/src/test/java/org/broadinstitute/sting/utils/variant/VCFIntegrationTest.java +++ b/public/gatk-framework/src/test/java/org/broadinstitute/sting/utils/variant/VCFIntegrationTest.java @@ -25,13 +25,17 @@ package org.broadinstitute.sting.utils.variant; +import org.broad.tribble.Tribble; import org.broad.tribble.index.AbstractIndex; import org.broad.tribble.index.ChrIndex; import org.broad.tribble.index.Index; import org.broad.tribble.index.IndexFactory; import org.broad.tribble.index.interval.IntervalTreeIndex; import org.broad.tribble.index.linear.LinearIndex; +import org.broad.tribble.index.tabix.TabixIndex; +import org.broad.tribble.util.TabixUtils; import org.broadinstitute.sting.WalkerTest; +import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor; import org.broadinstitute.variant.vcf.VCFCodec; import org.testng.Assert; import org.testng.TestException; @@ -216,7 +220,7 @@ public class VCFIntegrationTest extends WalkerTest { spec.disableShadowBCF(); File outVCF = executeTest(name, spec).first.get(0); - File outIdx = new File(outVCF.getAbsolutePath() + ".idx"); + File outIdx = new File(outVCF.getAbsolutePath() + Tribble.STANDARD_INDEX_EXTENSION); final Index actualIndex = IndexFactory.loadIndex(outIdx.getAbsolutePath()); final Index expectedIndex = testSpec.getIndex(outVCF); @@ -272,4 +276,58 @@ public class VCFIntegrationTest extends WalkerTest { LinkedHashMap chrIndices = (LinkedHashMap) f.get(index); return chrIndices.get(chr); } + + // + // + // Block-Compressed Tabix Index Tests + // + // + + private class BlockCompressedIndexCreatorTest extends TestDataProvider { + private final String extension; + + private BlockCompressedIndexCreatorTest(String extension) { + super(BlockCompressedIndexCreatorTest.class); + + this.extension = extension; + } + + public String toString() { + return String.format("File extension %s", extension); + } + } + + @DataProvider(name = "BlockCompressedIndexDataProvider") + public Object[][] blockCompressedIndexCreatorData() { + for (String suffix : VCFWriterArgumentTypeDescriptor.SUPPORTED_ZIPPED_SUFFIXES) + new BlockCompressedIndexCreatorTest(".vcf" + suffix); + + return TestDataProvider.getTests(BlockCompressedIndexCreatorTest.class); + } + + @Test(dataProvider = "BlockCompressedIndexDataProvider") + public void testBlockCompressedIndexCreation(BlockCompressedIndexCreatorTest testSpec) throws NoSuchFieldException, IllegalAccessException { + + final String commandLine = " -T SelectVariants" + + " -R " + b37KGReference + + " --no_cmdline_in_header" + + " -L 20" + + " -V " + b37_NA12878_OMNI; + final String name = "testBlockCompressedIndexCreation: " + testSpec.toString(); + + File outVCF = createTempFile("testBlockCompressedIndexCreation", testSpec.extension); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine, 1, Arrays.asList("")); + spec.disableShadowBCF(); + spec.setOutputFileLocation(outVCF); + + executeTest(name, spec); + + File outTribbleIdx = new File(outVCF.getAbsolutePath() + Tribble.STANDARD_INDEX_EXTENSION); + Assert.assertFalse(outTribbleIdx.exists(), "testBlockCompressedIndexCreation: Want Tabix index but Tribble index exists: " + outTribbleIdx); + + File outTabixIdx = new File(outVCF.getAbsolutePath() + TabixUtils.STANDARD_INDEX_EXTENSION); + final Index actualIndex = IndexFactory.loadIndex(outTabixIdx.toString()); + Assert.assertTrue(actualIndex instanceof TabixIndex, "testBlockCompressedIndexCreation: Want Tabix index but index is not Tabix: " + outTabixIdx); + } + } diff --git a/public/repo/net/sf/picard/1.110.1763/picard-1.110.1763.jar b/public/repo/net/sf/picard/1.110.1773/picard-1.110.1773.jar similarity index 93% rename from public/repo/net/sf/picard/1.110.1763/picard-1.110.1763.jar rename to public/repo/net/sf/picard/1.110.1773/picard-1.110.1773.jar index cea4a9512..2b83e8e13 100644 Binary files a/public/repo/net/sf/picard/1.110.1763/picard-1.110.1763.jar and b/public/repo/net/sf/picard/1.110.1773/picard-1.110.1773.jar differ diff --git a/public/repo/net/sf/picard/1.110.1763/picard-1.110.1763.pom b/public/repo/net/sf/picard/1.110.1773/picard-1.110.1773.pom similarity index 90% rename from public/repo/net/sf/picard/1.110.1763/picard-1.110.1763.pom rename to public/repo/net/sf/picard/1.110.1773/picard-1.110.1773.pom index 2438ab776..a95cdf671 100644 --- a/public/repo/net/sf/picard/1.110.1763/picard-1.110.1763.pom +++ b/public/repo/net/sf/picard/1.110.1773/picard-1.110.1773.pom @@ -3,23 +3,23 @@ 4.0.0 net.sf picard - 1.110.1763 + 1.110.1773 picard net.sf sam - 1.110.1763 + 1.110.1773 org.broadinstitute variant - 1.110.1763 + 1.110.1773 org.broad tribble - 1.110.1763 + 1.110.1773 diff --git a/public/repo/net/sf/sam/1.110.1763/sam-1.110.1763.jar b/public/repo/net/sf/sam/1.110.1773/sam-1.110.1773.jar similarity index 95% rename from public/repo/net/sf/sam/1.110.1763/sam-1.110.1763.jar rename to public/repo/net/sf/sam/1.110.1773/sam-1.110.1773.jar index f2c40dd61..686252e01 100644 Binary files a/public/repo/net/sf/sam/1.110.1763/sam-1.110.1763.jar and b/public/repo/net/sf/sam/1.110.1773/sam-1.110.1773.jar differ diff --git a/public/repo/net/sf/sam/1.110.1763/sam-1.110.1763.pom b/public/repo/net/sf/sam/1.110.1773/sam-1.110.1773.pom similarity index 95% rename from public/repo/net/sf/sam/1.110.1763/sam-1.110.1763.pom rename to public/repo/net/sf/sam/1.110.1773/sam-1.110.1773.pom index 5fe3c5cd8..3c951c14f 100644 --- a/public/repo/net/sf/sam/1.110.1763/sam-1.110.1763.pom +++ b/public/repo/net/sf/sam/1.110.1773/sam-1.110.1773.pom @@ -3,7 +3,7 @@ 4.0.0 net.sf sam - 1.110.1763 + 1.110.1773 sam-jdk diff --git a/public/repo/org/broad/tribble/1.110.1763/tribble-1.110.1763.jar b/public/repo/org/broad/tribble/1.110.1773/tribble-1.110.1773.jar similarity index 94% rename from public/repo/org/broad/tribble/1.110.1763/tribble-1.110.1763.jar rename to public/repo/org/broad/tribble/1.110.1773/tribble-1.110.1773.jar index 3c84086e6..1cf68d647 100644 Binary files a/public/repo/org/broad/tribble/1.110.1763/tribble-1.110.1763.jar and b/public/repo/org/broad/tribble/1.110.1773/tribble-1.110.1773.jar differ diff --git a/public/repo/org/broad/tribble/1.110.1763/tribble-1.110.1763.pom b/public/repo/org/broad/tribble/1.110.1773/tribble-1.110.1773.pom similarity index 87% rename from public/repo/org/broad/tribble/1.110.1763/tribble-1.110.1763.pom rename to public/repo/org/broad/tribble/1.110.1773/tribble-1.110.1773.pom index 4dee936f1..7d1233599 100644 --- a/public/repo/org/broad/tribble/1.110.1763/tribble-1.110.1763.pom +++ b/public/repo/org/broad/tribble/1.110.1773/tribble-1.110.1773.pom @@ -3,13 +3,13 @@ 4.0.0 org.broad tribble - 1.110.1763 + 1.110.1773 tribble net.sf sam - 1.110.1763 + 1.110.1773 diff --git a/public/repo/org/broadinstitute/variant/1.110.1763/variant-1.110.1763.jar b/public/repo/org/broadinstitute/variant/1.110.1773/variant-1.110.1773.jar similarity index 95% rename from public/repo/org/broadinstitute/variant/1.110.1763/variant-1.110.1763.jar rename to public/repo/org/broadinstitute/variant/1.110.1773/variant-1.110.1773.jar index efeaf209c..5ece61a1b 100644 Binary files a/public/repo/org/broadinstitute/variant/1.110.1763/variant-1.110.1763.jar and b/public/repo/org/broadinstitute/variant/1.110.1773/variant-1.110.1773.jar differ diff --git a/public/repo/org/broadinstitute/variant/1.110.1763/variant-1.110.1763.pom b/public/repo/org/broadinstitute/variant/1.110.1773/variant-1.110.1773.pom similarity index 90% rename from public/repo/org/broadinstitute/variant/1.110.1763/variant-1.110.1763.pom rename to public/repo/org/broadinstitute/variant/1.110.1773/variant-1.110.1773.pom index 8e42e0665..f37c23ede 100644 --- a/public/repo/org/broadinstitute/variant/1.110.1763/variant-1.110.1763.pom +++ b/public/repo/org/broadinstitute/variant/1.110.1773/variant-1.110.1773.pom @@ -3,18 +3,18 @@ 4.0.0 org.broadinstitute variant - 1.110.1763 + 1.110.1773 variant org.broad tribble - 1.110.1763 + 1.110.1773 net.sf sam - 1.110.1763 + 1.110.1773 org.apache.commons diff --git a/public/sting-root/pom.xml b/public/sting-root/pom.xml index abda9ae01..d59b6d415 100644 --- a/public/sting-root/pom.xml +++ b/public/sting-root/pom.xml @@ -43,7 +43,7 @@ -Xmx${test.maxmemory} -XX:+UseParallelOldGC -XX:ParallelGCThreads=${java.gc.threads} -XX:GCTimeLimit=${java.gc.timeLimit} -XX:GCHeapFreeLimit=${java.gc.heapFreeLimit} - 1.110.1763 + 1.110.1773 ${picard.public.version} ${picard.public.version} ${picard.public.version}