Merge pull request #583 from broadinstitute/jt_tabix
Create Tabix indices for block-compressed VCFs
This commit is contained in:
commit
8703bd7ad4
|
|
@ -29,6 +29,7 @@ import org.broadinstitute.sting.commandline.*;
|
|||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriterFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.OutputStream;
|
||||
|
|
@ -48,15 +49,8 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
public static final String NO_HEADER_ARG_NAME = "no_cmdline_in_header";
|
||||
public static final String SITES_ONLY_ARG_NAME = "sites_only";
|
||||
public static final String FORCE_BCF = "bcf";
|
||||
public static final HashSet<String> SUPPORTED_ZIPPED_SUFFIXES = new HashSet<String>();
|
||||
|
||||
//
|
||||
// static list of zipped suffixes supported by this system.
|
||||
//
|
||||
static {
|
||||
SUPPORTED_ZIPPED_SUFFIXES.add(".gz");
|
||||
SUPPORTED_ZIPPED_SUFFIXES.add(".gzip");
|
||||
}
|
||||
public static final HashSet<String> SUPPORTED_ZIPPED_SUFFIXES =
|
||||
new HashSet<>(Arrays.asList(VariantContextWriterFactory.BLOCK_COMPRESSED_EXTENSIONS));
|
||||
|
||||
/**
|
||||
* The engine into which output stubs should be fed.
|
||||
|
|
|
|||
|
|
@ -213,7 +213,7 @@ public class CatVariants extends CommandLineProgram {
|
|||
|
||||
FileOutputStream outputStream = new FileOutputStream(outputFile);
|
||||
EnumSet<Options> options = EnumSet.of(Options.INDEX_ON_THE_FLY);
|
||||
final IndexCreator idxCreator = GATKVCFUtils.getIndexCreator(variant_index_type, variant_index_parameter, outputFile);
|
||||
final IndexCreator idxCreator = GATKVCFUtils.getIndexCreator(variant_index_type, variant_index_parameter, outputFile, ref.getSequenceDictionary());
|
||||
final VariantContextWriter outputWriter = VariantContextWriterFactory.create(outputFile, outputStream, ref.getSequenceDictionary(), idxCreator, options);
|
||||
|
||||
boolean firstFile = true;
|
||||
|
|
|
|||
|
|
@ -25,6 +25,8 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.variant;
|
||||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.FeatureCodec;
|
||||
import org.broad.tribble.FeatureCodecHeader;
|
||||
|
|
@ -33,12 +35,15 @@ import org.broad.tribble.index.IndexCreator;
|
|||
import org.broad.tribble.index.IndexFactory;
|
||||
import org.broad.tribble.index.interval.IntervalIndexCreator;
|
||||
import org.broad.tribble.index.linear.LinearIndexCreator;
|
||||
import org.broad.tribble.index.tabix.TabixFormat;
|
||||
import org.broad.tribble.index.tabix.TabixIndexCreator;
|
||||
import org.broad.tribble.readers.LineIterator;
|
||||
import org.broad.tribble.readers.PositionalBufferedStream;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
|
|
@ -59,6 +64,7 @@ public class GATKVCFUtils {
|
|||
*/
|
||||
private GATKVCFUtils() { }
|
||||
|
||||
public static final Logger logger = Logger.getLogger(GATKVCFUtils.class);
|
||||
public final static String GATK_COMMAND_LINE_KEY = "GATKCommandLine";
|
||||
|
||||
public final static GATKVCFIndexType DEFAULT_INDEX_TYPE = GATKVCFIndexType.DYNAMIC_SEEK; // by default, optimize for seek time. All indices prior to Nov 2013 used this type.
|
||||
|
|
@ -192,6 +198,28 @@ public class GATKVCFUtils {
|
|||
* @return
|
||||
*/
|
||||
public static IndexCreator getIndexCreator(GATKVCFIndexType type, int parameter, File outFile) {
|
||||
return getIndexCreator(type, parameter, outFile, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create and return an IndexCreator
|
||||
* @param type
|
||||
* @param parameter
|
||||
* @param outFile
|
||||
* @param sequenceDictionary
|
||||
* @return
|
||||
*/
|
||||
public static IndexCreator getIndexCreator(GATKVCFIndexType type, int parameter, File outFile, SAMSequenceDictionary sequenceDictionary) {
|
||||
if (VCFWriterArgumentTypeDescriptor.isCompressed(outFile.toString())) {
|
||||
if (type != GATKVCFUtils.DEFAULT_INDEX_TYPE || parameter != GATKVCFUtils.DEFAULT_INDEX_PARAMETER)
|
||||
logger.warn("Creating Tabix index for " + outFile + ", ignoring user-specified index type and parameter");
|
||||
|
||||
if (sequenceDictionary == null)
|
||||
return new TabixIndexCreator(TabixFormat.VCF);
|
||||
else
|
||||
return new TabixIndexCreator(sequenceDictionary, TabixFormat.VCF);
|
||||
}
|
||||
|
||||
IndexCreator idxCreator;
|
||||
switch (type) {
|
||||
case DYNAMIC_SEEK: idxCreator = new DynamicIndexCreator(outFile, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME); break;
|
||||
|
|
|
|||
|
|
@ -29,6 +29,8 @@ import org.apache.commons.lang.StringUtils;
|
|||
import org.broad.tribble.Tribble;
|
||||
import org.broad.tribble.index.Index;
|
||||
import org.broad.tribble.index.IndexFactory;
|
||||
import org.broad.tribble.index.tabix.TabixFormat;
|
||||
import org.broad.tribble.util.TabixUtils;
|
||||
import org.broadinstitute.sting.gatk.CommandLineExecutable;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
|
|
@ -314,8 +316,9 @@ public class WalkerTest extends BaseTest {
|
|||
String ext = spec.exts == null ? ".tmp" : "." + spec.exts.get(i);
|
||||
File fl = createTempFile(String.format("walktest.tmp_param.%d", i), ext);
|
||||
|
||||
// Mark corresponding *.idx for deletion on exit as well just in case an index is created for the temp file:
|
||||
new File(fl.getAbsolutePath() + ".idx").deleteOnExit();
|
||||
// Mark corresponding indices for deletion on exit as well just in case an index is created for the temp file:
|
||||
new File(fl.getAbsolutePath() + Tribble.STANDARD_INDEX_EXTENSION).deleteOnExit();
|
||||
new File(fl.getAbsolutePath() + TabixUtils.STANDARD_INDEX_EXTENSION).deleteOnExit();
|
||||
|
||||
tmpFiles.add(fl);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,13 +25,17 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.variant;
|
||||
|
||||
import org.broad.tribble.Tribble;
|
||||
import org.broad.tribble.index.AbstractIndex;
|
||||
import org.broad.tribble.index.ChrIndex;
|
||||
import org.broad.tribble.index.Index;
|
||||
import org.broad.tribble.index.IndexFactory;
|
||||
import org.broad.tribble.index.interval.IntervalTreeIndex;
|
||||
import org.broad.tribble.index.linear.LinearIndex;
|
||||
import org.broad.tribble.index.tabix.TabixIndex;
|
||||
import org.broad.tribble.util.TabixUtils;
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
|
||||
import org.broadinstitute.variant.vcf.VCFCodec;
|
||||
import org.testng.Assert;
|
||||
import org.testng.TestException;
|
||||
|
|
@ -216,7 +220,7 @@ public class VCFIntegrationTest extends WalkerTest {
|
|||
spec.disableShadowBCF();
|
||||
|
||||
File outVCF = executeTest(name, spec).first.get(0);
|
||||
File outIdx = new File(outVCF.getAbsolutePath() + ".idx");
|
||||
File outIdx = new File(outVCF.getAbsolutePath() + Tribble.STANDARD_INDEX_EXTENSION);
|
||||
|
||||
final Index actualIndex = IndexFactory.loadIndex(outIdx.getAbsolutePath());
|
||||
final Index expectedIndex = testSpec.getIndex(outVCF);
|
||||
|
|
@ -272,4 +276,58 @@ public class VCFIntegrationTest extends WalkerTest {
|
|||
LinkedHashMap<String, ChrIndex> chrIndices = (LinkedHashMap<String, ChrIndex>) f.get(index);
|
||||
return chrIndices.get(chr);
|
||||
}
|
||||
|
||||
//
|
||||
//
|
||||
// Block-Compressed Tabix Index Tests
|
||||
//
|
||||
//
|
||||
|
||||
private class BlockCompressedIndexCreatorTest extends TestDataProvider {
|
||||
private final String extension;
|
||||
|
||||
private BlockCompressedIndexCreatorTest(String extension) {
|
||||
super(BlockCompressedIndexCreatorTest.class);
|
||||
|
||||
this.extension = extension;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("File extension %s", extension);
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "BlockCompressedIndexDataProvider")
|
||||
public Object[][] blockCompressedIndexCreatorData() {
|
||||
for (String suffix : VCFWriterArgumentTypeDescriptor.SUPPORTED_ZIPPED_SUFFIXES)
|
||||
new BlockCompressedIndexCreatorTest(".vcf" + suffix);
|
||||
|
||||
return TestDataProvider.getTests(BlockCompressedIndexCreatorTest.class);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "BlockCompressedIndexDataProvider")
|
||||
public void testBlockCompressedIndexCreation(BlockCompressedIndexCreatorTest testSpec) throws NoSuchFieldException, IllegalAccessException {
|
||||
|
||||
final String commandLine = " -T SelectVariants" +
|
||||
" -R " + b37KGReference +
|
||||
" --no_cmdline_in_header" +
|
||||
" -L 20" +
|
||||
" -V " + b37_NA12878_OMNI;
|
||||
final String name = "testBlockCompressedIndexCreation: " + testSpec.toString();
|
||||
|
||||
File outVCF = createTempFile("testBlockCompressedIndexCreation", testSpec.extension);
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine, 1, Arrays.asList(""));
|
||||
spec.disableShadowBCF();
|
||||
spec.setOutputFileLocation(outVCF);
|
||||
|
||||
executeTest(name, spec);
|
||||
|
||||
File outTribbleIdx = new File(outVCF.getAbsolutePath() + Tribble.STANDARD_INDEX_EXTENSION);
|
||||
Assert.assertFalse(outTribbleIdx.exists(), "testBlockCompressedIndexCreation: Want Tabix index but Tribble index exists: " + outTribbleIdx);
|
||||
|
||||
File outTabixIdx = new File(outVCF.getAbsolutePath() + TabixUtils.STANDARD_INDEX_EXTENSION);
|
||||
final Index actualIndex = IndexFactory.loadIndex(outTabixIdx.toString());
|
||||
Assert.assertTrue(actualIndex instanceof TabixIndex, "testBlockCompressedIndexCreation: Want Tabix index but index is not Tabix: " + outTabixIdx);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -3,23 +3,23 @@
|
|||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>net.sf</groupId>
|
||||
<artifactId>picard</artifactId>
|
||||
<version>1.110.1763</version>
|
||||
<version>1.110.1773</version>
|
||||
<name>picard</name>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>net.sf</groupId>
|
||||
<artifactId>sam</artifactId>
|
||||
<version>1.110.1763</version>
|
||||
<version>1.110.1773</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.broadinstitute</groupId>
|
||||
<artifactId>variant</artifactId>
|
||||
<version>1.110.1763</version>
|
||||
<version>1.110.1773</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.broad</groupId>
|
||||
<artifactId>tribble</artifactId>
|
||||
<version>1.110.1763</version>
|
||||
<version>1.110.1773</version>
|
||||
</dependency>
|
||||
<!-- TODO: Picard is using a custom zip with just ant's BZip2 classes. See also: http://www.kohsuke.org/bzip2 -->
|
||||
<dependency>
|
||||
Binary file not shown.
|
|
@ -3,7 +3,7 @@
|
|||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>net.sf</groupId>
|
||||
<artifactId>sam</artifactId>
|
||||
<version>1.110.1763</version>
|
||||
<version>1.110.1773</version>
|
||||
<name>sam-jdk</name>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
Binary file not shown.
|
|
@ -3,13 +3,13 @@
|
|||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>org.broad</groupId>
|
||||
<artifactId>tribble</artifactId>
|
||||
<version>1.110.1763</version>
|
||||
<version>1.110.1773</version>
|
||||
<name>tribble</name>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>net.sf</groupId>
|
||||
<artifactId>sam</artifactId>
|
||||
<version>1.110.1763</version>
|
||||
<version>1.110.1773</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
||||
Binary file not shown.
|
|
@ -3,18 +3,18 @@
|
|||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>org.broadinstitute</groupId>
|
||||
<artifactId>variant</artifactId>
|
||||
<version>1.110.1763</version>
|
||||
<version>1.110.1773</version>
|
||||
<name>variant</name>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.broad</groupId>
|
||||
<artifactId>tribble</artifactId>
|
||||
<version>1.110.1763</version>
|
||||
<version>1.110.1773</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>net.sf</groupId>
|
||||
<artifactId>sam</artifactId>
|
||||
<version>1.110.1763</version>
|
||||
<version>1.110.1773</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
|
|
@ -43,7 +43,7 @@
|
|||
<test.args>-Xmx${test.maxmemory} -XX:+UseParallelOldGC -XX:ParallelGCThreads=${java.gc.threads} -XX:GCTimeLimit=${java.gc.timeLimit} -XX:GCHeapFreeLimit=${java.gc.heapFreeLimit}</test.args>
|
||||
|
||||
<!-- Version numbers for picard sam-jdk. Usually kept in sync. -->
|
||||
<picard.public.version>1.110.1763</picard.public.version>
|
||||
<picard.public.version>1.110.1773</picard.public.version>
|
||||
<sam.version>${picard.public.version}</sam.version>
|
||||
<picard.version>${picard.public.version}</picard.version>
|
||||
<variant.version>${picard.public.version}</variant.version>
|
||||
|
|
|
|||
Loading…
Reference in New Issue