diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 17849749e..1dc760709 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -35,6 +35,8 @@ import org.broadinstitute.sting.gatk.samples.PedigreeValidationType; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variant.GATKVCFIndexType; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; import java.io.File; import java.util.ArrayList; @@ -454,5 +456,26 @@ public class GATKArgumentCollection { @Hidden public boolean generateShadowBCF = false; // TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed + + // -------------------------------------------------------------------------------------------------------------- + // + // VCF/BCF index parameters + // + // -------------------------------------------------------------------------------------------------------------- + + /** + * Specify the Tribble indexing strategy to use for VCFs. + * + * LINEAR creates a LinearIndex with bins of equal width, specified by the Bin Width parameter + * INTERVAL creates an IntervalTreeIndex with bins with an equal amount of features, specified by the Features Per Bin parameter + * DYNAMIC_SEEK attempts to optimize for minimal seek time by choosing an appropriate strategy and parameter (user-supplied parameter is ignored) + * DYNAMIC_SIZE attempts to optimize for minimal index size by choosing an appropriate strategy and parameter (user-supplied parameter is ignored) + */ + + @Argument(fullName="variant_index_type",shortName = "variant_index_type",doc="which type of IndexCreator to use for VCF/BCF indices",required=false) + public GATKVCFIndexType variant_index_type = GATKVCFUtils.DEFAULT_INDEX_TYPE; + + @Argument(fullName="variant_index_parameter",shortName = "variant_index_parameter",doc="the parameter (bin width or features per bin) to pass to the VCF/BCF IndexCreator",required=false) + public int variant_index_parameter = GATKVCFUtils.DEFAULT_INDEX_PARAMETER; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/StorageFactory.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/StorageFactory.java index 35aba8114..231f46f10 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/StorageFactory.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/StorageFactory.java @@ -62,7 +62,7 @@ public class StorageFactory { * @param Type of the stream to create. * @return Storage object with a facade of type T. */ - public static Storage createStorage( Stub stub, File file ) { + public static Storage createStorage( Stub stub, File file ) { Storage storage; if(stub instanceof OutputStreamStub) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java index e1dc36f5c..de203e59f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java @@ -133,14 +133,21 @@ public class VariantContextWriterStorage implements Storage options = stub.getWriterOptions(indexOnTheFly); - VariantContextWriter writer = VariantContextWriterFactory.create(file, this.stream, stub.getMasterSequenceDictionary(), options); + VariantContextWriter writer = VariantContextWriterFactory.create(file, this.stream, stub.getMasterSequenceDictionary(), stub.getIndexCreator(), options); // if the stub says to test BCF, create a secondary writer to BCF and an 2 way out writer to send to both // TODO -- remove me when argument generateShadowBCF is removed if ( stub.alsoWriteBCFForTest() && ! VariantContextWriterFactory.isBCFOutput(file, options)) { final File bcfFile = BCF2Utils.shadowBCF(file); if ( bcfFile != null ) { - VariantContextWriter bcfWriter = VariantContextWriterFactory.create(bcfFile, stub.getMasterSequenceDictionary(), options); + FileOutputStream bcfStream; + try { + bcfStream = new FileOutputStream(bcfFile); + } catch (FileNotFoundException e) { + throw new RuntimeException(bcfFile + ": Unable to create BCF writer", e); + } + + VariantContextWriter bcfWriter = VariantContextWriterFactory.create(bcfFile, bcfStream, stub.getMasterSequenceDictionary(), stub.getIndexCreator(), options); writer = new TestWriter(writer, bcfWriter); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java index 3e3d6de41..9ad388adf 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.io.stubs; import net.sf.samtools.SAMSequenceDictionary; +import org.broad.tribble.index.IndexCreator; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.io.OutputTracker; import org.broadinstitute.sting.utils.variant.GATKVCFUtils; @@ -70,6 +71,17 @@ public class VariantContextWriterStub implements Stub, Var */ private final PrintStream genotypeStream; + /** + * A hack: push the argument sources into the VCF header so that the VCF header + * can rebuild the command-line arguments. + */ + private final Collection argumentSources; + + /** + * Which IndexCreator to use + */ + private final IndexCreator indexCreator; + /** * The cached VCF header (initialized to null) */ @@ -80,12 +92,6 @@ public class VariantContextWriterStub implements Stub, Var */ private boolean isCompressed = false; - /** - * A hack: push the argument sources into the VCF header so that the VCF header - * can rebuild the command-line arguments. - */ - private final Collection argumentSources; - /** * Should the header be written out? A hidden argument. */ @@ -118,6 +124,7 @@ public class VariantContextWriterStub implements Stub, Var this.engine = engine; this.genotypeFile = genotypeFile; this.genotypeStream = null; + this.indexCreator = GATKVCFUtils.getIndexCreator(engine.getArguments().variant_index_type, engine.getArguments().variant_index_parameter, genotypeFile); this.argumentSources = argumentSources; } @@ -132,6 +139,7 @@ public class VariantContextWriterStub implements Stub, Var this.engine = engine; this.genotypeFile = null; this.genotypeStream = new PrintStream(genotypeStream); + this.indexCreator = null; this.argumentSources = argumentSources; } @@ -175,6 +183,10 @@ public class VariantContextWriterStub implements Stub, Var this.forceBCF = forceBCF; } + public IndexCreator getIndexCreator() { + return indexCreator; + } + /** * Gets the master sequence dictionary from the engine associated with this stub * @link GenomeAnalysisEngine.getMasterSequenceDictionary diff --git a/public/java/src/org/broadinstitute/sting/tools/CatVariants.java b/public/java/src/org/broadinstitute/sting/tools/CatVariants.java index b59786d15..8e5078f1f 100644 --- a/public/java/src/org/broadinstitute/sting/tools/CatVariants.java +++ b/public/java/src/org/broadinstitute/sting/tools/CatVariants.java @@ -31,12 +31,15 @@ import org.apache.log4j.BasicConfigurator; import org.apache.log4j.Level; import org.broad.tribble.AbstractFeatureReader; import org.broad.tribble.FeatureReader; +import org.broad.tribble.index.IndexCreator; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.HelpConstants; +import org.broadinstitute.sting.utils.variant.GATKVCFIndexType; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; import org.broadinstitute.variant.bcf2.BCF2Codec; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.variant.vcf.VCFCodec; @@ -123,6 +126,12 @@ public class CatVariants extends CommandLineProgram { @Argument(fullName = "assumeSorted", shortName = "assumeSorted", doc = "assumeSorted should be true if he input files are already sorted (based on the position of the variants", required = false) private Boolean assumeSorted = false; + @Argument(fullName = "variant_index_type", doc = "which type of IndexCreator to use for VCF/BCF indices", required = false) + private GATKVCFIndexType variant_index_type = GATKVCFUtils.DEFAULT_INDEX_TYPE; + + @Argument(fullName = "variant_index_parameter", doc = "the parameter (bin width or features per bin) to pass to the VCF/BCF IndexCreator", required = false) + private Integer variant_index_parameter = GATKVCFUtils.DEFAULT_INDEX_PARAMETER; + /* * print usage information */ @@ -204,7 +213,8 @@ public class CatVariants extends CommandLineProgram { FileOutputStream outputStream = new FileOutputStream(outputFile); EnumSet options = EnumSet.of(Options.INDEX_ON_THE_FLY); - final VariantContextWriter outputWriter = VariantContextWriterFactory.create(outputFile, outputStream, ref.getSequenceDictionary(), options); + final IndexCreator idxCreator = GATKVCFUtils.getIndexCreator(variant_index_type, variant_index_parameter, outputFile); + final VariantContextWriter outputWriter = VariantContextWriterFactory.create(outputFile, outputStream, ref.getSequenceDictionary(), idxCreator, options); boolean firstFile = true; int count =0; diff --git a/public/java/src/org/broadinstitute/sting/utils/variant/GATKVCFIndexType.java b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVCFIndexType.java new file mode 100644 index 000000000..3f00d9fe5 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVCFIndexType.java @@ -0,0 +1,39 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.utils.variant; + +import org.broadinstitute.sting.commandline.EnumerationArgumentDefault; + +/** + * Choose the Tribble indexing strategy + */ +public enum GATKVCFIndexType { + @EnumerationArgumentDefault + DYNAMIC_SEEK, // use DynamicIndexCreator(IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME) + DYNAMIC_SIZE, // use DynamicIndexCreator(IndexFactory.IndexBalanceApproach.FOR_SIZE) + LINEAR, // use LinearIndexCreator() + INTERVAL // use IntervalIndexCreator() +} diff --git a/public/java/src/org/broadinstitute/sting/utils/variant/GATKVCFUtils.java b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVCFUtils.java index d1ba990fd..5a160566e 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variant/GATKVCFUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVCFUtils.java @@ -28,6 +28,11 @@ package org.broadinstitute.sting.utils.variant; import org.broad.tribble.Feature; import org.broad.tribble.FeatureCodec; import org.broad.tribble.FeatureCodecHeader; +import org.broad.tribble.index.DynamicIndexCreator; +import org.broad.tribble.index.IndexCreator; +import org.broad.tribble.index.IndexFactory; +import org.broad.tribble.index.interval.IntervalIndexCreator; +import org.broad.tribble.index.linear.LinearIndexCreator; import org.broad.tribble.readers.LineIterator; import org.broad.tribble.readers.PositionalBufferedStream; import org.broadinstitute.sting.commandline.RodBinding; @@ -43,6 +48,7 @@ import java.io.FileInputStream; import java.io.IOException; import java.util.*; + /** * A set of GATK-specific static utility methods for common operations on VCF files/records. */ @@ -55,6 +61,9 @@ public class GATKVCFUtils { public final static String GATK_COMMAND_LINE_KEY = "GATKCommandLine"; + public final static GATKVCFIndexType DEFAULT_INDEX_TYPE = GATKVCFIndexType.DYNAMIC_SEEK; // by default, optimize for seek time. All indices prior to Nov 2013 used this type. + public final static Integer DEFAULT_INDEX_PARAMETER = -1; // the default DYNAMIC_SEEK does not use a parameter + /** * Gets the appropriately formatted header for a VCF file describing this GATK run * @@ -175,6 +184,27 @@ public class GATKVCFUtils { return VCFUtils.withUpdatedContigs(header, engine.getArguments().referenceFile, engine.getMasterSequenceDictionary()); } + /** + * Create and return an IndexCreator + * @param type + * @param parameter + * @param outFile + * @return + */ + public static IndexCreator getIndexCreator(GATKVCFIndexType type, int parameter, File outFile) { + IndexCreator idxCreator; + switch (type) { + case DYNAMIC_SEEK: idxCreator = new DynamicIndexCreator(IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME); break; + case DYNAMIC_SIZE: idxCreator = new DynamicIndexCreator(IndexFactory.IndexBalanceApproach.FOR_SIZE); break; + case LINEAR: idxCreator = new LinearIndexCreator(); break; + case INTERVAL: idxCreator = new IntervalIndexCreator(); break; + default: throw new IllegalArgumentException("Unknown IndexCreator type: " + type); + } + + idxCreator.initialize(outFile, parameter); + return idxCreator; + } + /** * Utility class to read all of the VC records from a file * diff --git a/public/java/test/org/broadinstitute/sting/ExampleToCopyUnitTest.java b/public/java/test/org/broadinstitute/sting/ExampleToCopyUnitTest.java index 8a8faee8b..06bab8fc0 100644 --- a/public/java/test/org/broadinstitute/sting/ExampleToCopyUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/ExampleToCopyUnitTest.java @@ -104,6 +104,42 @@ public class ExampleToCopyUnitTest extends BaseTest { Assert.assertTrue(size >= 0); } + /** + * DataProvider example using a class-based data structure + */ + private class MyDataProviderClass extends TestDataProvider { + private int start; + private int size; + + private MyDataProviderClass(int start, int size) { + super(MyDataProviderClass.class); + this.start = start; + this.size = size; + } + } + + @DataProvider(name = "MyClassBasedDataProvider") + public Object[][] makeMyDataProviderClass() { + // this functionality can be adapted to provide input data for whatever you might want in your data + for ( final int start : Arrays.asList(1, 10, 100) ) { + for ( final int size : Arrays.asList(1, 10, 100, 1000) ) { + new MyDataProviderClass(start, size); + } + } + + return TestDataProvider.getTests(MyDataProviderClass.class); + } + + /** + * Example testng test using MyClassBasedDataProvider + */ + @Test(dataProvider = "MyClassBasedDataProvider") + public void testMyDataProviderClass(MyDataProviderClass testSpec) { + // adaptor this code to do whatever testing you want given the arguments start and size + Assert.assertTrue(testSpec.start >= 0); + Assert.assertTrue(testSpec.size >= 0); + } + /** * A unit test that creates an artificial read for testing some code that uses reads */ diff --git a/public/java/test/org/broadinstitute/sting/utils/variant/GATKVCFUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variant/GATKVCFUtilsUnitTest.java index 051d0bcec..57020424c 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variant/GATKVCFUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variant/GATKVCFUtilsUnitTest.java @@ -25,6 +25,10 @@ package org.broadinstitute.sting.utils.variant; +import org.broad.tribble.index.DynamicIndexCreator; +import org.broad.tribble.index.IndexCreator; +import org.broad.tribble.index.interval.IntervalIndexCreator; +import org.broad.tribble.index.linear.LinearIndexCreator; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -35,8 +39,10 @@ import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.variant.vcf.VCFHeader; import org.broadinstitute.variant.vcf.VCFHeaderLine; import org.testng.Assert; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.io.File; import java.util.Arrays; import java.util.Collections; import java.util.Set; @@ -83,4 +89,41 @@ public class GATKVCFUtilsUnitTest extends BaseTest { Assert.assertTrue(lines2.contains(line1)); Assert.assertTrue(lines2.contains(line2)); } + + private class IndexCreatorTest extends TestDataProvider { + private final GATKVCFIndexType type; + private final int parameter; + private final Class expectedClass; + private final int expectedDefaultBinSize; + private final int expectedBinSize; + + private IndexCreatorTest(GATKVCFIndexType type, int parameter, Class expectedClass, int expectedDefaultBinSize, int expectedBinSize) { + super(IndexCreatorTest.class); + + this.type = type; + this.parameter = parameter; + this.expectedClass = expectedClass; + this.expectedDefaultBinSize = expectedDefaultBinSize; + this.expectedBinSize = expectedBinSize; + } + } + + @DataProvider(name = "indexCreator") + public Object[][] indexCreatorData() { + new IndexCreatorTest(GATKVCFIndexType.DYNAMIC_SEEK, 0, DynamicIndexCreator.class, -1, -1); + new IndexCreatorTest(GATKVCFIndexType.DYNAMIC_SIZE, 0, DynamicIndexCreator.class, -1, -1); + new IndexCreatorTest(GATKVCFIndexType.LINEAR, 100, LinearIndexCreator.class, LinearIndexCreator.DEFAULT_BIN_WIDTH, 100); + new IndexCreatorTest(GATKVCFIndexType.INTERVAL, 200, IntervalIndexCreator.class, IntervalIndexCreator.DEFAULT_FEATURE_COUNT, 200); + + return IndexCreatorTest.getTests(IndexCreatorTest.class); + } + + @Test(dataProvider = "indexCreator") + public void testGetIndexCreator(IndexCreatorTest spec) { + File dummy = new File(""); + IndexCreator ic = GATKVCFUtils.getIndexCreator(spec.type, spec.parameter, dummy); + Assert.assertEquals(ic.getClass(), spec.expectedClass, "Wrong IndexCreator type"); + Assert.assertEquals(ic.defaultBinSize(), spec.expectedDefaultBinSize, "Wrong default bin size"); + Assert.assertEquals(ic.getBinSize(), spec.expectedBinSize, "Wrong bin size"); + } } \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/utils/variant/VCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/variant/VCFIntegrationTest.java index f4cef7730..f29a1106c 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variant/VCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variant/VCFIntegrationTest.java @@ -25,11 +25,24 @@ package org.broadinstitute.sting.utils.variant; +import org.broad.tribble.index.AbstractIndex; +import org.broad.tribble.index.ChrIndex; +import org.broad.tribble.index.Index; +import org.broad.tribble.index.IndexFactory; +import org.broad.tribble.index.interval.IntervalTreeIndex; +import org.broad.tribble.index.linear.LinearIndex; import org.broadinstitute.sting.WalkerTest; +import org.broadinstitute.variant.vcf.VCFCodec; +import org.testng.Assert; +import org.testng.TestException; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.io.File; +import java.lang.reflect.Field; import java.util.Arrays; +import java.util.Collections; +import java.util.LinkedHashMap; import java.util.List; public class VCFIntegrationTest extends WalkerTest { @@ -141,4 +154,122 @@ public class VCFIntegrationTest extends WalkerTest { spec1.disableShadowBCF(); executeTest("Test reading VCF without header lines with additional args " + moreArgs, spec1); } + + // + // + // IndexCreator tests + // + // + + private class VCFIndexCreatorTest extends TestDataProvider { + private final GATKVCFIndexType type; + private final int parameter; + + private VCFIndexCreatorTest(GATKVCFIndexType type, int parameter) { + super(VCFIndexCreatorTest.class); + + this.type = type; + this.parameter = parameter; + } + + public String toString() { + return String.format("Index Type %s, Index Parameter %s", type, parameter); + } + + public Index getIndex(final File vcfFile) { + switch (type) { + case DYNAMIC_SEEK : return IndexFactory.createDynamicIndex(vcfFile, new VCFCodec(), IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME); + case DYNAMIC_SIZE : return IndexFactory.createDynamicIndex(vcfFile, new VCFCodec(), IndexFactory.IndexBalanceApproach.FOR_SIZE); + case LINEAR : return IndexFactory.createLinearIndex(vcfFile, new VCFCodec(), parameter); + case INTERVAL : return IndexFactory.createIntervalIndex(vcfFile, new VCFCodec(), parameter); + default : throw new TestException("Invalid index type"); + } + } + } + + @DataProvider(name = "IndexDataProvider") + public Object[][] indexCreatorData() { + new VCFIndexCreatorTest(GATKVCFIndexType.DYNAMIC_SEEK, 0); + new VCFIndexCreatorTest(GATKVCFIndexType.DYNAMIC_SIZE, 0); + new VCFIndexCreatorTest(GATKVCFIndexType.LINEAR, 100); + new VCFIndexCreatorTest(GATKVCFIndexType.LINEAR, 10000); + new VCFIndexCreatorTest(GATKVCFIndexType.INTERVAL, 20); + new VCFIndexCreatorTest(GATKVCFIndexType.INTERVAL, 2000); + + return TestDataProvider.getTests(VCFIndexCreatorTest.class); + } + + @Test(dataProvider = "IndexDataProvider") + public void testVCFIndexCreation(VCFIndexCreatorTest testSpec) throws NoSuchFieldException, IllegalAccessException { + + final String commandLine = " -T SelectVariants" + + " -R " + b37KGReference + + " --no_cmdline_in_header" + + " -L 20" + + " -V " + b37_NA12878_OMNI + + " --variant_index_type " + testSpec.type + + " --variant_index_parameter " + testSpec.parameter + + " -o %s "; + final String name = "testVCFIndexCreation: " + testSpec.toString(); + + final WalkerTestSpec spec = new WalkerTestSpec(commandLine, 1, Arrays.asList("")); + spec.disableShadowBCF(); + + File outVCF = executeTest(name, spec).first.get(0); + File outIdx = new File(outVCF.getAbsolutePath() + ".idx"); + + final Index actualIndex = IndexFactory.loadIndex(outIdx.getAbsolutePath()); + final Index expectedIndex = testSpec.getIndex(outVCF); + + if (testSpec.type.equals("LINEAR")) + Assert.assertTrue(actualIndex instanceof LinearIndex, "Index is not a LinearIndex"); + else if (testSpec.type.equals("INTERVAL")) + Assert.assertTrue(actualIndex instanceof IntervalTreeIndex, "Index is not a IntervalTreeIndex"); + // dynamic indices ultimately resolve to one of LinearIndex or IntervalTreeIndex + + Assert.assertTrue(equivalentAbstractIndices((AbstractIndex)actualIndex, (AbstractIndex)expectedIndex), "Indices are not equivalent"); + + if (actualIndex instanceof LinearIndex && expectedIndex instanceof LinearIndex) { + Assert.assertTrue(equivalentLinearIndices((LinearIndex)actualIndex, (LinearIndex)expectedIndex, "20"), "Linear indices are not equivalent"); + } + else if (actualIndex instanceof IntervalTreeIndex && expectedIndex instanceof IntervalTreeIndex) { + Assert.assertTrue(equivalentIntervalIndices((IntervalTreeIndex)actualIndex, (IntervalTreeIndex)expectedIndex, "20"), "Interval indices are not equivalent"); + } + else { + Assert.fail("Indices are not of the same type"); + } + } + + private static boolean equivalentAbstractIndices(AbstractIndex thisIndex, AbstractIndex otherIndex){ + return thisIndex.getVersion() == otherIndex.getVersion() && + thisIndex.getIndexedFile().equals(otherIndex.getIndexedFile()) && + thisIndex.getIndexedFileSize() == otherIndex.getIndexedFileSize() && + thisIndex.getIndexedFileMD5().equals(otherIndex.getIndexedFileMD5()) && + thisIndex.getFlags() == otherIndex.getFlags(); + } + + private static boolean equivalentLinearIndices(LinearIndex thisIndex, LinearIndex otherIndex, String chr) throws NoSuchFieldException, IllegalAccessException { + org.broad.tribble.index.linear.LinearIndex.ChrIndex thisChr = (org.broad.tribble.index.linear.LinearIndex.ChrIndex)getChrIndex(thisIndex, chr); + org.broad.tribble.index.linear.LinearIndex.ChrIndex otherChr = (org.broad.tribble.index.linear.LinearIndex.ChrIndex)getChrIndex(otherIndex, chr); + + return thisChr.getName().equals(otherChr.getName()) && + //thisChr.getTotalSize() == otherChr.getTotalSize() && TODO: why does this differ? + thisChr.getNFeatures() == otherChr.getNFeatures() && + thisChr.getNBlocks() == otherChr.getNBlocks(); + } + + private static boolean equivalentIntervalIndices(IntervalTreeIndex thisIndex, IntervalTreeIndex otherIndex, String chr) throws NoSuchFieldException, IllegalAccessException { + org.broad.tribble.index.interval.IntervalTreeIndex.ChrIndex thisChr = (org.broad.tribble.index.interval.IntervalTreeIndex.ChrIndex)getChrIndex(thisIndex, chr); + org.broad.tribble.index.interval.IntervalTreeIndex.ChrIndex otherChr = (org.broad.tribble.index.interval.IntervalTreeIndex.ChrIndex)getChrIndex(otherIndex, chr); + + // TODO: compare trees? + return thisChr.getName().equals(otherChr.getName()); + } + + private static ChrIndex getChrIndex(AbstractIndex index, String chr) throws NoSuchFieldException, IllegalAccessException { + Field f = AbstractIndex.class.getDeclaredField("chrIndices"); + f.setAccessible(true); + LinkedHashMap chrIndices = (LinkedHashMap) f.get(index); + return chrIndices.get(chr); + } } diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/CatVariantsGatherer.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/CatVariantsGatherer.scala index 30fd4c81f..940d98860 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/CatVariantsGatherer.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/CatVariantsGatherer.scala @@ -48,6 +48,9 @@ class CatVariantsGatherer extends CatVariants with GatherFunction with RetryMemo this.variant = this.gatherParts.zipWithIndex map { case (input, index) => new TaggedFile(input, "input"+index) } this.outputFile = this.originalOutput this.assumeSorted = true + this.variant_index_type = originalGATK.variant_index_type + this.variant_index_parameter = originalGATK.variant_index_parameter + super.freezeFieldValues() } diff --git a/settings/repository/net.sf/picard-1.96.1534.jar b/settings/repository/net.sf/picard-1.102.1595.jar similarity index 88% rename from settings/repository/net.sf/picard-1.96.1534.jar rename to settings/repository/net.sf/picard-1.102.1595.jar index 8be3e01a3..314b1db6a 100644 Binary files a/settings/repository/net.sf/picard-1.96.1534.jar and b/settings/repository/net.sf/picard-1.102.1595.jar differ diff --git a/settings/repository/net.sf/picard-1.102.1595.xml b/settings/repository/net.sf/picard-1.102.1595.xml new file mode 100644 index 000000000..e4370e4cd --- /dev/null +++ b/settings/repository/net.sf/picard-1.102.1595.xml @@ -0,0 +1,3 @@ + + + diff --git a/settings/repository/net.sf/picard-1.96.1534.xml b/settings/repository/net.sf/picard-1.96.1534.xml deleted file mode 100644 index 651193f22..000000000 --- a/settings/repository/net.sf/picard-1.96.1534.xml +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/settings/repository/net.sf/sam-1.96.1534.jar b/settings/repository/net.sf/sam-1.102.1595.jar similarity index 89% rename from settings/repository/net.sf/sam-1.96.1534.jar rename to settings/repository/net.sf/sam-1.102.1595.jar index 474579754..23a464627 100644 Binary files a/settings/repository/net.sf/sam-1.96.1534.jar and b/settings/repository/net.sf/sam-1.102.1595.jar differ diff --git a/settings/repository/net.sf/sam-1.102.1595.xml b/settings/repository/net.sf/sam-1.102.1595.xml new file mode 100644 index 000000000..44fc06d58 --- /dev/null +++ b/settings/repository/net.sf/sam-1.102.1595.xml @@ -0,0 +1,3 @@ + + + diff --git a/settings/repository/net.sf/sam-1.96.1534.xml b/settings/repository/net.sf/sam-1.96.1534.xml deleted file mode 100644 index f72556cfb..000000000 --- a/settings/repository/net.sf/sam-1.96.1534.xml +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/settings/repository/org.broad/tribble-1.96.1534.jar b/settings/repository/org.broad/tribble-1.102.1595.jar similarity index 89% rename from settings/repository/org.broad/tribble-1.96.1534.jar rename to settings/repository/org.broad/tribble-1.102.1595.jar index c414565bf..4afeb3366 100644 Binary files a/settings/repository/org.broad/tribble-1.96.1534.jar and b/settings/repository/org.broad/tribble-1.102.1595.jar differ diff --git a/settings/repository/org.broad/tribble-1.96.1534.xml b/settings/repository/org.broad/tribble-1.102.1595.xml similarity index 75% rename from settings/repository/org.broad/tribble-1.96.1534.xml rename to settings/repository/org.broad/tribble-1.102.1595.xml index 7bbb7d27b..3500c3b08 100644 --- a/settings/repository/org.broad/tribble-1.96.1534.xml +++ b/settings/repository/org.broad/tribble-1.102.1595.xml @@ -1,3 +1,3 @@ - + diff --git a/settings/repository/org.broadinstitute/variant-1.96.1534.jar b/settings/repository/org.broadinstitute/variant-1.102.1595.jar similarity index 75% rename from settings/repository/org.broadinstitute/variant-1.96.1534.jar rename to settings/repository/org.broadinstitute/variant-1.102.1595.jar index 1e34967fa..18118bfae 100644 Binary files a/settings/repository/org.broadinstitute/variant-1.96.1534.jar and b/settings/repository/org.broadinstitute/variant-1.102.1595.jar differ diff --git a/settings/repository/org.broadinstitute/variant-1.96.1534.xml b/settings/repository/org.broadinstitute/variant-1.102.1595.xml similarity index 70% rename from settings/repository/org.broadinstitute/variant-1.96.1534.xml rename to settings/repository/org.broadinstitute/variant-1.102.1595.xml index 0784e4891..aa348fc84 100644 --- a/settings/repository/org.broadinstitute/variant-1.96.1534.xml +++ b/settings/repository/org.broadinstitute/variant-1.102.1595.xml @@ -1,3 +1,3 @@ - +