From aef0a83c528afe602196a8442537ec59916edf63 Mon Sep 17 00:00:00 2001 From: Ron Levine Date: Wed, 18 Mar 2015 09:33:24 -0400 Subject: [PATCH] Automatically choose indexing strategy by file extension --- .../haplotypecaller/HaplotypeCaller.java | 23 ++- .../HaplotypeCallerGVCFIntegrationTest.java | 37 +++-- .../HaplotypeCallerIntegrationTest.java | 2 +- .../gatk/engine/GATKVCFUtils.java | 143 +++++++++++++++++- .../arguments/GATKArgumentCollection.java | 4 + .../io/stubs/VariantContextWriterStub.java | 4 +- .../gatk/tools/CatVariants.java | 3 +- .../tools/CatVariantsIntegrationTest.java | 87 ++++++++++- .../variantutils/VCFIntegrationTest.java | 137 +++++++++-------- 9 files changed, 346 insertions(+), 94 deletions(-) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java index 47c8cd78b..fe7d7f4b3 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java @@ -57,6 +57,7 @@ import htsjdk.variant.variantcontext.*; import htsjdk.variant.variantcontext.writer.VariantContextWriter; import htsjdk.variant.vcf.*; import org.broadinstitute.gatk.engine.CommandLineGATK; +import org.broadinstitute.gatk.engine.GATKVCFUtils; import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; import org.broadinstitute.gatk.engine.arguments.DbsnpArgumentCollection; import org.broadinstitute.gatk.engine.io.DirectOutputTracker; @@ -71,6 +72,7 @@ import org.broadinstitute.gatk.engine.filters.BadMateFilter; import org.broadinstitute.gatk.utils.genotyper.*; import org.broadinstitute.gatk.utils.sam.GATKSAMFileWriter; import org.broadinstitute.gatk.engine.iterators.ReadTransformer; +import org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub; import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker; import org.broadinstitute.gatk.engine.walkers.*; import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine; @@ -709,10 +711,6 @@ public class HaplotypeCaller extends ActiveRegionWalker, In ReferenceConfidenceModel referenceConfidenceModel = null; - // as determined experimentally Nov-Dec 2013 - public final static GATKVCFIndexType OPTIMAL_GVCF_INDEX_TYPE = GATKVCFIndexType.LINEAR; - public final static int OPTIMAL_GVCF_INDEX_PARAMETER = 128000; - //--------------------------------------------------------------------------------------------------------------- // // initialize @@ -893,10 +891,10 @@ public class HaplotypeCaller extends ActiveRegionWalker, In throw new UserException.BadArgumentValue("emitRefConfidence", "Can only be used in single sample mode currently. Use the sample_name argument to run on a single sample out of a multi-sample BAM file."); headerInfo.addAll(referenceConfidenceModel.getVCFHeaderLines()); if ( SCAC.emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) { - // a kluge to enforce the use of this indexing strategy - if (getToolkit().getArguments().variant_index_type != OPTIMAL_GVCF_INDEX_TYPE || - getToolkit().getArguments().variant_index_parameter != OPTIMAL_GVCF_INDEX_PARAMETER) { - throw new UserException.GVCFIndexException(OPTIMAL_GVCF_INDEX_TYPE, OPTIMAL_GVCF_INDEX_PARAMETER); + // A kluge to enforce the use of this indexing strategy - must set the gVCF indexing values if not a using a gVCF output file . + // An output gVCF file automatically sets the indexing values because it has the .g.vcf extension. + if (!GATKVCFUtils.usingGVCFIndexingArguments(getToolkit().getArguments().variant_index_type, getToolkit().getArguments().variant_index_parameter) && !isGVCF()) { + throw new UserException.GVCFIndexException(GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER); } try { @@ -1404,4 +1402,13 @@ public class HaplotypeCaller extends ActiveRegionWalker, In activeRegion.removeAll( readsToRemove ); } + + /** + * Is writing to an output GVCF file? + * + * @return true if the VCF output file has a .g.vcf extension + */ + private boolean isGVCF() { + return ((VariantContextWriterStub) vcfWriter).getOutputFile().getName().endsWith("." + GATKVCFUtils.GVCF_EXT); + } } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java index 57fd97195..783c173aa 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java @@ -51,6 +51,7 @@ package org.broadinstitute.gatk.tools.walkers.haplotypecaller; +import org.broadinstitute.gatk.engine.GATKVCFUtils; import org.broadinstitute.gatk.engine.walkers.WalkerTest; import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType; @@ -151,7 +152,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { @Test(dataProvider = "MyDataProvider") public void testHCWithGVCF(String bam, ReferenceConfidenceMode mode, String intervals, String md5) { final String commandLine = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s %s -ERC %s --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d", - HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, bam, intervals, mode, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); + HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, bam, intervals, mode, GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER); final String name = "testHCWithGVCF bam=" + bam + " intervals= " + intervals + " gvcf= " + mode; final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(md5)); executeTest(name, spec); @@ -163,7 +164,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { @Test(dataProvider = "MyDataProviderHaploid", enabled=true) public void testHCWithGVCFHaploid(final String bam, final ReferenceConfidenceMode mode, final String intervals, final String md5) { final String commandLine = String.format("-T HaplotypeCaller -ploidy 1 --disableDithering --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s %s -ERC %s --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d", - HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, bam, intervals, mode, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); + HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, bam, intervals, mode, GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER); final String name = "testHCWithGVCFHaploid bam=" + bam + " intervals= " + intervals + " gvcf= " + mode; final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(md5)); executeTest(name, spec); @@ -175,7 +176,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { @Test(dataProvider = "MyDataProviderTetraploid", enabled=true) public void testHCWithGVCFTetraploid(final String bam, final ReferenceConfidenceMode mode, final String intervals, final String md5) { final String commandLine = String.format("-T HaplotypeCaller -ploidy 4 --disableDithering --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s %s -ERC %s --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d", - HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, bam, intervals, mode, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); + HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, bam, intervals, mode, GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER); final String name = "testHCWithGVCFTetraploid bam=" + bam + " intervals= " + intervals + " gvcf= " + mode; final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(md5)); executeTest(name, spec); @@ -187,7 +188,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { @Test(dataProvider = "MyDataProviderManyploid", enabled=true) public void testHCWithGVCFManyploid(final String bam, final ReferenceConfidenceMode mode, final String intervals, final String md5) { final String commandLine = String.format("-T HaplotypeCaller -ploidy 33 --disableDithering --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s %s -ERC %s --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d", - HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, bam, intervals, mode, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); + HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, bam, intervals, mode, GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER); final String name = "testHCWithGVCFManyploid bam=" + bam + " intervals= " + intervals + " gvcf= " + mode; final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(md5)); executeTest(name, spec); @@ -196,7 +197,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { @Test public void testERCRegionWithNoCalledHaplotypes() { final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF -variant_index_type %s -variant_index_parameter %d", - HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, privateTestDir + "noCallRefModel.bam", "20:17000001-18000001", HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); + HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, privateTestDir + "noCallRefModel.bam", "20:17000001-18000001", GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER); final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("")); spec.disableShadowBCF(); executeTest("testERCRegionWithNoCalledHaplotypes", spec); @@ -211,10 +212,22 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { executeTest("testMissingGVCFIndexingStrategyException", spec); } + /** + * Test HaplotypeCaller to ensure it does not throw an exception when a .g.vcf output file is specified and the indexing arguments are omitted + */ + @Test() + public void testGVCFIndexNoThrow() { + final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF", + HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, privateTestDir + "noCallRefModel.bam", "20:17000000-17000100"); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(GATKVCFUtils.GVCF_EXT), Arrays.asList("")); + spec.disableShadowBCF(); + executeTest("testGVCFIndexNoThrow", spec); + } + @Test() public void testWrongParameterGVCFIndexException() { final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF -variant_index_type %s -variant_index_parameter %d", - HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, privateTestDir + "noCallRefModel.bam", "20:17000001-18000001", HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER + 1); + HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, privateTestDir + "noCallRefModel.bam", "20:17000001-18000001", GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER + 1); final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", 1, UserException.GVCFIndexException.class); spec.disableShadowBCF(); executeTest("testMissingGVCFIndexingStrategyException", spec); @@ -224,11 +237,11 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { public void testWrongTypeGVCFIndexException() { // ensure non-optimal, if optimal changes GATKVCFIndexType type = GATKVCFIndexType.DYNAMIC_SEEK; - if (HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE == GATKVCFIndexType.DYNAMIC_SEEK) + if (GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE == GATKVCFIndexType.DYNAMIC_SEEK) type = GATKVCFIndexType.DYNAMIC_SIZE; final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF -variant_index_type %s -variant_index_parameter %d", - HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, privateTestDir + "noCallRefModel.bam", "20:17000001-18000001", type, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); + HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, privateTestDir + "noCallRefModel.bam", "20:17000001-18000001", type, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER); final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", 1, UserException.GVCFIndexException.class); spec.disableShadowBCF(); executeTest("testMissingGVCFIndexingStrategyException", spec); @@ -240,7 +253,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { @Test() public void testWrongGVCFNonVariantRecordOrderBugFix() { final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d", - HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, WRONG_GVCF_RECORD_ORDER_BUGFIX_BAM, WRONG_GVCF_RECORD_ORDER_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); + HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, WRONG_GVCF_RECORD_ORDER_BUGFIX_BAM, WRONG_GVCF_RECORD_ORDER_BUGFIX_INTERVALS, GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER); final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("ae90a1b5c40f19df38e8ac58c3844ed5")); spec.disableShadowBCF(); executeTest("testMissingGVCFIndexingStrategyException", spec); @@ -257,7 +270,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { @Test public void testNoCallGVCFMissingPLsBugFix() { final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d", - HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, NOCALL_GVCF_BUGFIX_BAM, NOCALL_GVCF_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); + HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, NOCALL_GVCF_BUGFIX_BAM, NOCALL_GVCF_BUGFIX_INTERVALS, GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER); final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("bf6325fbc06c80893f3170668cfbed06")); spec.disableShadowBCF(); executeTest("testNoCallGVCFMissingPLsBugFix", spec); @@ -269,7 +282,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { @Test(enabled=true) public void testGeneralPloidyArrayIndexBug1Fix() { final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -ploidy 1 -maxAltAlleles 2 -isr INTERSECTION -L 1:23696115-23696189", - HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, GENERAL_PLOIDY_BUGFIX1_BAM, GENERAL_PLOIDY_BUGFIX1_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); + HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, GENERAL_PLOIDY_BUGFIX1_BAM, GENERAL_PLOIDY_BUGFIX1_INTERVALS, GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER); final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("")); spec.disableShadowBCF(); executeTest(" testGeneralPloidyArrayIndexBug1Fix", spec); @@ -281,7 +294,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { @Test(enabled=true) public void testGeneralPloidyArrayIndexBug2Fix() { final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -ploidy 2 -maxAltAlleles 2 -A DepthPerSampleHC -A StrandBiasBySample -L 1:38052860-38052937", - HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, GENERAL_PLOIDY_BUGFIX2_BAM, GENERAL_PLOIDY_BUGFIX2_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); + HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, GENERAL_PLOIDY_BUGFIX2_BAM, GENERAL_PLOIDY_BUGFIX2_INTERVALS, GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER); final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("")); spec.disableShadowBCF(); executeTest(" testGeneralPloidyArrayIndexBug2Fix", spec); diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 889e2d771..58a67ea05 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -362,7 +362,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -dontTrimActiveRegions -ERC GVCF " + "-likelihoodEngine GraphBased -variant_index_type %s -variant_index_parameter %d", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReferenceWithDecoy, privateTestDir + "graphbased_no_such_edge_bug.bam", privateTestDir + "graphbased_no_such_edge_bug.intervals.bed", - HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); + GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER); final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("")); spec.disableShadowBCF(); executeTest("testGraphBasedNoSuchEdgeBugFix", spec); diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GATKVCFUtils.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GATKVCFUtils.java index 883ed7fd8..d6925c890 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GATKVCFUtils.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GATKVCFUtils.java @@ -26,13 +26,11 @@ package org.broadinstitute.gatk.engine; import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.tribble.index.*; +import htsjdk.tribble.index.interval.IntervalTreeIndex; +import htsjdk.tribble.index.linear.LinearIndex; import org.apache.log4j.Logger; import htsjdk.tribble.Feature; -import htsjdk.tribble.FeatureCodec; -import htsjdk.tribble.FeatureCodecHeader; -import htsjdk.tribble.index.DynamicIndexCreator; -import htsjdk.tribble.index.IndexCreator; -import htsjdk.tribble.index.IndexFactory; import htsjdk.tribble.index.interval.IntervalIndexCreator; import htsjdk.tribble.index.linear.LinearIndexCreator; import htsjdk.tribble.index.tabix.TabixFormat; @@ -47,9 +45,8 @@ import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.*; import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; +import java.io.*; +import java.lang.reflect.Field; import java.util.*; @@ -68,6 +65,16 @@ public class GATKVCFUtils { public final static GATKVCFIndexType DEFAULT_INDEX_TYPE = GATKVCFIndexType.DYNAMIC_SEEK; // by default, optimize for seek time. All indices prior to Nov 2013 used this type. public final static Integer DEFAULT_INDEX_PARAMETER = -1; // the default DYNAMIC_SEEK does not use a parameter + // as determined experimentally Nov-Dec 2013 + public final static GATKVCFIndexType DEFAULT_GVCF_INDEX_TYPE = GATKVCFIndexType.LINEAR; + public final static Integer DEFAULT_GVCF_INDEX_PARAMETER = 128000; + + // GVCF file extension + public final static String GVCF_EXT = "g.vcf"; + + // Message for using the deprecated --variant_index_type or --variant_index_parameter arguments. + public final static String DEPRECATED_INDEX_ARGS_MSG = "Naming your output file using the .g.vcf extension will automatically set the appropriate values " + + " for --variant_index_type and --variant_index_parameter"; /** * Gets the appropriately formatted header for a VCF file describing this GATK run @@ -260,4 +267,124 @@ public class GATKVCFUtils { codec.close(vcfSource); } } + + /** + * Check if the two indices are equivalent + * + * @param thisIndex index + * @param otherIndex index + * @return true if indices are equivalent, false otherwise. + */ + public static boolean equivalentAbstractIndices(AbstractIndex thisIndex, AbstractIndex otherIndex){ + return thisIndex.getVersion() == otherIndex.getVersion() && + thisIndex.getIndexedFile().equals(otherIndex.getIndexedFile()) && + thisIndex.getIndexedFileSize() == otherIndex.getIndexedFileSize() && + thisIndex.getIndexedFileMD5().equals(otherIndex.getIndexedFileMD5()) && + thisIndex.getFlags() == otherIndex.getFlags(); + } + + /** + * Check if the two indices are equivalent for a chromosome + * + * @param thisIndex index + * @param otherIndex index + * @param chr chromosome + * @return true if indices are equivalent, false otherwise. + * @throws NoSuchFieldException if index does not exist for a chromosome + * @throws IllegalAccessException if index does not exist for a chromosome + */ + public static boolean equivalentLinearIndices(LinearIndex thisIndex, LinearIndex otherIndex, String chr) throws NoSuchFieldException, IllegalAccessException { + htsjdk.tribble.index.linear.LinearIndex.ChrIndex thisChr = (htsjdk.tribble.index.linear.LinearIndex.ChrIndex)getChrIndex(thisIndex, chr); + htsjdk.tribble.index.linear.LinearIndex.ChrIndex otherChr = (htsjdk.tribble.index.linear.LinearIndex.ChrIndex)getChrIndex(otherIndex, chr); + + return thisChr.getName().equals(otherChr.getName()) && + //thisChr.getTotalSize() == otherChr.getTotalSize() && TODO: why does this differ? + thisChr.getNFeatures() == otherChr.getNFeatures() && + thisChr.getNBlocks() == otherChr.getNBlocks(); + } + + /** + * Check if the two interval indices are equivalent for a chromosome + * + * @param thisIndex interval index + * @param otherIndex interval index + * @param chr chromosome + * @return true if indices are equivalent, false otherwise. + * @throws NoSuchFieldException if index does not exist for a chromosome + * @throws IllegalAccessException if index does not exist for a chromosome + */ + public static boolean equivalentIntervalIndices(IntervalTreeIndex thisIndex, IntervalTreeIndex otherIndex, String chr) throws NoSuchFieldException, IllegalAccessException { + htsjdk.tribble.index.interval.IntervalTreeIndex.ChrIndex thisChr = (htsjdk.tribble.index.interval.IntervalTreeIndex.ChrIndex)getChrIndex(thisIndex, chr); + htsjdk.tribble.index.interval.IntervalTreeIndex.ChrIndex otherChr = (htsjdk.tribble.index.interval.IntervalTreeIndex.ChrIndex)getChrIndex(otherIndex, chr); + + // TODO: compare trees? + return thisChr.getName().equals(otherChr.getName()); + } + + /** + * Get index for a chromosome + * + * @param index index + * @param chr chromosome + * @return index for the chromosome + * @throws NoSuchFieldException if index does not exist for a chromosome + * @throws IllegalAccessException if index does not exist for a chromosome + */ + public static ChrIndex getChrIndex(AbstractIndex index, String chr) throws NoSuchFieldException, IllegalAccessException { + Field f = AbstractIndex.class.getDeclaredField("chrIndices"); + f.setAccessible(true); + LinkedHashMap chrIndices = (LinkedHashMap) f.get(index); + return chrIndices.get(chr); + } + + /** + * Make an IndexCreator + * + * @param variantIndexType variant indexing strategy + * @param variantIndexParameter variant indexing parameter + * @param outputFile output variant file + * @param sequenceDictionary collection of SAM sequence records + * @return IndexCreator + */ + public static IndexCreator makeIndexCreator(final GATKVCFIndexType variantIndexType, final int variantIndexParameter, final File outputFile, final SAMSequenceDictionary sequenceDictionary) { + /* + * If using the index arguments, log a warning. + * If the genotype file has the GCVF extension (.g.vcf), use the default GCVF indexing. + * Otherwise, use the default index type and parameter. + */ + GATKVCFIndexType indexType = DEFAULT_INDEX_TYPE; + int indexParameter = DEFAULT_INDEX_PARAMETER; + if (usingNonDefaultIndexingArguments(variantIndexType, variantIndexParameter)) { + indexType = variantIndexType; + indexParameter = variantIndexParameter; + logger.warn(DEPRECATED_INDEX_ARGS_MSG); + } else if (outputFile.getName().endsWith("." + GVCF_EXT)) { + indexType = DEFAULT_GVCF_INDEX_TYPE; + indexParameter = DEFAULT_GVCF_INDEX_PARAMETER; + } + + return getIndexCreator(indexType, indexParameter, outputFile, sequenceDictionary); + } + + /** + * Check if not using the default indexing arguments' values + * + * @param variantIndexType variant indexing strategy + * @param variantIndexParameter variant indexing parameter + * @return true if the index type or parameter are not the default values, false otherwise + */ + public static boolean usingNonDefaultIndexingArguments(final GATKVCFIndexType variantIndexType, final int variantIndexParameter) { + return variantIndexType != GATKVCFUtils.DEFAULT_INDEX_TYPE || variantIndexParameter != GATKVCFUtils.DEFAULT_INDEX_PARAMETER; + } + + /** + * Check if using the GCVF indexing arguments' values + * + * @param variantIndexType variant indexing strategy + * @param variantIndexParameter variant indexing parameter + * @return true if the index type and parameter are the default GVCF values, false otherwise + */ + public static boolean usingGVCFIndexingArguments(final GATKVCFIndexType variantIndexType, final int variantIndexParameter) { + return variantIndexType == GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE && variantIndexParameter == GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER; + } } \ No newline at end of file diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/arguments/GATKArgumentCollection.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/arguments/GATKArgumentCollection.java index a796a22de..ddcf373e1 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/arguments/GATKArgumentCollection.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/arguments/GATKArgumentCollection.java @@ -621,12 +621,16 @@ public class GATKArgumentCollection { * INTERVAL creates an IntervalTreeIndex with bins with an equal amount of features, specified by the Features Per Bin parameter * DYNAMIC_SEEK attempts to optimize for minimal seek time by choosing an appropriate strategy and parameter (user-supplied parameter is ignored) * DYNAMIC_SIZE attempts to optimize for minimal index size by choosing an appropriate strategy and parameter (user-supplied parameter is ignored) + * + * This argument is deprecated, using the output file ".g.vcf" extension will automatically set the appropriate value */ @Argument(fullName="variant_index_type",shortName = "variant_index_type",doc="Type of IndexCreator to use for VCF/BCF indices",required=false) @Advanced public GATKVCFIndexType variant_index_type = GATKVCFUtils.DEFAULT_INDEX_TYPE; /** * This is either the bin width or the number of features per bin, depending on the indexing strategy + * + * This argument is deprecated, using the output file ".g.vcf" extension will automatically set the appropriate value */ @Argument(fullName="variant_index_parameter",shortName = "variant_index_parameter",doc="Parameter to pass to the VCF/BCF IndexCreator",required=false) @Advanced diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/io/stubs/VariantContextWriterStub.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/io/stubs/VariantContextWriterStub.java index 9597aec90..75832ec02 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/io/stubs/VariantContextWriterStub.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/io/stubs/VariantContextWriterStub.java @@ -130,7 +130,9 @@ public class VariantContextWriterStub implements Stub, Var this.engine = engine; this.genotypeFile = genotypeFile; this.genotypeStream = null; - this.indexCreator = GATKVCFUtils.getIndexCreator(engine.getArguments().variant_index_type, engine.getArguments().variant_index_parameter, genotypeFile); + + this.indexCreator = GATKVCFUtils.makeIndexCreator(engine.getArguments().variant_index_type, engine.getArguments().variant_index_parameter, + genotypeFile, null); this.argumentSources = argumentSources; } diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/CatVariants.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/CatVariants.java index 99ce0af6d..ff9dd9efc 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/CatVariants.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/CatVariants.java @@ -266,12 +266,11 @@ public class CatVariants extends CommandLineProgram { reader.close(); priorityQueue.add(new Pair<>(firstPosition,file)); } - } FileOutputStream outputStream = new FileOutputStream(outputFile); EnumSet options = EnumSet.of(Options.INDEX_ON_THE_FLY); - final IndexCreator idxCreator = GATKVCFUtils.getIndexCreator(variant_index_type, variant_index_parameter, outputFile, ref.getSequenceDictionary()); + IndexCreator idxCreator = GATKVCFUtils.makeIndexCreator(variant_index_type, variant_index_parameter, outputFile, ref.getSequenceDictionary()); final VariantContextWriter outputWriter = VariantContextWriterFactory.create(outputFile, outputStream, ref.getSequenceDictionary(), idxCreator, options); boolean firstFile = true; diff --git a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/CatVariantsIntegrationTest.java b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/CatVariantsIntegrationTest.java index f1b8d6ef3..930184a13 100644 --- a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/CatVariantsIntegrationTest.java +++ b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/CatVariantsIntegrationTest.java @@ -24,16 +24,21 @@ */ package org.broadinstitute.gatk.tools; - -import org.apache.commons.lang.StringUtils; +import htsjdk.tribble.index.Index; +import htsjdk.tribble.index.IndexFactory; +import htsjdk.variant.vcf.VCFCodec; import htsjdk.tribble.AbstractFeatureReader; +import org.apache.commons.lang.StringUtils; +import org.broadinstitute.gatk.engine.GATKVCFUtils; import org.broadinstitute.gatk.utils.BaseTest; import org.broadinstitute.gatk.utils.MD5DB; import org.broadinstitute.gatk.utils.MD5Mismatch; import org.broadinstitute.gatk.utils.runtime.ProcessController; import org.broadinstitute.gatk.utils.runtime.ProcessSettings; import org.broadinstitute.gatk.utils.runtime.RuntimeUtils; +import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType; import org.testng.Assert; +import org.testng.TestException; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -142,4 +147,82 @@ public class CatVariantsIntegrationTest { ProcessSettings ps = new ProcessSettings(cmdLine.split("\\s+")); pc.execAndCheck(ps); } + + // + // + // IndexCreator tests + // + // + + private class VCFIndexCreatorTest extends BaseTest.TestDataProvider { + private final GATKVCFIndexType type; + private final int parameter; + + private VCFIndexCreatorTest(GATKVCFIndexType type, int parameter) { + super(VCFIndexCreatorTest.class); + + this.type = type; + this.parameter = parameter; + } + + public String toString() { + return String.format("Index Type %s, Index Parameter %s", type, parameter); + } + + public Index getIndex(final File vcfFile) { + switch (type) { + case DYNAMIC_SEEK : return IndexFactory.createDynamicIndex(vcfFile, new VCFCodec(), IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME); + case DYNAMIC_SIZE : return IndexFactory.createDynamicIndex(vcfFile, new VCFCodec(), IndexFactory.IndexBalanceApproach.FOR_SIZE); + case LINEAR : return IndexFactory.createLinearIndex(vcfFile, new VCFCodec(), parameter); + case INTERVAL : return IndexFactory.createIntervalIndex(vcfFile, new VCFCodec(), parameter); + default : throw new TestException("Invalid index type"); + } + } + } + + @DataProvider(name = "IndexDataProvider") + public Object[][] indexCreatorData() { + new VCFIndexCreatorTest(GATKVCFIndexType.DYNAMIC_SEEK, 0); + new VCFIndexCreatorTest(GATKVCFIndexType.DYNAMIC_SIZE, 0); + new VCFIndexCreatorTest(GATKVCFIndexType.LINEAR, 100); + new VCFIndexCreatorTest(GATKVCFIndexType.LINEAR, 10000); + new VCFIndexCreatorTest(GATKVCFIndexType.INTERVAL, 20); + new VCFIndexCreatorTest(GATKVCFIndexType.INTERVAL, 2000); + + return BaseTest.TestDataProvider.getTests(VCFIndexCreatorTest.class); + } + + @Test(dataProvider = "IndexDataProvider") + public void testCatVariantsVCFIndexCreation(VCFIndexCreatorTest testSpec) throws IOException{ + + String cmdLine = String.format("java -cp %s %s -R %s -V %s -V %s --variant_index_type %s --variant_index_parameter %s -out %s", + StringUtils.join(RuntimeUtils.getAbsoluteClassPaths(), File.pathSeparatorChar), + CatVariants.class.getCanonicalName(), + BaseTest.b37KGReference, + CatVariantsVcf1, + CatVariantsVcf2, + testSpec.type, + testSpec.parameter, + BaseTest.createTempFile("CatVariantsVCFIndexCreationTest", ".vcf")); + + ProcessController pc = ProcessController.getThreadLocal(); + ProcessSettings ps = new ProcessSettings(cmdLine.split("\\s+")); + pc.execAndCheck(ps); + } + + @Test() + public void testCatVariantsGVCFIndexCreation() throws IOException{ + + String cmdLine = String.format("java -cp %s %s -R %s -V %s -V %s -out %s", + StringUtils.join(RuntimeUtils.getAbsoluteClassPaths(), File.pathSeparatorChar), + CatVariants.class.getCanonicalName(), + BaseTest.b37KGReference, + CatVariantsVcf1, + CatVariantsVcf2, + BaseTest.createTempFile("CatVariantsGVCFIndexCreationTest", "." + GATKVCFUtils.GVCF_EXT)); + + ProcessController pc = ProcessController.getThreadLocal(); + ProcessSettings ps = new ProcessSettings(cmdLine.split("\\s+")); + pc.execAndCheck(ps); + } } \ No newline at end of file diff --git a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/VCFIntegrationTest.java b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/VCFIntegrationTest.java index 2928bba1e..bbbcbdd1b 100644 --- a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/VCFIntegrationTest.java +++ b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/VCFIntegrationTest.java @@ -28,16 +28,17 @@ package org.broadinstitute.gatk.tools.walkers.variantutils; import htsjdk.tribble.AbstractFeatureReader; import htsjdk.tribble.Tribble; import htsjdk.tribble.index.AbstractIndex; -import htsjdk.tribble.index.ChrIndex; import htsjdk.tribble.index.Index; import htsjdk.tribble.index.IndexFactory; import htsjdk.tribble.index.interval.IntervalTreeIndex; import htsjdk.tribble.index.linear.LinearIndex; import htsjdk.tribble.index.tabix.TabixIndex; import htsjdk.tribble.util.TabixUtils; -import org.broadinstitute.gatk.utils.BaseTest; -import org.broadinstitute.gatk.engine.walkers.WalkerTest; import htsjdk.variant.vcf.VCFCodec; +import org.apache.commons.io.FileUtils; +import org.broadinstitute.gatk.engine.GATKVCFUtils; +import org.broadinstitute.gatk.engine.walkers.WalkerTest; +import org.broadinstitute.gatk.utils.BaseTest; import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType; import org.testng.Assert; import org.testng.TestException; @@ -45,9 +46,8 @@ import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.io.File; -import java.lang.reflect.Field; +import java.io.IOException; import java.util.Arrays; -import java.util.LinkedHashMap; import java.util.List; public class VCFIntegrationTest extends WalkerTest { @@ -205,77 +205,67 @@ public class VCFIntegrationTest extends WalkerTest { } @Test(dataProvider = "IndexDataProvider") - public void testVCFIndexCreation(VCFIndexCreatorTest testSpec) throws NoSuchFieldException, IllegalAccessException { + public void testVCFIndexCreation(VCFIndexCreatorTest testSpec) throws NoSuchFieldException, IllegalAccessException, IOException { + final String logFileName = new String("testVCFIndexCreation.log"); + final String chromosome = "20"; final String commandLine = " -T SelectVariants" + " -R " + b37KGReference + " --no_cmdline_in_header" + - " -L 20" + + " -L " + chromosome + " -V " + b37_NA12878_OMNI + " --variant_index_type " + testSpec.type + " --variant_index_parameter " + testSpec.parameter + - " -o %s "; + " -log " + logFileName + + " -o %s"; + final WalkerTestSpec spec = new WalkerTestSpec(commandLine, 1, Arrays.asList("")); + spec.disableShadowBCF(); final String name = "testVCFIndexCreation: " + testSpec.toString(); + // execute that test and check if the actual and expected indices are the same + executeTestAndCheckIndices(name, chromosome, testSpec, spec); + + // check the log for the warning message + File file = new File(logFileName); + Assert.assertTrue(FileUtils.readFileToString(file).contains(GATKVCFUtils.DEPRECATED_INDEX_ARGS_MSG)); + } + + @Test + public void testVCFIndexCreationNoArgs() throws NoSuchFieldException, IllegalAccessException { + + final String chromosome = "20"; + final String commandLine = " -T SelectVariants" + + " -R " + b37KGReference + + " --no_cmdline_in_header" + + " -L " + chromosome + + " -V " + b37_NA12878_OMNI + + " -o %s"; + final String name = "testVCFIndexCreationNoArgs"; + VCFIndexCreatorTest testSpec = new VCFIndexCreatorTest(GATKVCFUtils.DEFAULT_INDEX_TYPE, GATKVCFUtils.DEFAULT_INDEX_PARAMETER); final WalkerTestSpec spec = new WalkerTestSpec(commandLine, 1, Arrays.asList("")); spec.disableShadowBCF(); - File outVCF = executeTest(name, spec).first.get(0); - File outIdx = new File(outVCF.getAbsolutePath() + Tribble.STANDARD_INDEX_EXTENSION); - - final Index actualIndex = IndexFactory.loadIndex(outIdx.getAbsolutePath()); - final Index expectedIndex = testSpec.getIndex(outVCF); - - if (testSpec.type.equals("LINEAR")) - Assert.assertTrue(actualIndex instanceof LinearIndex, "Index is not a LinearIndex"); - else if (testSpec.type.equals("INTERVAL")) - Assert.assertTrue(actualIndex instanceof IntervalTreeIndex, "Index is not a IntervalTreeIndex"); - // dynamic indices ultimately resolve to one of LinearIndex or IntervalTreeIndex - - Assert.assertTrue(equivalentAbstractIndices((AbstractIndex)actualIndex, (AbstractIndex)expectedIndex), "Indices are not equivalent"); - - if (actualIndex instanceof LinearIndex && expectedIndex instanceof LinearIndex) { - Assert.assertTrue(equivalentLinearIndices((LinearIndex)actualIndex, (LinearIndex)expectedIndex, "20"), "Linear indices are not equivalent"); - } - else if (actualIndex instanceof IntervalTreeIndex && expectedIndex instanceof IntervalTreeIndex) { - Assert.assertTrue(equivalentIntervalIndices((IntervalTreeIndex)actualIndex, (IntervalTreeIndex)expectedIndex, "20"), "Interval indices are not equivalent"); - } - else { - Assert.fail("Indices are not of the same type"); - } + // execute that test and check if the actual and expected indices are the same + executeTestAndCheckIndices(name, chromosome, testSpec, spec); } - private static boolean equivalentAbstractIndices(AbstractIndex thisIndex, AbstractIndex otherIndex){ - return thisIndex.getVersion() == otherIndex.getVersion() && - thisIndex.getIndexedFile().equals(otherIndex.getIndexedFile()) && - thisIndex.getIndexedFileSize() == otherIndex.getIndexedFileSize() && - thisIndex.getIndexedFileMD5().equals(otherIndex.getIndexedFileMD5()) && - thisIndex.getFlags() == otherIndex.getFlags(); - } + @Test + public void testGVCFIndexCreation() throws NoSuchFieldException, IllegalAccessException { - private static boolean equivalentLinearIndices(LinearIndex thisIndex, LinearIndex otherIndex, String chr) throws NoSuchFieldException, IllegalAccessException { - htsjdk.tribble.index.linear.LinearIndex.ChrIndex thisChr = (htsjdk.tribble.index.linear.LinearIndex.ChrIndex)getChrIndex(thisIndex, chr); - htsjdk.tribble.index.linear.LinearIndex.ChrIndex otherChr = (htsjdk.tribble.index.linear.LinearIndex.ChrIndex)getChrIndex(otherIndex, chr); + final String chromosome = "20"; + final String commandLine = " -T SelectVariants" + + " -R " + b37KGReference + + " --no_cmdline_in_header" + + " -L " + chromosome + + " -V " + b37_NA12878_OMNI + + " -o %s"; + final String name = "testGVCFIndexCreation"; + VCFIndexCreatorTest testSpec = new VCFIndexCreatorTest(GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine, Arrays.asList(GATKVCFUtils.GVCF_EXT), Arrays.asList("")); + spec.disableShadowBCF(); - return thisChr.getName().equals(otherChr.getName()) && - //thisChr.getTotalSize() == otherChr.getTotalSize() && TODO: why does this differ? - thisChr.getNFeatures() == otherChr.getNFeatures() && - thisChr.getNBlocks() == otherChr.getNBlocks(); - } - - private static boolean equivalentIntervalIndices(IntervalTreeIndex thisIndex, IntervalTreeIndex otherIndex, String chr) throws NoSuchFieldException, IllegalAccessException { - htsjdk.tribble.index.interval.IntervalTreeIndex.ChrIndex thisChr = (htsjdk.tribble.index.interval.IntervalTreeIndex.ChrIndex)getChrIndex(thisIndex, chr); - htsjdk.tribble.index.interval.IntervalTreeIndex.ChrIndex otherChr = (htsjdk.tribble.index.interval.IntervalTreeIndex.ChrIndex)getChrIndex(otherIndex, chr); - - // TODO: compare trees? - return thisChr.getName().equals(otherChr.getName()); - } - - private static ChrIndex getChrIndex(AbstractIndex index, String chr) throws NoSuchFieldException, IllegalAccessException { - Field f = AbstractIndex.class.getDeclaredField("chrIndices"); - f.setAccessible(true); - LinkedHashMap chrIndices = (LinkedHashMap) f.get(index); - return chrIndices.get(chr); + // execute that test and check if the actual and expected indices are the same + executeTestAndCheckIndices(name, chromosome, testSpec, spec); } // @@ -375,4 +365,31 @@ public class VCFIntegrationTest extends WalkerTest { executeTest(name, spec); } + private void executeTestAndCheckIndices(final String name, final String chr, final VCFIndexCreatorTest testSpec, final WalkerTestSpec walkerTestSpec) + throws NoSuchFieldException, IllegalAccessException { + + File outVCF = executeTest(name, walkerTestSpec).first.get(0); + File outIdx = new File(outVCF.getAbsolutePath() + Tribble.STANDARD_INDEX_EXTENSION); + + final Index actualIndex = IndexFactory.loadIndex(outIdx.getAbsolutePath()); + final Index expectedIndex = testSpec.getIndex(outVCF); + + if (testSpec.type.equals("LINEAR")) + Assert.assertTrue(actualIndex instanceof LinearIndex, "Index is not a LinearIndex"); + else if (testSpec.type.equals("INTERVAL")) + Assert.assertTrue(actualIndex instanceof IntervalTreeIndex, "Index is not a IntervalTreeIndex"); + // dynamic indices ultimately resolve to one of LinearIndex or IntervalTreeIndex + + Assert.assertTrue(GATKVCFUtils.equivalentAbstractIndices((AbstractIndex) actualIndex, (AbstractIndex) expectedIndex), "Indices are not equivalent"); + + if (actualIndex instanceof LinearIndex && expectedIndex instanceof LinearIndex) { + Assert.assertTrue(GATKVCFUtils.equivalentLinearIndices((LinearIndex) actualIndex, (LinearIndex) expectedIndex, chr), "Linear indices are not equivalent"); + } + else if (actualIndex instanceof IntervalTreeIndex && expectedIndex instanceof IntervalTreeIndex) { + Assert.assertTrue(GATKVCFUtils.equivalentIntervalIndices((IntervalTreeIndex) actualIndex, (IntervalTreeIndex) expectedIndex, chr), "Interval indices are not equivalent"); + } + else { + Assert.fail("Indices are not of the same type"); + } + } }