Merge pull request #890 from broadinstitute/rhl_writers_use_index_args
Automatically choose indexing strategy by file extension
This commit is contained in:
commit
1f81c033f5
|
|
@ -57,6 +57,7 @@ import htsjdk.variant.variantcontext.*;
|
|||
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
|
||||
import htsjdk.variant.vcf.*;
|
||||
import org.broadinstitute.gatk.engine.CommandLineGATK;
|
||||
import org.broadinstitute.gatk.engine.GATKVCFUtils;
|
||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.gatk.engine.arguments.DbsnpArgumentCollection;
|
||||
import org.broadinstitute.gatk.engine.io.DirectOutputTracker;
|
||||
|
|
@ -71,6 +72,7 @@ import org.broadinstitute.gatk.engine.filters.BadMateFilter;
|
|||
import org.broadinstitute.gatk.utils.genotyper.*;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMFileWriter;
|
||||
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
|
||||
import org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.engine.walkers.*;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
|
||||
|
|
@ -709,10 +711,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
|
||||
ReferenceConfidenceModel referenceConfidenceModel = null;
|
||||
|
||||
// as determined experimentally Nov-Dec 2013
|
||||
public final static GATKVCFIndexType OPTIMAL_GVCF_INDEX_TYPE = GATKVCFIndexType.LINEAR;
|
||||
public final static int OPTIMAL_GVCF_INDEX_PARAMETER = 128000;
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// initialize
|
||||
|
|
@ -893,10 +891,10 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
throw new UserException.BadArgumentValue("emitRefConfidence", "Can only be used in single sample mode currently. Use the sample_name argument to run on a single sample out of a multi-sample BAM file.");
|
||||
headerInfo.addAll(referenceConfidenceModel.getVCFHeaderLines());
|
||||
if ( SCAC.emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) {
|
||||
// a kluge to enforce the use of this indexing strategy
|
||||
if (getToolkit().getArguments().variant_index_type != OPTIMAL_GVCF_INDEX_TYPE ||
|
||||
getToolkit().getArguments().variant_index_parameter != OPTIMAL_GVCF_INDEX_PARAMETER) {
|
||||
throw new UserException.GVCFIndexException(OPTIMAL_GVCF_INDEX_TYPE, OPTIMAL_GVCF_INDEX_PARAMETER);
|
||||
// A kluge to enforce the use of this indexing strategy - must set the gVCF indexing values if not a using a gVCF output file .
|
||||
// An output gVCF file automatically sets the indexing values because it has the .g.vcf extension.
|
||||
if (!GATKVCFUtils.usingGVCFIndexingArguments(getToolkit().getArguments().variant_index_type, getToolkit().getArguments().variant_index_parameter) && !isGVCF()) {
|
||||
throw new UserException.GVCFIndexException(GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER);
|
||||
}
|
||||
|
||||
try {
|
||||
|
|
@ -1404,4 +1402,13 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
activeRegion.removeAll( readsToRemove );
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Is writing to an output GVCF file?
|
||||
*
|
||||
* @return true if the VCF output file has a .g.vcf extension
|
||||
*/
|
||||
private boolean isGVCF() {
|
||||
return ((VariantContextWriterStub) vcfWriter).getOutputFile().getName().endsWith("." + GATKVCFUtils.GVCF_EXT);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
|
||||
|
||||
import org.broadinstitute.gatk.engine.GATKVCFUtils;
|
||||
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType;
|
||||
|
|
@ -151,7 +152,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
|||
@Test(dataProvider = "MyDataProvider")
|
||||
public void testHCWithGVCF(String bam, ReferenceConfidenceMode mode, String intervals, String md5) {
|
||||
final String commandLine = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s %s -ERC %s --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
|
||||
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, bam, intervals, mode, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
|
||||
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, bam, intervals, mode, GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER);
|
||||
final String name = "testHCWithGVCF bam=" + bam + " intervals= " + intervals + " gvcf= " + mode;
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(md5));
|
||||
executeTest(name, spec);
|
||||
|
|
@ -163,7 +164,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
|||
@Test(dataProvider = "MyDataProviderHaploid", enabled=true)
|
||||
public void testHCWithGVCFHaploid(final String bam, final ReferenceConfidenceMode mode, final String intervals, final String md5) {
|
||||
final String commandLine = String.format("-T HaplotypeCaller -ploidy 1 --disableDithering --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s %s -ERC %s --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
|
||||
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, bam, intervals, mode, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
|
||||
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, bam, intervals, mode, GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER);
|
||||
final String name = "testHCWithGVCFHaploid bam=" + bam + " intervals= " + intervals + " gvcf= " + mode;
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(md5));
|
||||
executeTest(name, spec);
|
||||
|
|
@ -175,7 +176,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
|||
@Test(dataProvider = "MyDataProviderTetraploid", enabled=true)
|
||||
public void testHCWithGVCFTetraploid(final String bam, final ReferenceConfidenceMode mode, final String intervals, final String md5) {
|
||||
final String commandLine = String.format("-T HaplotypeCaller -ploidy 4 --disableDithering --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s %s -ERC %s --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
|
||||
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, bam, intervals, mode, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
|
||||
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, bam, intervals, mode, GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER);
|
||||
final String name = "testHCWithGVCFTetraploid bam=" + bam + " intervals= " + intervals + " gvcf= " + mode;
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(md5));
|
||||
executeTest(name, spec);
|
||||
|
|
@ -187,7 +188,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
|||
@Test(dataProvider = "MyDataProviderManyploid", enabled=true)
|
||||
public void testHCWithGVCFManyploid(final String bam, final ReferenceConfidenceMode mode, final String intervals, final String md5) {
|
||||
final String commandLine = String.format("-T HaplotypeCaller -ploidy 33 --disableDithering --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s %s -ERC %s --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
|
||||
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, bam, intervals, mode, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
|
||||
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, bam, intervals, mode, GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER);
|
||||
final String name = "testHCWithGVCFManyploid bam=" + bam + " intervals= " + intervals + " gvcf= " + mode;
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(md5));
|
||||
executeTest(name, spec);
|
||||
|
|
@ -196,7 +197,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testERCRegionWithNoCalledHaplotypes() {
|
||||
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF -variant_index_type %s -variant_index_parameter %d",
|
||||
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, privateTestDir + "noCallRefModel.bam", "20:17000001-18000001", HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
|
||||
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, privateTestDir + "noCallRefModel.bam", "20:17000001-18000001", GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER);
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(""));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testERCRegionWithNoCalledHaplotypes", spec);
|
||||
|
|
@ -211,10 +212,22 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
|||
executeTest("testMissingGVCFIndexingStrategyException", spec);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test HaplotypeCaller to ensure it does not throw an exception when a .g.vcf output file is specified and the indexing arguments are omitted
|
||||
*/
|
||||
@Test()
|
||||
public void testGVCFIndexNoThrow() {
|
||||
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF",
|
||||
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, privateTestDir + "noCallRefModel.bam", "20:17000000-17000100");
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(GATKVCFUtils.GVCF_EXT), Arrays.asList(""));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testGVCFIndexNoThrow", spec);
|
||||
}
|
||||
|
||||
@Test()
|
||||
public void testWrongParameterGVCFIndexException() {
|
||||
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF -variant_index_type %s -variant_index_parameter %d",
|
||||
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, privateTestDir + "noCallRefModel.bam", "20:17000001-18000001", HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER + 1);
|
||||
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, privateTestDir + "noCallRefModel.bam", "20:17000001-18000001", GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER + 1);
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", 1, UserException.GVCFIndexException.class);
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testMissingGVCFIndexingStrategyException", spec);
|
||||
|
|
@ -224,11 +237,11 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
|||
public void testWrongTypeGVCFIndexException() {
|
||||
// ensure non-optimal, if optimal changes
|
||||
GATKVCFIndexType type = GATKVCFIndexType.DYNAMIC_SEEK;
|
||||
if (HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE == GATKVCFIndexType.DYNAMIC_SEEK)
|
||||
if (GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE == GATKVCFIndexType.DYNAMIC_SEEK)
|
||||
type = GATKVCFIndexType.DYNAMIC_SIZE;
|
||||
|
||||
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF -variant_index_type %s -variant_index_parameter %d",
|
||||
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, privateTestDir + "noCallRefModel.bam", "20:17000001-18000001", type, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
|
||||
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, privateTestDir + "noCallRefModel.bam", "20:17000001-18000001", type, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER);
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", 1, UserException.GVCFIndexException.class);
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testMissingGVCFIndexingStrategyException", spec);
|
||||
|
|
@ -240,7 +253,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
|||
@Test()
|
||||
public void testWrongGVCFNonVariantRecordOrderBugFix() {
|
||||
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
|
||||
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, WRONG_GVCF_RECORD_ORDER_BUGFIX_BAM, WRONG_GVCF_RECORD_ORDER_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
|
||||
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, WRONG_GVCF_RECORD_ORDER_BUGFIX_BAM, WRONG_GVCF_RECORD_ORDER_BUGFIX_INTERVALS, GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER);
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("ae90a1b5c40f19df38e8ac58c3844ed5"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testMissingGVCFIndexingStrategyException", spec);
|
||||
|
|
@ -257,7 +270,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testNoCallGVCFMissingPLsBugFix() {
|
||||
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
|
||||
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, NOCALL_GVCF_BUGFIX_BAM, NOCALL_GVCF_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
|
||||
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, NOCALL_GVCF_BUGFIX_BAM, NOCALL_GVCF_BUGFIX_INTERVALS, GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER);
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("bf6325fbc06c80893f3170668cfbed06"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testNoCallGVCFMissingPLsBugFix", spec);
|
||||
|
|
@ -269,7 +282,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
|||
@Test(enabled=true)
|
||||
public void testGeneralPloidyArrayIndexBug1Fix() {
|
||||
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -ploidy 1 -maxAltAlleles 2 -isr INTERSECTION -L 1:23696115-23696189",
|
||||
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, GENERAL_PLOIDY_BUGFIX1_BAM, GENERAL_PLOIDY_BUGFIX1_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
|
||||
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, GENERAL_PLOIDY_BUGFIX1_BAM, GENERAL_PLOIDY_BUGFIX1_INTERVALS, GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER);
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(""));
|
||||
spec.disableShadowBCF();
|
||||
executeTest(" testGeneralPloidyArrayIndexBug1Fix", spec);
|
||||
|
|
@ -281,7 +294,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
|||
@Test(enabled=true)
|
||||
public void testGeneralPloidyArrayIndexBug2Fix() {
|
||||
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -ploidy 2 -maxAltAlleles 2 -A DepthPerSampleHC -A StrandBiasBySample -L 1:38052860-38052937",
|
||||
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, GENERAL_PLOIDY_BUGFIX2_BAM, GENERAL_PLOIDY_BUGFIX2_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
|
||||
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, GENERAL_PLOIDY_BUGFIX2_BAM, GENERAL_PLOIDY_BUGFIX2_INTERVALS, GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER);
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(""));
|
||||
spec.disableShadowBCF();
|
||||
executeTest(" testGeneralPloidyArrayIndexBug2Fix", spec);
|
||||
|
|
|
|||
|
|
@ -362,7 +362,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -dontTrimActiveRegions -ERC GVCF " +
|
||||
"-likelihoodEngine GraphBased -variant_index_type %s -variant_index_parameter %d",
|
||||
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReferenceWithDecoy, privateTestDir + "graphbased_no_such_edge_bug.bam", privateTestDir + "graphbased_no_such_edge_bug.intervals.bed",
|
||||
HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
|
||||
GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER);
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(""));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testGraphBasedNoSuchEdgeBugFix", spec);
|
||||
|
|
|
|||
|
|
@ -26,13 +26,11 @@
|
|||
package org.broadinstitute.gatk.engine;
|
||||
|
||||
import htsjdk.samtools.SAMSequenceDictionary;
|
||||
import htsjdk.tribble.index.*;
|
||||
import htsjdk.tribble.index.interval.IntervalTreeIndex;
|
||||
import htsjdk.tribble.index.linear.LinearIndex;
|
||||
import org.apache.log4j.Logger;
|
||||
import htsjdk.tribble.Feature;
|
||||
import htsjdk.tribble.FeatureCodec;
|
||||
import htsjdk.tribble.FeatureCodecHeader;
|
||||
import htsjdk.tribble.index.DynamicIndexCreator;
|
||||
import htsjdk.tribble.index.IndexCreator;
|
||||
import htsjdk.tribble.index.IndexFactory;
|
||||
import htsjdk.tribble.index.interval.IntervalIndexCreator;
|
||||
import htsjdk.tribble.index.linear.LinearIndexCreator;
|
||||
import htsjdk.tribble.index.tabix.TabixFormat;
|
||||
|
|
@ -47,9 +45,8 @@ import htsjdk.variant.variantcontext.VariantContext;
|
|||
import htsjdk.variant.vcf.*;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.*;
|
||||
import java.lang.reflect.Field;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
|
|
@ -68,6 +65,16 @@ public class GATKVCFUtils {
|
|||
|
||||
public final static GATKVCFIndexType DEFAULT_INDEX_TYPE = GATKVCFIndexType.DYNAMIC_SEEK; // by default, optimize for seek time. All indices prior to Nov 2013 used this type.
|
||||
public final static Integer DEFAULT_INDEX_PARAMETER = -1; // the default DYNAMIC_SEEK does not use a parameter
|
||||
// as determined experimentally Nov-Dec 2013
|
||||
public final static GATKVCFIndexType DEFAULT_GVCF_INDEX_TYPE = GATKVCFIndexType.LINEAR;
|
||||
public final static Integer DEFAULT_GVCF_INDEX_PARAMETER = 128000;
|
||||
|
||||
// GVCF file extension
|
||||
public final static String GVCF_EXT = "g.vcf";
|
||||
|
||||
// Message for using the deprecated --variant_index_type or --variant_index_parameter arguments.
|
||||
public final static String DEPRECATED_INDEX_ARGS_MSG = "Naming your output file using the .g.vcf extension will automatically set the appropriate values " +
|
||||
" for --variant_index_type and --variant_index_parameter";
|
||||
|
||||
/**
|
||||
* Gets the appropriately formatted header for a VCF file describing this GATK run
|
||||
|
|
@ -260,4 +267,124 @@ public class GATKVCFUtils {
|
|||
codec.close(vcfSource);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the two indices are equivalent
|
||||
*
|
||||
* @param thisIndex index
|
||||
* @param otherIndex index
|
||||
* @return true if indices are equivalent, false otherwise.
|
||||
*/
|
||||
public static boolean equivalentAbstractIndices(AbstractIndex thisIndex, AbstractIndex otherIndex){
|
||||
return thisIndex.getVersion() == otherIndex.getVersion() &&
|
||||
thisIndex.getIndexedFile().equals(otherIndex.getIndexedFile()) &&
|
||||
thisIndex.getIndexedFileSize() == otherIndex.getIndexedFileSize() &&
|
||||
thisIndex.getIndexedFileMD5().equals(otherIndex.getIndexedFileMD5()) &&
|
||||
thisIndex.getFlags() == otherIndex.getFlags();
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the two indices are equivalent for a chromosome
|
||||
*
|
||||
* @param thisIndex index
|
||||
* @param otherIndex index
|
||||
* @param chr chromosome
|
||||
* @return true if indices are equivalent, false otherwise.
|
||||
* @throws NoSuchFieldException if index does not exist for a chromosome
|
||||
* @throws IllegalAccessException if index does not exist for a chromosome
|
||||
*/
|
||||
public static boolean equivalentLinearIndices(LinearIndex thisIndex, LinearIndex otherIndex, String chr) throws NoSuchFieldException, IllegalAccessException {
|
||||
htsjdk.tribble.index.linear.LinearIndex.ChrIndex thisChr = (htsjdk.tribble.index.linear.LinearIndex.ChrIndex)getChrIndex(thisIndex, chr);
|
||||
htsjdk.tribble.index.linear.LinearIndex.ChrIndex otherChr = (htsjdk.tribble.index.linear.LinearIndex.ChrIndex)getChrIndex(otherIndex, chr);
|
||||
|
||||
return thisChr.getName().equals(otherChr.getName()) &&
|
||||
//thisChr.getTotalSize() == otherChr.getTotalSize() && TODO: why does this differ?
|
||||
thisChr.getNFeatures() == otherChr.getNFeatures() &&
|
||||
thisChr.getNBlocks() == otherChr.getNBlocks();
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the two interval indices are equivalent for a chromosome
|
||||
*
|
||||
* @param thisIndex interval index
|
||||
* @param otherIndex interval index
|
||||
* @param chr chromosome
|
||||
* @return true if indices are equivalent, false otherwise.
|
||||
* @throws NoSuchFieldException if index does not exist for a chromosome
|
||||
* @throws IllegalAccessException if index does not exist for a chromosome
|
||||
*/
|
||||
public static boolean equivalentIntervalIndices(IntervalTreeIndex thisIndex, IntervalTreeIndex otherIndex, String chr) throws NoSuchFieldException, IllegalAccessException {
|
||||
htsjdk.tribble.index.interval.IntervalTreeIndex.ChrIndex thisChr = (htsjdk.tribble.index.interval.IntervalTreeIndex.ChrIndex)getChrIndex(thisIndex, chr);
|
||||
htsjdk.tribble.index.interval.IntervalTreeIndex.ChrIndex otherChr = (htsjdk.tribble.index.interval.IntervalTreeIndex.ChrIndex)getChrIndex(otherIndex, chr);
|
||||
|
||||
// TODO: compare trees?
|
||||
return thisChr.getName().equals(otherChr.getName());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get index for a chromosome
|
||||
*
|
||||
* @param index index
|
||||
* @param chr chromosome
|
||||
* @return index for the chromosome
|
||||
* @throws NoSuchFieldException if index does not exist for a chromosome
|
||||
* @throws IllegalAccessException if index does not exist for a chromosome
|
||||
*/
|
||||
public static ChrIndex getChrIndex(AbstractIndex index, String chr) throws NoSuchFieldException, IllegalAccessException {
|
||||
Field f = AbstractIndex.class.getDeclaredField("chrIndices");
|
||||
f.setAccessible(true);
|
||||
LinkedHashMap<String, ChrIndex> chrIndices = (LinkedHashMap<String, ChrIndex>) f.get(index);
|
||||
return chrIndices.get(chr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Make an IndexCreator
|
||||
*
|
||||
* @param variantIndexType variant indexing strategy
|
||||
* @param variantIndexParameter variant indexing parameter
|
||||
* @param outputFile output variant file
|
||||
* @param sequenceDictionary collection of SAM sequence records
|
||||
* @return IndexCreator
|
||||
*/
|
||||
public static IndexCreator makeIndexCreator(final GATKVCFIndexType variantIndexType, final int variantIndexParameter, final File outputFile, final SAMSequenceDictionary sequenceDictionary) {
|
||||
/*
|
||||
* If using the index arguments, log a warning.
|
||||
* If the genotype file has the GCVF extension (.g.vcf), use the default GCVF indexing.
|
||||
* Otherwise, use the default index type and parameter.
|
||||
*/
|
||||
GATKVCFIndexType indexType = DEFAULT_INDEX_TYPE;
|
||||
int indexParameter = DEFAULT_INDEX_PARAMETER;
|
||||
if (usingNonDefaultIndexingArguments(variantIndexType, variantIndexParameter)) {
|
||||
indexType = variantIndexType;
|
||||
indexParameter = variantIndexParameter;
|
||||
logger.warn(DEPRECATED_INDEX_ARGS_MSG);
|
||||
} else if (outputFile.getName().endsWith("." + GVCF_EXT)) {
|
||||
indexType = DEFAULT_GVCF_INDEX_TYPE;
|
||||
indexParameter = DEFAULT_GVCF_INDEX_PARAMETER;
|
||||
}
|
||||
|
||||
return getIndexCreator(indexType, indexParameter, outputFile, sequenceDictionary);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if not using the default indexing arguments' values
|
||||
*
|
||||
* @param variantIndexType variant indexing strategy
|
||||
* @param variantIndexParameter variant indexing parameter
|
||||
* @return true if the index type or parameter are not the default values, false otherwise
|
||||
*/
|
||||
public static boolean usingNonDefaultIndexingArguments(final GATKVCFIndexType variantIndexType, final int variantIndexParameter) {
|
||||
return variantIndexType != GATKVCFUtils.DEFAULT_INDEX_TYPE || variantIndexParameter != GATKVCFUtils.DEFAULT_INDEX_PARAMETER;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if using the GCVF indexing arguments' values
|
||||
*
|
||||
* @param variantIndexType variant indexing strategy
|
||||
* @param variantIndexParameter variant indexing parameter
|
||||
* @return true if the index type and parameter are the default GVCF values, false otherwise
|
||||
*/
|
||||
public static boolean usingGVCFIndexingArguments(final GATKVCFIndexType variantIndexType, final int variantIndexParameter) {
|
||||
return variantIndexType == GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE && variantIndexParameter == GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER;
|
||||
}
|
||||
}
|
||||
|
|
@ -621,12 +621,16 @@ public class GATKArgumentCollection {
|
|||
* INTERVAL creates an IntervalTreeIndex with bins with an equal amount of features, specified by the Features Per Bin parameter
|
||||
* DYNAMIC_SEEK attempts to optimize for minimal seek time by choosing an appropriate strategy and parameter (user-supplied parameter is ignored)
|
||||
* DYNAMIC_SIZE attempts to optimize for minimal index size by choosing an appropriate strategy and parameter (user-supplied parameter is ignored)
|
||||
*
|
||||
* This argument is deprecated, using the output file ".g.vcf" extension will automatically set the appropriate value
|
||||
*/
|
||||
@Argument(fullName="variant_index_type",shortName = "variant_index_type",doc="Type of IndexCreator to use for VCF/BCF indices",required=false)
|
||||
@Advanced
|
||||
public GATKVCFIndexType variant_index_type = GATKVCFUtils.DEFAULT_INDEX_TYPE;
|
||||
/**
|
||||
* This is either the bin width or the number of features per bin, depending on the indexing strategy
|
||||
*
|
||||
* This argument is deprecated, using the output file ".g.vcf" extension will automatically set the appropriate value
|
||||
*/
|
||||
@Argument(fullName="variant_index_parameter",shortName = "variant_index_parameter",doc="Parameter to pass to the VCF/BCF IndexCreator",required=false)
|
||||
@Advanced
|
||||
|
|
|
|||
|
|
@ -130,7 +130,9 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
|
|||
this.engine = engine;
|
||||
this.genotypeFile = genotypeFile;
|
||||
this.genotypeStream = null;
|
||||
this.indexCreator = GATKVCFUtils.getIndexCreator(engine.getArguments().variant_index_type, engine.getArguments().variant_index_parameter, genotypeFile);
|
||||
|
||||
this.indexCreator = GATKVCFUtils.makeIndexCreator(engine.getArguments().variant_index_type, engine.getArguments().variant_index_parameter,
|
||||
genotypeFile, null);
|
||||
this.argumentSources = argumentSources;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -266,12 +266,11 @@ public class CatVariants extends CommandLineProgram {
|
|||
reader.close();
|
||||
priorityQueue.add(new Pair<>(firstPosition,file));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
FileOutputStream outputStream = new FileOutputStream(outputFile);
|
||||
EnumSet<Options> options = EnumSet.of(Options.INDEX_ON_THE_FLY);
|
||||
final IndexCreator idxCreator = GATKVCFUtils.getIndexCreator(variant_index_type, variant_index_parameter, outputFile, ref.getSequenceDictionary());
|
||||
IndexCreator idxCreator = GATKVCFUtils.makeIndexCreator(variant_index_type, variant_index_parameter, outputFile, ref.getSequenceDictionary());
|
||||
final VariantContextWriter outputWriter = VariantContextWriterFactory.create(outputFile, outputStream, ref.getSequenceDictionary(), idxCreator, options);
|
||||
|
||||
boolean firstFile = true;
|
||||
|
|
|
|||
|
|
@ -24,16 +24,21 @@
|
|||
*/
|
||||
|
||||
package org.broadinstitute.gatk.tools;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import htsjdk.tribble.index.Index;
|
||||
import htsjdk.tribble.index.IndexFactory;
|
||||
import htsjdk.variant.vcf.VCFCodec;
|
||||
import htsjdk.tribble.AbstractFeatureReader;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.broadinstitute.gatk.engine.GATKVCFUtils;
|
||||
import org.broadinstitute.gatk.utils.BaseTest;
|
||||
import org.broadinstitute.gatk.utils.MD5DB;
|
||||
import org.broadinstitute.gatk.utils.MD5Mismatch;
|
||||
import org.broadinstitute.gatk.utils.runtime.ProcessController;
|
||||
import org.broadinstitute.gatk.utils.runtime.ProcessSettings;
|
||||
import org.broadinstitute.gatk.utils.runtime.RuntimeUtils;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType;
|
||||
import org.testng.Assert;
|
||||
import org.testng.TestException;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
|
|
@ -142,4 +147,82 @@ public class CatVariantsIntegrationTest {
|
|||
ProcessSettings ps = new ProcessSettings(cmdLine.split("\\s+"));
|
||||
pc.execAndCheck(ps);
|
||||
}
|
||||
|
||||
//
|
||||
//
|
||||
// IndexCreator tests
|
||||
//
|
||||
//
|
||||
|
||||
private class VCFIndexCreatorTest extends BaseTest.TestDataProvider {
|
||||
private final GATKVCFIndexType type;
|
||||
private final int parameter;
|
||||
|
||||
private VCFIndexCreatorTest(GATKVCFIndexType type, int parameter) {
|
||||
super(VCFIndexCreatorTest.class);
|
||||
|
||||
this.type = type;
|
||||
this.parameter = parameter;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("Index Type %s, Index Parameter %s", type, parameter);
|
||||
}
|
||||
|
||||
public Index getIndex(final File vcfFile) {
|
||||
switch (type) {
|
||||
case DYNAMIC_SEEK : return IndexFactory.createDynamicIndex(vcfFile, new VCFCodec(), IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
|
||||
case DYNAMIC_SIZE : return IndexFactory.createDynamicIndex(vcfFile, new VCFCodec(), IndexFactory.IndexBalanceApproach.FOR_SIZE);
|
||||
case LINEAR : return IndexFactory.createLinearIndex(vcfFile, new VCFCodec(), parameter);
|
||||
case INTERVAL : return IndexFactory.createIntervalIndex(vcfFile, new VCFCodec(), parameter);
|
||||
default : throw new TestException("Invalid index type");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "IndexDataProvider")
|
||||
public Object[][] indexCreatorData() {
|
||||
new VCFIndexCreatorTest(GATKVCFIndexType.DYNAMIC_SEEK, 0);
|
||||
new VCFIndexCreatorTest(GATKVCFIndexType.DYNAMIC_SIZE, 0);
|
||||
new VCFIndexCreatorTest(GATKVCFIndexType.LINEAR, 100);
|
||||
new VCFIndexCreatorTest(GATKVCFIndexType.LINEAR, 10000);
|
||||
new VCFIndexCreatorTest(GATKVCFIndexType.INTERVAL, 20);
|
||||
new VCFIndexCreatorTest(GATKVCFIndexType.INTERVAL, 2000);
|
||||
|
||||
return BaseTest.TestDataProvider.getTests(VCFIndexCreatorTest.class);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "IndexDataProvider")
|
||||
public void testCatVariantsVCFIndexCreation(VCFIndexCreatorTest testSpec) throws IOException{
|
||||
|
||||
String cmdLine = String.format("java -cp %s %s -R %s -V %s -V %s --variant_index_type %s --variant_index_parameter %s -out %s",
|
||||
StringUtils.join(RuntimeUtils.getAbsoluteClassPaths(), File.pathSeparatorChar),
|
||||
CatVariants.class.getCanonicalName(),
|
||||
BaseTest.b37KGReference,
|
||||
CatVariantsVcf1,
|
||||
CatVariantsVcf2,
|
||||
testSpec.type,
|
||||
testSpec.parameter,
|
||||
BaseTest.createTempFile("CatVariantsVCFIndexCreationTest", ".vcf"));
|
||||
|
||||
ProcessController pc = ProcessController.getThreadLocal();
|
||||
ProcessSettings ps = new ProcessSettings(cmdLine.split("\\s+"));
|
||||
pc.execAndCheck(ps);
|
||||
}
|
||||
|
||||
@Test()
|
||||
public void testCatVariantsGVCFIndexCreation() throws IOException{
|
||||
|
||||
String cmdLine = String.format("java -cp %s %s -R %s -V %s -V %s -out %s",
|
||||
StringUtils.join(RuntimeUtils.getAbsoluteClassPaths(), File.pathSeparatorChar),
|
||||
CatVariants.class.getCanonicalName(),
|
||||
BaseTest.b37KGReference,
|
||||
CatVariantsVcf1,
|
||||
CatVariantsVcf2,
|
||||
BaseTest.createTempFile("CatVariantsGVCFIndexCreationTest", "." + GATKVCFUtils.GVCF_EXT));
|
||||
|
||||
ProcessController pc = ProcessController.getThreadLocal();
|
||||
ProcessSettings ps = new ProcessSettings(cmdLine.split("\\s+"));
|
||||
pc.execAndCheck(ps);
|
||||
}
|
||||
}
|
||||
|
|
@ -28,16 +28,17 @@ package org.broadinstitute.gatk.tools.walkers.variantutils;
|
|||
import htsjdk.tribble.AbstractFeatureReader;
|
||||
import htsjdk.tribble.Tribble;
|
||||
import htsjdk.tribble.index.AbstractIndex;
|
||||
import htsjdk.tribble.index.ChrIndex;
|
||||
import htsjdk.tribble.index.Index;
|
||||
import htsjdk.tribble.index.IndexFactory;
|
||||
import htsjdk.tribble.index.interval.IntervalTreeIndex;
|
||||
import htsjdk.tribble.index.linear.LinearIndex;
|
||||
import htsjdk.tribble.index.tabix.TabixIndex;
|
||||
import htsjdk.tribble.util.TabixUtils;
|
||||
import org.broadinstitute.gatk.utils.BaseTest;
|
||||
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
|
||||
import htsjdk.variant.vcf.VCFCodec;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.broadinstitute.gatk.engine.GATKVCFUtils;
|
||||
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
|
||||
import org.broadinstitute.gatk.utils.BaseTest;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType;
|
||||
import org.testng.Assert;
|
||||
import org.testng.TestException;
|
||||
|
|
@ -45,9 +46,8 @@ import org.testng.annotations.DataProvider;
|
|||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.lang.reflect.Field;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
|
||||
public class VCFIntegrationTest extends WalkerTest {
|
||||
|
|
@ -205,77 +205,67 @@ public class VCFIntegrationTest extends WalkerTest {
|
|||
}
|
||||
|
||||
@Test(dataProvider = "IndexDataProvider")
|
||||
public void testVCFIndexCreation(VCFIndexCreatorTest testSpec) throws NoSuchFieldException, IllegalAccessException {
|
||||
public void testVCFIndexCreation(VCFIndexCreatorTest testSpec) throws NoSuchFieldException, IllegalAccessException, IOException {
|
||||
|
||||
final String logFileName = new String("testVCFIndexCreation.log");
|
||||
final String chromosome = "20";
|
||||
final String commandLine = " -T SelectVariants" +
|
||||
" -R " + b37KGReference +
|
||||
" --no_cmdline_in_header" +
|
||||
" -L 20" +
|
||||
" -L " + chromosome +
|
||||
" -V " + b37_NA12878_OMNI +
|
||||
" --variant_index_type " + testSpec.type +
|
||||
" --variant_index_parameter " + testSpec.parameter +
|
||||
" -o %s ";
|
||||
" -log " + logFileName +
|
||||
" -o %s";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine, 1, Arrays.asList(""));
|
||||
spec.disableShadowBCF();
|
||||
final String name = "testVCFIndexCreation: " + testSpec.toString();
|
||||
|
||||
// execute that test and check if the actual and expected indices are the same
|
||||
executeTestAndCheckIndices(name, chromosome, testSpec, spec);
|
||||
|
||||
// check the log for the warning message
|
||||
File file = new File(logFileName);
|
||||
Assert.assertTrue(FileUtils.readFileToString(file).contains(GATKVCFUtils.DEPRECATED_INDEX_ARGS_MSG));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVCFIndexCreationNoArgs() throws NoSuchFieldException, IllegalAccessException {
|
||||
|
||||
final String chromosome = "20";
|
||||
final String commandLine = " -T SelectVariants" +
|
||||
" -R " + b37KGReference +
|
||||
" --no_cmdline_in_header" +
|
||||
" -L " + chromosome +
|
||||
" -V " + b37_NA12878_OMNI +
|
||||
" -o %s";
|
||||
final String name = "testVCFIndexCreationNoArgs";
|
||||
VCFIndexCreatorTest testSpec = new VCFIndexCreatorTest(GATKVCFUtils.DEFAULT_INDEX_TYPE, GATKVCFUtils.DEFAULT_INDEX_PARAMETER);
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine, 1, Arrays.asList(""));
|
||||
spec.disableShadowBCF();
|
||||
|
||||
File outVCF = executeTest(name, spec).first.get(0);
|
||||
File outIdx = new File(outVCF.getAbsolutePath() + Tribble.STANDARD_INDEX_EXTENSION);
|
||||
|
||||
final Index actualIndex = IndexFactory.loadIndex(outIdx.getAbsolutePath());
|
||||
final Index expectedIndex = testSpec.getIndex(outVCF);
|
||||
|
||||
if (testSpec.type.equals("LINEAR"))
|
||||
Assert.assertTrue(actualIndex instanceof LinearIndex, "Index is not a LinearIndex");
|
||||
else if (testSpec.type.equals("INTERVAL"))
|
||||
Assert.assertTrue(actualIndex instanceof IntervalTreeIndex, "Index is not a IntervalTreeIndex");
|
||||
// dynamic indices ultimately resolve to one of LinearIndex or IntervalTreeIndex
|
||||
|
||||
Assert.assertTrue(equivalentAbstractIndices((AbstractIndex)actualIndex, (AbstractIndex)expectedIndex), "Indices are not equivalent");
|
||||
|
||||
if (actualIndex instanceof LinearIndex && expectedIndex instanceof LinearIndex) {
|
||||
Assert.assertTrue(equivalentLinearIndices((LinearIndex)actualIndex, (LinearIndex)expectedIndex, "20"), "Linear indices are not equivalent");
|
||||
}
|
||||
else if (actualIndex instanceof IntervalTreeIndex && expectedIndex instanceof IntervalTreeIndex) {
|
||||
Assert.assertTrue(equivalentIntervalIndices((IntervalTreeIndex)actualIndex, (IntervalTreeIndex)expectedIndex, "20"), "Interval indices are not equivalent");
|
||||
}
|
||||
else {
|
||||
Assert.fail("Indices are not of the same type");
|
||||
}
|
||||
// execute that test and check if the actual and expected indices are the same
|
||||
executeTestAndCheckIndices(name, chromosome, testSpec, spec);
|
||||
}
|
||||
|
||||
private static boolean equivalentAbstractIndices(AbstractIndex thisIndex, AbstractIndex otherIndex){
|
||||
return thisIndex.getVersion() == otherIndex.getVersion() &&
|
||||
thisIndex.getIndexedFile().equals(otherIndex.getIndexedFile()) &&
|
||||
thisIndex.getIndexedFileSize() == otherIndex.getIndexedFileSize() &&
|
||||
thisIndex.getIndexedFileMD5().equals(otherIndex.getIndexedFileMD5()) &&
|
||||
thisIndex.getFlags() == otherIndex.getFlags();
|
||||
}
|
||||
@Test
|
||||
public void testGVCFIndexCreation() throws NoSuchFieldException, IllegalAccessException {
|
||||
|
||||
private static boolean equivalentLinearIndices(LinearIndex thisIndex, LinearIndex otherIndex, String chr) throws NoSuchFieldException, IllegalAccessException {
|
||||
htsjdk.tribble.index.linear.LinearIndex.ChrIndex thisChr = (htsjdk.tribble.index.linear.LinearIndex.ChrIndex)getChrIndex(thisIndex, chr);
|
||||
htsjdk.tribble.index.linear.LinearIndex.ChrIndex otherChr = (htsjdk.tribble.index.linear.LinearIndex.ChrIndex)getChrIndex(otherIndex, chr);
|
||||
final String chromosome = "20";
|
||||
final String commandLine = " -T SelectVariants" +
|
||||
" -R " + b37KGReference +
|
||||
" --no_cmdline_in_header" +
|
||||
" -L " + chromosome +
|
||||
" -V " + b37_NA12878_OMNI +
|
||||
" -o %s";
|
||||
final String name = "testGVCFIndexCreation";
|
||||
VCFIndexCreatorTest testSpec = new VCFIndexCreatorTest(GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER);
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine, Arrays.asList(GATKVCFUtils.GVCF_EXT), Arrays.asList(""));
|
||||
spec.disableShadowBCF();
|
||||
|
||||
return thisChr.getName().equals(otherChr.getName()) &&
|
||||
//thisChr.getTotalSize() == otherChr.getTotalSize() && TODO: why does this differ?
|
||||
thisChr.getNFeatures() == otherChr.getNFeatures() &&
|
||||
thisChr.getNBlocks() == otherChr.getNBlocks();
|
||||
}
|
||||
|
||||
private static boolean equivalentIntervalIndices(IntervalTreeIndex thisIndex, IntervalTreeIndex otherIndex, String chr) throws NoSuchFieldException, IllegalAccessException {
|
||||
htsjdk.tribble.index.interval.IntervalTreeIndex.ChrIndex thisChr = (htsjdk.tribble.index.interval.IntervalTreeIndex.ChrIndex)getChrIndex(thisIndex, chr);
|
||||
htsjdk.tribble.index.interval.IntervalTreeIndex.ChrIndex otherChr = (htsjdk.tribble.index.interval.IntervalTreeIndex.ChrIndex)getChrIndex(otherIndex, chr);
|
||||
|
||||
// TODO: compare trees?
|
||||
return thisChr.getName().equals(otherChr.getName());
|
||||
}
|
||||
|
||||
private static ChrIndex getChrIndex(AbstractIndex index, String chr) throws NoSuchFieldException, IllegalAccessException {
|
||||
Field f = AbstractIndex.class.getDeclaredField("chrIndices");
|
||||
f.setAccessible(true);
|
||||
LinkedHashMap<String, ChrIndex> chrIndices = (LinkedHashMap<String, ChrIndex>) f.get(index);
|
||||
return chrIndices.get(chr);
|
||||
// execute that test and check if the actual and expected indices are the same
|
||||
executeTestAndCheckIndices(name, chromosome, testSpec, spec);
|
||||
}
|
||||
|
||||
//
|
||||
|
|
@ -375,4 +365,31 @@ public class VCFIntegrationTest extends WalkerTest {
|
|||
executeTest(name, spec);
|
||||
}
|
||||
|
||||
private void executeTestAndCheckIndices(final String name, final String chr, final VCFIndexCreatorTest testSpec, final WalkerTestSpec walkerTestSpec)
|
||||
throws NoSuchFieldException, IllegalAccessException {
|
||||
|
||||
File outVCF = executeTest(name, walkerTestSpec).first.get(0);
|
||||
File outIdx = new File(outVCF.getAbsolutePath() + Tribble.STANDARD_INDEX_EXTENSION);
|
||||
|
||||
final Index actualIndex = IndexFactory.loadIndex(outIdx.getAbsolutePath());
|
||||
final Index expectedIndex = testSpec.getIndex(outVCF);
|
||||
|
||||
if (testSpec.type.equals("LINEAR"))
|
||||
Assert.assertTrue(actualIndex instanceof LinearIndex, "Index is not a LinearIndex");
|
||||
else if (testSpec.type.equals("INTERVAL"))
|
||||
Assert.assertTrue(actualIndex instanceof IntervalTreeIndex, "Index is not a IntervalTreeIndex");
|
||||
// dynamic indices ultimately resolve to one of LinearIndex or IntervalTreeIndex
|
||||
|
||||
Assert.assertTrue(GATKVCFUtils.equivalentAbstractIndices((AbstractIndex) actualIndex, (AbstractIndex) expectedIndex), "Indices are not equivalent");
|
||||
|
||||
if (actualIndex instanceof LinearIndex && expectedIndex instanceof LinearIndex) {
|
||||
Assert.assertTrue(GATKVCFUtils.equivalentLinearIndices((LinearIndex) actualIndex, (LinearIndex) expectedIndex, chr), "Linear indices are not equivalent");
|
||||
}
|
||||
else if (actualIndex instanceof IntervalTreeIndex && expectedIndex instanceof IntervalTreeIndex) {
|
||||
Assert.assertTrue(GATKVCFUtils.equivalentIntervalIndices((IntervalTreeIndex) actualIndex, (IntervalTreeIndex) expectedIndex, chr), "Interval indices are not equivalent");
|
||||
}
|
||||
else {
|
||||
Assert.fail("Indices are not of the same type");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue