Merge pull request #1528 from broadinstitute/rhl_validate_vcf.gz

Output filenames with the vcf.gz extension are gzipped, those containing the string ".bcf" result in VCF
This commit is contained in:
Ron Levine 2016-12-06 06:46:15 -05:00 committed by GitHub
commit 24336e85b6
4 changed files with 32 additions and 6 deletions

View File

@ -63,12 +63,14 @@ import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import java.util.zip.GZIPInputStream;
public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
@ -236,15 +238,19 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
}
/**
* Test HaplotypeCaller to ensure it does not throw an exception when a .g.vcf.gz output file is specified and the indexing arguments are omitted
* Test HaplotypeCaller to ensure it does not throw an exception when a .g.vcf.gz output file is specified and the indexing arguments are omitted.
* Verify that the output file is using the GZIP file format.
*/
@Test()
public void testGVCFGzIndexNoThrow() {
public void testGVCFGzIndexNoThrow() throws IOException {
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF",
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, privateTestDir + "noCallRefModel.bam", "20:17000000-17000100");
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(GATKVCFUtils.GVCF_GZ_EXT), Arrays.asList(""));
final WalkerTestSpec spec = new WalkerTestSpec(commandLine, Arrays.asList(""));
final File outputFile = createTempFile("testGVCFGzIndexNoThrow", "." + GATKVCFUtils.GVCF_GZ_EXT);
spec.setOutputFileLocation(outputFile);
spec.disableShadowBCF();
executeTest("testGVCFIndexNoThrow", spec);
executeTest("testGVCFGzIndexNoThrow", spec);
final GZIPInputStream gzipOutputFileStream = new GZIPInputStream(new FileInputStream(outputFile));
}
@Test()

View File

@ -53,6 +53,8 @@ package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.tribble.AbstractFeatureReader;
import htsjdk.tribble.FeatureReader;
import htsjdk.tribble.readers.LineIterator;
import htsjdk.tribble.readers.PositionalBufferedStream;
import htsjdk.variant.variantcontext.VariantContext;
@ -527,5 +529,21 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void testAlleleBalance() throws IOException{
HCTest(CEUTRIO_BAM, " -L 20:10001000-10010000 -A AlleleBalance -A AlleleBalanceBySample", "a210161843f4cb80143ff56e4e5c250f");
}
@Test()
public void testBCFInFileNameGivesVCF() {
final String md5 = "d41d8cd98f00b204e9800998ecf8427e";
final String commandLine = String.format("-T HaplotypeCaller --contamination_fraction_to_filter 0.05 --disableDithering --maxReadsInRegionPerSample 1000" +
" --minReadsPerAlignmentStart 5 --maxProbPropagationDistance 50 --activeProbabilityThreshold 0.002 --pcr_indel_model NONE" +
" -pairHMMSub %s %s -R %s -I %s -L %s -minPruning 3 --no_cmdline_in_header",
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, REF, NA12878_BAM, "20:10000000-10100000");
final WalkerTestSpec spec = new WalkerTestSpec(commandLine, Arrays.asList(md5));
final File outputFile = createTempFile("testBCFInFileNameGivesVCF", ".bcftoolsFile.vcf");
spec.setOutputFileLocation(outputFile);
spec.disableShadowBCF();
executeTest("testBCFInFileNameGivesVCF", spec);
// Will throw an exception if the file in not a VCF
FeatureReader<VariantContext> reader = AbstractFeatureReader.getFeatureReader(outputFile.getAbsolutePath(), new VCFCodec(), false);
}
}

View File

@ -149,7 +149,8 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
// if the stub says to test BCF, create a secondary writer to BCF and an 2 way out writer to send to both
// TODO -- remove me when argument generateShadowBCF is removed
if ( stub.alsoWriteBCFForTest() &&
! (options.contains(Options.FORCE_BCF) || file != null && file.getName().contains(".bcf")) ) {
! ( options.contains(Options.FORCE_BCF) ||
file != null && (file.getName().endsWith(".bcf") || file.getName().endsWith(".bcf.gz")) ) ) {
final File bcfFile = BCF2Utils.shadowBCF(file);
if ( bcfFile != null ) {
final VariantContextWriter bcfWriter = new VariantContextWriterBuilder()

View File

@ -216,7 +216,8 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
if ( writeFullFormatField ) options.add(Options.WRITE_FULL_FORMAT_FIELD);
final File file = getOutputFile();
if ( forceBCF || (file != null && options.contains(Options.FORCE_BCF) || file != null && file.getName().contains(".bcf")) )
if ( forceBCF || (file != null && options.contains(Options.FORCE_BCF) ||
file != null && (file.getName().endsWith(".bcf") || file.getName().endsWith(".bcf.gz"))) )
options.add(Options.FORCE_BCF);
return options.isEmpty() ? EnumSet.noneOf(Options.class) : EnumSet.copyOf(options);