Merge pull request #610 from broadinstitute/jt_block_compressed_vcfs
Enable reading of other extensions for block-compressed VCFs
This commit is contained in:
commit
b840cf6b3f
|
|
@ -131,7 +131,6 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
|
|||
throw new UserException.CouldNotCreateOutputFile(file, "Unable to open target output stream", ex);
|
||||
}
|
||||
|
||||
// The GATK/Tribble can't currently index block-compressed files on the fly. Disable OTF indexing even if the user explicitly asked for it.
|
||||
EnumSet<Options> options = stub.getWriterOptions(indexOnTheFly);
|
||||
VariantContextWriter writer = VariantContextWriterFactory.create(file, this.stream, stub.getMasterSequenceDictionary(), stub.getIndexCreator(), options);
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.io.stubs;
|
||||
|
||||
import org.broad.tribble.AbstractFeatureReader;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
|
||||
|
|
@ -49,8 +50,6 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
public static final String NO_HEADER_ARG_NAME = "no_cmdline_in_header";
|
||||
public static final String SITES_ONLY_ARG_NAME = "sites_only";
|
||||
public static final String FORCE_BCF = "bcf";
|
||||
public static final HashSet<String> SUPPORTED_ZIPPED_SUFFIXES =
|
||||
new HashSet<>(Arrays.asList(VariantContextWriterFactory.BLOCK_COMPRESSED_EXTENSIONS));
|
||||
|
||||
/**
|
||||
* The engine into which output stubs should be fed.
|
||||
|
|
@ -223,19 +222,6 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
* @return true if the file will be compressed.
|
||||
*/
|
||||
public static boolean isCompressed(String writerFileName) {
|
||||
return writerFileName != null && SUPPORTED_ZIPPED_SUFFIXES.contains(getFileSuffix(writerFileName));
|
||||
return writerFileName != null && AbstractFeatureReader.hasBlockCompressedExtension(writerFileName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a lower-cased version of the suffix of the provided file.
|
||||
* @param fileName the file name. Must not be null.
|
||||
* @return lower-cased version of the file suffix. Will not be null.
|
||||
*/
|
||||
private static String getFileSuffix(String fileName) {
|
||||
int indexOfLastDot = fileName.lastIndexOf(".");
|
||||
if ( indexOfLastDot == -1 )
|
||||
return "";
|
||||
return fileName.substring(indexOfLastDot).toLowerCase();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ import org.broad.tribble.util.LittleEndianOutputStream;
|
|||
import org.broadinstitute.sting.commandline.Tags;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
|
@ -141,7 +142,7 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
|
|||
|
||||
// return a feature reader track
|
||||
Pair<AbstractFeatureReader, SAMSequenceDictionary> pair;
|
||||
if (inputFile.getAbsolutePath().endsWith(".gz"))
|
||||
if (VCFWriterArgumentTypeDescriptor.isCompressed(inputFile.toString()))
|
||||
pair = createTabixIndexedFeatureSource(descriptor, name, inputFile);
|
||||
else
|
||||
pair = getFeatureSource(descriptor, name, inputFile, fileDescriptor.getStorageType());
|
||||
|
|
@ -178,9 +179,8 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
|
|||
// we might not know the index type, try loading with the default reader constructor
|
||||
logger.debug("Attempting to load " + inputFile + " as a tabix indexed file without validating it");
|
||||
try {
|
||||
final File indexFile = null;//new File(inputFile.getAbsoluteFile() + TabixUtils.STANDARD_INDEX_EXTENSION);
|
||||
final SAMSequenceDictionary dict = null; //TabixUtils.getSequenceDictionary(indexFile);
|
||||
return new Pair<>(AbstractFeatureReader.getFeatureReader(inputFile.getAbsolutePath(), createCodec(descriptor, name, inputFile)), dict);
|
||||
// getFeatureReader will detect that it's Tabix
|
||||
return new Pair<>(AbstractFeatureReader.getFeatureReader(inputFile.getAbsolutePath(), createCodec(descriptor, name, inputFile)), null);
|
||||
} catch (TribbleException e) {
|
||||
throw new UserException(e.getMessage(), e);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,7 +53,6 @@ import org.broadinstitute.variant.variantcontext.writer.VariantContextWriterFact
|
|||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* Concatenates VCF files of non-overlapped genome intervals, all with the same set of samples
|
||||
|
|
@ -73,13 +72,12 @@ import java.util.*;
|
|||
* <h3>Input</h3>
|
||||
* <p>
|
||||
* One or more variant sets to combine. They should be of non-overlapping genome intervals and with the same samples (in the same order).
|
||||
* The input files should be 'name.vcf' or 'name.VCF' or 'name.bcf' or 'name.BCF'.
|
||||
* If the files are ordered according to the appearance of intervals in the ref genome, then one can use the -assumeSorted flag.
|
||||
* </p>
|
||||
*
|
||||
* <h3>Output</h3>
|
||||
* <p>
|
||||
* A combined VCF. The output file should be 'name.vcf' or 'name.VCF'.
|
||||
* A combined VCF or BCF. The output file should have the same extension as the input(s).
|
||||
* <\p>
|
||||
*
|
||||
* <h3>Important note</h3>
|
||||
|
|
@ -113,17 +111,17 @@ public class CatVariants extends CommandLineProgram {
|
|||
* The VCF or BCF files to merge together
|
||||
*
|
||||
* CatVariants can take any number of -V arguments on the command line. Each -V argument
|
||||
* will be included in the final merged output VCF. The order of arguments does not matter, but it runs more
|
||||
* will be included in the final merged output VCF/BCF. The order of arguments does not matter, but it runs more
|
||||
* efficiently if they are sorted based on the intervals and the assumeSorted argument is used.
|
||||
*
|
||||
*/
|
||||
@Input(fullName="variant", shortName="V", doc="Input VCF file/s named <name>.vcf or <name>.bcf", required = true)
|
||||
@Input(fullName="variant", shortName="V", doc="Input VCF file/s", required = true)
|
||||
private List<File> variant = null;
|
||||
|
||||
@Output(fullName = "outputFile", shortName = "out", doc = "output file name <name>.vcf or <name>.bcf", required = true)
|
||||
@Output(fullName = "outputFile", shortName = "out", doc = "output file", required = true)
|
||||
private File outputFile = null;
|
||||
|
||||
@Argument(fullName = "assumeSorted", shortName = "assumeSorted", doc = "assumeSorted should be true if he input files are already sorted (based on the position of the variants", required = false)
|
||||
@Argument(fullName = "assumeSorted", shortName = "assumeSorted", doc = "assumeSorted should be true if the input files are already sorted (based on the position of the variants)", required = false)
|
||||
private Boolean assumeSorted = false;
|
||||
|
||||
@Argument(fullName = "variant_index_type", doc = "which type of IndexCreator to use for VCF/BCF indices", required = false)
|
||||
|
|
@ -137,19 +135,69 @@ public class CatVariants extends CommandLineProgram {
|
|||
*/
|
||||
private static void printUsage() {
|
||||
System.err.println("Usage: java -cp target/GenomeAnalysisTK.jar org.broadinstitute.sting.tools.CatVariants --reference <reference> --variant <input VCF or BCF file; can specify --variant multiple times> --outputFile <outputFile> [--assumeSorted]");
|
||||
System.err.println(" The input file(s) can be of type: VCF (must end in .vcf or .VCF) or");
|
||||
System.err.println(" BCF2 (must end in .bcf or .BCF).");
|
||||
System.err.println(" Output file must be of type vcf or bcf (must end in .vcf or .bcf).");
|
||||
System.err.println(" The output file must be of the same type as all input files.");
|
||||
System.err.println(" If the input files are already sorted, then indicate that with --assumeSorted to improve performance.");
|
||||
}
|
||||
|
||||
private enum FileType {
|
||||
VCF,
|
||||
BCF,
|
||||
BLOCK_COMPRESSED_VCF,
|
||||
INVALID
|
||||
}
|
||||
|
||||
private FileType fileExtensionCheck(File inFile, File outFile) {
|
||||
final String inFileName = inFile.toString().toLowerCase();
|
||||
final String outFileName = outFile.toString().toLowerCase();
|
||||
|
||||
FileType inFileType = FileType.INVALID;
|
||||
|
||||
if (inFileName.endsWith(".vcf")) {
|
||||
inFileType = FileType.VCF;
|
||||
if (outFileName.endsWith(".vcf"))
|
||||
return inFileType;
|
||||
}
|
||||
|
||||
if (inFileName.endsWith(".bcf")) {
|
||||
inFileType = FileType.BCF;
|
||||
if (outFileName.endsWith(".bcf"))
|
||||
return inFileType;
|
||||
}
|
||||
|
||||
for (String extension : AbstractFeatureReader.BLOCK_COMPRESSED_EXTENSIONS) {
|
||||
if (inFileName.endsWith(".vcf" + extension)) {
|
||||
inFileType = FileType.BLOCK_COMPRESSED_VCF;
|
||||
if (outFileName.endsWith(".vcf" + extension))
|
||||
return inFileType;
|
||||
}
|
||||
}
|
||||
|
||||
if (inFileType == FileType.INVALID)
|
||||
System.err.println(String.format("File extension for input file %s is not valid for CatVariants", inFile));
|
||||
else
|
||||
System.err.println(String.format("File extension mismatch between input %s and output %s", inFile, outFile));
|
||||
|
||||
printUsage();
|
||||
return FileType.INVALID;
|
||||
}
|
||||
|
||||
private FeatureReader<VariantContext> getFeatureReader(final FileType fileType, final File file) {
|
||||
FeatureReader<VariantContext> reader = null;
|
||||
switch(fileType) {
|
||||
case VCF:
|
||||
case BLOCK_COMPRESSED_VCF:
|
||||
// getFeatureReader will handle both block-compressed and plain text VCFs
|
||||
reader = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), new VCFCodec(), false);
|
||||
break;
|
||||
case BCF:
|
||||
reader = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), new BCF2Codec(), false);
|
||||
break;
|
||||
}
|
||||
return reader;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int execute() throws Exception {
|
||||
//if(help){
|
||||
// printUsage();
|
||||
// return 1;
|
||||
//}
|
||||
|
||||
BasicConfigurator.configure();
|
||||
logger.setLevel(Level.INFO);
|
||||
|
||||
|
|
@ -162,37 +210,27 @@ public class CatVariants extends CommandLineProgram {
|
|||
|
||||
Comparator<Pair<Integer,File>> positionComparator = new PositionComparator();
|
||||
|
||||
|
||||
//PriorityQueue<Pair<Integer,FeatureReader<VariantContext>>> queue =
|
||||
// new PriorityQueue<Pair<Integer,FeatureReader<VariantContext>>>(2000, comparator);
|
||||
Queue<Pair<Integer,File>> priorityQueue;
|
||||
if(assumeSorted)
|
||||
priorityQueue = new LinkedList<Pair<Integer,File>>();
|
||||
if (assumeSorted)
|
||||
priorityQueue = new LinkedList<>();
|
||||
else
|
||||
priorityQueue = new PriorityQueue<Pair<Integer,File>>(10000, positionComparator);
|
||||
priorityQueue = new PriorityQueue<>(10000, positionComparator);
|
||||
|
||||
Iterator<File> files = variant.iterator();
|
||||
File file;
|
||||
while (files.hasNext()) {
|
||||
file = files.next();
|
||||
if (!(file.getName().endsWith(".vcf") || file.getName().endsWith(".VCF") || file.getName().endsWith(".bcf") || file.getName().endsWith(".BCF"))){
|
||||
System.err.println("File " + file.getAbsolutePath() + " should be <name>.vcf or <name>.bcf");
|
||||
printUsage();
|
||||
FileType fileType = FileType.INVALID;
|
||||
for (File file : variant) {
|
||||
// if it returns a valid type, it will be the same for all files
|
||||
fileType = fileExtensionCheck(file, outputFile);
|
||||
if (fileType == FileType.INVALID)
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (assumeSorted){
|
||||
priorityQueue.add(new Pair<Integer, File>(0,file));
|
||||
priorityQueue.add(new Pair<>(0,file));
|
||||
}
|
||||
else{
|
||||
if (!file.exists()) {
|
||||
throw new UserException(String.format("File %s doesn't exist",file.getAbsolutePath()));
|
||||
}
|
||||
FeatureReader<VariantContext> reader;
|
||||
boolean useVCF = (file.getName().endsWith(".vcf") || file.getName().endsWith(".VCF"));
|
||||
if(useVCF)
|
||||
reader = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), new VCFCodec(), false);
|
||||
else
|
||||
reader = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), new BCF2Codec(), false);
|
||||
FeatureReader<VariantContext> reader = getFeatureReader(fileType, file);
|
||||
Iterator<VariantContext> it = reader.iterator();
|
||||
if(!it.hasNext()){
|
||||
System.err.println(String.format("File %s is empty. This file will be ignored",file.getAbsolutePath()));
|
||||
|
|
@ -201,37 +239,25 @@ public class CatVariants extends CommandLineProgram {
|
|||
VariantContext vc = it.next();
|
||||
int firstPosition = vc.getStart();
|
||||
reader.close();
|
||||
//queue.add(new Pair<Integer, FeatureReader<VariantContext>>(firstPosition,reader));
|
||||
priorityQueue.add(new Pair<Integer, File>(firstPosition,file));
|
||||
priorityQueue.add(new Pair<>(firstPosition,file));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (!(outputFile.getName().endsWith(".vcf") || outputFile.getName().endsWith(".VCF"))){
|
||||
throw new UserException(String.format("Output file %s should be <name>.vcf", outputFile));
|
||||
}
|
||||
|
||||
FileOutputStream outputStream = new FileOutputStream(outputFile);
|
||||
EnumSet<Options> options = EnumSet.of(Options.INDEX_ON_THE_FLY);
|
||||
final IndexCreator idxCreator = GATKVCFUtils.getIndexCreator(variant_index_type, variant_index_parameter, outputFile, ref.getSequenceDictionary());
|
||||
final VariantContextWriter outputWriter = VariantContextWriterFactory.create(outputFile, outputStream, ref.getSequenceDictionary(), idxCreator, options);
|
||||
|
||||
boolean firstFile = true;
|
||||
int count =0;
|
||||
//while(!queue.isEmpty()){
|
||||
int count = 0;
|
||||
while(!priorityQueue.isEmpty() ){
|
||||
count++;
|
||||
//FeatureReader<VariantContext> reader = queue.remove().getSecond();
|
||||
file = priorityQueue.remove().getSecond();
|
||||
File file = priorityQueue.remove().getSecond();
|
||||
if (!file.exists()) {
|
||||
throw new UserException(String.format("File %s doesn't exist",file.getAbsolutePath()));
|
||||
}
|
||||
FeatureReader<VariantContext> reader;
|
||||
boolean useVCF = (file.getName().endsWith(".vcf") || file.getName().endsWith(".VCF"));
|
||||
if(useVCF)
|
||||
reader = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), new VCFCodec(), false);
|
||||
else
|
||||
reader = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), new BCF2Codec(), false);
|
||||
FeatureReader<VariantContext> reader = getFeatureReader(fileType, file);
|
||||
|
||||
if(count%10 ==0)
|
||||
System.out.print(count);
|
||||
|
|
@ -255,13 +281,11 @@ public class CatVariants extends CommandLineProgram {
|
|||
}
|
||||
System.out.println();
|
||||
|
||||
outputStream.close();
|
||||
outputWriter.close();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
public static void main(String[] args){
|
||||
try {
|
||||
CatVariants instance = new CatVariants();
|
||||
|
|
@ -286,5 +310,4 @@ public class CatVariants extends CommandLineProgram {
|
|||
return startPositionP1 < startPositionP2 ? -1 : 1 ;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ import org.apache.commons.io.FileUtils;
|
|||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
|
|
@ -267,6 +268,25 @@ public class ProcessController {
|
|||
return new ProcessOutput(exitCode, stdout, stderr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Executes a command line program with the settings and waits for it to return,
|
||||
* processing the output on a background thread.
|
||||
*
|
||||
* Throws an IOException if the ProcessOutput exit code is nonzero
|
||||
*
|
||||
* @param settings Settings to be run.
|
||||
*/
|
||||
public ProcessOutput execAndCheck(ProcessSettings settings) throws IOException {
|
||||
ProcessOutput po = exec(settings);
|
||||
if (po.getExitValue() != 0) {
|
||||
String message = String.format("Process exited with %d\nCommand Line: %s",
|
||||
po.getExitValue(),
|
||||
Utils.join(" ", settings.getCommand()));
|
||||
throw new IOException(message);
|
||||
}
|
||||
return po;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The set of still running processes.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -40,7 +40,6 @@ import org.broadinstitute.sting.BaseTest;
|
|||
import org.broadinstitute.sting.MD5DB;
|
||||
import org.broadinstitute.sting.MD5Mismatch;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.runtime.*;
|
||||
|
||||
public class LoggingIntegrationTest {
|
||||
|
|
@ -100,14 +99,14 @@ public class LoggingIntegrationTest {
|
|||
// output argument
|
||||
|
||||
ProcessSettings ps = new ProcessSettings(cfg.getCmdLine(false).split("\\s+"));
|
||||
execAndCheck(pc, ps);
|
||||
pc.execAndCheck(ps);
|
||||
String output_argument_md5 = md5db.calculateFileMD5(cfg.argumentOutputFile);
|
||||
|
||||
// pipe to stdout
|
||||
|
||||
ps = new ProcessSettings(cfg.getCmdLine(true).split("\\s+"));
|
||||
ps.setStdoutSettings(new OutputStreamSettings(cfg.pipedOutputFile));
|
||||
execAndCheck(pc, ps);
|
||||
pc.execAndCheck(ps);
|
||||
|
||||
MD5DB.MD5Match result = md5db.testFileMD5("LoggingIntegrationTest", "LoggingIntegrationTest", cfg.pipedOutputFile, output_argument_md5, false);
|
||||
if(result.failed) {
|
||||
|
|
@ -115,21 +114,4 @@ public class LoggingIntegrationTest {
|
|||
Assert.fail(failure.toString());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute a process, and throw an IOException if the exit code is nonzero
|
||||
*
|
||||
* @param pc
|
||||
* @param ps
|
||||
* @throws IOException
|
||||
*/
|
||||
private void execAndCheck(ProcessController pc, ProcessSettings ps) throws IOException {
|
||||
ProcessOutput po = pc.exec(ps);
|
||||
if (po.getExitValue() != 0) {
|
||||
String message = String.format("Process exited with %d\nCommand Line: %s",
|
||||
po.getExitValue(),
|
||||
Utils.join(" ", ps.getCommand()));
|
||||
throw new IOException(message);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,129 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.tools;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.broad.tribble.AbstractFeatureReader;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.MD5DB;
|
||||
import org.broadinstitute.sting.MD5Mismatch;
|
||||
import org.broadinstitute.sting.utils.runtime.ProcessController;
|
||||
import org.broadinstitute.sting.utils.runtime.ProcessSettings;
|
||||
import org.broadinstitute.sting.utils.runtime.RuntimeUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
||||
public class CatVariantsIntegrationTest {
|
||||
private final MD5DB md5db = new MD5DB();
|
||||
private final File CatVariantsDir = new File(BaseTest.privateTestDir, "CatVariants");
|
||||
|
||||
private class CatVariantsTestProvider extends BaseTest.TestDataProvider {
|
||||
private final File file1;
|
||||
private final File file2;
|
||||
public final File outputFile;
|
||||
public final String md5;
|
||||
|
||||
private CatVariantsTestProvider(final String file1, final String file2, final File outputFile, final String md5) {
|
||||
super(CatVariantsTestProvider.class);
|
||||
|
||||
this.file1 = new File(CatVariantsDir, file1);
|
||||
this.file2 = new File(CatVariantsDir, file2);
|
||||
this.outputFile = outputFile;
|
||||
this.md5 = md5;
|
||||
}
|
||||
|
||||
public final String getCmdLine() {
|
||||
return String.format("java -cp %s %s -R %s -V %s -V %s -out %s",
|
||||
StringUtils.join(RuntimeUtils.getAbsoluteClassPaths(), File.pathSeparatorChar),
|
||||
CatVariants.class.getCanonicalName(), BaseTest.b37KGReference, file1, file2, outputFile);
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return "CatVariantsTestProvider " + outputFile;
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "ExtensionsTest")
|
||||
public Object[][] makeExtensionsTestProvider() {
|
||||
new CatVariantsTestProvider("CatVariantsTest1.vcf", "CatVariantsTest2.vcf", BaseTest.createTempFile("CatVariantsTest", ".vcf"), "d0d81eb7fd3905256c4ac7c0fc480094");
|
||||
new CatVariantsTestProvider("CatVariantsTest1.bcf", "CatVariantsTest2.bcf", BaseTest.createTempFile("CatVariantsTest", ".bcf"), "6a57fcbbf3cae490896d13a288670d83");
|
||||
|
||||
for (String extension : AbstractFeatureReader.BLOCK_COMPRESSED_EXTENSIONS)
|
||||
new CatVariantsTestProvider("CatVariantsTest1.vcf" + extension, "CatVariantsTest2.vcf" + extension, BaseTest.createTempFile("CatVariantsTest", ".vcf" + extension), "33f728ac5c70ce2994f3619a27f47088");
|
||||
|
||||
return CatVariantsTestProvider.getTests(CatVariantsTestProvider.class);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "ExtensionsTest")
|
||||
public void testExtensions(final CatVariantsTestProvider cfg) throws IOException {
|
||||
|
||||
ProcessController pc = ProcessController.getThreadLocal();
|
||||
ProcessSettings ps = new ProcessSettings(cfg.getCmdLine().split("\\s+"));
|
||||
pc.execAndCheck(ps);
|
||||
|
||||
MD5DB.MD5Match result = md5db.testFileMD5("testExtensions", "CatVariantsTestProvider", cfg.outputFile, cfg.md5, false);
|
||||
if(result.failed) {
|
||||
final MD5Mismatch failure = new MD5Mismatch(result.actualMD5, result.expectedMD5, result.diffEngineOutput);
|
||||
Assert.fail(failure.toString());
|
||||
}
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = IOException.class)
|
||||
public void testMismatchedExtensions1() throws IOException {
|
||||
|
||||
String cmdLine = String.format("java -cp %s %s -R %s -V %s -V %s -out %s",
|
||||
StringUtils.join(RuntimeUtils.getAbsoluteClassPaths(), File.pathSeparatorChar),
|
||||
CatVariants.class.getCanonicalName(),
|
||||
BaseTest.b37KGReference,
|
||||
new File(CatVariantsDir, "CatVariantsTest1.vcf"),
|
||||
new File(CatVariantsDir, "CatVariantsTest2.vcf"),
|
||||
BaseTest.createTempFile("CatVariantsTest", ".bcf"));
|
||||
|
||||
ProcessController pc = ProcessController.getThreadLocal();
|
||||
ProcessSettings ps = new ProcessSettings(cmdLine.split("\\s+"));
|
||||
pc.execAndCheck(ps);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = IOException.class)
|
||||
public void testMismatchedExtensions2() throws IOException {
|
||||
|
||||
String cmdLine = String.format("java -cp %s %s -R %s -V %s -V %s -out %s",
|
||||
StringUtils.join(RuntimeUtils.getAbsoluteClassPaths(), File.pathSeparatorChar),
|
||||
CatVariants.class.getCanonicalName(),
|
||||
BaseTest.b37KGReference,
|
||||
new File(CatVariantsDir, "CatVariantsTest1.vcf"),
|
||||
new File(CatVariantsDir, "CatVariantsTest2.bcf"),
|
||||
BaseTest.createTempFile("CatVariantsTest", ".vcf"));
|
||||
|
||||
ProcessController pc = ProcessController.getThreadLocal();
|
||||
ProcessSettings ps = new ProcessSettings(cmdLine.split("\\s+"));
|
||||
pc.execAndCheck(ps);
|
||||
}
|
||||
}
|
||||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.variant;
|
||||
|
||||
import org.broad.tribble.AbstractFeatureReader;
|
||||
import org.broad.tribble.Tribble;
|
||||
import org.broad.tribble.index.AbstractIndex;
|
||||
import org.broad.tribble.index.ChrIndex;
|
||||
|
|
@ -34,8 +35,8 @@ import org.broad.tribble.index.interval.IntervalTreeIndex;
|
|||
import org.broad.tribble.index.linear.LinearIndex;
|
||||
import org.broad.tribble.index.tabix.TabixIndex;
|
||||
import org.broad.tribble.util.TabixUtils;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
|
||||
import org.broadinstitute.variant.vcf.VCFCodec;
|
||||
import org.testng.Assert;
|
||||
import org.testng.TestException;
|
||||
|
|
@ -45,7 +46,6 @@ import org.testng.annotations.Test;
|
|||
import java.io.File;
|
||||
import java.lang.reflect.Field;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
|
||||
|
|
@ -299,8 +299,8 @@ public class VCFIntegrationTest extends WalkerTest {
|
|||
|
||||
@DataProvider(name = "BlockCompressedIndexDataProvider")
|
||||
public Object[][] blockCompressedIndexCreatorData() {
|
||||
for (String suffix : VCFWriterArgumentTypeDescriptor.SUPPORTED_ZIPPED_SUFFIXES)
|
||||
new BlockCompressedIndexCreatorTest(".vcf" + suffix);
|
||||
for (final String extension : AbstractFeatureReader.BLOCK_COMPRESSED_EXTENSIONS)
|
||||
new BlockCompressedIndexCreatorTest(".vcf" + extension);
|
||||
|
||||
return TestDataProvider.getTests(BlockCompressedIndexCreatorTest.class);
|
||||
}
|
||||
|
|
@ -330,4 +330,48 @@ public class VCFIntegrationTest extends WalkerTest {
|
|||
Assert.assertTrue(actualIndex instanceof TabixIndex, "testBlockCompressedIndexCreation: Want Tabix index but index is not Tabix: " + outTabixIdx);
|
||||
}
|
||||
|
||||
//
|
||||
//
|
||||
// Block-Compressed Input Tests
|
||||
//
|
||||
//
|
||||
|
||||
private class BlockCompressedInputTest extends TestDataProvider {
|
||||
private final String extension;
|
||||
|
||||
private BlockCompressedInputTest(String extension) {
|
||||
super(BlockCompressedInputTest.class);
|
||||
|
||||
this.extension = extension;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("File extension %s", extension);
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "BlockCompressedInputDataProvider")
|
||||
public Object[][] blockCompressedInputData() {
|
||||
for (final String extension : AbstractFeatureReader.BLOCK_COMPRESSED_EXTENSIONS)
|
||||
new BlockCompressedInputTest(".vcf" + extension);
|
||||
|
||||
return TestDataProvider.getTests(BlockCompressedInputTest.class);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "BlockCompressedInputDataProvider")
|
||||
public void testBlockCompressedInput(BlockCompressedInputTest testSpec) {
|
||||
|
||||
File inputFile = new File(BaseTest.privateTestDir, "block_compressed_input_test" + testSpec.extension);
|
||||
final String commandLine = " -T SelectVariants" +
|
||||
" -R " + b37KGReference +
|
||||
" --no_cmdline_in_header" +
|
||||
" -V " + inputFile +
|
||||
" -o %s ";
|
||||
final String name = "testBlockCompressedInput: " + testSpec.toString();
|
||||
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine, 1, Arrays.asList("3b60668bd973e43783d0406de80d2ed2"));
|
||||
|
||||
executeTest(name, spec);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -3,23 +3,23 @@
|
|||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>net.sf</groupId>
|
||||
<artifactId>picard</artifactId>
|
||||
<version>1.111.1902</version>
|
||||
<version>1.111.1920</version>
|
||||
<name>picard</name>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>net.sf</groupId>
|
||||
<artifactId>sam</artifactId>
|
||||
<version>1.111.1902</version>
|
||||
<version>1.111.1920</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.broadinstitute</groupId>
|
||||
<artifactId>variant</artifactId>
|
||||
<version>1.111.1902</version>
|
||||
<version>1.111.1920</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.broad</groupId>
|
||||
<artifactId>tribble</artifactId>
|
||||
<version>1.111.1902</version>
|
||||
<version>1.111.1920</version>
|
||||
</dependency>
|
||||
<!-- TODO: Picard is using a custom zip with just ant's BZip2 classes. See also: http://www.kohsuke.org/bzip2 -->
|
||||
<dependency>
|
||||
Binary file not shown.
|
|
@ -3,7 +3,7 @@
|
|||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>net.sf</groupId>
|
||||
<artifactId>sam</artifactId>
|
||||
<version>1.111.1902</version>
|
||||
<version>1.111.1920</version>
|
||||
<name>sam-jdk</name>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
Binary file not shown.
|
|
@ -3,13 +3,13 @@
|
|||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>org.broad</groupId>
|
||||
<artifactId>tribble</artifactId>
|
||||
<version>1.111.1902</version>
|
||||
<version>1.111.1920</version>
|
||||
<name>tribble</name>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>net.sf</groupId>
|
||||
<artifactId>sam</artifactId>
|
||||
<version>1.111.1902</version>
|
||||
<version>1.111.1920</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
||||
Binary file not shown.
|
|
@ -3,18 +3,18 @@
|
|||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>org.broadinstitute</groupId>
|
||||
<artifactId>variant</artifactId>
|
||||
<version>1.111.1902</version>
|
||||
<version>1.111.1920</version>
|
||||
<name>variant</name>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.broad</groupId>
|
||||
<artifactId>tribble</artifactId>
|
||||
<version>1.111.1902</version>
|
||||
<version>1.111.1920</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>net.sf</groupId>
|
||||
<artifactId>sam</artifactId>
|
||||
<version>1.111.1902</version>
|
||||
<version>1.111.1920</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
|
|
@ -43,7 +43,7 @@
|
|||
<test.args>-Xmx${test.maxmemory} -XX:+UseParallelOldGC -XX:ParallelGCThreads=${java.gc.threads} -XX:GCTimeLimit=${java.gc.timeLimit} -XX:GCHeapFreeLimit=${java.gc.heapFreeLimit}</test.args>
|
||||
|
||||
<!-- Version numbers for picard sam-jdk. Usually kept in sync. -->
|
||||
<picard.public.version>1.111.1902</picard.public.version>
|
||||
<picard.public.version>1.111.1920</picard.public.version>
|
||||
<sam.version>${picard.public.version}</sam.version>
|
||||
<picard.version>${picard.public.version}</picard.version>
|
||||
<variant.version>${picard.public.version}</variant.version>
|
||||
|
|
|
|||
Loading…
Reference in New Issue