Okay, finally done with VCF compression. Now:
1. Uses blocked gzip compression. 2. No more -bzip option available (since we can't compress to sdout). 3. Only file extensions that are compressed are .gz and .gzip. 4. No more need for CompressedVCFWriter.java git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4099 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
83624e3af3
commit
43f1fb2380
1
ivy.xml
1
ivy.xml
|
|
@ -29,7 +29,6 @@
|
|||
|
||||
<!-- Commons Dependencies -->
|
||||
<dependency org="org.apache.commons" name="commons-jexl" rev="2.0"/>
|
||||
<dependency org="org.apache.commons" name="commons-compress" rev="1.0"/>
|
||||
<dependency org="commons-logging" name="commons-logging" rev="1.1.1"/>
|
||||
<dependency org="commons-io" name="commons-io" rev="1.3.2"/>
|
||||
|
||||
|
|
|
|||
|
|
@ -1,45 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.io;
|
||||
|
||||
import org.broad.tribble.vcf.StandardVCFWriter;
|
||||
|
||||
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* A writer that will allow bzipped VCF files written on-the-fly.
|
||||
*
|
||||
* @author ebanks
|
||||
* @version 0.1
|
||||
*/
|
||||
public class CompressedVCFWriter extends StandardVCFWriter {
|
||||
|
||||
public CompressedVCFWriter(OutputStream output) throws IOException {
|
||||
super(new BZip2CompressorOutputStream(output));
|
||||
}
|
||||
}
|
||||
|
|
@ -8,11 +8,12 @@ import org.broad.tribble.vcf.VCFWriter;
|
|||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub;
|
||||
import org.broadinstitute.sting.gatk.io.CompressedVCFWriter;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.Set;
|
||||
|
||||
import net.sf.samtools.util.BlockCompressedOutputStream;
|
||||
|
||||
/**
|
||||
* Provides temporary and permanent storage for genotypes in VCF format.
|
||||
*
|
||||
|
|
@ -21,7 +22,7 @@ import java.util.Set;
|
|||
*/
|
||||
public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
|
||||
protected final File file;
|
||||
protected final PrintStream stream;
|
||||
protected final OutputStream stream;
|
||||
protected final VCFWriter writer;
|
||||
|
||||
/**
|
||||
|
|
@ -30,31 +31,27 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
|
|||
* @param stub Stub to use when constructing the output file.
|
||||
*/
|
||||
public VCFWriterStorage( VCFWriterStub stub ) {
|
||||
if(stub.getFile() != null) {
|
||||
this.file = stub.getFile();
|
||||
|
||||
if ( stub.getFile() != null ) {
|
||||
file = stub.getFile();
|
||||
try {
|
||||
this.stream = new PrintStream(file);
|
||||
if ( stub.isCompressed() )
|
||||
stream = new BlockCompressedOutputStream(file);
|
||||
else
|
||||
stream = new PrintStream(file);
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new StingException("Unable to open target output stream",ex);
|
||||
throw new StingException("Unable to open target output stream", ex);
|
||||
}
|
||||
}
|
||||
else if(stub.getOutputStream() != null) {
|
||||
else if ( stub.getOutputStream() != null ) {
|
||||
this.file = null;
|
||||
this.stream = stub.getOutputStream();
|
||||
}
|
||||
else
|
||||
throw new StingException("Unable to create target to which to write; storage was provided with neither a file nor a stream.");
|
||||
|
||||
if ( stub.isCompressed() ) {
|
||||
try {
|
||||
writer = new CompressedVCFWriter(stream);
|
||||
} catch (IOException e) {
|
||||
throw new StingException("Unable to create a compressed output stream: " + e.getMessage());
|
||||
}
|
||||
} else {
|
||||
writer = new StandardVCFWriter(stream);
|
||||
}
|
||||
writer = new StandardVCFWriter(stream);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -43,16 +43,13 @@ import java.util.HashSet;
|
|||
*/
|
||||
public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||
|
||||
private static final String COMPRESSION_FULLNAME = "bzip_compression";
|
||||
private static final String COMPRESSION_SHORTNAME = "bzip";
|
||||
private static final HashSet<String> SUPPORTED_ZIPPED_SUFFIXES = new HashSet<String>();
|
||||
//
|
||||
// static list of zipped suffixes supported by this system.
|
||||
//
|
||||
static {
|
||||
SUPPORTED_ZIPPED_SUFFIXES.add(".gz");
|
||||
SUPPORTED_ZIPPED_SUFFIXES.add(".bz");
|
||||
SUPPORTED_ZIPPED_SUFFIXES.add(".bz2");
|
||||
SUPPORTED_ZIPPED_SUFFIXES.add(".gzip");
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -87,8 +84,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
|
||||
@Override
|
||||
public List<ArgumentDefinition> createArgumentDefinitions( ArgumentSource source ) {
|
||||
return Arrays.asList( createDefaultArgumentDefinition(source),
|
||||
createVCFCompressionArgumentDefinition(source) );
|
||||
return Arrays.asList( createDefaultArgumentDefinition(source) );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -121,7 +117,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
File writerFile = writerFileName != null ? new File(writerFileName) : null;
|
||||
|
||||
// Should we compress the output stream?
|
||||
boolean compress = argumentIsPresent(createVCFCompressionArgumentDefinition(source), matches) || (writerFileName != null && SUPPORTED_ZIPPED_SUFFIXES.contains(getFileSuffix(writerFileName)));
|
||||
boolean compress = writerFileName != null && SUPPORTED_ZIPPED_SUFFIXES.contains(getFileSuffix(writerFileName));
|
||||
|
||||
// Create a stub for the given object.
|
||||
VCFWriterStub stub = (writerFile != null) ? new VCFWriterStub(engine, writerFile, compress) : new VCFWriterStub(engine, System.out, compress);
|
||||
|
|
@ -142,25 +138,4 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
return "";
|
||||
return fileName.substring(indexOfLastDot).toLowerCase();
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates the optional compression argument for the VCF file.
|
||||
* @param source Argument source for the VCF file. Must not be null.
|
||||
* @return Argument definition for the VCF file itself. Will not be null.
|
||||
*/
|
||||
private ArgumentDefinition createVCFCompressionArgumentDefinition(ArgumentSource source) {
|
||||
return new ArgumentDefinition( ArgumentIOType.getIOType(getArgumentAnnotation(source)),
|
||||
boolean.class,
|
||||
COMPRESSION_FULLNAME,
|
||||
COMPRESSION_SHORTNAME,
|
||||
"Should we bzip the output VCF?",
|
||||
false,
|
||||
true,
|
||||
false,
|
||||
false,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null );
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue