Okay, finally done with VCF compression. Now:

1. Uses blocked gzip compression.
2. No more -bzip option available (since we can't compress to sdout).
3. Only file extensions that are compressed are .gz and .gzip.
4. No more need for CompressedVCFWriter.java



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4099 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-08-24 16:36:54 +00:00
parent 83624e3af3
commit 43f1fb2380
4 changed files with 16 additions and 90 deletions

View File

@ -29,7 +29,6 @@
<!-- Commons Dependencies -->
<dependency org="org.apache.commons" name="commons-jexl" rev="2.0"/>
<dependency org="org.apache.commons" name="commons-compress" rev="1.0"/>
<dependency org="commons-logging" name="commons-logging" rev="1.1.1"/>
<dependency org="commons-io" name="commons-io" rev="1.3.2"/>

View File

@ -1,45 +0,0 @@
/*
* Copyright (c) 2010.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.io;
import org.broad.tribble.vcf.StandardVCFWriter;
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
import java.io.OutputStream;
import java.io.IOException;
/**
* A writer that will allow bzipped VCF files written on-the-fly.
*
* @author ebanks
* @version 0.1
*/
public class CompressedVCFWriter extends StandardVCFWriter {
public CompressedVCFWriter(OutputStream output) throws IOException {
super(new BZip2CompressorOutputStream(output));
}
}

View File

@ -8,11 +8,12 @@ import org.broad.tribble.vcf.VCFWriter;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub;
import org.broadinstitute.sting.gatk.io.CompressedVCFWriter;
import java.io.*;
import java.util.Set;
import net.sf.samtools.util.BlockCompressedOutputStream;
/**
* Provides temporary and permanent storage for genotypes in VCF format.
*
@ -21,7 +22,7 @@ import java.util.Set;
*/
public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
protected final File file;
protected final PrintStream stream;
protected final OutputStream stream;
protected final VCFWriter writer;
/**
@ -30,31 +31,27 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
* @param stub Stub to use when constructing the output file.
*/
public VCFWriterStorage( VCFWriterStub stub ) {
if(stub.getFile() != null) {
this.file = stub.getFile();
if ( stub.getFile() != null ) {
file = stub.getFile();
try {
this.stream = new PrintStream(file);
if ( stub.isCompressed() )
stream = new BlockCompressedOutputStream(file);
else
stream = new PrintStream(file);
}
catch(IOException ex) {
throw new StingException("Unable to open target output stream",ex);
throw new StingException("Unable to open target output stream", ex);
}
}
else if(stub.getOutputStream() != null) {
else if ( stub.getOutputStream() != null ) {
this.file = null;
this.stream = stub.getOutputStream();
}
else
throw new StingException("Unable to create target to which to write; storage was provided with neither a file nor a stream.");
if ( stub.isCompressed() ) {
try {
writer = new CompressedVCFWriter(stream);
} catch (IOException e) {
throw new StingException("Unable to create a compressed output stream: " + e.getMessage());
}
} else {
writer = new StandardVCFWriter(stream);
}
writer = new StandardVCFWriter(stream);
}
/**

View File

@ -43,16 +43,13 @@ import java.util.HashSet;
*/
public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
private static final String COMPRESSION_FULLNAME = "bzip_compression";
private static final String COMPRESSION_SHORTNAME = "bzip";
private static final HashSet<String> SUPPORTED_ZIPPED_SUFFIXES = new HashSet<String>();
//
// static list of zipped suffixes supported by this system.
//
static {
SUPPORTED_ZIPPED_SUFFIXES.add(".gz");
SUPPORTED_ZIPPED_SUFFIXES.add(".bz");
SUPPORTED_ZIPPED_SUFFIXES.add(".bz2");
SUPPORTED_ZIPPED_SUFFIXES.add(".gzip");
}
/**
@ -87,8 +84,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override
public List<ArgumentDefinition> createArgumentDefinitions( ArgumentSource source ) {
return Arrays.asList( createDefaultArgumentDefinition(source),
createVCFCompressionArgumentDefinition(source) );
return Arrays.asList( createDefaultArgumentDefinition(source) );
}
/**
@ -121,7 +117,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
File writerFile = writerFileName != null ? new File(writerFileName) : null;
// Should we compress the output stream?
boolean compress = argumentIsPresent(createVCFCompressionArgumentDefinition(source), matches) || (writerFileName != null && SUPPORTED_ZIPPED_SUFFIXES.contains(getFileSuffix(writerFileName)));
boolean compress = writerFileName != null && SUPPORTED_ZIPPED_SUFFIXES.contains(getFileSuffix(writerFileName));
// Create a stub for the given object.
VCFWriterStub stub = (writerFile != null) ? new VCFWriterStub(engine, writerFile, compress) : new VCFWriterStub(engine, System.out, compress);
@ -142,25 +138,4 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
return "";
return fileName.substring(indexOfLastDot).toLowerCase();
}
/**
* Creates the optional compression argument for the VCF file.
* @param source Argument source for the VCF file. Must not be null.
* @return Argument definition for the VCF file itself. Will not be null.
*/
private ArgumentDefinition createVCFCompressionArgumentDefinition(ArgumentSource source) {
return new ArgumentDefinition( ArgumentIOType.getIOType(getArgumentAnnotation(source)),
boolean.class,
COMPRESSION_FULLNAME,
COMPRESSION_SHORTNAME,
"Should we bzip the output VCF?",
false,
true,
false,
false,
null,
null,
null,
null );
}
}