Okay, finally done with VCF compression. Now:
1. Uses blocked gzip compression. 2. No more -bzip option available (since we can't compress to sdout). 3. Only file extensions that are compressed are .gz and .gzip. 4. No more need for CompressedVCFWriter.java git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4099 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
83624e3af3
commit
43f1fb2380
1
ivy.xml
1
ivy.xml
|
|
@ -29,7 +29,6 @@
|
||||||
|
|
||||||
<!-- Commons Dependencies -->
|
<!-- Commons Dependencies -->
|
||||||
<dependency org="org.apache.commons" name="commons-jexl" rev="2.0"/>
|
<dependency org="org.apache.commons" name="commons-jexl" rev="2.0"/>
|
||||||
<dependency org="org.apache.commons" name="commons-compress" rev="1.0"/>
|
|
||||||
<dependency org="commons-logging" name="commons-logging" rev="1.1.1"/>
|
<dependency org="commons-logging" name="commons-logging" rev="1.1.1"/>
|
||||||
<dependency org="commons-io" name="commons-io" rev="1.3.2"/>
|
<dependency org="commons-io" name="commons-io" rev="1.3.2"/>
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,45 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2010.
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
|
||||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.io;
|
|
||||||
|
|
||||||
import org.broad.tribble.vcf.StandardVCFWriter;
|
|
||||||
|
|
||||||
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
|
|
||||||
import java.io.OutputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A writer that will allow bzipped VCF files written on-the-fly.
|
|
||||||
*
|
|
||||||
* @author ebanks
|
|
||||||
* @version 0.1
|
|
||||||
*/
|
|
||||||
public class CompressedVCFWriter extends StandardVCFWriter {
|
|
||||||
|
|
||||||
public CompressedVCFWriter(OutputStream output) throws IOException {
|
|
||||||
super(new BZip2CompressorOutputStream(output));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -8,11 +8,12 @@ import org.broad.tribble.vcf.VCFWriter;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.utils.SampleUtils;
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub;
|
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub;
|
||||||
import org.broadinstitute.sting.gatk.io.CompressedVCFWriter;
|
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import net.sf.samtools.util.BlockCompressedOutputStream;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Provides temporary and permanent storage for genotypes in VCF format.
|
* Provides temporary and permanent storage for genotypes in VCF format.
|
||||||
*
|
*
|
||||||
|
|
@ -21,7 +22,7 @@ import java.util.Set;
|
||||||
*/
|
*/
|
||||||
public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
|
public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
|
||||||
protected final File file;
|
protected final File file;
|
||||||
protected final PrintStream stream;
|
protected final OutputStream stream;
|
||||||
protected final VCFWriter writer;
|
protected final VCFWriter writer;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -30,31 +31,27 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
|
||||||
* @param stub Stub to use when constructing the output file.
|
* @param stub Stub to use when constructing the output file.
|
||||||
*/
|
*/
|
||||||
public VCFWriterStorage( VCFWriterStub stub ) {
|
public VCFWriterStorage( VCFWriterStub stub ) {
|
||||||
if(stub.getFile() != null) {
|
|
||||||
this.file = stub.getFile();
|
if ( stub.getFile() != null ) {
|
||||||
|
file = stub.getFile();
|
||||||
try {
|
try {
|
||||||
this.stream = new PrintStream(file);
|
if ( stub.isCompressed() )
|
||||||
|
stream = new BlockCompressedOutputStream(file);
|
||||||
|
else
|
||||||
|
stream = new PrintStream(file);
|
||||||
}
|
}
|
||||||
catch(IOException ex) {
|
catch(IOException ex) {
|
||||||
throw new StingException("Unable to open target output stream",ex);
|
throw new StingException("Unable to open target output stream", ex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if(stub.getOutputStream() != null) {
|
else if ( stub.getOutputStream() != null ) {
|
||||||
this.file = null;
|
this.file = null;
|
||||||
this.stream = stub.getOutputStream();
|
this.stream = stub.getOutputStream();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
throw new StingException("Unable to create target to which to write; storage was provided with neither a file nor a stream.");
|
throw new StingException("Unable to create target to which to write; storage was provided with neither a file nor a stream.");
|
||||||
|
|
||||||
if ( stub.isCompressed() ) {
|
writer = new StandardVCFWriter(stream);
|
||||||
try {
|
|
||||||
writer = new CompressedVCFWriter(stream);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new StingException("Unable to create a compressed output stream: " + e.getMessage());
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
writer = new StandardVCFWriter(stream);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -43,16 +43,13 @@ import java.util.HashSet;
|
||||||
*/
|
*/
|
||||||
public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
|
|
||||||
private static final String COMPRESSION_FULLNAME = "bzip_compression";
|
|
||||||
private static final String COMPRESSION_SHORTNAME = "bzip";
|
|
||||||
private static final HashSet<String> SUPPORTED_ZIPPED_SUFFIXES = new HashSet<String>();
|
private static final HashSet<String> SUPPORTED_ZIPPED_SUFFIXES = new HashSet<String>();
|
||||||
//
|
//
|
||||||
// static list of zipped suffixes supported by this system.
|
// static list of zipped suffixes supported by this system.
|
||||||
//
|
//
|
||||||
static {
|
static {
|
||||||
SUPPORTED_ZIPPED_SUFFIXES.add(".gz");
|
SUPPORTED_ZIPPED_SUFFIXES.add(".gz");
|
||||||
SUPPORTED_ZIPPED_SUFFIXES.add(".bz");
|
SUPPORTED_ZIPPED_SUFFIXES.add(".gzip");
|
||||||
SUPPORTED_ZIPPED_SUFFIXES.add(".bz2");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -87,8 +84,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<ArgumentDefinition> createArgumentDefinitions( ArgumentSource source ) {
|
public List<ArgumentDefinition> createArgumentDefinitions( ArgumentSource source ) {
|
||||||
return Arrays.asList( createDefaultArgumentDefinition(source),
|
return Arrays.asList( createDefaultArgumentDefinition(source) );
|
||||||
createVCFCompressionArgumentDefinition(source) );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -121,7 +117,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
File writerFile = writerFileName != null ? new File(writerFileName) : null;
|
File writerFile = writerFileName != null ? new File(writerFileName) : null;
|
||||||
|
|
||||||
// Should we compress the output stream?
|
// Should we compress the output stream?
|
||||||
boolean compress = argumentIsPresent(createVCFCompressionArgumentDefinition(source), matches) || (writerFileName != null && SUPPORTED_ZIPPED_SUFFIXES.contains(getFileSuffix(writerFileName)));
|
boolean compress = writerFileName != null && SUPPORTED_ZIPPED_SUFFIXES.contains(getFileSuffix(writerFileName));
|
||||||
|
|
||||||
// Create a stub for the given object.
|
// Create a stub for the given object.
|
||||||
VCFWriterStub stub = (writerFile != null) ? new VCFWriterStub(engine, writerFile, compress) : new VCFWriterStub(engine, System.out, compress);
|
VCFWriterStub stub = (writerFile != null) ? new VCFWriterStub(engine, writerFile, compress) : new VCFWriterStub(engine, System.out, compress);
|
||||||
|
|
@ -142,25 +138,4 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
return "";
|
return "";
|
||||||
return fileName.substring(indexOfLastDot).toLowerCase();
|
return fileName.substring(indexOfLastDot).toLowerCase();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates the optional compression argument for the VCF file.
|
|
||||||
* @param source Argument source for the VCF file. Must not be null.
|
|
||||||
* @return Argument definition for the VCF file itself. Will not be null.
|
|
||||||
*/
|
|
||||||
private ArgumentDefinition createVCFCompressionArgumentDefinition(ArgumentSource source) {
|
|
||||||
return new ArgumentDefinition( ArgumentIOType.getIOType(getArgumentAnnotation(source)),
|
|
||||||
boolean.class,
|
|
||||||
COMPRESSION_FULLNAME,
|
|
||||||
COMPRESSION_SHORTNAME,
|
|
||||||
"Should we bzip the output VCF?",
|
|
||||||
false,
|
|
||||||
true,
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
null,
|
|
||||||
null,
|
|
||||||
null,
|
|
||||||
null );
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue