I have finally figured out that when you volunteer to do something in group meeting, you keep getting pestered about it on Mark's Omniplan doc until it gets done (except for contig aliasing, of course). As such...
We can now emit bzipped VCFs from the GATK. Details: any walker that defines a VCFWriter for its @Output (i.e. pretty much every core walker from UG and on), also has associated with it the -bzip (--bzip_compression) boolean argument. When set, it will emit a VCF that is compressed with bzip2. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4093 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
691333f75c
commit
44f3c5639a
4
ivy.xml
4
ivy.xml
|
|
@ -27,9 +27,9 @@
|
|||
<!-- Dependencies for the graph aligner -->
|
||||
<dependency org="org.jgrapht" name="jgrapht-jdk1.5" rev="0.7.3"/>
|
||||
|
||||
<!-- Dependencies for VariantFiltration -->
|
||||
<!-- <dependency org="commons-jexl" name="commons-jexl" rev="1.1"/> -->
|
||||
<!-- Commons Dependencies -->
|
||||
<dependency org="org.apache.commons" name="commons-jexl" rev="2.0"/>
|
||||
<dependency org="org.apache.commons" name="commons-compress" rev="1.0"/>
|
||||
<dependency org="commons-logging" name="commons-logging" rev="1.1.1"/>
|
||||
<dependency org="commons-io" name="commons-io" rev="1.3.2"/>
|
||||
|
||||
|
|
|
|||
|
|
@ -34,7 +34,6 @@ import org.apache.log4j.Logger;
|
|||
import java.lang.annotation.Annotation;
|
||||
import java.lang.reflect.*;
|
||||
import java.util.*;
|
||||
import java.io.OutputStream;
|
||||
|
||||
/**
|
||||
* An descriptor capable of providing parsers that can parse any type
|
||||
|
|
@ -192,6 +191,20 @@ public abstract class ArgumentTypeDescriptor {
|
|||
return validOptions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the argument with the given full name exists in the collection of ArgumentMatches.
|
||||
* @param definition Definition of the argument for which to find matches.
|
||||
* @param matches The matches for the given argument.
|
||||
* @return true if the argument is present, or false if not present.
|
||||
*/
|
||||
protected boolean argumentIsPresent( ArgumentDefinition definition, ArgumentMatches matches ) {
|
||||
for( ArgumentMatch match: matches ) {
|
||||
if( match.definition.equals(definition) )
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the value of an argument with the given full name, from the collection of ArgumentMatches.
|
||||
* If the argument matches multiple values, an exception will be thrown.
|
||||
|
|
|
|||
|
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
* Copyright (c) 2010.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.io;
|
||||
|
||||
import org.broad.tribble.vcf.StandardVCFWriter;
|
||||
|
||||
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* A writer that will allow bzipped VCF files written on-the-fly.
|
||||
*
|
||||
* @author ebanks
|
||||
* @version 0.1
|
||||
*/
|
||||
public class CompressedVCFWriter extends StandardVCFWriter {
|
||||
|
||||
public CompressedVCFWriter(OutputStream output) throws IOException {
|
||||
super(new BZip2CompressorOutputStream(output));
|
||||
}
|
||||
}
|
||||
|
|
@ -8,6 +8,7 @@ import org.broad.tribble.vcf.VCFWriter;
|
|||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub;
|
||||
import org.broadinstitute.sting.gatk.io.CompressedVCFWriter;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.Set;
|
||||
|
|
@ -45,7 +46,15 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
|
|||
else
|
||||
throw new StingException("Unable to create target to which to write; storage was provided with neither a file nor a stream.");
|
||||
|
||||
writer = new StandardVCFWriter(stream);
|
||||
if ( stub.isCompressed() ) {
|
||||
try {
|
||||
writer = new CompressedVCFWriter(stream);
|
||||
} catch (IOException e) {
|
||||
throw new StingException("Unable to create a compressed output stream: " + e.getMessage());
|
||||
}
|
||||
} else {
|
||||
writer = new StandardVCFWriter(stream);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -41,6 +41,10 @@ import java.util.Arrays;
|
|||
* @version 0.1
|
||||
*/
|
||||
public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||
|
||||
private static final String COMPRESSION_FULLNAME = "bzip_compression";
|
||||
private static final String COMPRESSION_SHORTNAME = "bzip";
|
||||
|
||||
/**
|
||||
* The engine into which output stubs should be fed.
|
||||
*/
|
||||
|
|
@ -73,7 +77,8 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
|
||||
@Override
|
||||
public List<ArgumentDefinition> createArgumentDefinitions( ArgumentSource source ) {
|
||||
return Arrays.asList( createDefaultArgumentDefinition(source) );
|
||||
return Arrays.asList( createDefaultArgumentDefinition(source),
|
||||
createVCFCompressionArgumentDefinition(source) );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -87,7 +92,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
|
||||
@Override
|
||||
public Object createTypeDefault(ArgumentSource source,Class type) {
|
||||
VCFWriterStub stub = new VCFWriterStub(engine,defaultOutputStream);
|
||||
VCFWriterStub stub = new VCFWriterStub(engine, defaultOutputStream, false);
|
||||
engine.addOutput(stub);
|
||||
return stub;
|
||||
}
|
||||
|
|
@ -105,11 +110,35 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
String writerFileName = getArgumentValue(createDefaultArgumentDefinition(source),matches);
|
||||
File writerFile = writerFileName != null ? new File(writerFileName) : null;
|
||||
|
||||
// Should we compress the output stream?
|
||||
boolean compress = argumentIsPresent(createVCFCompressionArgumentDefinition(source), matches);
|
||||
|
||||
// Create a stub for the given object.
|
||||
VCFWriterStub stub = (writerFile != null) ? new VCFWriterStub(engine, writerFile) : new VCFWriterStub(engine,System.out);
|
||||
VCFWriterStub stub = (writerFile != null) ? new VCFWriterStub(engine, writerFile, compress) : new VCFWriterStub(engine, System.out, compress);
|
||||
|
||||
engine.addOutput(stub);
|
||||
|
||||
return stub;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates the optional compression argument for the VCF file.
|
||||
* @param source Argument source for the VCF file. Must not be null.
|
||||
* @return Argument definition for the VCF file itself. Will not be null.
|
||||
*/
|
||||
private ArgumentDefinition createVCFCompressionArgumentDefinition(ArgumentSource source) {
|
||||
return new ArgumentDefinition( ArgumentIOType.getIOType(getArgumentAnnotation(source)),
|
||||
boolean.class,
|
||||
COMPRESSION_FULLNAME,
|
||||
COMPRESSION_SHORTNAME,
|
||||
"Should we bzip the output VCF?",
|
||||
false,
|
||||
true,
|
||||
false,
|
||||
false,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null );
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -61,6 +61,11 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
|
|||
*/
|
||||
private final PrintStream genotypeStream;
|
||||
|
||||
/**
|
||||
* Should we emit a compressed output stream?
|
||||
*/
|
||||
private final boolean isCompressed;
|
||||
|
||||
/**
|
||||
* Connects this stub with an external stream capable of serving the
|
||||
* requests of the consumer of this stub.
|
||||
|
|
@ -71,22 +76,26 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
|
|||
* Create a new stub given the requested file.
|
||||
* @param engine GATK engine.
|
||||
* @param genotypeFile file to (ultimately) create.
|
||||
* @param isCompressed should we compress the output stream?
|
||||
*/
|
||||
public VCFWriterStub(GenomeAnalysisEngine engine,File genotypeFile) {
|
||||
public VCFWriterStub(GenomeAnalysisEngine engine, File genotypeFile, boolean isCompressed) {
|
||||
this.engine = engine;
|
||||
this.genotypeFile = genotypeFile;
|
||||
this.genotypeStream = null;
|
||||
this.isCompressed = isCompressed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new stub given the requested file.
|
||||
* @param engine GATK engine.
|
||||
* @param genotypeStream stream to (ultimately) write.
|
||||
* @param isCompressed should we compress the output stream?
|
||||
*/
|
||||
public VCFWriterStub(GenomeAnalysisEngine engine, OutputStream genotypeStream) {
|
||||
public VCFWriterStub(GenomeAnalysisEngine engine, OutputStream genotypeStream, boolean isCompressed) {
|
||||
this.engine = engine;
|
||||
this.genotypeFile = null;
|
||||
this.genotypeStream = new PrintStream(genotypeStream);
|
||||
this.isCompressed = isCompressed;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -105,6 +114,14 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
|
|||
return genotypeStream;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the output stearm to which to (ultimately) write.
|
||||
* @return The file. Can be null if genotypeFile is not.
|
||||
*/
|
||||
public boolean isCompressed() {
|
||||
return isCompressed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the header to use when creating the new file.
|
||||
* @return header to use when creating the new file.
|
||||
|
|
|
|||
Loading…
Reference in New Issue