diff --git a/ivy.xml b/ivy.xml index ba02d9b6b..c7549459c 100644 --- a/ivy.xml +++ b/ivy.xml @@ -27,9 +27,9 @@ - - + + diff --git a/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index acbef061e..06c220da6 100644 --- a/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -34,7 +34,6 @@ import org.apache.log4j.Logger; import java.lang.annotation.Annotation; import java.lang.reflect.*; import java.util.*; -import java.io.OutputStream; /** * An descriptor capable of providing parsers that can parse any type @@ -192,6 +191,20 @@ public abstract class ArgumentTypeDescriptor { return validOptions; } + /** + * Returns true if the argument with the given full name exists in the collection of ArgumentMatches. + * @param definition Definition of the argument for which to find matches. + * @param matches The matches for the given argument. + * @return true if the argument is present, or false if not present. + */ + protected boolean argumentIsPresent( ArgumentDefinition definition, ArgumentMatches matches ) { + for( ArgumentMatch match: matches ) { + if( match.definition.equals(definition) ) + return true; + } + return false; + } + /** * Gets the value of an argument with the given full name, from the collection of ArgumentMatches. * If the argument matches multiple values, an exception will be thrown. diff --git a/java/src/org/broadinstitute/sting/gatk/io/CompressedVCFWriter.java b/java/src/org/broadinstitute/sting/gatk/io/CompressedVCFWriter.java new file mode 100755 index 000000000..d0f0fdd01 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/io/CompressedVCFWriter.java @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2010. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.io; + +import org.broad.tribble.vcf.StandardVCFWriter; + +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream; +import java.io.OutputStream; +import java.io.IOException; + +/** + * A writer that will allow bzipped VCF files written on-the-fly. + * + * @author ebanks + * @version 0.1 + */ +public class CompressedVCFWriter extends StandardVCFWriter { + + public CompressedVCFWriter(OutputStream output) throws IOException { + super(new BZip2CompressorOutputStream(output)); + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java b/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java index d1422056e..23ff429fd 100644 --- a/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java +++ b/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java @@ -8,6 +8,7 @@ import org.broad.tribble.vcf.VCFWriter; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub; +import org.broadinstitute.sting.gatk.io.CompressedVCFWriter; import java.io.*; import java.util.Set; @@ -45,7 +46,15 @@ public class VCFWriterStorage implements Storage, VCFWriter { else throw new StingException("Unable to create target to which to write; storage was provided with neither a file nor a stream."); - writer = new StandardVCFWriter(stream); + if ( stub.isCompressed() ) { + try { + writer = new CompressedVCFWriter(stream); + } catch (IOException e) { + throw new StingException("Unable to create a compressed output stream: " + e.getMessage()); + } + } else { + writer = new StandardVCFWriter(stream); + } } /** diff --git a/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java b/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java index 60306e805..398a7b032 100644 --- a/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java +++ b/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java @@ -41,6 +41,10 @@ import java.util.Arrays; * @version 0.1 */ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { + + private static final String COMPRESSION_FULLNAME = "bzip_compression"; + private static final String COMPRESSION_SHORTNAME = "bzip"; + /** * The engine into which output stubs should be fed. */ @@ -73,7 +77,8 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { @Override public List createArgumentDefinitions( ArgumentSource source ) { - return Arrays.asList( createDefaultArgumentDefinition(source) ); + return Arrays.asList( createDefaultArgumentDefinition(source), + createVCFCompressionArgumentDefinition(source) ); } /** @@ -87,7 +92,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { @Override public Object createTypeDefault(ArgumentSource source,Class type) { - VCFWriterStub stub = new VCFWriterStub(engine,defaultOutputStream); + VCFWriterStub stub = new VCFWriterStub(engine, defaultOutputStream, false); engine.addOutput(stub); return stub; } @@ -105,11 +110,35 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { String writerFileName = getArgumentValue(createDefaultArgumentDefinition(source),matches); File writerFile = writerFileName != null ? new File(writerFileName) : null; + // Should we compress the output stream? + boolean compress = argumentIsPresent(createVCFCompressionArgumentDefinition(source), matches); + // Create a stub for the given object. - VCFWriterStub stub = (writerFile != null) ? new VCFWriterStub(engine, writerFile) : new VCFWriterStub(engine,System.out); + VCFWriterStub stub = (writerFile != null) ? new VCFWriterStub(engine, writerFile, compress) : new VCFWriterStub(engine, System.out, compress); engine.addOutput(stub); return stub; } + + /** + * Creates the optional compression argument for the VCF file. + * @param source Argument source for the VCF file. Must not be null. + * @return Argument definition for the VCF file itself. Will not be null. + */ + private ArgumentDefinition createVCFCompressionArgumentDefinition(ArgumentSource source) { + return new ArgumentDefinition( ArgumentIOType.getIOType(getArgumentAnnotation(source)), + boolean.class, + COMPRESSION_FULLNAME, + COMPRESSION_SHORTNAME, + "Should we bzip the output VCF?", + false, + true, + false, + false, + null, + null, + null, + null ); + } } diff --git a/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java b/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java index ed27ed8f3..1842e3d40 100755 --- a/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java +++ b/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java @@ -61,6 +61,11 @@ public class VCFWriterStub implements Stub, VCFWriter { */ private final PrintStream genotypeStream; + /** + * Should we emit a compressed output stream? + */ + private final boolean isCompressed; + /** * Connects this stub with an external stream capable of serving the * requests of the consumer of this stub. @@ -71,22 +76,26 @@ public class VCFWriterStub implements Stub, VCFWriter { * Create a new stub given the requested file. * @param engine GATK engine. * @param genotypeFile file to (ultimately) create. + * @param isCompressed should we compress the output stream? */ - public VCFWriterStub(GenomeAnalysisEngine engine,File genotypeFile) { + public VCFWriterStub(GenomeAnalysisEngine engine, File genotypeFile, boolean isCompressed) { this.engine = engine; this.genotypeFile = genotypeFile; this.genotypeStream = null; + this.isCompressed = isCompressed; } /** * Create a new stub given the requested file. * @param engine GATK engine. * @param genotypeStream stream to (ultimately) write. + * @param isCompressed should we compress the output stream? */ - public VCFWriterStub(GenomeAnalysisEngine engine, OutputStream genotypeStream) { + public VCFWriterStub(GenomeAnalysisEngine engine, OutputStream genotypeStream, boolean isCompressed) { this.engine = engine; this.genotypeFile = null; this.genotypeStream = new PrintStream(genotypeStream); + this.isCompressed = isCompressed; } /** @@ -105,6 +114,14 @@ public class VCFWriterStub implements Stub, VCFWriter { return genotypeStream; } + /** + * Retrieves the output stearm to which to (ultimately) write. + * @return The file. Can be null if genotypeFile is not. + */ + public boolean isCompressed() { + return isCompressed; + } + /** * Retrieves the header to use when creating the new file. * @return header to use when creating the new file.