From afd2f1a3f95b7174be5686a3ed663669858d89ee Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 14 May 2012 15:34:32 -0400 Subject: [PATCH] Individual VariantContextWriters are now package protected -- Added VCFHeader() constructor that makes an empty header, and updated VariantRecalibrator to use it -- Update build.xml to build vcf.jar with updated paths and bcf2 support. --- build.xml | 2 + .../io/stubs/VariantContextWriterStub.java | 14 +++---- .../VariantRecalibrator.java | 2 +- .../walkers/variantutils/CombineVariants.java | 4 +- .../utils/codecs/bcf2/BCF2TestWalker.java | 15 ++++++-- .../sting/utils/codecs/vcf/VCFHeader.java | 7 ++++ .../variantcontext/writer/BCF2Writer.java | 2 +- .../writer/IndexingVariantContextWriter.java | 2 +- .../utils/variantcontext/writer/Options.java | 37 +++++++++++++++++++ .../writer/SortingVariantContextWriter.java | 2 +- .../SortingVariantContextWriterBase.java | 2 +- .../variantcontext/writer/VCFWriter.java | 2 +- .../writer/VariantContextWriterFactory.java | 11 ++---- 13 files changed, 75 insertions(+), 27 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/Options.java diff --git a/build.xml b/build.xml index d425b5af5..c72ec9bac 100644 --- a/build.xml +++ b/build.xml @@ -641,6 +641,7 @@ + @@ -1141,6 +1142,7 @@ useDefaultListeners="false" listeners="org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.sting.StingTextReporter"> + diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java index b7f90a800..087e21a0b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java @@ -32,9 +32,9 @@ import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.variantcontext.writer.Options; import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory; import java.io.File; import java.io.OutputStream; @@ -173,17 +173,17 @@ public class VariantContextWriterStub implements Stub, Var return engine.getMasterSequenceDictionary(); } - public EnumSet getWriterOptions() { + public EnumSet getWriterOptions() { return getWriterOptions(false); } - public EnumSet getWriterOptions(boolean indexOnTheFly) { - List options = new ArrayList(); + public EnumSet getWriterOptions(boolean indexOnTheFly) { + List options = new ArrayList(); - if ( doNotWriteGenotypes ) options.add(VariantContextWriterFactory.Options.DO_NOT_WRITE_GENOTYPES); - if ( indexOnTheFly && ! isCompressed() ) options.add(VariantContextWriterFactory.Options.ENABLE_ON_THE_FLY_INDEX); + if ( doNotWriteGenotypes ) options.add(Options.DO_NOT_WRITE_GENOTYPES); + if ( indexOnTheFly && ! isCompressed() ) options.add(Options.INDEX_ON_THE_FLY); - return options.isEmpty() ? EnumSet.noneOf(VariantContextWriterFactory.Options.class) : EnumSet.copyOf(options); + return options.isEmpty() ? EnumSet.noneOf(Options.class) : EnumSet.copyOf(options); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 4a4ec937d..813f20e57 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -230,7 +230,7 @@ public class VariantRecalibrator extends RodWalkeremptySet() ); + final VCFHeader vcfHeader = new VCFHeader(); recalWriter = VariantContextWriterFactory.create(recalFile, getMasterSequenceDictionary()); recalWriter.writeHeader(vcfHeader); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 6c27ee651..6a55b024b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -41,8 +41,8 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.utils.variantcontext.writer.Options; import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory; import java.util.*; @@ -199,7 +199,7 @@ public class CombineVariants extends RodWalker { vcfWriter.writeHeader(vcfHeader); if ( vcfWriter instanceof VariantContextWriterStub) { - sitesOnlyVCF = ((VariantContextWriterStub)vcfWriter).getWriterOptions().contains(VariantContextWriterFactory.Options.DO_NOT_WRITE_GENOTYPES); + sitesOnlyVCF = ((VariantContextWriterStub)vcfWriter).getWriterOptions().contains(Options.DO_NOT_WRITE_GENOTYPES); if ( sitesOnlyVCF ) logger.info("Pre-stripping genotypes for performance"); } else logger.warn("VCF output file not an instance of VCFWriterStub; cannot enable sites only output option"); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2TestWalker.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2TestWalker.java index 90b506975..001aeee68 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2TestWalker.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2TestWalker.java @@ -26,17 +26,22 @@ package org.broadinstitute.sting.utils.codecs.bcf2; import org.broad.tribble.FeatureCodecHeader; import org.broad.tribble.readers.PositionalBufferedStream; -import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.utils.variantcontext.writer.BCF2Writer; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; +import org.broadinstitute.sting.utils.variantcontext.writer.Options; +import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory; import java.io.*; import java.util.*; @@ -69,14 +74,16 @@ public class BCF2TestWalker extends RodWalker { protected File bcfFile; private final List vcs = new ArrayList(); - protected BCF2Writer writer; + protected VariantContextWriter writer; @Override public void initialize() { final Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), Collections.singletonList(variants)); final VCFHeader header = VCFUtils.withUpdatedContigs(vcfRods.values().iterator().next(), getToolkit()); try { - writer = new BCF2Writer(bcfFile, new FileOutputStream(bcfFile), getToolkit().getMasterSequenceDictionary(), ! dontIndexOnTheFly, false ); + EnumSet options = EnumSet.of(Options.FORCE_BCF); + if ( !dontIndexOnTheFly ) options.add(Options.INDEX_ON_THE_FLY); + writer = VariantContextWriterFactory.create(bcfFile, new FileOutputStream(bcfFile), getToolkit().getMasterSequenceDictionary(), options); writer.writeHeader(header); } catch ( FileNotFoundException e ) { throw new UserException.CouldNotCreateOutputFile(bcfFile, e); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java index 63b2bc0f1..707ddb39a 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java @@ -74,6 +74,13 @@ public class VCFHeader { private boolean writeEngineHeaders = true; private boolean writeCommandLine = true; + /** + * Create an empty VCF header with no header lines and no samples + */ + public VCFHeader() { + this(Collections.emptySet(), Collections.emptySet()); + } + /** * create a VCF header, given a list of meta data and auxillary tags * diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java index 23f3285f5..b2c7bb302 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java @@ -40,7 +40,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.*; import java.util.*; -public class BCF2Writer extends IndexingVariantContextWriter { +class BCF2Writer extends IndexingVariantContextWriter { final protected static Logger logger = Logger.getLogger(BCF2Writer.class); private final OutputStream outputStream; // Note: do not flush until completely done writing, to avoid issues with eventual BGZF support diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/IndexingVariantContextWriter.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/IndexingVariantContextWriter.java index 8eac0c441..93fcfdeda 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/IndexingVariantContextWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/IndexingVariantContextWriter.java @@ -43,7 +43,7 @@ import java.io.*; /** * this class writes VCF files */ -public abstract class IndexingVariantContextWriter implements VariantContextWriter { +abstract class IndexingVariantContextWriter implements VariantContextWriter { private final String name; private final SAMSequenceDictionary refDict; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/Options.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/Options.java new file mode 100644 index 000000000..7180ae6bc --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/Options.java @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.variantcontext.writer; + +/** + * Available writer options for VariantContextWriters + * + * @author Mark DePristo + * @since 5/12 + */ +public enum Options { + INDEX_ON_THE_FLY, + DO_NOT_WRITE_GENOTYPES, + FORCE_BCF +} diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/SortingVariantContextWriter.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/SortingVariantContextWriter.java index ae75a97fd..b51892b31 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/SortingVariantContextWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/SortingVariantContextWriter.java @@ -30,7 +30,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; /** * this class writes VCF files, allowing records to be passed in unsorted (up to a certain genomic distance away) */ -public class SortingVariantContextWriter extends SortingVariantContextWriterBase { +class SortingVariantContextWriter extends SortingVariantContextWriterBase { // the maximum START distance between records that we'll cache private int maxCachingStartDistance; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/SortingVariantContextWriterBase.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/SortingVariantContextWriterBase.java index 64bbdf36a..18d91ef3f 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/SortingVariantContextWriterBase.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/SortingVariantContextWriterBase.java @@ -35,7 +35,7 @@ import java.util.concurrent.PriorityBlockingQueue; * This class writes VCF files, allowing records to be passed in unsorted. * It also enforces that it is never passed records of the same chromosome with any other chromosome in between them. */ -public abstract class SortingVariantContextWriterBase implements VariantContextWriter { +abstract class SortingVariantContextWriterBase implements VariantContextWriter { // The VCFWriter to which to actually write the sorted VCF records private final VariantContextWriter innerWriter; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java index 7bf664b7f..24f1921a8 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java @@ -38,7 +38,7 @@ import java.util.*; /** * this class writes VCF files */ -public class VCFWriter extends IndexingVariantContextWriter { +class VCFWriter extends IndexingVariantContextWriter { private final static String VERSION_LINE = VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF4_1.getFormatString() + "=" + VCFHeaderVersion.VCF4_1.getVersionString(); // the print stream we're writing to diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VariantContextWriterFactory.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VariantContextWriterFactory.java index d2b63277d..7a4ca3be1 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VariantContextWriterFactory.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VariantContextWriterFactory.java @@ -40,13 +40,8 @@ import java.util.EnumSet; * @since 5/12 */ public class VariantContextWriterFactory { - public enum Options { - ENABLE_ON_THE_FLY_INDEX, - DO_NOT_WRITE_GENOTYPES, - FORCE_BCF - } - public static final EnumSet DEFAULT_OPTIONS = EnumSet.of(Options.ENABLE_ON_THE_FLY_INDEX); + public static final EnumSet DEFAULT_OPTIONS = EnumSet.of(Options.INDEX_ON_THE_FLY); public static final EnumSet NO_OPTIONS = EnumSet.noneOf(Options.class); private VariantContextWriterFactory() {} @@ -79,11 +74,11 @@ public class VariantContextWriterFactory { if ( enableBCF ) return new BCF2Writer(location, output, refDict, - options.contains(Options.ENABLE_ON_THE_FLY_INDEX), + options.contains(Options.INDEX_ON_THE_FLY), options.contains(Options.DO_NOT_WRITE_GENOTYPES)); else { return new VCFWriter(location, output, refDict, - options.contains(Options.ENABLE_ON_THE_FLY_INDEX), + options.contains(Options.INDEX_ON_THE_FLY), options.contains(Options.DO_NOT_WRITE_GENOTYPES)); } }