Individual VariantContextWriters are now package protected

-- Added VCFHeader() constructor that makes an empty header, and updated VariantRecalibrator to use it
-- Update build.xml to build vcf.jar with updated paths and bcf2 support.
This commit is contained in:
Mark DePristo 2012-05-14 15:34:32 -04:00
parent 24864fd5b0
commit afd2f1a3f9
13 changed files with 75 additions and 27 deletions

View File

@ -641,6 +641,7 @@
<jar jarfile="${dist.dir}/vcf.jar">
<fileset dir="${java.classes}">
<include name="org/broadinstitute/sting/utils/codecs/vcf/**/*.class"/>
<include name="org/broadinstitute/sting/utils/codecs/bcf2/**/*.class"/>
<include name="org/broadinstitute/sting/utils/variantcontext/**/*.class"/>
<include name="org/broadinstitute/sting/utils/exceptions/**"/>
<include name="org/broadinstitute/sting/utils/help/DocumentedGATKFeature.class"/>
@ -1141,6 +1142,7 @@
useDefaultListeners="false"
listeners="org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.sting.StingTextReporter">
<jvmarg value="-Xmx${test.maxmemory}" />
<jvmarg value="-ea" />
<jvmarg value="-Djava.awt.headless=true" />
<jvmarg value="-Dpipeline.run=${pipeline.run}" />
<jvmarg value="-Djava.io.tmpdir=${java.io.tmpdir}" />

View File

@ -32,9 +32,9 @@ import org.broadinstitute.sting.utils.classloader.JVMUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
import org.broadinstitute.sting.utils.variantcontext.writer.Options;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;
import java.io.File;
import java.io.OutputStream;
@ -173,17 +173,17 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
return engine.getMasterSequenceDictionary();
}
public EnumSet<VariantContextWriterFactory.Options> getWriterOptions() {
public EnumSet<Options> getWriterOptions() {
return getWriterOptions(false);
}
public EnumSet<VariantContextWriterFactory.Options> getWriterOptions(boolean indexOnTheFly) {
List<VariantContextWriterFactory.Options> options = new ArrayList<VariantContextWriterFactory.Options>();
public EnumSet<Options> getWriterOptions(boolean indexOnTheFly) {
List<Options> options = new ArrayList<Options>();
if ( doNotWriteGenotypes ) options.add(VariantContextWriterFactory.Options.DO_NOT_WRITE_GENOTYPES);
if ( indexOnTheFly && ! isCompressed() ) options.add(VariantContextWriterFactory.Options.ENABLE_ON_THE_FLY_INDEX);
if ( doNotWriteGenotypes ) options.add(Options.DO_NOT_WRITE_GENOTYPES);
if ( indexOnTheFly && ! isCompressed() ) options.add(Options.INDEX_ON_THE_FLY);
return options.isEmpty() ? EnumSet.noneOf(VariantContextWriterFactory.Options.class) : EnumSet.copyOf(options);
return options.isEmpty() ? EnumSet.noneOf(Options.class) : EnumSet.copyOf(options);
}
/**

View File

@ -230,7 +230,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
throw new UserException.CommandLineException( "No truth set found! Please provide sets of known polymorphic loci marked with the truth=true ROD binding tag. For example, -B:hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf" );
}
final VCFHeader vcfHeader = new VCFHeader( null, Collections.<String>emptySet() );
final VCFHeader vcfHeader = new VCFHeader();
recalWriter = VariantContextWriterFactory.create(recalFile, getMasterSequenceDictionary());
recalWriter.writeHeader(vcfHeader);
}

View File

@ -41,8 +41,8 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.utils.variantcontext.writer.Options;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;
import java.util.*;
@ -199,7 +199,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
vcfWriter.writeHeader(vcfHeader);
if ( vcfWriter instanceof VariantContextWriterStub) {
sitesOnlyVCF = ((VariantContextWriterStub)vcfWriter).getWriterOptions().contains(VariantContextWriterFactory.Options.DO_NOT_WRITE_GENOTYPES);
sitesOnlyVCF = ((VariantContextWriterStub)vcfWriter).getWriterOptions().contains(Options.DO_NOT_WRITE_GENOTYPES);
if ( sitesOnlyVCF ) logger.info("Pre-stripping genotypes for performance");
} else
logger.warn("VCF output file not an instance of VCFWriterStub; cannot enable sites only output option");

View File

@ -26,17 +26,22 @@ package org.broadinstitute.sting.utils.codecs.bcf2;
import org.broad.tribble.FeatureCodecHeader;
import org.broad.tribble.readers.PositionalBufferedStream;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.variantcontext.writer.BCF2Writer;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
import org.broadinstitute.sting.utils.variantcontext.writer.Options;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;
import java.io.*;
import java.util.*;
@ -69,14 +74,16 @@ public class BCF2TestWalker extends RodWalker<Integer, Integer> {
protected File bcfFile;
private final List<VariantContext> vcs = new ArrayList<VariantContext>();
protected BCF2Writer writer;
protected VariantContextWriter writer;
@Override
public void initialize() {
final Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), Collections.singletonList(variants));
final VCFHeader header = VCFUtils.withUpdatedContigs(vcfRods.values().iterator().next(), getToolkit());
try {
writer = new BCF2Writer(bcfFile, new FileOutputStream(bcfFile), getToolkit().getMasterSequenceDictionary(), ! dontIndexOnTheFly, false );
EnumSet<Options> options = EnumSet.of(Options.FORCE_BCF);
if ( !dontIndexOnTheFly ) options.add(Options.INDEX_ON_THE_FLY);
writer = VariantContextWriterFactory.create(bcfFile, new FileOutputStream(bcfFile), getToolkit().getMasterSequenceDictionary(), options);
writer.writeHeader(header);
} catch ( FileNotFoundException e ) {
throw new UserException.CouldNotCreateOutputFile(bcfFile, e);

View File

@ -74,6 +74,13 @@ public class VCFHeader {
private boolean writeEngineHeaders = true;
private boolean writeCommandLine = true;
/**
* Create an empty VCF header with no header lines and no samples
*/
public VCFHeader() {
this(Collections.<VCFHeaderLine>emptySet(), Collections.<String>emptySet());
}
/**
* create a VCF header, given a list of meta data and auxillary tags
*

View File

@ -40,7 +40,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.*;
import java.util.*;
public class BCF2Writer extends IndexingVariantContextWriter {
class BCF2Writer extends IndexingVariantContextWriter {
final protected static Logger logger = Logger.getLogger(BCF2Writer.class);
private final OutputStream outputStream; // Note: do not flush until completely done writing, to avoid issues with eventual BGZF support

View File

@ -43,7 +43,7 @@ import java.io.*;
/**
* this class writes VCF files
*/
public abstract class IndexingVariantContextWriter implements VariantContextWriter {
abstract class IndexingVariantContextWriter implements VariantContextWriter {
private final String name;
private final SAMSequenceDictionary refDict;

View File

@ -0,0 +1,37 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.variantcontext.writer;
/**
* Available writer options for VariantContextWriters
*
* @author Mark DePristo
* @since 5/12
*/
public enum Options {
INDEX_ON_THE_FLY,
DO_NOT_WRITE_GENOTYPES,
FORCE_BCF
}

View File

@ -30,7 +30,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
/**
* this class writes VCF files, allowing records to be passed in unsorted (up to a certain genomic distance away)
*/
public class SortingVariantContextWriter extends SortingVariantContextWriterBase {
class SortingVariantContextWriter extends SortingVariantContextWriterBase {
// the maximum START distance between records that we'll cache
private int maxCachingStartDistance;

View File

@ -35,7 +35,7 @@ import java.util.concurrent.PriorityBlockingQueue;
* This class writes VCF files, allowing records to be passed in unsorted.
* It also enforces that it is never passed records of the same chromosome with any other chromosome in between them.
*/
public abstract class SortingVariantContextWriterBase implements VariantContextWriter {
abstract class SortingVariantContextWriterBase implements VariantContextWriter {
// The VCFWriter to which to actually write the sorted VCF records
private final VariantContextWriter innerWriter;

View File

@ -38,7 +38,7 @@ import java.util.*;
/**
* this class writes VCF files
*/
public class VCFWriter extends IndexingVariantContextWriter {
class VCFWriter extends IndexingVariantContextWriter {
private final static String VERSION_LINE = VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF4_1.getFormatString() + "=" + VCFHeaderVersion.VCF4_1.getVersionString();
// the print stream we're writing to

View File

@ -40,13 +40,8 @@ import java.util.EnumSet;
* @since 5/12
*/
public class VariantContextWriterFactory {
public enum Options {
ENABLE_ON_THE_FLY_INDEX,
DO_NOT_WRITE_GENOTYPES,
FORCE_BCF
}
public static final EnumSet<Options> DEFAULT_OPTIONS = EnumSet.of(Options.ENABLE_ON_THE_FLY_INDEX);
public static final EnumSet<Options> DEFAULT_OPTIONS = EnumSet.of(Options.INDEX_ON_THE_FLY);
public static final EnumSet<Options> NO_OPTIONS = EnumSet.noneOf(Options.class);
private VariantContextWriterFactory() {}
@ -79,11 +74,11 @@ public class VariantContextWriterFactory {
if ( enableBCF )
return new BCF2Writer(location, output, refDict,
options.contains(Options.ENABLE_ON_THE_FLY_INDEX),
options.contains(Options.INDEX_ON_THE_FLY),
options.contains(Options.DO_NOT_WRITE_GENOTYPES));
else {
return new VCFWriter(location, output, refDict,
options.contains(Options.ENABLE_ON_THE_FLY_INDEX),
options.contains(Options.INDEX_ON_THE_FLY),
options.contains(Options.DO_NOT_WRITE_GENOTYPES));
}
}