Added -bcf argument to VCFWriter output to force BCF regardless of file extension

-- Now possible to do -o /dev/stdout -bcf -l DEBUG > tmp.bcf and create a valid BCF2 file
-- Cleanup code to make sure extensions easier by moving to a setX model in VariantContextWriterStub
This commit is contained in:
Mark DePristo 2012-08-16 10:54:52 -04:00
parent 28c8e3e6d7
commit 7a247df922
2 changed files with 66 additions and 32 deletions

View File

@ -47,6 +47,7 @@ import java.util.List;
public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
public static final String NO_HEADER_ARG_NAME = "no_cmdline_in_header"; public static final String NO_HEADER_ARG_NAME = "no_cmdline_in_header";
public static final String SITES_ONLY_ARG_NAME = "sites_only"; public static final String SITES_ONLY_ARG_NAME = "sites_only";
public static final String FORCE_BCF = "bcf";
public static final HashSet<String> SUPPORTED_ZIPPED_SUFFIXES = new HashSet<String>(); public static final HashSet<String> SUPPORTED_ZIPPED_SUFFIXES = new HashSet<String>();
// //
@ -96,7 +97,11 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override @Override
public List<ArgumentDefinition> createArgumentDefinitions( ArgumentSource source ) { public List<ArgumentDefinition> createArgumentDefinitions( ArgumentSource source ) {
return Arrays.asList( createDefaultArgumentDefinition(source), createNoCommandLineHeaderArgumentDefinition(),createSitesOnlyArgumentDefinition()); return Arrays.asList(
createDefaultArgumentDefinition(source),
createNoCommandLineHeaderArgumentDefinition(),
createSitesOnlyArgumentDefinition(),
createBCFArgumentDefinition() );
} }
/** /**
@ -117,7 +122,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) { public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) {
if(!source.isRequired()) if(!source.isRequired())
throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default."); throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default.");
VariantContextWriterStub stub = new VariantContextWriterStub(engine, defaultOutputStream, false, argumentSources, false, false); VariantContextWriterStub stub = new VariantContextWriterStub(engine, defaultOutputStream, argumentSources);
engine.addOutput(stub); engine.addOutput(stub);
return stub; return stub;
} }
@ -141,15 +146,15 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
if(writerFile == null && !source.isRequired()) if(writerFile == null && !source.isRequired())
throw new MissingArgumentValueException(defaultArgumentDefinition); throw new MissingArgumentValueException(defaultArgumentDefinition);
// Should we compress the output stream?
boolean compress = isCompressed(writerFileName);
boolean skipWritingCmdLineHeader = argumentIsPresent(createNoCommandLineHeaderArgumentDefinition(),matches);
boolean doNotWriteGenotypes = argumentIsPresent(createSitesOnlyArgumentDefinition(),matches);
// Create a stub for the given object. // Create a stub for the given object.
VariantContextWriterStub stub = (writerFile != null) ? new VariantContextWriterStub(engine, writerFile, compress, argumentSources, skipWritingCmdLineHeader, doNotWriteGenotypes) final VariantContextWriterStub stub = (writerFile != null)
: new VariantContextWriterStub(engine, defaultOutputStream, compress, argumentSources, skipWritingCmdLineHeader, doNotWriteGenotypes); ? new VariantContextWriterStub(engine, writerFile, argumentSources)
: new VariantContextWriterStub(engine, defaultOutputStream, argumentSources);
stub.setCompressed(isCompressed(writerFileName));
stub.setDoNotWriteGenotypes(argumentIsPresent(createSitesOnlyArgumentDefinition(),matches));
stub.setSkipWritingCommandLineHeader(argumentIsPresent(createNoCommandLineHeaderArgumentDefinition(),matches));
stub.setForceBCF(argumentIsPresent(createBCFArgumentDefinition(),matches));
// WARNING: Side effects required by engine! // WARNING: Side effects required by engine!
parsingEngine.addTags(stub,getArgumentTags(matches)); parsingEngine.addTags(stub,getArgumentTags(matches));
@ -159,8 +164,8 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
} }
/** /**
* Creates the optional compression level argument for the BAM file. * Creates the optional no_header argument for the VCF file.
* @return Argument definition for the BAM file itself. Will not be null. * @return Argument definition for the VCF file itself. Will not be null.
*/ */
private ArgumentDefinition createNoCommandLineHeaderArgumentDefinition() { private ArgumentDefinition createNoCommandLineHeaderArgumentDefinition() {
return new ArgumentDefinition( ArgumentIOType.ARGUMENT, return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
@ -179,8 +184,8 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
} }
/** /**
* Creates the optional compression level argument for the BAM file. * Creates the optional sites_only argument definition
* @return Argument definition for the BAM file itself. Will not be null. * @return Argument definition for the VCF file itself. Will not be null.
*/ */
private ArgumentDefinition createSitesOnlyArgumentDefinition() { private ArgumentDefinition createSitesOnlyArgumentDefinition() {
return new ArgumentDefinition( ArgumentIOType.ARGUMENT, return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
@ -198,6 +203,26 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
null ); null );
} }
/**
* Creates the optional bcf argument definition
* @return Argument definition for the VCF file itself. Will not be null.
*/
private ArgumentDefinition createBCFArgumentDefinition() {
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
boolean.class,
FORCE_BCF,
FORCE_BCF,
"force BCF output, regardless of the file's extension",
false,
true,
false,
true,
null,
null,
null,
null );
}
/** /**
* Returns true if the file will be compressed. * Returns true if the file will be compressed.
* @param writerFileName Name of the file * @param writerFileName Name of the file

View File

@ -79,7 +79,7 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
/** /**
* Should we emit a compressed output stream? * Should we emit a compressed output stream?
*/ */
private final boolean isCompressed; private boolean isCompressed = false;
/** /**
* A hack: push the argument sources into the VCF header so that the VCF header * A hack: push the argument sources into the VCF header so that the VCF header
@ -90,12 +90,17 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
/** /**
* Should the header be written out? A hidden argument. * Should the header be written out? A hidden argument.
*/ */
private final boolean skipWritingCommandLineHeader; private boolean skipWritingCommandLineHeader = false;
/** /**
* Should we not write genotypes even when provided? * Should we not write genotypes even when provided?
*/ */
private final boolean doNotWriteGenotypes; private boolean doNotWriteGenotypes = false;
/**
* Should we force BCF writing regardless of the file extension?
*/
private boolean forceBCF = false;
/** /**
* Connects this stub with an external stream capable of serving the * Connects this stub with an external stream capable of serving the
@ -108,19 +113,13 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
* *
* @param engine engine. * @param engine engine.
* @param genotypeFile file to (ultimately) create. * @param genotypeFile file to (ultimately) create.
* @param isCompressed should we compress the output stream?
* @param argumentSources sources. * @param argumentSources sources.
* @param skipWritingCommandLineHeader skip writing header.
* @param doNotWriteGenotypes do not write genotypes.
*/ */
public VariantContextWriterStub(GenomeAnalysisEngine engine, File genotypeFile, boolean isCompressed, Collection<Object> argumentSources, boolean skipWritingCommandLineHeader, boolean doNotWriteGenotypes) { public VariantContextWriterStub(GenomeAnalysisEngine engine, File genotypeFile, Collection<Object> argumentSources) {
this.engine = engine; this.engine = engine;
this.genotypeFile = genotypeFile; this.genotypeFile = genotypeFile;
this.genotypeStream = null; this.genotypeStream = null;
this.isCompressed = isCompressed;
this.argumentSources = argumentSources; this.argumentSources = argumentSources;
this.skipWritingCommandLineHeader = skipWritingCommandLineHeader;
this.doNotWriteGenotypes = doNotWriteGenotypes;
} }
/** /**
@ -128,19 +127,13 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
* *
* @param engine engine. * @param engine engine.
* @param genotypeStream stream to (ultimately) write. * @param genotypeStream stream to (ultimately) write.
* @param isCompressed should we compress the output stream?
* @param argumentSources sources. * @param argumentSources sources.
* @param skipWritingCommandLineHeader skip writing header.
* @param doNotWriteGenotypes do not write genotypes.
*/ */
public VariantContextWriterStub(GenomeAnalysisEngine engine, OutputStream genotypeStream, boolean isCompressed, Collection<Object> argumentSources, boolean skipWritingCommandLineHeader, boolean doNotWriteGenotypes) { public VariantContextWriterStub(GenomeAnalysisEngine engine, OutputStream genotypeStream, Collection<Object> argumentSources) {
this.engine = engine; this.engine = engine;
this.genotypeFile = null; this.genotypeFile = null;
this.genotypeStream = new PrintStream(genotypeStream); this.genotypeStream = new PrintStream(genotypeStream);
this.isCompressed = isCompressed;
this.argumentSources = argumentSources; this.argumentSources = argumentSources;
this.skipWritingCommandLineHeader = skipWritingCommandLineHeader;
this.doNotWriteGenotypes = doNotWriteGenotypes;
} }
/** /**
@ -167,6 +160,22 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
return isCompressed; return isCompressed;
} }
public void setCompressed(boolean compressed) {
isCompressed = compressed;
}
public void setSkipWritingCommandLineHeader(boolean skipWritingCommandLineHeader) {
this.skipWritingCommandLineHeader = skipWritingCommandLineHeader;
}
public void setDoNotWriteGenotypes(boolean doNotWriteGenotypes) {
this.doNotWriteGenotypes = doNotWriteGenotypes;
}
public void setForceBCF(boolean forceBCF) {
this.forceBCF = forceBCF;
}
/** /**
* Gets the master sequence dictionary from the engine associated with this stub * Gets the master sequence dictionary from the engine associated with this stub
* @link GenomeAnalysisEngine.getMasterSequenceDictionary * @link GenomeAnalysisEngine.getMasterSequenceDictionary
@ -187,7 +196,7 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
if ( engine.lenientVCFProcessing() ) options.add(Options.ALLOW_MISSING_FIELDS_IN_HEADER); if ( engine.lenientVCFProcessing() ) options.add(Options.ALLOW_MISSING_FIELDS_IN_HEADER);
if ( indexOnTheFly && ! isCompressed() ) options.add(Options.INDEX_ON_THE_FLY); if ( indexOnTheFly && ! isCompressed() ) options.add(Options.INDEX_ON_THE_FLY);
if ( getFile() != null && VariantContextWriterFactory.isBCFOutput(getFile()) ) if ( forceBCF || (getFile() != null && VariantContextWriterFactory.isBCFOutput(getFile())) )
options.add(Options.FORCE_BCF); options.add(Options.FORCE_BCF);
return options.isEmpty() ? EnumSet.noneOf(Options.class) : EnumSet.copyOf(options); return options.isEmpty() ? EnumSet.noneOf(Options.class) : EnumSet.copyOf(options);