Moving the --sites_only argument up into the VCFWriter itself so that any walkers that write VCFs can choose not to emit genotypes

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5088 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2011-01-26 19:38:16 +00:00
parent a97184fddf
commit d33162145b
6 changed files with 45 additions and 28 deletions

View File

@ -1,6 +1,5 @@
package org.broadinstitute.sting.gatk.io.storage; package org.broadinstitute.sting.gatk.io.storage;
import org.broad.tribble.readers.LineReader;
import org.broad.tribble.source.BasicFeatureSource; import org.broad.tribble.source.BasicFeatureSource;
import org.broad.tribble.vcf.*; import org.broad.tribble.vcf.*;
import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.util.variantcontext.VariantContext;
@ -11,7 +10,6 @@ import java.io.*;
import net.sf.samtools.util.BlockCompressedOutputStream; import net.sf.samtools.util.BlockCompressedOutputStream;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.text.XReadLines;
/** /**
* Provides temporary and permanent storage for genotypes in VCF format. * Provides temporary and permanent storage for genotypes in VCF format.
@ -37,7 +35,7 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
else if ( stub.getOutputStream() != null ) { else if ( stub.getOutputStream() != null ) {
this.file = null; this.file = null;
this.stream = stub.getOutputStream(); this.stream = stub.getOutputStream();
writer = new StandardVCFWriter(stream); writer = new StandardVCFWriter(stream, stub.doNotWriteGenotypes());
} }
else else
throw new ReviewedStingException("Unable to create target to which to write; storage was provided with neither a file nor a stream."); throw new ReviewedStingException("Unable to create target to which to write; storage was provided with neither a file nor a stream.");
@ -62,7 +60,7 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
} }
// The GATK/Tribble can't currently index block-compressed files on the fly. Disable OTF indexing even if the user explicitly asked for it. // The GATK/Tribble can't currently index block-compressed files on the fly. Disable OTF indexing even if the user explicitly asked for it.
return new StandardVCFWriter(file, this.stream, indexOnTheFly && !stub.isCompressed()); return new StandardVCFWriter(file, this.stream, indexOnTheFly && !stub.isCompressed(), stub.doNotWriteGenotypes());
} }

View File

@ -25,17 +25,13 @@
package org.broadinstitute.sting.gatk.io.stubs; package org.broadinstitute.sting.gatk.io.stubs;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFWriter; import org.broad.tribble.vcf.VCFWriter;
import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.CommandLineExecutable;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.classloader.JVMUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.File; import java.io.File;
import java.io.OutputStream; import java.io.OutputStream;
import java.lang.annotation.Annotation;
import java.util.*; import java.util.*;
/** /**
@ -46,6 +42,7 @@ import java.util.*;
*/ */
public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
private static final String NO_HEADER_ARG_NAME = "NO_HEADER"; private static final String NO_HEADER_ARG_NAME = "NO_HEADER";
private static final String SITES_ONLY_ARG_NAME = "sites_only";
private static final HashSet<String> SUPPORTED_ZIPPED_SUFFIXES = new HashSet<String>(); private static final HashSet<String> SUPPORTED_ZIPPED_SUFFIXES = new HashSet<String>();
// //
@ -95,7 +92,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override @Override
public List<ArgumentDefinition> createArgumentDefinitions( ArgumentSource source ) { public List<ArgumentDefinition> createArgumentDefinitions( ArgumentSource source ) {
return Arrays.asList( createDefaultArgumentDefinition(source),createNoHeaderArgumentDefinition()); return Arrays.asList( createDefaultArgumentDefinition(source),createNoHeaderArgumentDefinition(),createSitesOnlyArgumentDefinition());
} }
/** /**
@ -111,7 +108,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class<?> type) { public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class<?> type) {
if(!source.isRequired()) if(!source.isRequired())
throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default."); throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default.");
VCFWriterStub stub = new VCFWriterStub(engine, defaultOutputStream, false, argumentSources, false); VCFWriterStub stub = new VCFWriterStub(engine, defaultOutputStream, false, argumentSources, false, false);
engine.addOutput(stub); engine.addOutput(stub);
return stub; return stub;
} }
@ -139,10 +136,11 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
boolean compress = writerFileName != null && SUPPORTED_ZIPPED_SUFFIXES.contains(getFileSuffix(writerFileName)); boolean compress = writerFileName != null && SUPPORTED_ZIPPED_SUFFIXES.contains(getFileSuffix(writerFileName));
boolean skipWritingHeader = argumentIsPresent(createNoHeaderArgumentDefinition(),matches); boolean skipWritingHeader = argumentIsPresent(createNoHeaderArgumentDefinition(),matches);
boolean doNotWriteGenotypes = argumentIsPresent(createSitesOnlyArgumentDefinition(),matches);
// Create a stub for the given object. // Create a stub for the given object.
VCFWriterStub stub = (writerFile != null) ? new VCFWriterStub(engine, writerFile, compress, argumentSources, skipWritingHeader) VCFWriterStub stub = (writerFile != null) ? new VCFWriterStub(engine, writerFile, compress, argumentSources, skipWritingHeader, doNotWriteGenotypes)
: new VCFWriterStub(engine, defaultOutputStream, compress, argumentSources, skipWritingHeader); : new VCFWriterStub(engine, defaultOutputStream, compress, argumentSources, skipWritingHeader, doNotWriteGenotypes);
// WARNING: Side effects required by engine! // WARNING: Side effects required by engine!
parsingEngine.addTags(stub,getArgumentTags(matches)); parsingEngine.addTags(stub,getArgumentTags(matches));
@ -171,6 +169,26 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
null ); null );
} }
/**
* Creates the optional compression level argument for the BAM file.
* @return Argument definition for the BAM file itself. Will not be null.
*/
private ArgumentDefinition createSitesOnlyArgumentDefinition() {
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
boolean.class,
SITES_ONLY_ARG_NAME,
SITES_ONLY_ARG_NAME,
"Just output sites without genotypes (i.e. only the first 8 columns of the VCF)",
false,
true,
false,
true,
null,
null,
null,
null );
}
/** /**
* Returns a lower-cased version of the suffix of the provided file. * Returns a lower-cased version of the suffix of the provided file.

View File

@ -28,15 +28,12 @@ package org.broadinstitute.sting.gatk.io.stubs;
import java.io.File; import java.io.File;
import java.io.PrintStream; import java.io.PrintStream;
import java.io.OutputStream; import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.TreeSet;
import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.vcf.VCFHeader; import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFHeaderLine; import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFWriter; import org.broad.tribble.vcf.VCFWriter;
import org.broadinstitute.sting.commandline.CommandLineUtils;
import org.broadinstitute.sting.gatk.CommandLineExecutable; import org.broadinstitute.sting.gatk.CommandLineExecutable;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.io.OutputTracker; import org.broadinstitute.sting.gatk.io.OutputTracker;
@ -87,6 +84,11 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
*/ */
private final boolean skipWritingHeader; private final boolean skipWritingHeader;
/**
* Should we not write genotypes even when provided?
*/
private final boolean doNotWriteGenotypes;
/** /**
* Connects this stub with an external stream capable of serving the * Connects this stub with an external stream capable of serving the
* requests of the consumer of this stub. * requests of the consumer of this stub.
@ -98,13 +100,14 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
* @param genotypeFile file to (ultimately) create. * @param genotypeFile file to (ultimately) create.
* @param isCompressed should we compress the output stream? * @param isCompressed should we compress the output stream?
*/ */
public VCFWriterStub(GenomeAnalysisEngine engine, File genotypeFile, boolean isCompressed, Collection<Object> argumentSources, boolean skipWritingHeader) { public VCFWriterStub(GenomeAnalysisEngine engine, File genotypeFile, boolean isCompressed, Collection<Object> argumentSources, boolean skipWritingHeader, boolean doNotWriteGenotypes) {
this.engine = engine; this.engine = engine;
this.genotypeFile = genotypeFile; this.genotypeFile = genotypeFile;
this.genotypeStream = null; this.genotypeStream = null;
this.isCompressed = isCompressed; this.isCompressed = isCompressed;
this.argumentSources = argumentSources; this.argumentSources = argumentSources;
this.skipWritingHeader = skipWritingHeader; this.skipWritingHeader = skipWritingHeader;
this.doNotWriteGenotypes = doNotWriteGenotypes;
} }
/** /**
@ -112,13 +115,14 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
* @param genotypeStream stream to (ultimately) write. * @param genotypeStream stream to (ultimately) write.
* @param isCompressed should we compress the output stream? * @param isCompressed should we compress the output stream?
*/ */
public VCFWriterStub(GenomeAnalysisEngine engine, OutputStream genotypeStream, boolean isCompressed, Collection<Object> argumentSources, boolean skipWritingHeader) { public VCFWriterStub(GenomeAnalysisEngine engine, OutputStream genotypeStream, boolean isCompressed, Collection<Object> argumentSources, boolean skipWritingHeader, boolean doNotWriteGenotypes) {
this.engine = engine; this.engine = engine;
this.genotypeFile = null; this.genotypeFile = null;
this.genotypeStream = new PrintStream(genotypeStream); this.genotypeStream = new PrintStream(genotypeStream);
this.isCompressed = isCompressed; this.isCompressed = isCompressed;
this.argumentSources = argumentSources; this.argumentSources = argumentSources;
this.skipWritingHeader = skipWritingHeader; this.skipWritingHeader = skipWritingHeader;
this.doNotWriteGenotypes = doNotWriteGenotypes;
} }
/** /**
@ -145,6 +149,14 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
return isCompressed; return isCompressed;
} }
/**
* Should we tell the VCF writer not to write genotypes?
* @return true if the writer should not write genotypes.
*/
public boolean doNotWriteGenotypes() {
return doNotWriteGenotypes;
}
/** /**
* Retrieves the header to use when creating the new file. * Retrieves the header to use when creating the new file.
* @return header to use when creating the new file. * @return header to use when creating the new file.

View File

@ -44,10 +44,6 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "pcr_error_rate", shortName = "pcr_error", doc = "The PCR error rate to be used for computing fragment-based likelihoods", required = false) @Argument(fullName = "pcr_error_rate", shortName = "pcr_error", doc = "The PCR error rate to be used for computing fragment-based likelihoods", required = false)
public Double PCR_error = DiploidSNPGenotypeLikelihoods.DEFAULT_PCR_ERROR_RATE; public Double PCR_error = DiploidSNPGenotypeLikelihoods.DEFAULT_PCR_ERROR_RATE;
// control the output
@Argument(fullName = "sites_only", shortName = "sites_only", doc = "Should we output just sites without genotypes (i.e. only the first 8 columns of the VCF)?", required = false)
public boolean SITES_ONLY = false;
@Argument(fullName = "genotype", shortName = "genotype", doc = "Should we output confident genotypes (i.e. including ref calls) or just the variants?", required = false) @Argument(fullName = "genotype", shortName = "genotype", doc = "Should we output confident genotypes (i.e. including ref calls) or just the variants?", required = false)
public boolean GENOTYPE_MODE = false; public boolean GENOTYPE_MODE = false;
@ -109,7 +105,6 @@ public class UnifiedArgumentCollection {
uac.GLmodel = GLmodel; uac.GLmodel = GLmodel;
uac.heterozygosity = heterozygosity; uac.heterozygosity = heterozygosity;
uac.PCR_error = PCR_error; uac.PCR_error = PCR_error;
uac.SITES_ONLY = SITES_ONLY;
uac.GENOTYPE_MODE = GENOTYPE_MODE; uac.GENOTYPE_MODE = GENOTYPE_MODE;
uac.ALL_BASES_MODE = ALL_BASES_MODE; uac.ALL_BASES_MODE = ALL_BASES_MODE;
uac.NO_SLOD = NO_SLOD; uac.NO_SLOD = NO_SLOD;

View File

@ -129,9 +129,6 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
annotationEngine = new VariantAnnotatorEngine(getToolkit(), Arrays.asList(annotationClassesToUse), annotationsToUse); annotationEngine = new VariantAnnotatorEngine(getToolkit(), Arrays.asList(annotationClassesToUse), annotationsToUse);
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples); UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples);
if ( UAC.SITES_ONLY )
samples.clear();
// initialize the header // initialize the header
writer.writeHeader(new VCFHeader(getHeaderInfo(), samples)) ; writer.writeHeader(new VCFHeader(getHeaderInfo(), samples)) ;
} }

View File

@ -400,9 +400,6 @@ public class UnifiedGenotyperEngine {
vcCall = variantContexts.iterator().next(); // we know the collection will always have exactly 1 element. vcCall = variantContexts.iterator().next(); // we know the collection will always have exactly 1 element.
} }
if ( UAC.SITES_ONLY )
vcCall = VariantContext.modifyGenotypes(vcCall, null);
VariantCallContext call = new VariantCallContext(vcCall, passesCallThreshold(phredScaledConfidence, atTriggerTrack)); VariantCallContext call = new VariantCallContext(vcCall, passesCallThreshold(phredScaledConfidence, atTriggerTrack));
call.setRefBase(refContext.getBase()); call.setRefBase(refContext.getBase());
return call; return call;