Moving the --sites_only argument up into the VCFWriter itself so that any walkers that write VCFs can choose not to emit genotypes

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5088 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2011-01-26 19:38:16 +00:00
parent a97184fddf
commit d33162145b
6 changed files with 45 additions and 28 deletions

View File

@ -1,6 +1,5 @@
package org.broadinstitute.sting.gatk.io.storage;
import org.broad.tribble.readers.LineReader;
import org.broad.tribble.source.BasicFeatureSource;
import org.broad.tribble.vcf.*;
import org.broad.tribble.util.variantcontext.VariantContext;
@ -11,7 +10,6 @@ import java.io.*;
import net.sf.samtools.util.BlockCompressedOutputStream;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.text.XReadLines;
/**
* Provides temporary and permanent storage for genotypes in VCF format.
@ -37,7 +35,7 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
else if ( stub.getOutputStream() != null ) {
this.file = null;
this.stream = stub.getOutputStream();
writer = new StandardVCFWriter(stream);
writer = new StandardVCFWriter(stream, stub.doNotWriteGenotypes());
}
else
throw new ReviewedStingException("Unable to create target to which to write; storage was provided with neither a file nor a stream.");
@ -62,7 +60,7 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
}
// The GATK/Tribble can't currently index block-compressed files on the fly. Disable OTF indexing even if the user explicitly asked for it.
return new StandardVCFWriter(file, this.stream, indexOnTheFly && !stub.isCompressed());
return new StandardVCFWriter(file, this.stream, indexOnTheFly && !stub.isCompressed(), stub.doNotWriteGenotypes());
}

View File

@ -25,17 +25,13 @@
package org.broadinstitute.sting.gatk.io.stubs;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFWriter;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.CommandLineExecutable;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.classloader.JVMUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.File;
import java.io.OutputStream;
import java.lang.annotation.Annotation;
import java.util.*;
/**
@ -46,6 +42,7 @@ import java.util.*;
*/
public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
private static final String NO_HEADER_ARG_NAME = "NO_HEADER";
private static final String SITES_ONLY_ARG_NAME = "sites_only";
private static final HashSet<String> SUPPORTED_ZIPPED_SUFFIXES = new HashSet<String>();
//
@ -95,7 +92,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override
public List<ArgumentDefinition> createArgumentDefinitions( ArgumentSource source ) {
return Arrays.asList( createDefaultArgumentDefinition(source),createNoHeaderArgumentDefinition());
return Arrays.asList( createDefaultArgumentDefinition(source),createNoHeaderArgumentDefinition(),createSitesOnlyArgumentDefinition());
}
/**
@ -111,7 +108,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class<?> type) {
if(!source.isRequired())
throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default.");
VCFWriterStub stub = new VCFWriterStub(engine, defaultOutputStream, false, argumentSources, false);
VCFWriterStub stub = new VCFWriterStub(engine, defaultOutputStream, false, argumentSources, false, false);
engine.addOutput(stub);
return stub;
}
@ -139,10 +136,11 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
boolean compress = writerFileName != null && SUPPORTED_ZIPPED_SUFFIXES.contains(getFileSuffix(writerFileName));
boolean skipWritingHeader = argumentIsPresent(createNoHeaderArgumentDefinition(),matches);
boolean doNotWriteGenotypes = argumentIsPresent(createSitesOnlyArgumentDefinition(),matches);
// Create a stub for the given object.
VCFWriterStub stub = (writerFile != null) ? new VCFWriterStub(engine, writerFile, compress, argumentSources, skipWritingHeader)
: new VCFWriterStub(engine, defaultOutputStream, compress, argumentSources, skipWritingHeader);
VCFWriterStub stub = (writerFile != null) ? new VCFWriterStub(engine, writerFile, compress, argumentSources, skipWritingHeader, doNotWriteGenotypes)
: new VCFWriterStub(engine, defaultOutputStream, compress, argumentSources, skipWritingHeader, doNotWriteGenotypes);
// WARNING: Side effects required by engine!
parsingEngine.addTags(stub,getArgumentTags(matches));
@ -171,6 +169,26 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
null );
}
/**
* Creates the optional compression level argument for the BAM file.
* @return Argument definition for the BAM file itself. Will not be null.
*/
private ArgumentDefinition createSitesOnlyArgumentDefinition() {
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
boolean.class,
SITES_ONLY_ARG_NAME,
SITES_ONLY_ARG_NAME,
"Just output sites without genotypes (i.e. only the first 8 columns of the VCF)",
false,
true,
false,
true,
null,
null,
null,
null );
}
/**
* Returns a lower-cased version of the suffix of the provided file.

View File

@ -28,15 +28,12 @@ package org.broadinstitute.sting.gatk.io.stubs;
import java.io.File;
import java.io.PrintStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.TreeSet;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFWriter;
import org.broadinstitute.sting.commandline.CommandLineUtils;
import org.broadinstitute.sting.gatk.CommandLineExecutable;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.io.OutputTracker;
@ -87,6 +84,11 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
*/
private final boolean skipWritingHeader;
/**
* Should we not write genotypes even when provided?
*/
private final boolean doNotWriteGenotypes;
/**
* Connects this stub with an external stream capable of serving the
* requests of the consumer of this stub.
@ -98,13 +100,14 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
* @param genotypeFile file to (ultimately) create.
* @param isCompressed should we compress the output stream?
*/
public VCFWriterStub(GenomeAnalysisEngine engine, File genotypeFile, boolean isCompressed, Collection<Object> argumentSources, boolean skipWritingHeader) {
public VCFWriterStub(GenomeAnalysisEngine engine, File genotypeFile, boolean isCompressed, Collection<Object> argumentSources, boolean skipWritingHeader, boolean doNotWriteGenotypes) {
this.engine = engine;
this.genotypeFile = genotypeFile;
this.genotypeStream = null;
this.isCompressed = isCompressed;
this.argumentSources = argumentSources;
this.skipWritingHeader = skipWritingHeader;
this.doNotWriteGenotypes = doNotWriteGenotypes;
}
/**
@ -112,13 +115,14 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
* @param genotypeStream stream to (ultimately) write.
* @param isCompressed should we compress the output stream?
*/
public VCFWriterStub(GenomeAnalysisEngine engine, OutputStream genotypeStream, boolean isCompressed, Collection<Object> argumentSources, boolean skipWritingHeader) {
public VCFWriterStub(GenomeAnalysisEngine engine, OutputStream genotypeStream, boolean isCompressed, Collection<Object> argumentSources, boolean skipWritingHeader, boolean doNotWriteGenotypes) {
this.engine = engine;
this.genotypeFile = null;
this.genotypeStream = new PrintStream(genotypeStream);
this.isCompressed = isCompressed;
this.argumentSources = argumentSources;
this.skipWritingHeader = skipWritingHeader;
this.doNotWriteGenotypes = doNotWriteGenotypes;
}
/**
@ -145,6 +149,14 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
return isCompressed;
}
/**
* Should we tell the VCF writer not to write genotypes?
* @return true if the writer should not write genotypes.
*/
public boolean doNotWriteGenotypes() {
return doNotWriteGenotypes;
}
/**
* Retrieves the header to use when creating the new file.
* @return header to use when creating the new file.

View File

@ -44,10 +44,6 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "pcr_error_rate", shortName = "pcr_error", doc = "The PCR error rate to be used for computing fragment-based likelihoods", required = false)
public Double PCR_error = DiploidSNPGenotypeLikelihoods.DEFAULT_PCR_ERROR_RATE;
// control the output
@Argument(fullName = "sites_only", shortName = "sites_only", doc = "Should we output just sites without genotypes (i.e. only the first 8 columns of the VCF)?", required = false)
public boolean SITES_ONLY = false;
@Argument(fullName = "genotype", shortName = "genotype", doc = "Should we output confident genotypes (i.e. including ref calls) or just the variants?", required = false)
public boolean GENOTYPE_MODE = false;
@ -109,7 +105,6 @@ public class UnifiedArgumentCollection {
uac.GLmodel = GLmodel;
uac.heterozygosity = heterozygosity;
uac.PCR_error = PCR_error;
uac.SITES_ONLY = SITES_ONLY;
uac.GENOTYPE_MODE = GENOTYPE_MODE;
uac.ALL_BASES_MODE = ALL_BASES_MODE;
uac.NO_SLOD = NO_SLOD;

View File

@ -129,9 +129,6 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
annotationEngine = new VariantAnnotatorEngine(getToolkit(), Arrays.asList(annotationClassesToUse), annotationsToUse);
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples);
if ( UAC.SITES_ONLY )
samples.clear();
// initialize the header
writer.writeHeader(new VCFHeader(getHeaderInfo(), samples)) ;
}

View File

@ -400,9 +400,6 @@ public class UnifiedGenotyperEngine {
vcCall = variantContexts.iterator().next(); // we know the collection will always have exactly 1 element.
}
if ( UAC.SITES_ONLY )
vcCall = VariantContext.modifyGenotypes(vcCall, null);
VariantCallContext call = new VariantCallContext(vcCall, passesCallThreshold(phredScaledConfidence, atTriggerTrack));
call.setRefBase(refContext.getBase());
return call;