Getting rid of GenotypeWriter interface. Of note:

- GATKVCFWriter deleted, to be replaced if absolutely necessary when VCF writing goes into Tribble.
- VCFWriter is now an interface, for easier redirection.
- VCFWriterImpl fleshes out the VCFWriter interface.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4026 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2010-08-13 16:33:22 +00:00
parent 542d394e09
commit cb144734c0
46 changed files with 625 additions and 1109 deletions

View File

@ -35,7 +35,7 @@ import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
import java.io.File;
import java.io.FileNotFoundException;
@ -99,7 +99,7 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
* @return A collection of type descriptors generating implementation-dependent placeholders.
*/
protected Collection<ArgumentTypeDescriptor> getArgumentTypeDescriptors() {
return Arrays.asList( new GenotypeWriterArgumentTypeDescriptor(GATKEngine),
return Arrays.asList( new VCFWriterArgumentTypeDescriptor(GATKEngine),
new SAMFileReaderArgumentTypeDescriptor(GATKEngine),
new SAMFileWriterArgumentTypeDescriptor(GATKEngine),
new OutputStreamArgumentTypeDescriptor(GATKEngine) );

View File

@ -1,86 +0,0 @@
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.io.storage;
import java.io.*;
import java.util.Set;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub;
import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.StingException;
import org.broad.tribble.vcf.VCFHeader;
/**
* Provides temporary storage for GenotypeWriters.
*
* @author ebanks
* @version 0.1
*/
public abstract class GenotypeWriterStorage<T extends GenotypeWriter> implements GenotypeWriter, Storage<T> {
protected final File file;
protected final PrintStream stream;
protected final GenotypeWriter writer;
/**
* Constructs an object which will write directly into the output file provided by the stub.
* Intentionally delaying the writing of the header -- this should be filled in by the walker.
* @param stub Stub to use when constructing the output file.
*/
public GenotypeWriterStorage( GenotypeWriterStub stub ) {
this.file = stub.getFile();
this.stream = stub.getOutputStream();
if(file != null)
writer = GenotypeWriterFactory.create(file);
else if(stream != null)
writer = GenotypeWriterFactory.create(stream);
else
throw new StingException("Unable to create target to which to write; storage was provided with neither a file nor a stream.");
}
/**
* Constructs an object which will redirect into a different file.
* @param stub Stub to use when synthesizing file / header info.
* @param file File into which to direct the output data.
*/
public GenotypeWriterStorage( GenotypeWriterStub stub, File file ) {
this.file = file;
this.stream = null;
writer = GenotypeWriterFactory.create(file);
Set<String> samples = SampleUtils.getSAMFileSamples(stub.getSAMFileHeader());
GenotypeWriterFactory.writeHeader(writer, new VCFHeader(null, samples));
}
public void add(VariantContext vc, byte ref) {
writer.add(vc, ref);
}
public void close() {
writer.close();
}
}

View File

@ -28,7 +28,7 @@ package org.broadinstitute.sting.gatk.io.storage;
import org.broadinstitute.sting.gatk.io.stubs.Stub;
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamStub;
import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterStub;
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub;
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub;
import org.broadinstitute.sting.utils.StingException;
import java.io.File;
@ -62,7 +62,7 @@ public class StorageFactory {
* @param <T> Type of the stream to create.
* @return Storage object with a facade of type T.
*/
public static <T> Storage<T> createStorage( Stub<T> stub, File file ) {
public static <T> Storage<T> createStorage( Stub<T> stub, File file ) {
Storage storage;
if(stub instanceof OutputStreamStub) {
@ -77,12 +77,12 @@ public class StorageFactory {
else
storage = new SAMFileWriterStorage((SAMFileWriterStub)stub);
}
else if(stub instanceof GenotypeWriterStub) {
GenotypeWriterStub genotypeWriterStub = (GenotypeWriterStub)stub;
else if(stub instanceof VCFWriterStub) {
VCFWriterStub vcfWriterStub = (VCFWriterStub)stub;
if( file != null )
storage = new VCFGenotypeWriterStorage(genotypeWriterStub,file);
storage = new VCFWriterStorage(vcfWriterStub,file);
else
storage = new VCFGenotypeWriterStorage(genotypeWriterStub);
storage = new VCFWriterStorage(vcfWriterStub);
}
else
throw new StingException("Unsupported stub type: " + stub.getClass().getName());

View File

@ -1,58 +0,0 @@
package org.broadinstitute.sting.gatk.io.storage;
import org.broad.tribble.vcf.VCFHeader;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriter;
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub;
import java.io.File;
/**
* Provides temporary and permanent storage for genotypes in VCF format.
*
* @author mhanna
* @version 0.1
*/
public class VCFGenotypeWriterStorage extends GenotypeWriterStorage<VCFGenotypeWriter> implements VCFGenotypeWriter {
/**
* Creates new (permanent) storage for VCF genotype writers.
* @param stub Stub containing appropriate input parameters.
*/
public VCFGenotypeWriterStorage(GenotypeWriterStub stub) {
super(stub);
}
/**
* Creates new (temporary) storage for VCF genotype writers.
* @param stub Stub containing appropriate input parameters.
* @param target Target file for output data.
*/
public VCFGenotypeWriterStorage(GenotypeWriterStub stub,File target) {
super(stub,target);
}
/**
* initialize this VCF header
*
* @param header the header
*/
public void writeHeader(VCFHeader header) {
((VCFGenotypeWriter)writer).writeHeader(header);
}
/**
* Add a given VCF file to the writer.
* @param file file from which to add records
*/
public void append(File file) {
((VCFGenotypeWriter)writer).append(file);
}
/**
* Merges the stream backing up this temporary storage into the target.
* @param target Target stream for the temporary storage. May not be null.
*/
public void mergeInto(VCFGenotypeWriter target) {
target.append(file);
file.delete();
}
}

View File

@ -0,0 +1,109 @@
package org.broadinstitute.sting.gatk.io.storage;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub;
import java.io.*;
import java.util.Set;
/**
* Provides temporary and permanent storage for genotypes in VCF format.
*
* @author mhanna
* @version 0.1
*/
public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
protected final File file;
protected final PrintStream stream;
protected final VCFWriter writer;
/**
* Constructs an object which will write directly into the output file provided by the stub.
* Intentionally delaying the writing of the header -- this should be filled in by the walker.
* @param stub Stub to use when constructing the output file.
*/
public VCFWriterStorage( VCFWriterStub stub ) {
if(stub.getFile() != null) {
this.file = stub.getFile();
try {
this.stream = new PrintStream(stub.getFile());
}
catch(IOException ex) {
throw new StingException("Unable to open target output stream",ex);
}
}
else if(stub.getOutputStream() != null) {
this.file = null;
this.stream = stub.getOutputStream();
}
else
throw new StingException("Unable to create target to which to write; storage was provided with neither a file nor a stream.");
writer = new VCFWriterImpl(stream);
}
/**
* Constructs an object which will redirect into a different file.
* @param stub Stub to use when synthesizing file / header info.
* @param file File into which to direct the output data.
*/
public VCFWriterStorage(VCFWriterStub stub, File file) {
this.file = file;
try {
this.stream = new PrintStream(file);
}
catch(IOException ex) {
throw new StingException("Unable to open target output stream",ex);
}
writer = new VCFWriterImpl(this.stream);
Set<String> samples = SampleUtils.getSAMFileSamples(stub.getSAMFileHeader());
writer.writeHeader(new VCFHeader(null, samples));
}
public void add(VariantContext vc, byte ref) {
writer.add(vc, ref);
}
/**
* initialize this VCF header
*
* @param header the header
*/
public void writeHeader(VCFHeader header) {
writer.writeHeader(header);
}
/**
* Close the VCF storage object.
*/
public void close() {
writer.close();
}
/**
* Merges the stream backing up this temporary storage into the target.
* @param target Target stream for the temporary storage. May not be null.
*/
public void mergeInto(VCFWriterStorage target) {
PrintStream formattingTarget = new PrintStream(target.stream);
try {
BufferedReader reader = new BufferedReader(new FileReader(file));
String line = reader.readLine();
while ( line != null ) {
if (!VCFHeaderLine.isHeaderLine(line))
formattingTarget.printf("%s%n",line);
line = reader.readLine();
}
reader.close();
} catch (IOException e) {
throw new StingException("Error reading file " + file + " in GATKVCFWriter: ", e);
}
}
}

View File

@ -1,60 +0,0 @@
package org.broadinstitute.sting.gatk.io.stubs;
import org.broad.tribble.vcf.VCFHeader;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriter;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import java.io.File;
import java.io.PrintStream;
/**
* Stub providing a passthrough for VCF files.
*
* @author mhanna
* @version 0.1
*/
public class VCFGenotypeWriterStub extends GenotypeWriterStub<VCFGenotypeWriter> implements VCFGenotypeWriter {
/**
* Construct a new stub with the given engine and target file.
* @param engine The engine, for extracting command-line arguments, etc.
* @param genotypeFile Target file into which to write genotyping data.
*/
public VCFGenotypeWriterStub(GenomeAnalysisEngine engine, File genotypeFile) {
super(engine,genotypeFile);
}
/**
* Construct a new stub with the given engine and target stream.
* @param engine The engine, for extracting command-line arguments, etc.
* @param genotypeStream Target stream into which to write genotyping data.
*/
public VCFGenotypeWriterStub(GenomeAnalysisEngine engine, PrintStream genotypeStream) {
super(engine,genotypeStream);
}
/**
* Gets the format of this stub. We may want to discontinue use of this method and rely on instanceof comparisons.
* @return VCF always.
*/
public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() {
return GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
}
/**
* initialize this VCF header
*
* @param header the header
*/
public void writeHeader(VCFHeader header) {
outputTracker.getStorage(this).writeHeader(header);
}
/**
* Add a given VCF file to the writer.
* @param file file from which to add records
*/
public void append(File file) {
outputTracker.getStorage(this).append(file);
}
}

View File

@ -26,8 +26,7 @@
package org.broadinstitute.sting.gatk.io.stubs;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import java.io.File;
@ -41,7 +40,7 @@ import java.util.Arrays;
* @author mhanna
* @version 0.1
*/
public class GenotypeWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
/**
* The engine into which output stubs should be fed.
*/
@ -51,7 +50,7 @@ public class GenotypeWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
* Create a new GenotypeWriter argument, notifying the given engine when that argument has been created.
* @param engine the engine to be notified.
*/
public GenotypeWriterArgumentTypeDescriptor(GenomeAnalysisEngine engine) {
public VCFWriterArgumentTypeDescriptor(GenomeAnalysisEngine engine) {
this.engine = engine;
}
@ -62,7 +61,7 @@ public class GenotypeWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
*/
@Override
public boolean supports( Class type ) {
return GenotypeWriter.class.equals(type);
return VCFWriter.class.equals(type);
}
/**
@ -73,8 +72,7 @@ public class GenotypeWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
*/
@Override
public List<ArgumentDefinition> createArgumentDefinitions( ArgumentSource source ) {
return Arrays.asList( createGenotypeFileArgumentDefinition(source),
createGenotypeFormatArgumentDefinition(source) );
return Arrays.asList( createGenotypeFileArgumentDefinition(source) );
}
/**
@ -92,7 +90,7 @@ public class GenotypeWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
*/
@Override
public Object getDefault() {
GenotypeWriterStub defaultGenotypeWriter = new VCFGenotypeWriterStub(engine,System.out);
VCFWriterStub defaultGenotypeWriter = new VCFWriterStub(engine,System.out);
engine.addOutput(defaultGenotypeWriter);
return defaultGenotypeWriter;
}
@ -111,7 +109,7 @@ public class GenotypeWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
File writerFile = writerFileName != null ? new File(writerFileName) : null;
// Create a stub for the given object.
GenotypeWriterStub stub = (writerFile != null) ? new VCFGenotypeWriterStub(engine, writerFile) : new VCFGenotypeWriterStub(engine,System.out);
VCFWriterStub stub = (writerFile != null) ? new VCFWriterStub(engine, writerFile) : new VCFWriterStub(engine,System.out);
engine.addOutput(stub);
@ -136,27 +134,4 @@ public class GenotypeWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
source.isHidden(),
null );
}
/**
* Creates the optional compression level argument for the BAM file.
* @param source Argument source for the BAM file. Must not be null.
* @return Argument definition for the BAM file itself. Will not be null.
*/
private ArgumentDefinition createGenotypeFormatArgumentDefinition(ArgumentSource source) {
Annotation annotation = this.getArgumentAnnotation(source);
return new ArgumentDefinition( ArgumentIOType.getIOType(annotation),
GenotypeWriterFactory.GENOTYPE_FORMAT.class,
"variant_output_format",
"vf",
"Format to be used to represent variants; default is VCF",
false,
false,
false,
null,
source.isHidden(),
null,
null,
null );
}
}

View File

@ -29,10 +29,11 @@ import java.io.File;
import java.io.PrintStream;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.vcf.VCFHeader;
import org.broadinstitute.sting.gatk.io.OutputTracker;
import org.broadinstitute.sting.gatk.io.storage.VCFWriterStorage;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import net.sf.samtools.SAMFileHeader;
/**
@ -41,7 +42,7 @@ import net.sf.samtools.SAMFileHeader;
* @author ebanks
* @version 0.1
*/
public abstract class GenotypeWriterStub<T extends GenotypeWriter> implements Stub<T>, GenotypeWriter {
public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
/**
* Engine to use for collecting attributes for the output SAM file.
@ -71,7 +72,7 @@ public abstract class GenotypeWriterStub<T extends GenotypeWriter> implements St
* @param engine GATK engine.
* @param genotypeFile file to (ultimately) create.
*/
public GenotypeWriterStub(GenomeAnalysisEngine engine,File genotypeFile) {
public VCFWriterStub(GenomeAnalysisEngine engine,File genotypeFile) {
this.engine = engine;
this.genotypeFile = genotypeFile;
this.genotypeStream = null;
@ -82,7 +83,7 @@ public abstract class GenotypeWriterStub<T extends GenotypeWriter> implements St
* @param engine GATK engine.
* @param genotypeStream stream to (ultimately) write.
*/
public GenotypeWriterStub(GenomeAnalysisEngine engine,PrintStream genotypeStream) {
public VCFWriterStub(GenomeAnalysisEngine engine,PrintStream genotypeStream) {
this.engine = engine;
this.genotypeFile = null;
this.genotypeStream = genotypeStream;
@ -112,12 +113,6 @@ public abstract class GenotypeWriterStub<T extends GenotypeWriter> implements St
return engine.getSAMFileHeader();
}
/**
* Retrieves the format to use when creating the new file.
* @return format to use when creating the new file.
*/
public abstract GenotypeWriterFactory.GENOTYPE_FORMAT getFormat();
/**
* Registers the given streamConnector with this stub.
* @param outputTracker The connector used to provide an appropriate stream.
@ -126,6 +121,10 @@ public abstract class GenotypeWriterStub<T extends GenotypeWriter> implements St
this.outputTracker = outputTracker;
}
public void writeHeader(VCFHeader header) {
outputTracker.getStorage(this).writeHeader(header);
}
/**
* @{inheritDoc}
*/
@ -139,5 +138,4 @@ public abstract class GenotypeWriterStub<T extends GenotypeWriter> implements St
public void close() {
outputTracker.getStorage(this).close();
}
}

View File

@ -126,7 +126,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
}
}
vcfwriter = new VCFWriter(out);
vcfwriter = new VCFWriterImpl(out);
vcfwriter.writeHeader(new VCFHeader(hInfo, samples));
}

View File

@ -46,6 +46,7 @@ import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.CommandLineUtils;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
import java.util.*;
@ -153,7 +154,7 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
hInfo.add(new VCFHeaderLine("VariantAnnotator", "\"" + CommandLineUtils.createApproximateCommandLineArgumentString(getToolkit(), args, getClass()) + "\""));
}
vcfWriter = new VCFWriter(out);
vcfWriter = new VCFWriterImpl(out);
VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
vcfWriter.writeHeader(vcfHeader);

View File

@ -40,6 +40,7 @@ import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.CommandLineUtils;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
import java.util.*;
@ -118,7 +119,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
hInfo.add(new VCFHeaderLine("VariantFiltration", "\"" + CommandLineUtils.createApproximateCommandLineArgumentString(getToolkit(), args, getClass()) + "\""));
}
writer = new VCFWriter(out);
writer = new VCFWriterImpl(out);
writer.writeHeader(new VCFHeader(hInfo, new TreeSet<String>(vc.getSampleNames())));
}

View File

@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
@ -56,7 +56,7 @@ public class BatchedCallsMerger extends LocusWalker<VariantContext, Integer> imp
@ArgumentCollection private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
@Argument(doc = "VCF file to which variants should be written", required = false)
public GenotypeWriter writer = null;
public VCFWriter writer = null;
@Argument(fullName="rod_list", shortName="rods", doc="A comma-separated string describing the rod names representing individual call batches", required=true)
protected String ROD_STRING = null;
@ -91,7 +91,7 @@ public class BatchedCallsMerger extends LocusWalker<VariantContext, Integer> imp
UG_engine.samples = samples;
// initialize the header
GenotypeWriterFactory.writeHeader(writer, new VCFHeader(headerLines, samples));
writer.writeHeader(new VCFHeader(headerLines, samples));
}
public VariantContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {

View File

@ -52,7 +52,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
// control the output
@Argument(doc = "File to which variants should be written", required = false)
public GenotypeWriter writer = null;
public VCFWriter writer = null;
@Argument(fullName = "verbose_mode", shortName = "verbose", doc = "File to print all of the annotated and detailed debugging output", required = false)
protected PrintStream verboseWriter = null;
@ -126,16 +126,12 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
}
// initialize the header
GenotypeWriterFactory.writeHeader(writer, new VCFHeader(getHeaderInfo(), UG_engine.samples)) ;
writer.writeHeader(new VCFHeader(getHeaderInfo(), UG_engine.samples)) ;
}
private Set<VCFHeaderLine> getHeaderInfo() {
Set<VCFHeaderLine> headerInfo = new HashSet<VCFHeaderLine>();
// this is only applicable to VCF
if ( !(writer instanceof VCFGenotypeWriter) )
return headerInfo;
// all annotation fields from VariantAnnotatorEngine
headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions());

View File

@ -39,7 +39,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broadinstitute.sting.utils.pileup.*;
import org.broad.tribble.vcf.VCFConstants;
@ -65,7 +65,7 @@ public class UnifiedGenotyperEngine {
// the various loggers and writers
protected Logger logger = null;
protected GenotypeWriter genotypeWriter = null;
protected VCFWriter vcfWriter = null;
protected PrintStream verboseWriter = null;
// samples in input
@ -76,15 +76,15 @@ public class UnifiedGenotyperEngine {
initialize(toolkit, UAC, null, null, null, null);
}
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, GenotypeWriter genotypeWriter, PrintStream verboseWriter, VariantAnnotatorEngine engine) {
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, VCFWriter genotypeWriter, PrintStream verboseWriter, VariantAnnotatorEngine engine) {
initialize(toolkit, UAC, logger, genotypeWriter, verboseWriter, engine);
}
private void initialize(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, GenotypeWriter genotypeWriter, PrintStream verboseWriter, VariantAnnotatorEngine engine) {
private void initialize(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, VCFWriter genotypeWriter, PrintStream verboseWriter, VariantAnnotatorEngine engine) {
this.UAC = UAC;
this.logger = logger;
this.genotypeWriter = genotypeWriter;
this.vcfWriter = genotypeWriter;
this.verboseWriter = verboseWriter;
this.annotationEngine = engine;

View File

@ -40,6 +40,7 @@ import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
import java.util.*;
@ -118,7 +119,7 @@ public class SequenomValidationConverter extends RodWalker<Pair<VariantContext,
if ( sampleNames == null )
sampleNames = new TreeSet<String>();
VCFWriter vcfWriter = new VCFWriter(out);
VCFWriter vcfWriter = new VCFWriterImpl(out);
// set up the info and filter headers
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();

View File

@ -48,6 +48,7 @@ import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
import org.broadinstitute.sting.utils.text.XReadLines;
import java.io.File;
@ -336,7 +337,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
determineContextNamePartSizes();
if ( outputVCF != null )
writer = new VCFWriter(new File(outputVCF));
writer = new VCFWriterImpl(new File(outputVCF));
if ( rsIDFile != null ) {
if ( maxRsIDBuild == Integer.MAX_VALUE )

View File

@ -37,6 +37,7 @@ import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
import org.broadinstitute.sting.utils.text.XReadLines;
import java.io.File;
@ -148,7 +149,7 @@ public class ApplyVariantCuts extends RodWalker<Integer, Integer> {
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
hInfo.add(new VCFInfoHeaderLine("OQ", 1, VCFHeaderLineType.Float, "The original variant quality score"));
hInfo.add(new VCFHeaderLine("source", "VariantOptimizer"));
vcfWriter = new VCFWriter( new File(OUTPUT_FILENAME) );
vcfWriter = new VCFWriterImpl( new File(OUTPUT_FILENAME) );
final TreeSet<String> samples = new TreeSet<String>();
samples.addAll(SampleUtils.getSampleListWithVCFHeader(getToolkit(), null));

View File

@ -41,6 +41,7 @@ import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
import java.io.File;
import java.io.IOException;
@ -137,7 +138,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
hInfo.add(new VCFHeaderLine("source", "VariantOptimizer"));
samples.addAll(SampleUtils.getUniqueSamplesFromRods(getToolkit()));
vcfWriter = new VCFWriter( new File(OUTPUT_PREFIX + ".vcf") );
vcfWriter = new VCFWriterImpl( new File(OUTPUT_PREFIX + ".vcf") );
final VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
vcfWriter.writeHeader(vcfHeader);

View File

@ -81,7 +81,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
private VariantAnnotatorEngine engine;
public void initialize() {
vcfWriter = new VCFWriter(out);
vcfWriter = new VCFWriterImpl(out);
validateAnnotateUnionArguments();
Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), null);

View File

@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -51,7 +52,7 @@ public class FilterLiftedVariants extends RodWalker<Integer, Integer> {
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant"));
Map<String, VCFHeader> vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant"));
writer = new VCFWriter(out);
writer = new VCFWriterImpl(out);
final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey("variant") ? vcfHeaders.get("variant").getMetaData() : null, samples);
writer.writeHeader(vcfHeader);
}

View File

@ -28,6 +28,7 @@ import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -74,7 +75,7 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant"));
Map<String, VCFHeader> vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant"));
writer = new VCFWriter(out);
writer = new VCFWriterImpl(out);
final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey("variant") ? vcfHeaders.get("variant").getMetaData() : null, samples);
writer.writeHeader(vcfHeader);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2010.
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@ -12,15 +12,14 @@
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.variantutils;
@ -43,6 +42,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
import java.util.*;
import java.util.regex.Matcher;
@ -79,7 +79,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
*/
public void initialize() {
vcfWriter = new VCFWriter(out);
vcfWriter = new VCFWriterImpl(out);
ArrayList<String> rodNames = new ArrayList<String>();
rodNames.add("variant");

View File

@ -85,7 +85,7 @@ public class IndelAnnotator extends RodWalker<Integer,Long>{
anno.add(new VCFInfoHeaderLine("type",1, VCFHeaderLineType.String,"Genomic interpretation (according to RefSeq)"));
hInfo.addAll(anno);
vcfWriter = new VCFWriter(out);
vcfWriter = new VCFWriterImpl(out);
VCFHeader vcfHeader = new VCFHeader(hInfo, SampleUtils.getUniqueSamplesFromRods(getToolkit()));
vcfWriter.writeHeader(vcfHeader);
}

View File

@ -17,6 +17,7 @@ import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
import java.io.PrintStream;
import java.util.*;
@ -46,7 +47,7 @@ public class IndelDBRateWalker extends RodWalker<OverlapTable,OverlapTabulator>
}
if ( outVCF != null ) {
vcfWriter = new VCFWriter(outVCF);
vcfWriter = new VCFWriterImpl(outVCF);
Set<VCFHeaderLine> header = new HashSet<VCFHeaderLine>();
header.addAll(VCFUtils.getHeaderFields(getToolkit()));
VCFHeader vcfHeader = new VCFHeader(header, SampleUtils.getUniqueSamplesFromRods(getToolkit()));

View File

@ -23,6 +23,7 @@ import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import java.io.PrintStream;
@ -372,7 +373,7 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
*********** REDUCE INIT
*/
public VCFWriter reduceInit() {
VCFWriter writer = new VCFWriter(out);
VCFWriter writer = new VCFWriterImpl(out);
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
hInfo.add(new VCFHeaderLine("source", "MendelianViolationClassifier"));

View File

@ -60,7 +60,7 @@ public class TestVariantContextWalker extends RodWalker<Integer, Integer> {
public void initialize() {
if ( outputVCF != null )
writer = new VCFWriter(new File(outputVCF));
writer = new VCFWriterImpl(new File(outputVCF));
}
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {

View File

@ -14,6 +14,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
import java.io.File;
import java.io.FileInputStream;
@ -79,7 +80,7 @@ public class VCF4WriterTestWalker extends RodWalker<Integer, Integer> {
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
vcfWriter = new VCFWriter(new File(OUTPUT_FILE));
vcfWriter = new VCFWriterImpl(new File(OUTPUT_FILE));
VCFHeader header = null;
for( final ReferenceOrderedDataSource source : dataSources ) {
final RMDTrack rod = source.getReferenceOrderedData();

View File

@ -44,6 +44,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
import org.broad.tribble.vcf.*;
import java.io.*;
@ -92,7 +93,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
hInfo.add(new VCFHeaderLine("source", "BeagleImputation"));
// Open output file specified by output VCF ROD
vcfWriter = new VCFWriter(new File(OUTPUT_FILE));
vcfWriter = new VCFWriterImpl(new File(OUTPUT_FILE));
final List<ReferenceOrderedDataSource> dataSources = this.getToolkit().getRodDataSources();
for( final ReferenceOrderedDataSource source : dataSources ) {

View File

@ -41,6 +41,7 @@ import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -72,7 +73,7 @@ public class ReadBackedPhasingWalker extends LocusWalker<Pair<VariantContextStat
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
writer = new VCFWriter(new File(phasedVCFFile));
writer = new VCFWriterImpl(new File(phasedVCFFile));
writer.writeHeader(new VCFHeader(hInfo, new TreeSet<String>(vc.getSampleNames())));
}

View File

@ -44,6 +44,7 @@ import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
import org.broadinstitute.sting.gatk.walkers.varianteval.MendelianViolationEvaluator;
import java.util.*;
@ -86,7 +87,7 @@ public class TrioGenotyperWalker extends RefWalker<VariantContext, Integer>{
FAMILY_MEMBERS = Arrays.asList(mom, dad, kid);
// initialize the writer
writer = new VCFWriter(new File(vcfOutputFile));
writer = new VCFWriterImpl(new File(vcfOutputFile));
}
public VariantContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {

View File

@ -63,6 +63,7 @@ import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
/**
* Annotates variant calls with information from user-specified tabular files.
@ -240,7 +241,7 @@ public class GenomicAnnotator extends RodWalker<LinkedList<VariantContext>, Link
hInfo.add(new VCFHeaderLine("annotatorReference", getToolkit().getArguments().referenceFile.getName()));
hInfo.addAll(engine.getVCFAnnotationDescriptions());
vcfWriter = new VCFWriter(VCF_OUT);
vcfWriter = new VCFWriterImpl(VCF_OUT);
VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
vcfWriter.writeHeader(vcfHeader);
}

View File

@ -94,7 +94,7 @@ public class VariantSelect extends RodWalker<Integer, Integer> {
hInfo.add(new VCFFilterHeaderLine(exp.name, exp.expStr));
}
writer = new VCFWriter(out);
writer = new VCFWriterImpl(out);
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant"));
final VCFHeader vcfHeader = new VCFHeader(hInfo, samples);

View File

@ -29,7 +29,7 @@ import net.sf.samtools.SAMFileWriter;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.lang.StringUtils;
import org.broadinstitute.sting.gatk.filters.PlatformUnitFilterHelper;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import java.io.File;
import java.io.InputStream;
@ -212,7 +212,7 @@ public abstract class ArgumentField {
if (InputStream.class.isAssignableFrom(clazz)) return File.class;
if (SAMFileReader.class.isAssignableFrom(clazz)) return File.class;
if (OutputStream.class.isAssignableFrom(clazz)) return File.class;
if (GenotypeWriter.class.isAssignableFrom(clazz)) return File.class;
if (VCFWriter.class.isAssignableFrom(clazz)) return File.class;
if (SAMFileWriter.class.isAssignableFrom(clazz)) return File.class;
if (PlatformUnitFilterHelper.class.isAssignableFrom(clazz)) return String.class;
return clazz;

View File

@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.WalkerManager;
import org.broadinstitute.sting.gatk.filters.FilterManager;
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor;
@ -83,7 +83,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
@Override
protected Collection<ArgumentTypeDescriptor> getArgumentTypeDescriptors() {
List<ArgumentTypeDescriptor> typeDescriptors = new ArrayList<ArgumentTypeDescriptor>();
typeDescriptors.add(new GenotypeWriterArgumentTypeDescriptor(GATKEngine));
typeDescriptors.add(new VCFWriterArgumentTypeDescriptor(GATKEngine));
typeDescriptors.add(new SAMFileReaderArgumentTypeDescriptor(GATKEngine));
typeDescriptors.add(new SAMFileWriterArgumentTypeDescriptor(GATKEngine));
typeDescriptors.add(new OutputStreamArgumentTypeDescriptor(GATKEngine));

View File

@ -1,49 +0,0 @@
package org.broadinstitute.sting.utils.genotype;
import org.broad.tribble.util.variantcontext.VariantContext;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* @author aaron, ebanks
* <p/>
* Class GenotypeWriter
* <p/>
* The interface for writing genotype calls.
*/
public interface GenotypeWriter {
/**
* Add a record, given a variant context, with the genotype fields restricted to what is defined in the header
* @param vc the variant context representing the call to add
* @param refBase This is required for VCF writers, as the VCF format explicitly requires (previous) ref base for an indel.
*/
public void add(VariantContext vc, byte refBase);
/** finish writing, closing any open files. */
public void close();
}

View File

@ -1,40 +0,0 @@
package org.broadinstitute.sting.utils.genotype;
import org.broad.tribble.vcf.VCFHeader;
import org.broadinstitute.sting.utils.vcf.GATKVCFWriter;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import java.io.File;
import java.io.PrintStream;
/**
* @author aaron
* <p/>
* Class GenotypeWriterFactory
* <p/>
* A descriptions should go here. Blame aaron if it's missing.
*/
public class GenotypeWriterFactory {
/** available genotype writers */
public enum GENOTYPE_FORMAT {
GELI, GLF, GELI_BINARY, VCF
}
/**
* create a genotype writer
* @param destination the destination file
* @return the genotype writer object
*/
public static GenotypeWriter create(File destination) {
return new GATKVCFWriter(destination);
}
public static GenotypeWriter create(PrintStream destination) {
return new GATKVCFWriter(destination);
}
public static void writeHeader(GenotypeWriter writer, VCFHeader vcfHeader) {
((VCFGenotypeWriter)writer).writeHeader(vcfHeader);
}
}

View File

@ -1,92 +0,0 @@
package org.broadinstitute.sting.utils.genotype.geli;
import edu.mit.broad.picard.genotype.geli.GeliFileWriter;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
import net.sf.samtools.SAMFileHeader;
import org.broad.tribble.util.variantcontext.VariantContext;
import java.io.File;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* @author aaron, ebanks
* @version 1.0
* <p/>
* Class GeliAdapter
* Adapts the Geli file writer to the Genotype writer interface
*/
public class GeliAdapter implements GeliGenotypeWriter {
// the file we're writing to
private File writeTo = null;
// the geli file writer we're adapting
private GeliFileWriter writer = null;
/**
* wrap a GeliFileWriter in the Genotype writer interface
*
* @param writeTo where to write to
*/
public GeliAdapter(File writeTo) {
this.writeTo = writeTo;
}
/**
* wrap a GeliFileWriter in the Genotype writer interface
*
* @param fileHeader the file header to write out
*/
public void writeHeader(final SAMFileHeader fileHeader) {
this.writer = GeliFileWriter.newInstanceForPresortedRecords(writeTo, fileHeader);
}
public void addGenotypeLikelihoods(GenotypeLikelihoods gl) {
if ( writer == null )
throw new IllegalStateException("The Geli Header must be written before records can be added");
writer.addGenotypeLikelihoods(gl);
}
/**
* Add a genotype, given a variant context
*
* @param vc the variant context representing the call to add
* @param refBase not used by this writer
*/
public void add(VariantContext vc, byte refBase) {
throw new UnsupportedOperationException("We no longer support writing Geli");
}
/** finish writing, closing any open files. */
public void close() {
if (this.writer != null) {
this.writer.close();
}
}
}

View File

@ -1,26 +0,0 @@
package org.broadinstitute.sting.utils.genotype.geli;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import net.sf.samtools.SAMFileHeader;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
/**
* An extension of eth GenotypeWriter interface with support
* for adding a header.
*
* @author mhanna
* @version 0.1
*/
public interface GeliGenotypeWriter extends GenotypeWriter {
/**
* Write the file header.
* @param fileHeader SAM file header from which to derive the geli header.
*/
public void writeHeader(final SAMFileHeader fileHeader);
/**
* Writes the genotype likelihoods to the output.
* @param gl genotype likelihoods to write.
*/
public void addGenotypeLikelihoods(GenotypeLikelihoods gl);
}

View File

@ -1,79 +0,0 @@
package org.broadinstitute.sting.utils.genotype.geli;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
import net.sf.samtools.SAMFileHeader;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.StingException;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.io.PrintWriter;
/**
* @author aaron
* <p/>
* Class GeliTextWriter
* <p/>
* write out the geli text file format containing genotype information
*/
public class GeliTextWriter implements GeliGenotypeWriter {
// where we write to
PrintWriter mWriter;
// used to store the max mapping quality as a field in variant contexts
public static final String MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY = "MAXIMUM_MAPPING_QUALITY";
// used to store the max mapping quality as a field in variant contexts
public static final String READ_COUNT_ATTRIBUTE_KEY = "READ_COUNT";
/**
* create a geli text writer
*
* @param file the file to write to
*/
public GeliTextWriter(File file) {
try {
mWriter = new PrintWriter(file);
} catch (FileNotFoundException e) {
throw new StingException("Unable to open file " + file.toURI());
}
}
public GeliTextWriter(PrintStream out) {
mWriter = new PrintWriter(out);
}
public final static String headerLine = "#Sequence Position ReferenceBase NumberOfReads MaxMappingQuality BestGenotype BtrLod BtnbLod AA AC AG AT CC CG CT GG GT TT";
/**
* Write the file header.
* @param fileHeader SAM file header from which to derive the geli header.
*/
public void writeHeader(final SAMFileHeader fileHeader) {
// ignore the SAM header; the geli text header is fixed.
mWriter.println(headerLine);
mWriter.flush(); // necessary so that writing to an output stream will work
}
/**
* Add a genotype, given a variant context
*
* @param vc the variant context representing the call to add
* @param refBase required by the inteface; not used by this writer.
*/
public void add(VariantContext vc, byte refBase) {
throw new UnsupportedOperationException("We no longer support writing Geli");
}
public void addGenotypeLikelihoods(GenotypeLikelihoods gl) {
mWriter.println(gl.toString());
mWriter.flush(); // necessary so that writing to an output stream will work
}
/** finish writing, closing any open files. */
public void close() {
mWriter.flush();
mWriter.close();
}
}

View File

@ -1,27 +0,0 @@
package org.broadinstitute.sting.utils.genotype.glf;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
/**
* An extension of eth GenotypeWriter interface with support
* for adding header lines.
*
* @author mhanna
* @version 0.1
*/
public interface GLFGenotypeWriter extends GenotypeWriter {
/**
* Append the given header text to the GLF file.
* @param headerText the file header to write out
*/
public void writeHeader(String headerText);
/**
* add a GLF record to the output file
*
* @param contigName the contig name
* @param contigLength the contig length
* @param rec the GLF record to write.
*/
public void addGLFRecord(String contigName, int contigLength, GLFRecord rec);
}

View File

@ -43,7 +43,7 @@ import java.io.OutputStream;
* single and variable length genotype calls using the provided functions. When you've finished
* generating GLF records, make sure you close the file.
*/
public class GLFWriter implements GLFGenotypeWriter {
public class GLFWriter {
// our output codec
private final BinaryCodec outputBinaryCodec;

View File

@ -1,29 +0,0 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broad.tribble.vcf.VCFHeader;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import java.io.File;
/**
* An extension of the GenotypeWriter interface with support
* for adding header lines.
*
* @author mhanna
* @version 0.1
*/
public interface VCFGenotypeWriter extends GenotypeWriter {
/**
* initialize this VCF header
*
* @param header the header
*/
public void writeHeader(VCFHeader header);
/**
* Add a given VCF file to the writer.
* @param file file from which to add records
*/
public void append(File file);
}

View File

@ -1,407 +1,19 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broad.tribble.util.variantcontext.Allele;
import org.broad.tribble.util.variantcontext.Genotype;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.StingException;
import java.io.*;
import java.util.*;
/**
* this class writes VCF files
*/
public class VCFWriter {
public interface VCFWriter {
// the VCF header we're storing
protected VCFHeader mHeader = null;
// the print stream we're writting to
protected BufferedWriter mWriter;
// were filters applied?
protected boolean filtersWereAppliedToContext = false;
/**
* create a VCF writer, given a file to write to
*
* @param location the file location to write to
*/
public VCFWriter(File location) {
FileOutputStream output;
try {
output = new FileOutputStream(location);
} catch (FileNotFoundException e) {
throw new RuntimeException("Unable to create VCF file at location: " + location);
}
mWriter = new BufferedWriter(new OutputStreamWriter(output));
}
/**
* create a VCF writer, given a stream to write to
*
* @param output the file location to write to
*/
public VCFWriter(OutputStream output) {
mWriter = new BufferedWriter(new OutputStreamWriter(output));
}
public void writeHeader(VCFHeader header) {
this.mHeader = header;
try {
// the file format field needs to be written first
mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF4_0.getFormatString() + "=" + VCFHeaderVersion.VCF4_0.getVersionString() + "\n");
for ( VCFHeaderLine line : header.getMetaData() ) {
if ( line.getKey().equals(VCFHeaderVersion.VCF4_0.getFormatString()) ||
line.getKey().equals(VCFHeaderVersion.VCF3_3.getFormatString()) ||
line.getKey().equals(VCFHeaderVersion.VCF3_2.getFormatString()) )
continue;
// are the records filtered (so we know what to put in the FILTER column of passing records) ?
if ( line instanceof VCFFilterHeaderLine )
filtersWereAppliedToContext = true;
mWriter.write(VCFHeader.METADATA_INDICATOR);
mWriter.write(line.toString());
mWriter.write("\n");
}
// write out the column line
mWriter.write(VCFHeader.HEADER_INDICATOR);
for ( VCFHeader.HEADER_FIELDS field : header.getHeaderFields() ) {
mWriter.write(field.toString());
mWriter.write(VCFConstants.FIELD_SEPARATOR);
}
if ( header.hasGenotypingData() ) {
mWriter.write("FORMAT");
mWriter.write(VCFConstants.FIELD_SEPARATOR);
for ( String sample : header.getGenotypeSamples() ) {
mWriter.write(sample);
mWriter.write(VCFConstants.FIELD_SEPARATOR);
} }
mWriter.write("\n");
mWriter.flush(); // necessary so that writing to an output stream will work
}
catch (IOException e) {
throw new RuntimeException("IOException writing the VCF header", e);
}
}
public void writeHeader(VCFHeader header);
/**
* attempt to close the VCF file
*/
public void close() {
try {
mWriter.flush();
mWriter.close();
} catch (IOException e) {
throw new RuntimeException("Unable to close VCFFile");
}
}
public void add(VariantContext vc, byte refBase) {
if ( mHeader == null )
throw new IllegalStateException("The VCF Header must be written before records can be added");
try {
vc = VariantContextUtils.createVariantContextWithPaddedAlleles(vc, refBase);
GenomeLoc loc = VariantContextUtils.getLocation(vc);
Map<Allele, String> alleleMap = new HashMap<Allele, String>(vc.getAlleles().size());
alleleMap.put(Allele.NO_CALL, VCFConstants.EMPTY_ALLELE); // convenience for lookup
// CHROM
mWriter.write(loc.getContig());
mWriter.write(VCFConstants.FIELD_SEPARATOR);
// POS
mWriter.write(String.valueOf(loc.getStart()));
mWriter.write(VCFConstants.FIELD_SEPARATOR);
// ID
String ID = vc.hasAttribute(VariantContext.ID_KEY) ? vc.getAttributeAsString(VariantContext.ID_KEY) : VCFConstants.EMPTY_ID_FIELD;
mWriter.write(ID);
mWriter.write(VCFConstants.FIELD_SEPARATOR);
// REF
alleleMap.put(vc.getReference(), "0");
String refString = vc.getReference().getBaseString();
mWriter.write(refString);
mWriter.write(VCFConstants.FIELD_SEPARATOR);
// ALT
if ( vc.isVariant() ) {
Allele altAllele = vc.getAlternateAllele(0);
alleleMap.put(altAllele, "1");
String alt = altAllele.getBaseString();
mWriter.write(alt);
for (int i = 1; i < vc.getAlternateAlleles().size(); i++) {
altAllele = vc.getAlternateAllele(i);
alleleMap.put(altAllele, String.valueOf(i+1));
alt = altAllele.getBaseString();
mWriter.write(",");
mWriter.write(alt);
}
} else {
mWriter.write(VCFConstants.EMPTY_ALTERNATE_ALLELE_FIELD);
}
mWriter.write(VCFConstants.FIELD_SEPARATOR);
// QUAL
if ( !vc.hasNegLog10PError() )
mWriter.write(VCFConstants.MISSING_VALUE_v4);
else
mWriter.write(getQualValue(vc.getPhredScaledQual()));
mWriter.write(VCFConstants.FIELD_SEPARATOR);
// FILTER
String filters = vc.isFiltered() ? Utils.join(";", Utils.sorted(vc.getFilters())) : (filtersWereAppliedToContext || vc.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
mWriter.write(filters);
mWriter.write(VCFConstants.FIELD_SEPARATOR);
// INFO
Map<String, String> infoFields = new TreeMap<String, String>();
for ( Map.Entry<String, Object> field : vc.getAttributes().entrySet() ) {
String key = field.getKey();
if ( key.equals(VariantContext.ID_KEY) || key.equals(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY) )
continue;
String outputValue = formatVCFField(field.getValue());
if ( outputValue != null )
infoFields.put(key, outputValue);
}
writeInfoString(infoFields);
// FORMAT
List<String> genotypeAttributeKeys = new ArrayList<String>();
if ( vc.hasGenotypes() ) {
genotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY);
for ( String key : calcVCFGenotypeKeys(vc) ) {
genotypeAttributeKeys.add(key);
}
} else if ( mHeader.hasGenotypingData() ) {
// this needs to be done in case all samples are no-calls
genotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY);
}
if ( genotypeAttributeKeys.size() > 0 ) {
String genotypeFormatString = Utils.join(VCFConstants.GENOTYPE_FIELD_SEPARATOR, genotypeAttributeKeys);
mWriter.write(VCFConstants.FIELD_SEPARATOR);
mWriter.write(genotypeFormatString);
addGenotypeData(vc, alleleMap, genotypeAttributeKeys);
}
mWriter.write("\n");
mWriter.flush(); // necessary so that writing to an output stream will work
} catch (IOException e) {
throw new RuntimeException("Unable to write the VCF object to a file");
}
}
private String getQualValue(double qual) {
String s = String.format(VCFConstants.DOUBLE_PRECISION_FORMAT_STRING, qual);
if ( s.endsWith(VCFConstants.DOUBLE_PRECISION_INT_SUFFIX) )
s = s.substring(0, s.length() - VCFConstants.DOUBLE_PRECISION_INT_SUFFIX.length());
return s;
}
/**
* create the info string; assumes that no values are null
*
* @param infoFields a map of info fields
* @throws IOException for writer
*/
protected void writeInfoString(Map<String, String> infoFields) throws IOException {
if ( infoFields.isEmpty() ) {
mWriter.write(VCFConstants.EMPTY_INFO_FIELD);
return;
}
boolean isFirst = true;
for ( Map.Entry<String, String> entry : infoFields.entrySet() ) {
if ( isFirst )
isFirst = false;
else
mWriter.write(VCFConstants.INFO_FIELD_SEPARATOR);
String key = entry.getKey();
mWriter.write(key);
if ( !entry.getValue().equals("") ) {
int numVals = 1;
VCFInfoHeaderLine metaData = mHeader.getInfoHeaderLine(key);
if ( metaData != null )
numVals = metaData.getCount();
// take care of unbounded encoding
if ( numVals == VCFInfoHeaderLine.UNBOUNDED )
numVals = 1;
if ( numVals > 0 ) {
mWriter.write("=");
mWriter.write(entry.getValue());
}
}
}
}
/**
* add the genotype data
*
* @param vc the variant context
* @param genotypeFormatKeys Genotype formatting string
* @param alleleMap alleles for this context
* @throws IOException for writer
*/
private void addGenotypeData(VariantContext vc, Map<Allele, String> alleleMap, List<String> genotypeFormatKeys)
throws IOException {
for ( String sample : mHeader.getGenotypeSamples() ) {
mWriter.write(VCFConstants.FIELD_SEPARATOR);
Genotype g = vc.getGenotype(sample);
if ( g == null ) {
// TODO -- The VariantContext needs to know what the general ploidy is of the samples
// TODO -- We shouldn't be assuming diploid genotypes here!
mWriter.write(VCFConstants.EMPTY_GENOTYPE);
continue;
}
writeAllele(g.getAllele(0), alleleMap);
for (int i = 1; i < g.getPloidy(); i++) {
mWriter.write(g.genotypesArePhased() ? VCFConstants.PHASED : VCFConstants.UNPHASED);
writeAllele(g.getAllele(i), alleleMap);
}
List<String> attrs = new ArrayList<String>(genotypeFormatKeys.size());
for ( String key : genotypeFormatKeys ) {
if ( key.equals(VCFConstants.GENOTYPE_KEY) )
continue;
Object val = g.hasAttribute(key) ? g.getAttribute(key) : VCFConstants.MISSING_VALUE_v4;
// some exceptions
if ( key.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) {
if ( MathUtils.compareDoubles(g.getNegLog10PError(), Genotype.NO_NEG_LOG_10PERROR) == 0 )
val = VCFConstants.MISSING_VALUE_v4;
else {
val = getQualValue(Math.min(g.getPhredScaledQual(), VCFConstants.MAX_GENOTYPE_QUAL));
}
} else if ( key.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
val = g.isFiltered() ? Utils.join(";", Utils.sorted(g.getFilters())) : (g.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
}
VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(key);
if ( metaData != null ) {
int numInFormatField = metaData.getCount();
if ( numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4) ) {
// If we have a missing field but multiple values are expected, we need to construct a new string with all fields.
// For example, if Number=2, the string has to be ".,."
StringBuilder sb = new StringBuilder(VCFConstants.MISSING_VALUE_v4);
for ( int i = 1; i < numInFormatField; i++ ) {
sb.append(",");
sb.append(VCFConstants.MISSING_VALUE_v4);
}
val = sb.toString();
}
}
// assume that if key is absent, then the given string encoding suffices
String outputValue = formatVCFField(val);
if ( outputValue != null )
attrs.add(outputValue);
}
// strip off trailing missing values
for (int i = attrs.size()-1; i >= 0; i--) {
if ( isMissingValue(attrs.get(i)) )
attrs.remove(i);
else
break;
}
for (String s : attrs ) {
mWriter.write(VCFConstants.GENOTYPE_FIELD_SEPARATOR);
mWriter.write(s);
}
}
}
private boolean isMissingValue(String s) {
// we need to deal with the case that it's a list of missing values
return (MathUtils.countOccurrences(VCFConstants.MISSING_VALUE_v4.charAt(0), s) + MathUtils.countOccurrences(',', s) == s.length());
}
private void writeAllele(Allele allele, Map<Allele, String> alleleMap) throws IOException {
String encoding = alleleMap.get(allele);
if ( encoding == null )
throw new StingException("Allele " + allele + " is not an allele in the variant context");
mWriter.write(encoding);
}
private static String formatVCFField(Object val) {
String result;
if ( val == null )
result = VCFConstants.MISSING_VALUE_v4;
else if ( val instanceof Double )
result = String.format(VCFConstants.DOUBLE_PRECISION_FORMAT_STRING, (Double)val);
else if ( val instanceof Boolean )
result = (Boolean)val ? "" : null; // empty string for true, null for false
else if ( val instanceof List ) {
result = formatVCFField(((List)val).toArray());
} else if ( val instanceof Object[] ) {
Object[] array = (Object[])val;
if ( array.length == 0 )
return formatVCFField(null);
StringBuffer sb = new StringBuffer(formatVCFField(array[0]));
for ( int i = 1; i < array.length; i++) {
sb.append(",");
sb.append(formatVCFField(array[i]));
}
result = sb.toString();
} else
result = val.toString();
return result;
}
private static List<String> calcVCFGenotypeKeys(VariantContext vc) {
Set<String> keys = new HashSet<String>();
boolean sawGoodQual = false;
boolean sawGenotypeFilter = false;
for ( Genotype g : vc.getGenotypes().values() ) {
keys.addAll(g.getAttributes().keySet());
if ( g.hasNegLog10PError() )
sawGoodQual = true;
if (g.isFiltered() && g.isCalled())
sawGenotypeFilter = true;
}
if ( sawGoodQual )
keys.add(VCFConstants.GENOTYPE_QUALITY_KEY);
if (sawGenotypeFilter)
keys.add(VCFConstants.GENOTYPE_FILTER_KEY);
return Utils.sorted(new ArrayList<String>(keys));
}
public void close();
public void add(VariantContext vc, byte refBase);
}

View File

@ -0,0 +1,431 @@
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broad.tribble.vcf.*;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.util.variantcontext.Allele;
import org.broad.tribble.util.variantcontext.Genotype;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.StingException;
import java.io.*;
import java.util.*;
/**
* this class writes VCF files
*/
public class VCFWriterImpl implements VCFWriter {
// the VCF header we're storing
protected VCFHeader mHeader = null;
// the print stream we're writting to
protected BufferedWriter mWriter;
// were filters applied?
protected boolean filtersWereAppliedToContext = false;
/**
* create a VCF writer, given a file to write to
*
* @param location the file location to write to
*/
public VCFWriterImpl(File location) {
FileOutputStream output;
try {
output = new FileOutputStream(location);
} catch (FileNotFoundException e) {
throw new RuntimeException("Unable to create VCF file at location: " + location);
}
mWriter = new BufferedWriter(new OutputStreamWriter(output));
}
/**
* create a VCF writer, given a stream to write to
*
* @param output the file location to write to
*/
public VCFWriterImpl(OutputStream output) {
mWriter = new BufferedWriter(new OutputStreamWriter(output));
}
public void writeHeader(VCFHeader header) {
this.mHeader = header;
try {
// the file format field needs to be written first
mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF4_0.getFormatString() + "=" + VCFHeaderVersion.VCF4_0.getVersionString() + "\n");
for ( VCFHeaderLine line : header.getMetaData() ) {
if ( line.getKey().equals(VCFHeaderVersion.VCF4_0.getFormatString()) ||
line.getKey().equals(VCFHeaderVersion.VCF3_3.getFormatString()) ||
line.getKey().equals(VCFHeaderVersion.VCF3_2.getFormatString()) )
continue;
// are the records filtered (so we know what to put in the FILTER column of passing records) ?
if ( line instanceof VCFFilterHeaderLine)
filtersWereAppliedToContext = true;
mWriter.write(VCFHeader.METADATA_INDICATOR);
mWriter.write(line.toString());
mWriter.write("\n");
}
// write out the column line
mWriter.write(VCFHeader.HEADER_INDICATOR);
for ( VCFHeader.HEADER_FIELDS field : header.getHeaderFields() ) {
mWriter.write(field.toString());
mWriter.write(VCFConstants.FIELD_SEPARATOR);
}
if ( header.hasGenotypingData() ) {
mWriter.write("FORMAT");
mWriter.write(VCFConstants.FIELD_SEPARATOR);
for ( String sample : header.getGenotypeSamples() ) {
mWriter.write(sample);
mWriter.write(VCFConstants.FIELD_SEPARATOR);
} }
mWriter.write("\n");
mWriter.flush(); // necessary so that writing to an output stream will work
}
catch (IOException e) {
throw new RuntimeException("IOException writing the VCF header", e);
}
}
/**
* attempt to close the VCF file
*/
public void close() {
try {
mWriter.flush();
mWriter.close();
} catch (IOException e) {
throw new RuntimeException("Unable to close VCFFile");
}
}
public void add(VariantContext vc, byte refBase) {
if ( mHeader == null )
throw new IllegalStateException("The VCF Header must be written before records can be added");
try {
vc = VariantContextUtils.createVariantContextWithPaddedAlleles(vc, refBase);
GenomeLoc loc = VariantContextUtils.getLocation(vc);
Map<Allele, String> alleleMap = new HashMap<Allele, String>(vc.getAlleles().size());
alleleMap.put(Allele.NO_CALL, VCFConstants.EMPTY_ALLELE); // convenience for lookup
// CHROM
mWriter.write(loc.getContig());
mWriter.write(VCFConstants.FIELD_SEPARATOR);
// POS
mWriter.write(String.valueOf(loc.getStart()));
mWriter.write(VCFConstants.FIELD_SEPARATOR);
// ID
String ID = vc.hasAttribute(VariantContext.ID_KEY) ? vc.getAttributeAsString(VariantContext.ID_KEY) : VCFConstants.EMPTY_ID_FIELD;
mWriter.write(ID);
mWriter.write(VCFConstants.FIELD_SEPARATOR);
// REF
alleleMap.put(vc.getReference(), "0");
String refString = vc.getReference().getBaseString();
mWriter.write(refString);
mWriter.write(VCFConstants.FIELD_SEPARATOR);
// ALT
if ( vc.isVariant() ) {
Allele altAllele = vc.getAlternateAllele(0);
alleleMap.put(altAllele, "1");
String alt = altAllele.getBaseString();
mWriter.write(alt);
for (int i = 1; i < vc.getAlternateAlleles().size(); i++) {
altAllele = vc.getAlternateAllele(i);
alleleMap.put(altAllele, String.valueOf(i+1));
alt = altAllele.getBaseString();
mWriter.write(",");
mWriter.write(alt);
}
} else {
mWriter.write(VCFConstants.EMPTY_ALTERNATE_ALLELE_FIELD);
}
mWriter.write(VCFConstants.FIELD_SEPARATOR);
// QUAL
if ( !vc.hasNegLog10PError() )
mWriter.write(VCFConstants.MISSING_VALUE_v4);
else
mWriter.write(getQualValue(vc.getPhredScaledQual()));
mWriter.write(VCFConstants.FIELD_SEPARATOR);
// FILTER
String filters = vc.isFiltered() ? Utils.join(";", Utils.sorted(vc.getFilters())) : (filtersWereAppliedToContext || vc.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
mWriter.write(filters);
mWriter.write(VCFConstants.FIELD_SEPARATOR);
// INFO
Map<String, String> infoFields = new TreeMap<String, String>();
for ( Map.Entry<String, Object> field : vc.getAttributes().entrySet() ) {
String key = field.getKey();
if ( key.equals(VariantContext.ID_KEY) || key.equals(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY) )
continue;
String outputValue = formatVCFField(field.getValue());
if ( outputValue != null )
infoFields.put(key, outputValue);
}
writeInfoString(infoFields);
// FORMAT
List<String> genotypeAttributeKeys = new ArrayList<String>();
if ( vc.hasGenotypes() ) {
genotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY);
for ( String key : calcVCFGenotypeKeys(vc) ) {
genotypeAttributeKeys.add(key);
}
} else if ( mHeader.hasGenotypingData() ) {
// this needs to be done in case all samples are no-calls
genotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY);
}
if ( genotypeAttributeKeys.size() > 0 ) {
String genotypeFormatString = Utils.join(VCFConstants.GENOTYPE_FIELD_SEPARATOR, genotypeAttributeKeys);
mWriter.write(VCFConstants.FIELD_SEPARATOR);
mWriter.write(genotypeFormatString);
addGenotypeData(vc, alleleMap, genotypeAttributeKeys);
}
mWriter.write("\n");
mWriter.flush(); // necessary so that writing to an output stream will work
} catch (IOException e) {
throw new RuntimeException("Unable to write the VCF object to a file");
}
}
private String getQualValue(double qual) {
String s = String.format(VCFConstants.DOUBLE_PRECISION_FORMAT_STRING, qual);
if ( s.endsWith(VCFConstants.DOUBLE_PRECISION_INT_SUFFIX) )
s = s.substring(0, s.length() - VCFConstants.DOUBLE_PRECISION_INT_SUFFIX.length());
return s;
}
/**
* create the info string; assumes that no values are null
*
* @param infoFields a map of info fields
* @throws IOException for writer
*/
protected void writeInfoString(Map<String, String> infoFields) throws IOException {
if ( infoFields.isEmpty() ) {
mWriter.write(VCFConstants.EMPTY_INFO_FIELD);
return;
}
boolean isFirst = true;
for ( Map.Entry<String, String> entry : infoFields.entrySet() ) {
if ( isFirst )
isFirst = false;
else
mWriter.write(VCFConstants.INFO_FIELD_SEPARATOR);
String key = entry.getKey();
mWriter.write(key);
if ( !entry.getValue().equals("") ) {
int numVals = 1;
VCFInfoHeaderLine metaData = mHeader.getInfoHeaderLine(key);
if ( metaData != null )
numVals = metaData.getCount();
// take care of unbounded encoding
if ( numVals == VCFInfoHeaderLine.UNBOUNDED )
numVals = 1;
if ( numVals > 0 ) {
mWriter.write("=");
mWriter.write(entry.getValue());
}
}
}
}
/**
* add the genotype data
*
* @param vc the variant context
* @param genotypeFormatKeys Genotype formatting string
* @param alleleMap alleles for this context
* @throws IOException for writer
*/
private void addGenotypeData(VariantContext vc, Map<Allele, String> alleleMap, List<String> genotypeFormatKeys)
throws IOException {
for ( String sample : mHeader.getGenotypeSamples() ) {
mWriter.write(VCFConstants.FIELD_SEPARATOR);
Genotype g = vc.getGenotype(sample);
if ( g == null ) {
// TODO -- The VariantContext needs to know what the general ploidy is of the samples
// TODO -- We shouldn't be assuming diploid genotypes here!
mWriter.write(VCFConstants.EMPTY_GENOTYPE);
continue;
}
writeAllele(g.getAllele(0), alleleMap);
for (int i = 1; i < g.getPloidy(); i++) {
mWriter.write(g.genotypesArePhased() ? VCFConstants.PHASED : VCFConstants.UNPHASED);
writeAllele(g.getAllele(i), alleleMap);
}
List<String> attrs = new ArrayList<String>(genotypeFormatKeys.size());
for ( String key : genotypeFormatKeys ) {
if ( key.equals(VCFConstants.GENOTYPE_KEY) )
continue;
Object val = g.hasAttribute(key) ? g.getAttribute(key) : VCFConstants.MISSING_VALUE_v4;
// some exceptions
if ( key.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) {
if ( MathUtils.compareDoubles(g.getNegLog10PError(), Genotype.NO_NEG_LOG_10PERROR) == 0 )
val = VCFConstants.MISSING_VALUE_v4;
else {
val = getQualValue(Math.min(g.getPhredScaledQual(), VCFConstants.MAX_GENOTYPE_QUAL));
}
} else if ( key.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
val = g.isFiltered() ? Utils.join(";", Utils.sorted(g.getFilters())) : (g.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
}
VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(key);
if ( metaData != null ) {
int numInFormatField = metaData.getCount();
if ( numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4) ) {
// If we have a missing field but multiple values are expected, we need to construct a new string with all fields.
// For example, if Number=2, the string has to be ".,."
StringBuilder sb = new StringBuilder(VCFConstants.MISSING_VALUE_v4);
for ( int i = 1; i < numInFormatField; i++ ) {
sb.append(",");
sb.append(VCFConstants.MISSING_VALUE_v4);
}
val = sb.toString();
}
}
// assume that if key is absent, then the given string encoding suffices
String outputValue = formatVCFField(val);
if ( outputValue != null )
attrs.add(outputValue);
}
// strip off trailing missing values
for (int i = attrs.size()-1; i >= 0; i--) {
if ( isMissingValue(attrs.get(i)) )
attrs.remove(i);
else
break;
}
for (String s : attrs ) {
mWriter.write(VCFConstants.GENOTYPE_FIELD_SEPARATOR);
mWriter.write(s);
}
}
}
private boolean isMissingValue(String s) {
// we need to deal with the case that it's a list of missing values
return (MathUtils.countOccurrences(VCFConstants.MISSING_VALUE_v4.charAt(0), s) + MathUtils.countOccurrences(',', s) == s.length());
}
private void writeAllele(Allele allele, Map<Allele, String> alleleMap) throws IOException {
String encoding = alleleMap.get(allele);
if ( encoding == null )
throw new StingException("Allele " + allele + " is not an allele in the variant context");
mWriter.write(encoding);
}
private static String formatVCFField(Object val) {
String result;
if ( val == null )
result = VCFConstants.MISSING_VALUE_v4;
else if ( val instanceof Double )
result = String.format(VCFConstants.DOUBLE_PRECISION_FORMAT_STRING, (Double)val);
else if ( val instanceof Boolean )
result = (Boolean)val ? "" : null; // empty string for true, null for false
else if ( val instanceof List ) {
result = formatVCFField(((List)val).toArray());
} else if ( val instanceof Object[] ) {
Object[] array = (Object[])val;
if ( array.length == 0 )
return formatVCFField(null);
StringBuffer sb = new StringBuffer(formatVCFField(array[0]));
for ( int i = 1; i < array.length; i++) {
sb.append(",");
sb.append(formatVCFField(array[i]));
}
result = sb.toString();
} else
result = val.toString();
return result;
}
private static List<String> calcVCFGenotypeKeys(VariantContext vc) {
Set<String> keys = new HashSet<String>();
boolean sawGoodQual = false;
boolean sawGenotypeFilter = false;
for ( Genotype g : vc.getGenotypes().values() ) {
keys.addAll(g.getAttributes().keySet());
if ( g.hasNegLog10PError() )
sawGoodQual = true;
if (g.isFiltered() && g.isCalled())
sawGenotypeFilter = true;
}
if ( sawGoodQual )
keys.add(VCFConstants.GENOTYPE_QUALITY_KEY);
if (sawGenotypeFilter)
keys.add(VCFConstants.GENOTYPE_FILTER_KEY);
return Utils.sorted(new ArrayList<String>(keys));
}
}

View File

@ -1,74 +0,0 @@
/*
* Copyright (c) 2010.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.vcf;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import org.broad.tribble.vcf.*;
import java.io.*;
/**
* @author ebanks
* <p/>
* Class GATKVCFWriter
* <p/>
* GATK-specific version of the VCF Writer
*/
public class GATKVCFWriter extends VCFWriter implements VCFGenotypeWriter {
public GATKVCFWriter(File writeTo) {
super(writeTo);
}
public GATKVCFWriter(OutputStream writeTo) {
super(writeTo);
}
public void writeHeader(VCFHeader header) {
// TODO -- put the command-line generating code for the header right here
super.writeHeader(header);
}
public void append(File file) {
try {
BufferedReader reader = new BufferedReader(new FileReader(file));
String line = reader.readLine();
while ( line != null ) {
if ( !VCFHeaderLine.isHeaderLine(line) ) {
mWriter.write(line);
mWriter.write("\n");
}
line = reader.readLine();
}
reader.close();
} catch (IOException e) {
throw new StingException("Error reading file " + file + " in GATKVCFWriter: ", e);
}
}
}

View File

@ -45,7 +45,7 @@ public class VCFWriterUnitTest extends BaseTest {
@Test
public void testBasicWriteAndRead() {
VCFHeader header = createFakeHeader(metaData,additionalColumns);
VCFWriter writer = new VCFWriter(fakeVCFFile);
VCFWriter writer = new VCFWriterImpl(fakeVCFFile);
writer.writeHeader(header);
writer.add(createVC(header),"A".getBytes()[0]);
writer.add(createVC(header),"A".getBytes()[0]);