From cb144734c0c0215478b59fc177900385597349e1 Mon Sep 17 00:00:00 2001 From: hanna Date: Fri, 13 Aug 2010 16:33:22 +0000 Subject: [PATCH] Getting rid of GenotypeWriter interface. Of note: - GATKVCFWriter deleted, to be replaced if absolutely necessary when VCF writing goes into Tribble. - VCFWriter is now an interface, for easier redirection. - VCFWriterImpl fleshes out the VCFWriter interface. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4026 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/CommandLineExecutable.java | 4 +- .../io/storage/GenotypeWriterStorage.java | 86 ---- .../sting/gatk/io/storage/StorageFactory.java | 12 +- .../io/storage/VCFGenotypeWriterStorage.java | 58 --- .../gatk/io/storage/VCFWriterStorage.java | 109 +++++ .../gatk/io/stubs/VCFGenotypeWriterStub.java | 60 --- ...a => VCFWriterArgumentTypeDescriptor.java} | 39 +- ...typeWriterStub.java => VCFWriterStub.java} | 22 +- .../sting/gatk/walkers/VariantsToVCF.java | 2 +- .../walkers/annotator/VariantAnnotator.java | 3 +- .../filters/VariantFiltrationWalker.java | 3 +- .../walkers/genotyper/BatchedCallsMerger.java | 6 +- .../walkers/genotyper/UnifiedGenotyper.java | 8 +- .../genotyper/UnifiedGenotyperEngine.java | 10 +- .../sequenom/SequenomValidationConverter.java | 3 +- .../varianteval/VariantEvalWalker.java | 3 +- .../ApplyVariantCuts.java | 3 +- .../VariantRecalibrator.java | 3 +- .../walkers/variantutils/CombineVariants.java | 2 +- .../variantutils/FilterLiftedVariants.java | 3 +- .../variantutils/LiftoverVariants.java | 3 +- .../walkers/variantutils/SelectVariants.java | 10 +- .../walkers/IndelAnnotator.java | 2 +- .../walkers/IndelDBRateWalker.java | 3 +- .../walkers/MendelianViolationClassifier.java | 3 +- .../walkers/TestVariantContextWalker.java | 2 +- .../walkers/VCF4WriterTestWalker.java | 3 +- .../gatk/walkers/BeagleOutputToVCFWalker.java | 3 +- .../gatk/walkers/ReadBackedPhasingWalker.java | 3 +- .../gatk/walkers/TrioGenotyperWalker.java | 3 +- .../walkers/annotator/GenomicAnnotator.java | 3 +- .../gatk/walkers/vcftools/VariantSelect.java | 2 +- .../queue/extensions/gatk/ArgumentField.java | 4 +- .../gatk/GATKExtensionsGenerator.java | 4 +- .../sting/utils/genotype/GenotypeWriter.java | 49 -- .../utils/genotype/GenotypeWriterFactory.java | 40 -- .../utils/genotype/geli/GeliAdapter.java | 92 ---- .../genotype/geli/GeliGenotypeWriter.java | 26 -- .../utils/genotype/geli/GeliTextWriter.java | 79 ---- .../utils/genotype/glf/GLFGenotypeWriter.java | 27 -- .../sting/utils/genotype/glf/GLFWriter.java | 2 +- .../utils/genotype/vcf/VCFGenotypeWriter.java | 29 -- .../sting/utils/genotype/vcf/VCFWriter.java | 396 +--------------- .../utils/genotype/vcf/VCFWriterImpl.java | 431 ++++++++++++++++++ .../sting/utils/vcf/GATKVCFWriter.java | 74 --- .../utils/genotype/vcf/VCFWriterUnitTest.java | 2 +- 46 files changed, 625 insertions(+), 1109 deletions(-) delete mode 100755 java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java delete mode 100644 java/src/org/broadinstitute/sting/gatk/io/storage/VCFGenotypeWriterStorage.java create mode 100644 java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java delete mode 100644 java/src/org/broadinstitute/sting/gatk/io/stubs/VCFGenotypeWriterStub.java rename java/src/org/broadinstitute/sting/gatk/io/stubs/{GenotypeWriterArgumentTypeDescriptor.java => VCFWriterArgumentTypeDescriptor.java} (72%) rename java/src/org/broadinstitute/sting/gatk/io/stubs/{GenotypeWriterStub.java => VCFWriterStub.java} (86%) delete mode 100644 java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java delete mode 100644 java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java delete mode 100644 java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java delete mode 100644 java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeWriter.java delete mode 100644 java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java delete mode 100644 java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeWriter.java delete mode 100644 java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriter.java create mode 100644 java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterImpl.java delete mode 100644 java/src/org/broadinstitute/sting/utils/vcf/GATKVCFWriter.java diff --git a/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java b/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java index 9784058e5..8169fc7f3 100644 --- a/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java +++ b/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java @@ -35,7 +35,7 @@ import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor; -import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterArgumentTypeDescriptor; +import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor; import java.io.File; import java.io.FileNotFoundException; @@ -99,7 +99,7 @@ public abstract class CommandLineExecutable extends CommandLineProgram { * @return A collection of type descriptors generating implementation-dependent placeholders. */ protected Collection getArgumentTypeDescriptors() { - return Arrays.asList( new GenotypeWriterArgumentTypeDescriptor(GATKEngine), + return Arrays.asList( new VCFWriterArgumentTypeDescriptor(GATKEngine), new SAMFileReaderArgumentTypeDescriptor(GATKEngine), new SAMFileWriterArgumentTypeDescriptor(GATKEngine), new OutputStreamArgumentTypeDescriptor(GATKEngine) ); diff --git a/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java b/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java deleted file mode 100755 index df559225f..000000000 --- a/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.io.storage; - -import java.io.*; -import java.util.Set; - -import org.broad.tribble.util.variantcontext.VariantContext; -import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub; -import org.broadinstitute.sting.utils.genotype.*; -import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.StingException; -import org.broad.tribble.vcf.VCFHeader; - -/** - * Provides temporary storage for GenotypeWriters. - * - * @author ebanks - * @version 0.1 - */ -public abstract class GenotypeWriterStorage implements GenotypeWriter, Storage { - protected final File file; - protected final PrintStream stream; - protected final GenotypeWriter writer; - - /** - * Constructs an object which will write directly into the output file provided by the stub. - * Intentionally delaying the writing of the header -- this should be filled in by the walker. - * @param stub Stub to use when constructing the output file. - */ - public GenotypeWriterStorage( GenotypeWriterStub stub ) { - this.file = stub.getFile(); - this.stream = stub.getOutputStream(); - if(file != null) - writer = GenotypeWriterFactory.create(file); - else if(stream != null) - writer = GenotypeWriterFactory.create(stream); - else - throw new StingException("Unable to create target to which to write; storage was provided with neither a file nor a stream."); - } - - /** - * Constructs an object which will redirect into a different file. - * @param stub Stub to use when synthesizing file / header info. - * @param file File into which to direct the output data. - */ - public GenotypeWriterStorage( GenotypeWriterStub stub, File file ) { - this.file = file; - this.stream = null; - writer = GenotypeWriterFactory.create(file); - Set samples = SampleUtils.getSAMFileSamples(stub.getSAMFileHeader()); - GenotypeWriterFactory.writeHeader(writer, new VCFHeader(null, samples)); - } - - public void add(VariantContext vc, byte ref) { - writer.add(vc, ref); - } - - public void close() { - writer.close(); - } - -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/io/storage/StorageFactory.java b/java/src/org/broadinstitute/sting/gatk/io/storage/StorageFactory.java index c4aecc29b..d59e8b35d 100644 --- a/java/src/org/broadinstitute/sting/gatk/io/storage/StorageFactory.java +++ b/java/src/org/broadinstitute/sting/gatk/io/storage/StorageFactory.java @@ -28,7 +28,7 @@ package org.broadinstitute.sting.gatk.io.storage; import org.broadinstitute.sting.gatk.io.stubs.Stub; import org.broadinstitute.sting.gatk.io.stubs.OutputStreamStub; import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterStub; -import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub; +import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub; import org.broadinstitute.sting.utils.StingException; import java.io.File; @@ -62,7 +62,7 @@ public class StorageFactory { * @param Type of the stream to create. * @return Storage object with a facade of type T. */ - public static Storage createStorage( Stub stub, File file ) { + public static Storage createStorage( Stub stub, File file ) { Storage storage; if(stub instanceof OutputStreamStub) { @@ -77,12 +77,12 @@ public class StorageFactory { else storage = new SAMFileWriterStorage((SAMFileWriterStub)stub); } - else if(stub instanceof GenotypeWriterStub) { - GenotypeWriterStub genotypeWriterStub = (GenotypeWriterStub)stub; + else if(stub instanceof VCFWriterStub) { + VCFWriterStub vcfWriterStub = (VCFWriterStub)stub; if( file != null ) - storage = new VCFGenotypeWriterStorage(genotypeWriterStub,file); + storage = new VCFWriterStorage(vcfWriterStub,file); else - storage = new VCFGenotypeWriterStorage(genotypeWriterStub); + storage = new VCFWriterStorage(vcfWriterStub); } else throw new StingException("Unsupported stub type: " + stub.getClass().getName()); diff --git a/java/src/org/broadinstitute/sting/gatk/io/storage/VCFGenotypeWriterStorage.java b/java/src/org/broadinstitute/sting/gatk/io/storage/VCFGenotypeWriterStorage.java deleted file mode 100644 index d4edf7fdd..000000000 --- a/java/src/org/broadinstitute/sting/gatk/io/storage/VCFGenotypeWriterStorage.java +++ /dev/null @@ -1,58 +0,0 @@ -package org.broadinstitute.sting.gatk.io.storage; - -import org.broad.tribble.vcf.VCFHeader; -import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriter; -import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub; - -import java.io.File; - -/** - * Provides temporary and permanent storage for genotypes in VCF format. - * - * @author mhanna - * @version 0.1 - */ -public class VCFGenotypeWriterStorage extends GenotypeWriterStorage implements VCFGenotypeWriter { - /** - * Creates new (permanent) storage for VCF genotype writers. - * @param stub Stub containing appropriate input parameters. - */ - public VCFGenotypeWriterStorage(GenotypeWriterStub stub) { - super(stub); - } - - /** - * Creates new (temporary) storage for VCF genotype writers. - * @param stub Stub containing appropriate input parameters. - * @param target Target file for output data. - */ - public VCFGenotypeWriterStorage(GenotypeWriterStub stub,File target) { - super(stub,target); - } - - /** - * initialize this VCF header - * - * @param header the header - */ - public void writeHeader(VCFHeader header) { - ((VCFGenotypeWriter)writer).writeHeader(header); - } - - /** - * Add a given VCF file to the writer. - * @param file file from which to add records - */ - public void append(File file) { - ((VCFGenotypeWriter)writer).append(file); - } - - /** - * Merges the stream backing up this temporary storage into the target. - * @param target Target stream for the temporary storage. May not be null. - */ - public void mergeInto(VCFGenotypeWriter target) { - target.append(file); - file.delete(); - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java b/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java new file mode 100644 index 000000000..1189d3e72 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java @@ -0,0 +1,109 @@ +package org.broadinstitute.sting.gatk.io.storage; + +import org.broad.tribble.vcf.VCFHeader; +import org.broad.tribble.vcf.VCFHeaderLine; +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; +import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub; + +import java.io.*; +import java.util.Set; + +/** + * Provides temporary and permanent storage for genotypes in VCF format. + * + * @author mhanna + * @version 0.1 + */ +public class VCFWriterStorage implements Storage, VCFWriter { + protected final File file; + protected final PrintStream stream; + protected final VCFWriter writer; + + /** + * Constructs an object which will write directly into the output file provided by the stub. + * Intentionally delaying the writing of the header -- this should be filled in by the walker. + * @param stub Stub to use when constructing the output file. + */ + public VCFWriterStorage( VCFWriterStub stub ) { + if(stub.getFile() != null) { + this.file = stub.getFile(); + try { + this.stream = new PrintStream(stub.getFile()); + } + catch(IOException ex) { + throw new StingException("Unable to open target output stream",ex); + } + } + else if(stub.getOutputStream() != null) { + this.file = null; + this.stream = stub.getOutputStream(); + } + else + throw new StingException("Unable to create target to which to write; storage was provided with neither a file nor a stream."); + + writer = new VCFWriterImpl(stream); + } + + /** + * Constructs an object which will redirect into a different file. + * @param stub Stub to use when synthesizing file / header info. + * @param file File into which to direct the output data. + */ + public VCFWriterStorage(VCFWriterStub stub, File file) { + this.file = file; + try { + this.stream = new PrintStream(file); + } + catch(IOException ex) { + throw new StingException("Unable to open target output stream",ex); + } + writer = new VCFWriterImpl(this.stream); + Set samples = SampleUtils.getSAMFileSamples(stub.getSAMFileHeader()); + writer.writeHeader(new VCFHeader(null, samples)); + } + + public void add(VariantContext vc, byte ref) { + writer.add(vc, ref); + } + + /** + * initialize this VCF header + * + * @param header the header + */ + public void writeHeader(VCFHeader header) { + writer.writeHeader(header); + } + + /** + * Close the VCF storage object. + */ + public void close() { + writer.close(); + } + + /** + * Merges the stream backing up this temporary storage into the target. + * @param target Target stream for the temporary storage. May not be null. + */ + public void mergeInto(VCFWriterStorage target) { + PrintStream formattingTarget = new PrintStream(target.stream); + try { + BufferedReader reader = new BufferedReader(new FileReader(file)); + String line = reader.readLine(); + while ( line != null ) { + if (!VCFHeaderLine.isHeaderLine(line)) + formattingTarget.printf("%s%n",line); + line = reader.readLine(); + } + + reader.close(); + } catch (IOException e) { + throw new StingException("Error reading file " + file + " in GATKVCFWriter: ", e); + } + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFGenotypeWriterStub.java b/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFGenotypeWriterStub.java deleted file mode 100644 index 26b9be20e..000000000 --- a/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFGenotypeWriterStub.java +++ /dev/null @@ -1,60 +0,0 @@ -package org.broadinstitute.sting.gatk.io.stubs; - -import org.broad.tribble.vcf.VCFHeader; -import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; -import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriter; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; - -import java.io.File; -import java.io.PrintStream; - -/** - * Stub providing a passthrough for VCF files. - * - * @author mhanna - * @version 0.1 - */ -public class VCFGenotypeWriterStub extends GenotypeWriterStub implements VCFGenotypeWriter { - /** - * Construct a new stub with the given engine and target file. - * @param engine The engine, for extracting command-line arguments, etc. - * @param genotypeFile Target file into which to write genotyping data. - */ - public VCFGenotypeWriterStub(GenomeAnalysisEngine engine, File genotypeFile) { - super(engine,genotypeFile); - } - - /** - * Construct a new stub with the given engine and target stream. - * @param engine The engine, for extracting command-line arguments, etc. - * @param genotypeStream Target stream into which to write genotyping data. - */ - public VCFGenotypeWriterStub(GenomeAnalysisEngine engine, PrintStream genotypeStream) { - super(engine,genotypeStream); - } - - /** - * Gets the format of this stub. We may want to discontinue use of this method and rely on instanceof comparisons. - * @return VCF always. - */ - public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() { - return GenotypeWriterFactory.GENOTYPE_FORMAT.VCF; - } - - /** - * initialize this VCF header - * - * @param header the header - */ - public void writeHeader(VCFHeader header) { - outputTracker.getStorage(this).writeHeader(header); - } - - /** - * Add a given VCF file to the writer. - * @param file file from which to add records - */ - public void append(File file) { - outputTracker.getStorage(this).append(file); - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterArgumentTypeDescriptor.java b/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java similarity index 72% rename from java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterArgumentTypeDescriptor.java rename to java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java index 2f75ab2fa..7d9e764c2 100644 --- a/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterArgumentTypeDescriptor.java +++ b/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java @@ -26,8 +26,7 @@ package org.broadinstitute.sting.gatk.io.stubs; import org.broadinstitute.sting.commandline.*; -import org.broadinstitute.sting.utils.genotype.GenotypeWriter; -import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; +import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import java.io.File; @@ -41,7 +40,7 @@ import java.util.Arrays; * @author mhanna * @version 0.1 */ -public class GenotypeWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { +public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { /** * The engine into which output stubs should be fed. */ @@ -51,7 +50,7 @@ public class GenotypeWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor * Create a new GenotypeWriter argument, notifying the given engine when that argument has been created. * @param engine the engine to be notified. */ - public GenotypeWriterArgumentTypeDescriptor(GenomeAnalysisEngine engine) { + public VCFWriterArgumentTypeDescriptor(GenomeAnalysisEngine engine) { this.engine = engine; } @@ -62,7 +61,7 @@ public class GenotypeWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor */ @Override public boolean supports( Class type ) { - return GenotypeWriter.class.equals(type); + return VCFWriter.class.equals(type); } /** @@ -73,8 +72,7 @@ public class GenotypeWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor */ @Override public List createArgumentDefinitions( ArgumentSource source ) { - return Arrays.asList( createGenotypeFileArgumentDefinition(source), - createGenotypeFormatArgumentDefinition(source) ); + return Arrays.asList( createGenotypeFileArgumentDefinition(source) ); } /** @@ -92,7 +90,7 @@ public class GenotypeWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor */ @Override public Object getDefault() { - GenotypeWriterStub defaultGenotypeWriter = new VCFGenotypeWriterStub(engine,System.out); + VCFWriterStub defaultGenotypeWriter = new VCFWriterStub(engine,System.out); engine.addOutput(defaultGenotypeWriter); return defaultGenotypeWriter; } @@ -111,7 +109,7 @@ public class GenotypeWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor File writerFile = writerFileName != null ? new File(writerFileName) : null; // Create a stub for the given object. - GenotypeWriterStub stub = (writerFile != null) ? new VCFGenotypeWriterStub(engine, writerFile) : new VCFGenotypeWriterStub(engine,System.out); + VCFWriterStub stub = (writerFile != null) ? new VCFWriterStub(engine, writerFile) : new VCFWriterStub(engine,System.out); engine.addOutput(stub); @@ -136,27 +134,4 @@ public class GenotypeWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor source.isHidden(), null ); } - - /** - * Creates the optional compression level argument for the BAM file. - * @param source Argument source for the BAM file. Must not be null. - * @return Argument definition for the BAM file itself. Will not be null. - */ - private ArgumentDefinition createGenotypeFormatArgumentDefinition(ArgumentSource source) { - Annotation annotation = this.getArgumentAnnotation(source); - return new ArgumentDefinition( ArgumentIOType.getIOType(annotation), - GenotypeWriterFactory.GENOTYPE_FORMAT.class, - "variant_output_format", - "vf", - "Format to be used to represent variants; default is VCF", - false, - false, - false, - null, - source.isHidden(), - null, - null, - null ); - } - } diff --git a/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterStub.java b/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java similarity index 86% rename from java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterStub.java rename to java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java index 495188648..694f53e63 100755 --- a/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterStub.java +++ b/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java @@ -29,10 +29,11 @@ import java.io.File; import java.io.PrintStream; import org.broad.tribble.util.variantcontext.VariantContext; +import org.broad.tribble.vcf.VCFHeader; import org.broadinstitute.sting.gatk.io.OutputTracker; +import org.broadinstitute.sting.gatk.io.storage.VCFWriterStorage; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.utils.genotype.GenotypeWriter; -import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; +import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; import net.sf.samtools.SAMFileHeader; /** @@ -41,7 +42,7 @@ import net.sf.samtools.SAMFileHeader; * @author ebanks * @version 0.1 */ -public abstract class GenotypeWriterStub implements Stub, GenotypeWriter { +public class VCFWriterStub implements Stub, VCFWriter { /** * Engine to use for collecting attributes for the output SAM file. @@ -71,7 +72,7 @@ public abstract class GenotypeWriterStub implements St * @param engine GATK engine. * @param genotypeFile file to (ultimately) create. */ - public GenotypeWriterStub(GenomeAnalysisEngine engine,File genotypeFile) { + public VCFWriterStub(GenomeAnalysisEngine engine,File genotypeFile) { this.engine = engine; this.genotypeFile = genotypeFile; this.genotypeStream = null; @@ -82,7 +83,7 @@ public abstract class GenotypeWriterStub implements St * @param engine GATK engine. * @param genotypeStream stream to (ultimately) write. */ - public GenotypeWriterStub(GenomeAnalysisEngine engine,PrintStream genotypeStream) { + public VCFWriterStub(GenomeAnalysisEngine engine,PrintStream genotypeStream) { this.engine = engine; this.genotypeFile = null; this.genotypeStream = genotypeStream; @@ -112,12 +113,6 @@ public abstract class GenotypeWriterStub implements St return engine.getSAMFileHeader(); } - /** - * Retrieves the format to use when creating the new file. - * @return format to use when creating the new file. - */ - public abstract GenotypeWriterFactory.GENOTYPE_FORMAT getFormat(); - /** * Registers the given streamConnector with this stub. * @param outputTracker The connector used to provide an appropriate stream. @@ -126,6 +121,10 @@ public abstract class GenotypeWriterStub implements St this.outputTracker = outputTracker; } + public void writeHeader(VCFHeader header) { + outputTracker.getStorage(this).writeHeader(header); + } + /** * @{inheritDoc} */ @@ -139,5 +138,4 @@ public abstract class GenotypeWriterStub implements St public void close() { outputTracker.getStorage(this).close(); } - } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/VariantsToVCF.java b/java/src/org/broadinstitute/sting/gatk/walkers/VariantsToVCF.java index 61f413802..0f0fd018e 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/VariantsToVCF.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/VariantsToVCF.java @@ -126,7 +126,7 @@ public class VariantsToVCF extends RodWalker { } } - vcfwriter = new VCFWriter(out); + vcfwriter = new VCFWriterImpl(out); vcfwriter.writeHeader(new VCFHeader(hInfo, samples)); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 9824aba1d..2250ffc05 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -46,6 +46,7 @@ import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.CommandLineUtils; import org.broadinstitute.sting.utils.vcf.VCFUtils; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; +import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl; import java.util.*; @@ -153,7 +154,7 @@ public class VariantAnnotator extends RodWalker { hInfo.add(new VCFHeaderLine("VariantAnnotator", "\"" + CommandLineUtils.createApproximateCommandLineArgumentString(getToolkit(), args, getClass()) + "\"")); } - vcfWriter = new VCFWriter(out); + vcfWriter = new VCFWriterImpl(out); VCFHeader vcfHeader = new VCFHeader(hInfo, samples); vcfWriter.writeHeader(vcfHeader); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index c23adeb83..75426edfd 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -40,6 +40,7 @@ import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.CommandLineUtils; import org.broadinstitute.sting.utils.vcf.VCFUtils; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; +import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl; import java.util.*; @@ -118,7 +119,7 @@ public class VariantFiltrationWalker extends RodWalker { hInfo.add(new VCFHeaderLine("VariantFiltration", "\"" + CommandLineUtils.createApproximateCommandLineArgumentString(getToolkit(), args, getClass()) + "\"")); } - writer = new VCFWriter(out); + writer = new VCFWriterImpl(out); writer.writeHeader(new VCFHeader(hInfo, new TreeSet(vc.getSampleNames()))); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BatchedCallsMerger.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BatchedCallsMerger.java index 19eefcabc..12b93b019 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BatchedCallsMerger.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BatchedCallsMerger.java @@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.commandline.*; -import org.broadinstitute.sting.utils.genotype.*; +import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; @@ -56,7 +56,7 @@ public class BatchedCallsMerger extends LocusWalker imp @ArgumentCollection private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection(); @Argument(doc = "VCF file to which variants should be written", required = false) - public GenotypeWriter writer = null; + public VCFWriter writer = null; @Argument(fullName="rod_list", shortName="rods", doc="A comma-separated string describing the rod names representing individual call batches", required=true) protected String ROD_STRING = null; @@ -91,7 +91,7 @@ public class BatchedCallsMerger extends LocusWalker imp UG_engine.samples = samples; // initialize the header - GenotypeWriterFactory.writeHeader(writer, new VCFHeader(headerLines, samples)); + writer.writeHeader(new VCFHeader(headerLines, samples)); } public VariantContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 3876ef8cd..15a763bfc 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -52,7 +52,7 @@ public class UnifiedGenotyper extends LocusWalker getHeaderInfo() { Set headerInfo = new HashSet(); - // this is only applicable to VCF - if ( !(writer instanceof VCFGenotypeWriter) ) - return headerInfo; - // all annotation fields from VariantAnnotatorEngine headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions()); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 98d3bd531..319e477b1 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -39,7 +39,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.genotype.GenotypeWriter; +import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; import org.broadinstitute.sting.utils.pileup.*; import org.broad.tribble.vcf.VCFConstants; @@ -65,7 +65,7 @@ public class UnifiedGenotyperEngine { // the various loggers and writers protected Logger logger = null; - protected GenotypeWriter genotypeWriter = null; + protected VCFWriter vcfWriter = null; protected PrintStream verboseWriter = null; // samples in input @@ -76,15 +76,15 @@ public class UnifiedGenotyperEngine { initialize(toolkit, UAC, null, null, null, null); } - public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, GenotypeWriter genotypeWriter, PrintStream verboseWriter, VariantAnnotatorEngine engine) { + public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, VCFWriter genotypeWriter, PrintStream verboseWriter, VariantAnnotatorEngine engine) { initialize(toolkit, UAC, logger, genotypeWriter, verboseWriter, engine); } - private void initialize(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, GenotypeWriter genotypeWriter, PrintStream verboseWriter, VariantAnnotatorEngine engine) { + private void initialize(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, VCFWriter genotypeWriter, PrintStream verboseWriter, VariantAnnotatorEngine engine) { this.UAC = UAC; this.logger = logger; - this.genotypeWriter = genotypeWriter; + this.vcfWriter = genotypeWriter; this.verboseWriter = verboseWriter; this.annotationEngine = engine; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/SequenomValidationConverter.java b/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/SequenomValidationConverter.java index 16fd5e6f6..a704573db 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/SequenomValidationConverter.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/SequenomValidationConverter.java @@ -40,6 +40,7 @@ import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; +import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl; import java.util.*; @@ -118,7 +119,7 @@ public class SequenomValidationConverter extends RodWalker(); - VCFWriter vcfWriter = new VCFWriter(out); + VCFWriter vcfWriter = new VCFWriterImpl(out); // set up the info and filter headers Set hInfo = new HashSet(); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index e7f4fca34..efb56b372 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -48,6 +48,7 @@ import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; +import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl; import org.broadinstitute.sting.utils.text.XReadLines; import java.io.File; @@ -336,7 +337,7 @@ public class VariantEvalWalker extends RodWalker { determineContextNamePartSizes(); if ( outputVCF != null ) - writer = new VCFWriter(new File(outputVCF)); + writer = new VCFWriterImpl(new File(outputVCF)); if ( rsIDFile != null ) { if ( maxRsIDBuild == Integer.MAX_VALUE ) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyVariantCuts.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyVariantCuts.java index 6fdeed0b4..b3f03b8a0 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyVariantCuts.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyVariantCuts.java @@ -37,6 +37,7 @@ import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.utils.vcf.VCFUtils; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; +import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl; import org.broadinstitute.sting.utils.text.XReadLines; import java.io.File; @@ -148,7 +149,7 @@ public class ApplyVariantCuts extends RodWalker { hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); hInfo.add(new VCFInfoHeaderLine("OQ", 1, VCFHeaderLineType.Float, "The original variant quality score")); hInfo.add(new VCFHeaderLine("source", "VariantOptimizer")); - vcfWriter = new VCFWriter( new File(OUTPUT_FILENAME) ); + vcfWriter = new VCFWriterImpl( new File(OUTPUT_FILENAME) ); final TreeSet samples = new TreeSet(); samples.addAll(SampleUtils.getSampleListWithVCFHeader(getToolkit(), null)); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 430123c65..01c10ab13 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -41,6 +41,7 @@ import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.utils.vcf.VCFUtils; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; +import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl; import java.io.File; import java.io.IOException; @@ -137,7 +138,7 @@ public class VariantRecalibrator extends RodWalker { private VariantAnnotatorEngine engine; public void initialize() { - vcfWriter = new VCFWriter(out); + vcfWriter = new VCFWriterImpl(out); validateAnnotateUnionArguments(); Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), null); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java index 0e7035838..459c59913 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; +import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl; import org.broadinstitute.sting.utils.vcf.VCFUtils; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -51,7 +52,7 @@ public class FilterLiftedVariants extends RodWalker { Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant")); Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant")); - writer = new VCFWriter(out); + writer = new VCFWriterImpl(out); final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey("variant") ? vcfHeaders.get("variant").getMetaData() : null, samples); writer.writeHeader(vcfHeader); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java index f63ef005c..3e0d1d67a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java @@ -28,6 +28,7 @@ import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.utils.vcf.VCFUtils; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; +import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -74,7 +75,7 @@ public class LiftoverVariants extends RodWalker { Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant")); Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant")); - writer = new VCFWriter(out); + writer = new VCFWriterImpl(out); final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey("variant") ? vcfHeaders.get("variant").getMetaData() : null, samples); writer.writeHeader(vcfHeader); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 5a904d66f..e2e6f2e74 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010. + * Copyright (c) 2010, The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -12,15 +12,14 @@ * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. - * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. */ package org.broadinstitute.sting.gatk.walkers.variantutils; @@ -43,6 +42,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.vcf.VCFUtils; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; +import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl; import java.util.*; import java.util.regex.Matcher; @@ -79,7 +79,7 @@ public class SelectVariants extends RodWalker { * Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher */ public void initialize() { - vcfWriter = new VCFWriter(out); + vcfWriter = new VCFWriterImpl(out); ArrayList rodNames = new ArrayList(); rodNames.add("variant"); diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java index ae2308898..73851101c 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java @@ -85,7 +85,7 @@ public class IndelAnnotator extends RodWalker{ anno.add(new VCFInfoHeaderLine("type",1, VCFHeaderLineType.String,"Genomic interpretation (according to RefSeq)")); hInfo.addAll(anno); - vcfWriter = new VCFWriter(out); + vcfWriter = new VCFWriterImpl(out); VCFHeader vcfHeader = new VCFHeader(hInfo, SampleUtils.getUniqueSamplesFromRods(getToolkit())); vcfWriter.writeHeader(vcfHeader); } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelDBRateWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelDBRateWalker.java index 2a4a839d4..c0feefda0 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelDBRateWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelDBRateWalker.java @@ -17,6 +17,7 @@ import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.vcf.VCFUtils; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; +import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl; import java.io.PrintStream; import java.util.*; @@ -46,7 +47,7 @@ public class IndelDBRateWalker extends RodWalker } if ( outVCF != null ) { - vcfWriter = new VCFWriter(outVCF); + vcfWriter = new VCFWriterImpl(outVCF); Set header = new HashSet(); header.addAll(VCFUtils.getHeaderFields(getToolkit())); VCFHeader vcfHeader = new VCFHeader(header, SampleUtils.getUniqueSamplesFromRods(getToolkit())); diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java index 3c3b23c16..7b56ab99f 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java @@ -23,6 +23,7 @@ import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.vcf.VCFUtils; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; +import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl; import org.broadinstitute.sting.utils.pileup.PileupElement; import java.io.PrintStream; @@ -372,7 +373,7 @@ public class MendelianViolationClassifier extends LocusWalker hInfo = new HashSet(); hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); hInfo.add(new VCFHeaderLine("source", "MendelianViolationClassifier")); diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestVariantContextWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestVariantContextWalker.java index 3e1e01e3d..9dccc1ebd 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestVariantContextWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestVariantContextWalker.java @@ -60,7 +60,7 @@ public class TestVariantContextWalker extends RodWalker { public void initialize() { if ( outputVCF != null ) - writer = new VCFWriter(new File(outputVCF)); + writer = new VCFWriterImpl(new File(outputVCF)); } public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCF4WriterTestWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCF4WriterTestWalker.java index 14c04e1f7..7611baccd 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCF4WriterTestWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCF4WriterTestWalker.java @@ -14,6 +14,7 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.vcf.VCFUtils; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; +import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl; import java.io.File; import java.io.FileInputStream; @@ -79,7 +80,7 @@ public class VCF4WriterTestWalker extends RodWalker { hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); - vcfWriter = new VCFWriter(new File(OUTPUT_FILE)); + vcfWriter = new VCFWriterImpl(new File(OUTPUT_FILE)); VCFHeader header = null; for( final ReferenceOrderedDataSource source : dataSources ) { final RMDTrack rod = source.getReferenceOrderedData(); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/BeagleOutputToVCFWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/BeagleOutputToVCFWalker.java index 7d5dd34e3..03bb4bab1 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/BeagleOutputToVCFWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/BeagleOutputToVCFWalker.java @@ -44,6 +44,7 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.vcf.VCFUtils; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; +import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl; import org.broad.tribble.vcf.*; import java.io.*; @@ -92,7 +93,7 @@ public class BeagleOutputToVCFWalker extends RodWalker { hInfo.add(new VCFHeaderLine("source", "BeagleImputation")); // Open output file specified by output VCF ROD - vcfWriter = new VCFWriter(new File(OUTPUT_FILE)); + vcfWriter = new VCFWriterImpl(new File(OUTPUT_FILE)); final List dataSources = this.getToolkit().getRodDataSources(); for( final ReferenceOrderedDataSource source : dataSources ) { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadBackedPhasingWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadBackedPhasingWalker.java index d9ff8a3c8..5d8408621 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadBackedPhasingWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadBackedPhasingWalker.java @@ -41,6 +41,7 @@ import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.vcf.VCFUtils; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; +import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -72,7 +73,7 @@ public class ReadBackedPhasingWalker extends LocusWalker(vc.getSampleNames()))); } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/TrioGenotyperWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/TrioGenotyperWalker.java index 788e1d3bf..bb9c8b14d 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/TrioGenotyperWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/TrioGenotyperWalker.java @@ -44,6 +44,7 @@ import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; +import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl; import org.broadinstitute.sting.gatk.walkers.varianteval.MendelianViolationEvaluator; import java.util.*; @@ -86,7 +87,7 @@ public class TrioGenotyperWalker extends RefWalker{ FAMILY_MEMBERS = Arrays.asList(mom, dad, kid); // initialize the writer - writer = new VCFWriter(new File(vcfOutputFile)); + writer = new VCFWriterImpl(new File(vcfOutputFile)); } public VariantContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotator.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotator.java index c92edbd9a..05ac19369 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotator.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotator.java @@ -63,6 +63,7 @@ import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.vcf.VCFUtils; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; +import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl; /** * Annotates variant calls with information from user-specified tabular files. @@ -240,7 +241,7 @@ public class GenomicAnnotator extends RodWalker, Link hInfo.add(new VCFHeaderLine("annotatorReference", getToolkit().getArguments().referenceFile.getName())); hInfo.addAll(engine.getVCFAnnotationDescriptions()); - vcfWriter = new VCFWriter(VCF_OUT); + vcfWriter = new VCFWriterImpl(VCF_OUT); VCFHeader vcfHeader = new VCFHeader(hInfo, samples); vcfWriter.writeHeader(vcfHeader); } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VariantSelect.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VariantSelect.java index b663e1398..f1f4e866b 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VariantSelect.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VariantSelect.java @@ -94,7 +94,7 @@ public class VariantSelect extends RodWalker { hInfo.add(new VCFFilterHeaderLine(exp.name, exp.expStr)); } - writer = new VCFWriter(out); + writer = new VCFWriterImpl(out); Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant")); final VCFHeader vcfHeader = new VCFHeader(hInfo, samples); diff --git a/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java b/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java index 76a147a43..9b453f945 100644 --- a/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java +++ b/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java @@ -29,7 +29,7 @@ import net.sf.samtools.SAMFileWriter; import org.apache.commons.lang.StringEscapeUtils; import org.apache.commons.lang.StringUtils; import org.broadinstitute.sting.gatk.filters.PlatformUnitFilterHelper; -import org.broadinstitute.sting.utils.genotype.GenotypeWriter; +import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; import java.io.File; import java.io.InputStream; @@ -212,7 +212,7 @@ public abstract class ArgumentField { if (InputStream.class.isAssignableFrom(clazz)) return File.class; if (SAMFileReader.class.isAssignableFrom(clazz)) return File.class; if (OutputStream.class.isAssignableFrom(clazz)) return File.class; - if (GenotypeWriter.class.isAssignableFrom(clazz)) return File.class; + if (VCFWriter.class.isAssignableFrom(clazz)) return File.class; if (SAMFileWriter.class.isAssignableFrom(clazz)) return File.class; if (PlatformUnitFilterHelper.class.isAssignableFrom(clazz)) return String.class; return clazz; diff --git a/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java b/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java index d4339eb14..72f30f3b5 100644 --- a/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java +++ b/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java @@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.WalkerManager; import org.broadinstitute.sting.gatk.filters.FilterManager; -import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterArgumentTypeDescriptor; +import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor; @@ -83,7 +83,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram { @Override protected Collection getArgumentTypeDescriptors() { List typeDescriptors = new ArrayList(); - typeDescriptors.add(new GenotypeWriterArgumentTypeDescriptor(GATKEngine)); + typeDescriptors.add(new VCFWriterArgumentTypeDescriptor(GATKEngine)); typeDescriptors.add(new SAMFileReaderArgumentTypeDescriptor(GATKEngine)); typeDescriptors.add(new SAMFileWriterArgumentTypeDescriptor(GATKEngine)); typeDescriptors.add(new OutputStreamArgumentTypeDescriptor(GATKEngine)); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java deleted file mode 100644 index 901c3f321..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java +++ /dev/null @@ -1,49 +0,0 @@ -package org.broadinstitute.sting.utils.genotype; - -import org.broad.tribble.util.variantcontext.VariantContext; - - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * @author aaron, ebanks - *

- * Class GenotypeWriter - *

- * The interface for writing genotype calls. - */ -public interface GenotypeWriter { - /** - * Add a record, given a variant context, with the genotype fields restricted to what is defined in the header - * @param vc the variant context representing the call to add - * @param refBase This is required for VCF writers, as the VCF format explicitly requires (previous) ref base for an indel. - */ - public void add(VariantContext vc, byte refBase); - - /** finish writing, closing any open files. */ - public void close(); - -} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java deleted file mode 100644 index 3031d0156..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java +++ /dev/null @@ -1,40 +0,0 @@ -package org.broadinstitute.sting.utils.genotype; - -import org.broad.tribble.vcf.VCFHeader; -import org.broadinstitute.sting.utils.vcf.GATKVCFWriter; -import org.broadinstitute.sting.utils.genotype.vcf.*; - -import java.io.File; -import java.io.PrintStream; - - -/** - * @author aaron - *

- * Class GenotypeWriterFactory - *

- * A descriptions should go here. Blame aaron if it's missing. - */ -public class GenotypeWriterFactory { - /** available genotype writers */ - public enum GENOTYPE_FORMAT { - GELI, GLF, GELI_BINARY, VCF - } - - /** - * create a genotype writer - * @param destination the destination file - * @return the genotype writer object - */ - public static GenotypeWriter create(File destination) { - return new GATKVCFWriter(destination); - } - - public static GenotypeWriter create(PrintStream destination) { - return new GATKVCFWriter(destination); - } - - public static void writeHeader(GenotypeWriter writer, VCFHeader vcfHeader) { - ((VCFGenotypeWriter)writer).writeHeader(vcfHeader); - } -} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java deleted file mode 100644 index 1a594adc0..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java +++ /dev/null @@ -1,92 +0,0 @@ -package org.broadinstitute.sting.utils.genotype.geli; - -import edu.mit.broad.picard.genotype.geli.GeliFileWriter; -import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods; -import net.sf.samtools.SAMFileHeader; -import org.broad.tribble.util.variantcontext.VariantContext; - -import java.io.File; - - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * @author aaron, ebanks - * @version 1.0 - *

- * Class GeliAdapter - * Adapts the Geli file writer to the Genotype writer interface - */ -public class GeliAdapter implements GeliGenotypeWriter { - - // the file we're writing to - private File writeTo = null; - - // the geli file writer we're adapting - private GeliFileWriter writer = null; - - /** - * wrap a GeliFileWriter in the Genotype writer interface - * - * @param writeTo where to write to - */ - public GeliAdapter(File writeTo) { - this.writeTo = writeTo; - } - - /** - * wrap a GeliFileWriter in the Genotype writer interface - * - * @param fileHeader the file header to write out - */ - public void writeHeader(final SAMFileHeader fileHeader) { - this.writer = GeliFileWriter.newInstanceForPresortedRecords(writeTo, fileHeader); - } - - public void addGenotypeLikelihoods(GenotypeLikelihoods gl) { - if ( writer == null ) - throw new IllegalStateException("The Geli Header must be written before records can be added"); - - writer.addGenotypeLikelihoods(gl); - } - - /** - * Add a genotype, given a variant context - * - * @param vc the variant context representing the call to add - * @param refBase not used by this writer - */ - public void add(VariantContext vc, byte refBase) { - throw new UnsupportedOperationException("We no longer support writing Geli"); - } - - /** finish writing, closing any open files. */ - public void close() { - if (this.writer != null) { - this.writer.close(); - } - } -} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeWriter.java deleted file mode 100644 index 801f843ab..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeWriter.java +++ /dev/null @@ -1,26 +0,0 @@ -package org.broadinstitute.sting.utils.genotype.geli; - -import org.broadinstitute.sting.utils.genotype.GenotypeWriter; -import net.sf.samtools.SAMFileHeader; -import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods; - -/** - * An extension of eth GenotypeWriter interface with support - * for adding a header. - * - * @author mhanna - * @version 0.1 - */ -public interface GeliGenotypeWriter extends GenotypeWriter { - /** - * Write the file header. - * @param fileHeader SAM file header from which to derive the geli header. - */ - public void writeHeader(final SAMFileHeader fileHeader); - - /** - * Writes the genotype likelihoods to the output. - * @param gl genotype likelihoods to write. - */ - public void addGenotypeLikelihoods(GenotypeLikelihoods gl); -} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java deleted file mode 100644 index 9602dc9c6..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java +++ /dev/null @@ -1,79 +0,0 @@ -package org.broadinstitute.sting.utils.genotype.geli; - -import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods; -import net.sf.samtools.SAMFileHeader; -import org.broad.tribble.util.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.StingException; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.PrintStream; -import java.io.PrintWriter; - - -/** - * @author aaron - *

- * Class GeliTextWriter - *

- * write out the geli text file format containing genotype information - */ -public class GeliTextWriter implements GeliGenotypeWriter { - // where we write to - PrintWriter mWriter; - - // used to store the max mapping quality as a field in variant contexts - public static final String MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY = "MAXIMUM_MAPPING_QUALITY"; - // used to store the max mapping quality as a field in variant contexts - public static final String READ_COUNT_ATTRIBUTE_KEY = "READ_COUNT"; - - /** - * create a geli text writer - * - * @param file the file to write to - */ - public GeliTextWriter(File file) { - try { - mWriter = new PrintWriter(file); - } catch (FileNotFoundException e) { - throw new StingException("Unable to open file " + file.toURI()); - } - } - - public GeliTextWriter(PrintStream out) { - mWriter = new PrintWriter(out); - } - - public final static String headerLine = "#Sequence Position ReferenceBase NumberOfReads MaxMappingQuality BestGenotype BtrLod BtnbLod AA AC AG AT CC CG CT GG GT TT"; - - /** - * Write the file header. - * @param fileHeader SAM file header from which to derive the geli header. - */ - public void writeHeader(final SAMFileHeader fileHeader) { - // ignore the SAM header; the geli text header is fixed. - mWriter.println(headerLine); - mWriter.flush(); // necessary so that writing to an output stream will work - } - - /** - * Add a genotype, given a variant context - * - * @param vc the variant context representing the call to add - * @param refBase required by the inteface; not used by this writer. - */ - public void add(VariantContext vc, byte refBase) { - throw new UnsupportedOperationException("We no longer support writing Geli"); - } - - public void addGenotypeLikelihoods(GenotypeLikelihoods gl) { - mWriter.println(gl.toString()); - mWriter.flush(); // necessary so that writing to an output stream will work - } - - /** finish writing, closing any open files. */ - public void close() { - mWriter.flush(); - mWriter.close(); - } -} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeWriter.java deleted file mode 100644 index 9ad345373..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeWriter.java +++ /dev/null @@ -1,27 +0,0 @@ -package org.broadinstitute.sting.utils.genotype.glf; - -import org.broadinstitute.sting.utils.genotype.GenotypeWriter; - -/** - * An extension of eth GenotypeWriter interface with support - * for adding header lines. - * - * @author mhanna - * @version 0.1 - */ -public interface GLFGenotypeWriter extends GenotypeWriter { - /** - * Append the given header text to the GLF file. - * @param headerText the file header to write out - */ - public void writeHeader(String headerText); - - /** - * add a GLF record to the output file - * - * @param contigName the contig name - * @param contigLength the contig length - * @param rec the GLF record to write. - */ - public void addGLFRecord(String contigName, int contigLength, GLFRecord rec); -} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java index 1364708e5..c998b5221 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java @@ -43,7 +43,7 @@ import java.io.OutputStream; * single and variable length genotype calls using the provided functions. When you've finished * generating GLF records, make sure you close the file. */ -public class GLFWriter implements GLFGenotypeWriter { +public class GLFWriter { // our output codec private final BinaryCodec outputBinaryCodec; diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriter.java deleted file mode 100644 index 035d9e22f..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriter.java +++ /dev/null @@ -1,29 +0,0 @@ -package org.broadinstitute.sting.utils.genotype.vcf; - -import org.broad.tribble.vcf.VCFHeader; -import org.broadinstitute.sting.utils.genotype.GenotypeWriter; - -import java.io.File; - -/** - * An extension of the GenotypeWriter interface with support - * for adding header lines. - * - * @author mhanna - * @version 0.1 - */ -public interface VCFGenotypeWriter extends GenotypeWriter { - /** - * initialize this VCF header - * - * @param header the header - */ - public void writeHeader(VCFHeader header); - - /** - * Add a given VCF file to the writer. - * @param file file from which to add records - */ - public void append(File file); - -} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java index de75cb94a..637a14335 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java @@ -1,407 +1,19 @@ package org.broadinstitute.sting.utils.genotype.vcf; - -import org.broad.tribble.util.variantcontext.Allele; -import org.broad.tribble.util.variantcontext.Genotype; import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.*; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.StingException; - -import java.io.*; -import java.util.*; /** * this class writes VCF files */ -public class VCFWriter { +public interface VCFWriter { - // the VCF header we're storing - protected VCFHeader mHeader = null; - - // the print stream we're writting to - protected BufferedWriter mWriter; - - // were filters applied? - protected boolean filtersWereAppliedToContext = false; - - /** - * create a VCF writer, given a file to write to - * - * @param location the file location to write to - */ - public VCFWriter(File location) { - FileOutputStream output; - try { - output = new FileOutputStream(location); - } catch (FileNotFoundException e) { - throw new RuntimeException("Unable to create VCF file at location: " + location); - } - - mWriter = new BufferedWriter(new OutputStreamWriter(output)); - } - - - /** - * create a VCF writer, given a stream to write to - * - * @param output the file location to write to - */ - public VCFWriter(OutputStream output) { - mWriter = new BufferedWriter(new OutputStreamWriter(output)); - } - - public void writeHeader(VCFHeader header) { - this.mHeader = header; - - try { - // the file format field needs to be written first - mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF4_0.getFormatString() + "=" + VCFHeaderVersion.VCF4_0.getVersionString() + "\n"); - - for ( VCFHeaderLine line : header.getMetaData() ) { - if ( line.getKey().equals(VCFHeaderVersion.VCF4_0.getFormatString()) || - line.getKey().equals(VCFHeaderVersion.VCF3_3.getFormatString()) || - line.getKey().equals(VCFHeaderVersion.VCF3_2.getFormatString()) ) - continue; - - // are the records filtered (so we know what to put in the FILTER column of passing records) ? - if ( line instanceof VCFFilterHeaderLine ) - filtersWereAppliedToContext = true; - - mWriter.write(VCFHeader.METADATA_INDICATOR); - mWriter.write(line.toString()); - mWriter.write("\n"); - } - - // write out the column line - mWriter.write(VCFHeader.HEADER_INDICATOR); - for ( VCFHeader.HEADER_FIELDS field : header.getHeaderFields() ) { - mWriter.write(field.toString()); - mWriter.write(VCFConstants.FIELD_SEPARATOR); - } - - if ( header.hasGenotypingData() ) { - mWriter.write("FORMAT"); - mWriter.write(VCFConstants.FIELD_SEPARATOR); - for ( String sample : header.getGenotypeSamples() ) { - mWriter.write(sample); - mWriter.write(VCFConstants.FIELD_SEPARATOR); - } } - - mWriter.write("\n"); - mWriter.flush(); // necessary so that writing to an output stream will work - } - catch (IOException e) { - throw new RuntimeException("IOException writing the VCF header", e); - } - } + public void writeHeader(VCFHeader header); /** * attempt to close the VCF file */ - public void close() { - try { - mWriter.flush(); - mWriter.close(); - } catch (IOException e) { - throw new RuntimeException("Unable to close VCFFile"); - } - } - - public void add(VariantContext vc, byte refBase) { - if ( mHeader == null ) - throw new IllegalStateException("The VCF Header must be written before records can be added"); - - try { - - vc = VariantContextUtils.createVariantContextWithPaddedAlleles(vc, refBase); - - GenomeLoc loc = VariantContextUtils.getLocation(vc); - Map alleleMap = new HashMap(vc.getAlleles().size()); - alleleMap.put(Allele.NO_CALL, VCFConstants.EMPTY_ALLELE); // convenience for lookup - - // CHROM - mWriter.write(loc.getContig()); - mWriter.write(VCFConstants.FIELD_SEPARATOR); - - // POS - mWriter.write(String.valueOf(loc.getStart())); - mWriter.write(VCFConstants.FIELD_SEPARATOR); - - // ID - String ID = vc.hasAttribute(VariantContext.ID_KEY) ? vc.getAttributeAsString(VariantContext.ID_KEY) : VCFConstants.EMPTY_ID_FIELD; - mWriter.write(ID); - mWriter.write(VCFConstants.FIELD_SEPARATOR); - - // REF - alleleMap.put(vc.getReference(), "0"); - String refString = vc.getReference().getBaseString(); - mWriter.write(refString); - mWriter.write(VCFConstants.FIELD_SEPARATOR); - - // ALT - if ( vc.isVariant() ) { - Allele altAllele = vc.getAlternateAllele(0); - alleleMap.put(altAllele, "1"); - String alt = altAllele.getBaseString(); - mWriter.write(alt); - - for (int i = 1; i < vc.getAlternateAlleles().size(); i++) { - altAllele = vc.getAlternateAllele(i); - alleleMap.put(altAllele, String.valueOf(i+1)); - alt = altAllele.getBaseString(); - mWriter.write(","); - mWriter.write(alt); - } - } else { - mWriter.write(VCFConstants.EMPTY_ALTERNATE_ALLELE_FIELD); - } - mWriter.write(VCFConstants.FIELD_SEPARATOR); - - // QUAL - if ( !vc.hasNegLog10PError() ) - mWriter.write(VCFConstants.MISSING_VALUE_v4); - else - mWriter.write(getQualValue(vc.getPhredScaledQual())); - mWriter.write(VCFConstants.FIELD_SEPARATOR); - - // FILTER - String filters = vc.isFiltered() ? Utils.join(";", Utils.sorted(vc.getFilters())) : (filtersWereAppliedToContext || vc.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED); - mWriter.write(filters); - mWriter.write(VCFConstants.FIELD_SEPARATOR); - - // INFO - Map infoFields = new TreeMap(); - for ( Map.Entry field : vc.getAttributes().entrySet() ) { - String key = field.getKey(); - if ( key.equals(VariantContext.ID_KEY) || key.equals(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY) ) - continue; - - String outputValue = formatVCFField(field.getValue()); - if ( outputValue != null ) - infoFields.put(key, outputValue); - } - writeInfoString(infoFields); - - // FORMAT - List genotypeAttributeKeys = new ArrayList(); - if ( vc.hasGenotypes() ) { - genotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY); - for ( String key : calcVCFGenotypeKeys(vc) ) { - genotypeAttributeKeys.add(key); - } - } else if ( mHeader.hasGenotypingData() ) { - // this needs to be done in case all samples are no-calls - genotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY); - } - - if ( genotypeAttributeKeys.size() > 0 ) { - String genotypeFormatString = Utils.join(VCFConstants.GENOTYPE_FIELD_SEPARATOR, genotypeAttributeKeys); - mWriter.write(VCFConstants.FIELD_SEPARATOR); - mWriter.write(genotypeFormatString); - - addGenotypeData(vc, alleleMap, genotypeAttributeKeys); - } - - mWriter.write("\n"); - mWriter.flush(); // necessary so that writing to an output stream will work - } catch (IOException e) { - throw new RuntimeException("Unable to write the VCF object to a file"); - } - - } - - private String getQualValue(double qual) { - String s = String.format(VCFConstants.DOUBLE_PRECISION_FORMAT_STRING, qual); - if ( s.endsWith(VCFConstants.DOUBLE_PRECISION_INT_SUFFIX) ) - s = s.substring(0, s.length() - VCFConstants.DOUBLE_PRECISION_INT_SUFFIX.length()); - return s; - } - - /** - * create the info string; assumes that no values are null - * - * @param infoFields a map of info fields - * @throws IOException for writer - */ - protected void writeInfoString(Map infoFields) throws IOException { - if ( infoFields.isEmpty() ) { - mWriter.write(VCFConstants.EMPTY_INFO_FIELD); - return; - } - - boolean isFirst = true; - for ( Map.Entry entry : infoFields.entrySet() ) { - if ( isFirst ) - isFirst = false; - else - mWriter.write(VCFConstants.INFO_FIELD_SEPARATOR); - - String key = entry.getKey(); - mWriter.write(key); - - if ( !entry.getValue().equals("") ) { - int numVals = 1; - VCFInfoHeaderLine metaData = mHeader.getInfoHeaderLine(key); - if ( metaData != null ) - numVals = metaData.getCount(); - - // take care of unbounded encoding - if ( numVals == VCFInfoHeaderLine.UNBOUNDED ) - numVals = 1; - - if ( numVals > 0 ) { - mWriter.write("="); - mWriter.write(entry.getValue()); - } - } - } - } - - /** - * add the genotype data - * - * @param vc the variant context - * @param genotypeFormatKeys Genotype formatting string - * @param alleleMap alleles for this context - * @throws IOException for writer - */ - private void addGenotypeData(VariantContext vc, Map alleleMap, List genotypeFormatKeys) - throws IOException { - - for ( String sample : mHeader.getGenotypeSamples() ) { - mWriter.write(VCFConstants.FIELD_SEPARATOR); - - Genotype g = vc.getGenotype(sample); - if ( g == null ) { - // TODO -- The VariantContext needs to know what the general ploidy is of the samples - // TODO -- We shouldn't be assuming diploid genotypes here! - mWriter.write(VCFConstants.EMPTY_GENOTYPE); - continue; - } - - writeAllele(g.getAllele(0), alleleMap); - for (int i = 1; i < g.getPloidy(); i++) { - mWriter.write(g.genotypesArePhased() ? VCFConstants.PHASED : VCFConstants.UNPHASED); - writeAllele(g.getAllele(i), alleleMap); - } - - List attrs = new ArrayList(genotypeFormatKeys.size()); - for ( String key : genotypeFormatKeys ) { - if ( key.equals(VCFConstants.GENOTYPE_KEY) ) - continue; - - Object val = g.hasAttribute(key) ? g.getAttribute(key) : VCFConstants.MISSING_VALUE_v4; - - // some exceptions - if ( key.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) { - if ( MathUtils.compareDoubles(g.getNegLog10PError(), Genotype.NO_NEG_LOG_10PERROR) == 0 ) - val = VCFConstants.MISSING_VALUE_v4; - else { - val = getQualValue(Math.min(g.getPhredScaledQual(), VCFConstants.MAX_GENOTYPE_QUAL)); - } - } else if ( key.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) { - val = g.isFiltered() ? Utils.join(";", Utils.sorted(g.getFilters())) : (g.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED); - } - - VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(key); - if ( metaData != null ) { - int numInFormatField = metaData.getCount(); - if ( numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4) ) { - // If we have a missing field but multiple values are expected, we need to construct a new string with all fields. - // For example, if Number=2, the string has to be ".,." - StringBuilder sb = new StringBuilder(VCFConstants.MISSING_VALUE_v4); - for ( int i = 1; i < numInFormatField; i++ ) { - sb.append(","); - sb.append(VCFConstants.MISSING_VALUE_v4); - } - val = sb.toString(); - } - } - - // assume that if key is absent, then the given string encoding suffices - String outputValue = formatVCFField(val); - if ( outputValue != null ) - attrs.add(outputValue); - } - - // strip off trailing missing values - for (int i = attrs.size()-1; i >= 0; i--) { - if ( isMissingValue(attrs.get(i)) ) - attrs.remove(i); - else - break; - } - - for (String s : attrs ) { - mWriter.write(VCFConstants.GENOTYPE_FIELD_SEPARATOR); - mWriter.write(s); - } - } - } - - private boolean isMissingValue(String s) { - // we need to deal with the case that it's a list of missing values - return (MathUtils.countOccurrences(VCFConstants.MISSING_VALUE_v4.charAt(0), s) + MathUtils.countOccurrences(',', s) == s.length()); - } - - private void writeAllele(Allele allele, Map alleleMap) throws IOException { - String encoding = alleleMap.get(allele); - if ( encoding == null ) - throw new StingException("Allele " + allele + " is not an allele in the variant context"); - mWriter.write(encoding); - } - - private static String formatVCFField(Object val) { - String result; - if ( val == null ) - result = VCFConstants.MISSING_VALUE_v4; - else if ( val instanceof Double ) - result = String.format(VCFConstants.DOUBLE_PRECISION_FORMAT_STRING, (Double)val); - else if ( val instanceof Boolean ) - result = (Boolean)val ? "" : null; // empty string for true, null for false - else if ( val instanceof List ) { - result = formatVCFField(((List)val).toArray()); - } else if ( val instanceof Object[] ) { - Object[] array = (Object[])val; - if ( array.length == 0 ) - return formatVCFField(null); - StringBuffer sb = new StringBuffer(formatVCFField(array[0])); - for ( int i = 1; i < array.length; i++) { - sb.append(","); - sb.append(formatVCFField(array[i])); - } - result = sb.toString(); - } else - result = val.toString(); - - return result; - } - - private static List calcVCFGenotypeKeys(VariantContext vc) { - Set keys = new HashSet(); - - boolean sawGoodQual = false; - boolean sawGenotypeFilter = false; - for ( Genotype g : vc.getGenotypes().values() ) { - keys.addAll(g.getAttributes().keySet()); - if ( g.hasNegLog10PError() ) - sawGoodQual = true; - if (g.isFiltered() && g.isCalled()) - sawGenotypeFilter = true; - } - - if ( sawGoodQual ) - keys.add(VCFConstants.GENOTYPE_QUALITY_KEY); - - if (sawGenotypeFilter) - keys.add(VCFConstants.GENOTYPE_FILTER_KEY); - - return Utils.sorted(new ArrayList(keys)); - } - + public void close(); + public void add(VariantContext vc, byte refBase); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterImpl.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterImpl.java new file mode 100644 index 000000000..57a7dc796 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterImpl.java @@ -0,0 +1,431 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.genotype.vcf; + +import org.broad.tribble.vcf.*; +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broad.tribble.util.variantcontext.Allele; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.StingException; + +import java.io.*; +import java.util.*; + +/** + * this class writes VCF files + */ +public class VCFWriterImpl implements VCFWriter { + + // the VCF header we're storing + protected VCFHeader mHeader = null; + + // the print stream we're writting to + protected BufferedWriter mWriter; + + // were filters applied? + protected boolean filtersWereAppliedToContext = false; + + /** + * create a VCF writer, given a file to write to + * + * @param location the file location to write to + */ + public VCFWriterImpl(File location) { + FileOutputStream output; + try { + output = new FileOutputStream(location); + } catch (FileNotFoundException e) { + throw new RuntimeException("Unable to create VCF file at location: " + location); + } + + mWriter = new BufferedWriter(new OutputStreamWriter(output)); + } + + + /** + * create a VCF writer, given a stream to write to + * + * @param output the file location to write to + */ + public VCFWriterImpl(OutputStream output) { + mWriter = new BufferedWriter(new OutputStreamWriter(output)); + } + + public void writeHeader(VCFHeader header) { + this.mHeader = header; + + try { + // the file format field needs to be written first + mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF4_0.getFormatString() + "=" + VCFHeaderVersion.VCF4_0.getVersionString() + "\n"); + + for ( VCFHeaderLine line : header.getMetaData() ) { + if ( line.getKey().equals(VCFHeaderVersion.VCF4_0.getFormatString()) || + line.getKey().equals(VCFHeaderVersion.VCF3_3.getFormatString()) || + line.getKey().equals(VCFHeaderVersion.VCF3_2.getFormatString()) ) + continue; + + // are the records filtered (so we know what to put in the FILTER column of passing records) ? + if ( line instanceof VCFFilterHeaderLine) + filtersWereAppliedToContext = true; + + mWriter.write(VCFHeader.METADATA_INDICATOR); + mWriter.write(line.toString()); + mWriter.write("\n"); + } + + // write out the column line + mWriter.write(VCFHeader.HEADER_INDICATOR); + for ( VCFHeader.HEADER_FIELDS field : header.getHeaderFields() ) { + mWriter.write(field.toString()); + mWriter.write(VCFConstants.FIELD_SEPARATOR); + } + + if ( header.hasGenotypingData() ) { + mWriter.write("FORMAT"); + mWriter.write(VCFConstants.FIELD_SEPARATOR); + for ( String sample : header.getGenotypeSamples() ) { + mWriter.write(sample); + mWriter.write(VCFConstants.FIELD_SEPARATOR); + } } + + mWriter.write("\n"); + mWriter.flush(); // necessary so that writing to an output stream will work + } + catch (IOException e) { + throw new RuntimeException("IOException writing the VCF header", e); + } + } + + /** + * attempt to close the VCF file + */ + public void close() { + try { + mWriter.flush(); + mWriter.close(); + } catch (IOException e) { + throw new RuntimeException("Unable to close VCFFile"); + } + } + + public void add(VariantContext vc, byte refBase) { + if ( mHeader == null ) + throw new IllegalStateException("The VCF Header must be written before records can be added"); + + try { + + vc = VariantContextUtils.createVariantContextWithPaddedAlleles(vc, refBase); + + GenomeLoc loc = VariantContextUtils.getLocation(vc); + Map alleleMap = new HashMap(vc.getAlleles().size()); + alleleMap.put(Allele.NO_CALL, VCFConstants.EMPTY_ALLELE); // convenience for lookup + + // CHROM + mWriter.write(loc.getContig()); + mWriter.write(VCFConstants.FIELD_SEPARATOR); + + // POS + mWriter.write(String.valueOf(loc.getStart())); + mWriter.write(VCFConstants.FIELD_SEPARATOR); + + // ID + String ID = vc.hasAttribute(VariantContext.ID_KEY) ? vc.getAttributeAsString(VariantContext.ID_KEY) : VCFConstants.EMPTY_ID_FIELD; + mWriter.write(ID); + mWriter.write(VCFConstants.FIELD_SEPARATOR); + + // REF + alleleMap.put(vc.getReference(), "0"); + String refString = vc.getReference().getBaseString(); + mWriter.write(refString); + mWriter.write(VCFConstants.FIELD_SEPARATOR); + + // ALT + if ( vc.isVariant() ) { + Allele altAllele = vc.getAlternateAllele(0); + alleleMap.put(altAllele, "1"); + String alt = altAllele.getBaseString(); + mWriter.write(alt); + + for (int i = 1; i < vc.getAlternateAlleles().size(); i++) { + altAllele = vc.getAlternateAllele(i); + alleleMap.put(altAllele, String.valueOf(i+1)); + alt = altAllele.getBaseString(); + mWriter.write(","); + mWriter.write(alt); + } + } else { + mWriter.write(VCFConstants.EMPTY_ALTERNATE_ALLELE_FIELD); + } + mWriter.write(VCFConstants.FIELD_SEPARATOR); + + // QUAL + if ( !vc.hasNegLog10PError() ) + mWriter.write(VCFConstants.MISSING_VALUE_v4); + else + mWriter.write(getQualValue(vc.getPhredScaledQual())); + mWriter.write(VCFConstants.FIELD_SEPARATOR); + + // FILTER + String filters = vc.isFiltered() ? Utils.join(";", Utils.sorted(vc.getFilters())) : (filtersWereAppliedToContext || vc.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED); + mWriter.write(filters); + mWriter.write(VCFConstants.FIELD_SEPARATOR); + + // INFO + Map infoFields = new TreeMap(); + for ( Map.Entry field : vc.getAttributes().entrySet() ) { + String key = field.getKey(); + if ( key.equals(VariantContext.ID_KEY) || key.equals(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY) ) + continue; + + String outputValue = formatVCFField(field.getValue()); + if ( outputValue != null ) + infoFields.put(key, outputValue); + } + writeInfoString(infoFields); + + // FORMAT + List genotypeAttributeKeys = new ArrayList(); + if ( vc.hasGenotypes() ) { + genotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY); + for ( String key : calcVCFGenotypeKeys(vc) ) { + genotypeAttributeKeys.add(key); + } + } else if ( mHeader.hasGenotypingData() ) { + // this needs to be done in case all samples are no-calls + genotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY); + } + + if ( genotypeAttributeKeys.size() > 0 ) { + String genotypeFormatString = Utils.join(VCFConstants.GENOTYPE_FIELD_SEPARATOR, genotypeAttributeKeys); + mWriter.write(VCFConstants.FIELD_SEPARATOR); + mWriter.write(genotypeFormatString); + + addGenotypeData(vc, alleleMap, genotypeAttributeKeys); + } + + mWriter.write("\n"); + mWriter.flush(); // necessary so that writing to an output stream will work + } catch (IOException e) { + throw new RuntimeException("Unable to write the VCF object to a file"); + } + + } + + private String getQualValue(double qual) { + String s = String.format(VCFConstants.DOUBLE_PRECISION_FORMAT_STRING, qual); + if ( s.endsWith(VCFConstants.DOUBLE_PRECISION_INT_SUFFIX) ) + s = s.substring(0, s.length() - VCFConstants.DOUBLE_PRECISION_INT_SUFFIX.length()); + return s; + } + + /** + * create the info string; assumes that no values are null + * + * @param infoFields a map of info fields + * @throws IOException for writer + */ + protected void writeInfoString(Map infoFields) throws IOException { + if ( infoFields.isEmpty() ) { + mWriter.write(VCFConstants.EMPTY_INFO_FIELD); + return; + } + + boolean isFirst = true; + for ( Map.Entry entry : infoFields.entrySet() ) { + if ( isFirst ) + isFirst = false; + else + mWriter.write(VCFConstants.INFO_FIELD_SEPARATOR); + + String key = entry.getKey(); + mWriter.write(key); + + if ( !entry.getValue().equals("") ) { + int numVals = 1; + VCFInfoHeaderLine metaData = mHeader.getInfoHeaderLine(key); + if ( metaData != null ) + numVals = metaData.getCount(); + + // take care of unbounded encoding + if ( numVals == VCFInfoHeaderLine.UNBOUNDED ) + numVals = 1; + + if ( numVals > 0 ) { + mWriter.write("="); + mWriter.write(entry.getValue()); + } + } + } + } + + /** + * add the genotype data + * + * @param vc the variant context + * @param genotypeFormatKeys Genotype formatting string + * @param alleleMap alleles for this context + * @throws IOException for writer + */ + private void addGenotypeData(VariantContext vc, Map alleleMap, List genotypeFormatKeys) + throws IOException { + + for ( String sample : mHeader.getGenotypeSamples() ) { + mWriter.write(VCFConstants.FIELD_SEPARATOR); + + Genotype g = vc.getGenotype(sample); + if ( g == null ) { + // TODO -- The VariantContext needs to know what the general ploidy is of the samples + // TODO -- We shouldn't be assuming diploid genotypes here! + mWriter.write(VCFConstants.EMPTY_GENOTYPE); + continue; + } + + writeAllele(g.getAllele(0), alleleMap); + for (int i = 1; i < g.getPloidy(); i++) { + mWriter.write(g.genotypesArePhased() ? VCFConstants.PHASED : VCFConstants.UNPHASED); + writeAllele(g.getAllele(i), alleleMap); + } + + List attrs = new ArrayList(genotypeFormatKeys.size()); + for ( String key : genotypeFormatKeys ) { + if ( key.equals(VCFConstants.GENOTYPE_KEY) ) + continue; + + Object val = g.hasAttribute(key) ? g.getAttribute(key) : VCFConstants.MISSING_VALUE_v4; + + // some exceptions + if ( key.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) { + if ( MathUtils.compareDoubles(g.getNegLog10PError(), Genotype.NO_NEG_LOG_10PERROR) == 0 ) + val = VCFConstants.MISSING_VALUE_v4; + else { + val = getQualValue(Math.min(g.getPhredScaledQual(), VCFConstants.MAX_GENOTYPE_QUAL)); + } + } else if ( key.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) { + val = g.isFiltered() ? Utils.join(";", Utils.sorted(g.getFilters())) : (g.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED); + } + + VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(key); + if ( metaData != null ) { + int numInFormatField = metaData.getCount(); + if ( numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4) ) { + // If we have a missing field but multiple values are expected, we need to construct a new string with all fields. + // For example, if Number=2, the string has to be ".,." + StringBuilder sb = new StringBuilder(VCFConstants.MISSING_VALUE_v4); + for ( int i = 1; i < numInFormatField; i++ ) { + sb.append(","); + sb.append(VCFConstants.MISSING_VALUE_v4); + } + val = sb.toString(); + } + } + + // assume that if key is absent, then the given string encoding suffices + String outputValue = formatVCFField(val); + if ( outputValue != null ) + attrs.add(outputValue); + } + + // strip off trailing missing values + for (int i = attrs.size()-1; i >= 0; i--) { + if ( isMissingValue(attrs.get(i)) ) + attrs.remove(i); + else + break; + } + + for (String s : attrs ) { + mWriter.write(VCFConstants.GENOTYPE_FIELD_SEPARATOR); + mWriter.write(s); + } + } + } + + private boolean isMissingValue(String s) { + // we need to deal with the case that it's a list of missing values + return (MathUtils.countOccurrences(VCFConstants.MISSING_VALUE_v4.charAt(0), s) + MathUtils.countOccurrences(',', s) == s.length()); + } + + private void writeAllele(Allele allele, Map alleleMap) throws IOException { + String encoding = alleleMap.get(allele); + if ( encoding == null ) + throw new StingException("Allele " + allele + " is not an allele in the variant context"); + mWriter.write(encoding); + } + + private static String formatVCFField(Object val) { + String result; + if ( val == null ) + result = VCFConstants.MISSING_VALUE_v4; + else if ( val instanceof Double ) + result = String.format(VCFConstants.DOUBLE_PRECISION_FORMAT_STRING, (Double)val); + else if ( val instanceof Boolean ) + result = (Boolean)val ? "" : null; // empty string for true, null for false + else if ( val instanceof List ) { + result = formatVCFField(((List)val).toArray()); + } else if ( val instanceof Object[] ) { + Object[] array = (Object[])val; + if ( array.length == 0 ) + return formatVCFField(null); + StringBuffer sb = new StringBuffer(formatVCFField(array[0])); + for ( int i = 1; i < array.length; i++) { + sb.append(","); + sb.append(formatVCFField(array[i])); + } + result = sb.toString(); + } else + result = val.toString(); + + return result; + } + + private static List calcVCFGenotypeKeys(VariantContext vc) { + Set keys = new HashSet(); + + boolean sawGoodQual = false; + boolean sawGenotypeFilter = false; + for ( Genotype g : vc.getGenotypes().values() ) { + keys.addAll(g.getAttributes().keySet()); + if ( g.hasNegLog10PError() ) + sawGoodQual = true; + if (g.isFiltered() && g.isCalled()) + sawGenotypeFilter = true; + } + + if ( sawGoodQual ) + keys.add(VCFConstants.GENOTYPE_QUALITY_KEY); + + if (sawGenotypeFilter) + keys.add(VCFConstants.GENOTYPE_FILTER_KEY); + + return Utils.sorted(new ArrayList(keys)); + } + + + +} diff --git a/java/src/org/broadinstitute/sting/utils/vcf/GATKVCFWriter.java b/java/src/org/broadinstitute/sting/utils/vcf/GATKVCFWriter.java deleted file mode 100644 index 71c4072c7..000000000 --- a/java/src/org/broadinstitute/sting/utils/vcf/GATKVCFWriter.java +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2010. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.vcf; - -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.genotype.vcf.*; -import org.broad.tribble.vcf.*; - -import java.io.*; - - -/** - * @author ebanks - *

- * Class GATKVCFWriter - *

- * GATK-specific version of the VCF Writer - */ -public class GATKVCFWriter extends VCFWriter implements VCFGenotypeWriter { - - public GATKVCFWriter(File writeTo) { - super(writeTo); - } - - public GATKVCFWriter(OutputStream writeTo) { - super(writeTo); - } - - public void writeHeader(VCFHeader header) { - // TODO -- put the command-line generating code for the header right here - super.writeHeader(header); - } - - public void append(File file) { - try { - BufferedReader reader = new BufferedReader(new FileReader(file)); - String line = reader.readLine(); - while ( line != null ) { - if ( !VCFHeaderLine.isHeaderLine(line) ) { - mWriter.write(line); - mWriter.write("\n"); - } - line = reader.readLine(); - } - - reader.close(); - } catch (IOException e) { - throw new StingException("Error reading file " + file + " in GATKVCFWriter: ", e); - } - } -} diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java index be6c6a9f3..9c720fdc8 100644 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java @@ -45,7 +45,7 @@ public class VCFWriterUnitTest extends BaseTest { @Test public void testBasicWriteAndRead() { VCFHeader header = createFakeHeader(metaData,additionalColumns); - VCFWriter writer = new VCFWriter(fakeVCFFile); + VCFWriter writer = new VCFWriterImpl(fakeVCFFile); writer.writeHeader(header); writer.add(createVC(header),"A".getBytes()[0]); writer.add(createVC(header),"A".getBytes()[0]);