From 4ea31fd94921e82dbaff970603a9ec6293e62827 Mon Sep 17 00:00:00 2001 From: ebanks Date: Fri, 18 Dec 2009 19:16:41 +0000 Subject: [PATCH] Pushed header initialization out of the GenotypeWriter constructors and into a writeHeader method, in preparation for parallelization. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2406 348d0f76-0448-11de-a6fe-93d51630548a --- .../io/storage/GenotypeWriterStorage.java | 11 ++-- .../gatk/io/stubs/GenotypeWriterStub.java | 39 +---------- .../walkers/annotator/VariantAnnotator.java | 3 +- .../concordance/CallsetConcordanceWalker.java | 4 +- .../filters/VariantFiltrationWalker.java | 4 +- .../walkers/genotyper/UnifiedGenotyper.java | 25 +++---- .../walkers/variantstovcf/VariantsToVCF.java | 3 +- .../walkers/vcftools/VCFSubsetWalker.java | 5 +- .../sting/playground/tools/vcf/VCFTool.java | 4 +- .../utils/genotype/GenotypeWriterFactory.java | 44 ++++++++----- .../utils/genotype/geli/GeliAdapter.java | 24 ++++++- .../sting/utils/genotype/glf/GLFWriter.java | 65 +++++++++++-------- .../vcf/VCFGenotypeWriterAdapter.java | 47 ++++---------- .../sting/utils/genotype/vcf/VCFWriter.java | 26 ++++---- .../utils/genotype/glf/GLFWriterTest.java | 9 ++- .../utils/genotype/vcf/VCFWriterTest.java | 3 +- 16 files changed, 146 insertions(+), 170 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java b/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java index 1d125d9a6..ebb538103 100755 --- a/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java +++ b/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java @@ -27,12 +27,15 @@ package org.broadinstitute.sting.gatk.io.storage; import java.io.*; import java.util.List; +import java.util.Set; +import java.util.HashSet; import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub; import org.broadinstitute.sting.utils.genotype.*; import org.broadinstitute.sting.utils.genotype.glf.*; import org.broadinstitute.sting.utils.genotype.geli.*; import org.broadinstitute.sting.utils.genotype.vcf.*; +import org.broadinstitute.sting.utils.SampleUtils; import edu.mit.broad.picard.genotype.geli.GeliFileReader; /** @@ -51,11 +54,9 @@ public class GenotypeWriterStorage implements GenotypeWriter, Storage samples = SampleUtils.getSAMFileSamples(stub.getSAMFileHeader()); + GenotypeWriterFactory.writeHeader(writer, stub.getSAMFileHeader(), samples, new HashSet()); } public void mergeInto( GenotypeWriter targetStream ) { diff --git a/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterStub.java b/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterStub.java index febdf6db9..fc04aa8da 100755 --- a/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterStub.java +++ b/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterStub.java @@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.io.stubs; import java.io.File; import java.util.List; -import java.util.Set; import org.broadinstitute.sting.gatk.io.OutputTracker; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; @@ -35,7 +34,6 @@ import org.broadinstitute.sting.utils.genotype.GenotypeWriter; import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.VariationCall; import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; -import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine; import net.sf.samtools.SAMFileHeader; /** @@ -62,19 +60,6 @@ public class GenotypeWriterStub implements Stub, GenotypeWriter */ private final GenotypeWriterFactory.GENOTYPE_FORMAT format; - /** - * The sample names for the output file - */ - private final Set sampleNames; - - - /** - * The header info for the output file - */ - private final Set headerInfo; - - - /** * Connects this stub with an external stream capable of serving the * requests of the consumer of this stub. @@ -86,19 +71,13 @@ public class GenotypeWriterStub implements Stub, GenotypeWriter * @param engine GATK engine. * @param genotypeFile file to (ultimately) create. * @param format file format. - * @param sampleNames sample names to use for creating writer. - * @param headerInfo header info to use for creating writer. */ public GenotypeWriterStub( GenomeAnalysisEngine engine, File genotypeFile, - GenotypeWriterFactory.GENOTYPE_FORMAT format, - Set sampleNames, - Set headerInfo) { + GenotypeWriterFactory.GENOTYPE_FORMAT format) { this.engine = engine; this.genotypeFile = genotypeFile; this.format = format; - this.sampleNames = sampleNames; - this.headerInfo = headerInfo; } /** @@ -125,22 +104,6 @@ public class GenotypeWriterStub implements Stub, GenotypeWriter return format; } - /** - * Retrieves the sample names to use when creating the new file. - * @return sample names to use when creating the new file. - */ - public Set getSampleNames() { - return sampleNames; - } - - /** - * Retrieves the header info to use when creating the new file. - * @return header info to use when creating the new file. - */ - public Set getHeaderInfo() { - return headerInfo; - } - /** * Registers the given streamConnector with this stub. * @param outputTracker The connector used to provide an appropriate stream. diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 641471920..573b3599e 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -122,8 +122,9 @@ public class VariantAnnotator extends RodWalker { hInfo.add(new VCFHeaderLine("annotatorReference", getToolkit().getArguments().referenceFile.getName())); hInfo.addAll(getVCFAnnotationDescriptions(requestedAnnotations)); + vcfWriter = new VCFWriter(VCF_OUT); vcfHeader = new VCFHeader(hInfo, samples); - vcfWriter = new VCFWriter(vcfHeader, VCF_OUT); + vcfWriter.writeHeader(vcfHeader); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceWalker.java index 907522088..a53c9184a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceWalker.java @@ -108,9 +108,9 @@ public class CallsetConcordanceWalker extends RodWalker { hInfo.add(new VCFHeaderLine("source", "CallsetConcordance")); hInfo.add(new VCFHeaderLine("note", "\"This file represents a concordance test of various call sets - NOT the output from a multi-sample caller\"")); hInfo.addAll(getVCFAnnotationDescriptions(requestedTypes)); - VCFHeader header = new VCFHeader(hInfo, samples); - vcfWriter = new VCFWriter(header, OUTPUT); + vcfWriter = new VCFWriter(OUTPUT); + vcfWriter.writeHeader(new VCFHeader(hInfo, samples)); } public static Set getVCFAnnotationDescriptions(Collection types) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index 8b4f7598a..21679e825 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -64,8 +64,8 @@ public class VariantFiltrationWalker extends RodWalker { } } - VCFHeader header = new VCFHeader(hInfo, rod.getHeader().getGenotypeSamples()); - writer = new VCFWriter(header, out); + writer = new VCFWriter(out); + writer.writeHeader(new VCFHeader(hInfo, rod.getHeader().getGenotypeSamples())); } public void initialize() { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index c7f7653b5..ae534f0a4 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -37,8 +37,6 @@ import org.broadinstitute.sting.utils.cmdLine.*; import org.broadinstitute.sting.utils.genotype.*; import org.broadinstitute.sting.utils.genotype.vcf.*; -import net.sf.samtools.SAMReadGroupRecord; - import java.io.File; import java.util.*; @@ -68,7 +66,7 @@ public class UnifiedGenotyper extends LocusWalker samples; + private Set samples; // keep track of some metrics about our calls private CallMetrics callsMetrics; @@ -113,15 +111,11 @@ public class UnifiedGenotyper extends LocusWalker(); // if we're supposed to assume a single sample - if ( UAC.ASSUME_SINGLE_SAMPLE != null ) { + if ( UAC.ASSUME_SINGLE_SAMPLE != null ) samples.add(UAC.ASSUME_SINGLE_SAMPLE); - } else { - List readGroups = getToolkit().getSAMFileHeader().getReadGroups(); - for ( SAMReadGroupRecord readGroup : readGroups ) - samples.add(readGroup.getSample()); - } + else + samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); // print them out for debugging (need separate loop to ensure uniqueness) // for ( String sample : samples ) @@ -144,15 +138,12 @@ public class UnifiedGenotyper extends LocusWalker headerInfo = getHeaderInfo(); - // create the output writer stream + // create the output writer stream and initialize the header if ( VARIANTS_FILE != null ) - writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE, - samples, - headerInfo); + writer = GenotypeWriterFactory.create(VAR_FORMAT, VARIANTS_FILE); else - writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out, - samples, - headerInfo); + writer = GenotypeWriterFactory.create(VAR_FORMAT, out); + GenotypeWriterFactory.writeHeader(writer, GenomeAnalysisEngine.instance.getSAMFileHeader(), samples, headerInfo); callsMetrics = new CallMetrics(); } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java index 59ced192b..a253a57ff 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java @@ -49,7 +49,8 @@ public class VariantsToVCF extends RefWalker { } vcfheader = getHeader(args, sampleNames.keySet()); - vcfwriter = new VCFWriter(vcfheader, VCF_OUT); + vcfwriter = new VCFWriter(VCF_OUT); + vcfwriter.writeHeader(vcfheader); } public static VCFHeader getHeader(GATKArgumentCollection args, Set sampleNames) { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VCFSubsetWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VCFSubsetWalker.java index eb90f2a70..ac5091198 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VCFSubsetWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VCFSubsetWalker.java @@ -20,7 +20,7 @@ public class VCFSubsetWalker extends RefWalker, VCFWriter> private HashSet SAMPLES; @Argument(fullName="vcfsubset", shortName="O", doc="File to write VCF subset to", required=false) - private File VPATH; + private File VPATH = null; @Argument(fullName="includeNonVariants", shortName="INV", doc="Include non-variant loci", required=false) private boolean INCLUDE_NON_VARIANTS = false; @@ -43,7 +43,8 @@ public class VCFSubsetWalker extends RefWalker, VCFWriter> vheader = new VCFHeader(metaData, additionalColumns); if (VPATH != null) { - vwriter = new VCFWriter(vheader, VPATH); + vwriter = new VCFWriter(VPATH); + vwriter.writeHeader(vheader); } } diff --git a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFTool.java b/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFTool.java index bfa1ec058..9fff99a30 100644 --- a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFTool.java +++ b/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFTool.java @@ -342,9 +342,9 @@ class VCFGrep extends CommandLineProgram if (autocorrect) { reader = new VCFReader(VCFHomogenizer.create(in_filename)); } else { reader = new VCFReader(new File(in_filename)); } - VCFHeader header = reader.getHeader(); - writer = new VCFWriter(header, new File(out_filename)); + writer = new VCFWriter(new File(out_filename)); + writer.writeHeader(reader.getHeader()); while(reader.hasNext()) { diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java index 548aa404f..718c01a04 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java @@ -28,48 +28,56 @@ public class GenotypeWriterFactory { /** * create a genotype writer * @param format the format - * @param header the sam file header * @param destination the destination file - * @param sampleNames the sample names - * @param headerInfo the optional header info fields * @return the genotype writer object */ - public static GenotypeWriter create(GENOTYPE_FORMAT format, - SAMFileHeader header, - File destination, - Set sampleNames, - Set headerInfo) { + public static GenotypeWriter create(GENOTYPE_FORMAT format, File destination) { switch (format) { case GLF: - return new GLFWriter(header.toString(), destination); + return new GLFWriter(destination); case GELI: return new GeliTextWriter(destination); case GELI_BINARY: - return new GeliAdapter(destination, header); + return new GeliAdapter(destination); case VCF: - return new VCFGenotypeWriterAdapter(destination, sampleNames, headerInfo); + return new VCFGenotypeWriterAdapter(destination); default: throw new StingException("Genotype writer " + format.toString() + " is not implemented"); } } - public static GenotypeWriter create(GENOTYPE_FORMAT format, - SAMFileHeader header, - PrintStream destination, - Set sampleNames, - Set headerInfo) { + public static GenotypeWriter create(GENOTYPE_FORMAT format, PrintStream destination) { switch (format) { case GELI: return new GeliTextWriter(destination); case GLF: - return new GLFWriter(header.toString(), destination); + return new GLFWriter(destination); case VCF: - return new VCFGenotypeWriterAdapter(destination, sampleNames, headerInfo); + return new VCFGenotypeWriterAdapter(destination); default: throw new StingException("Genotype writer to " + format.toString() + " to standard output is not implemented"); } } + public static void writeHeader(GenotypeWriter writer, + SAMFileHeader header, + Set sampleNames, + Set headerInfo) { + // VCF + if ( writer instanceof VCFGenotypeWriterAdapter ) { + ((VCFGenotypeWriterAdapter)writer).writeHeader(sampleNames, headerInfo); + } + // GELI BINARY + else if ( writer instanceof GeliAdapter ) { + ((GeliAdapter)writer).writeHeader(header); + } + // GLF + else if ( writer instanceof GLFWriter ) { + ((GLFWriter)writer).writeHeader(header.toString()); + } + // nothing to do for GELI TEXT + } + /** * create a genotype call * @param format the format diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java index aba286aee..80a9dfb7a 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java @@ -46,17 +46,27 @@ import java.util.List; */ public class GeliAdapter implements GenotypeWriter { + // the file we're writing to + private File writeTo = null; // the geli file writer we're adapting - private final GeliFileWriter writer; + private GeliFileWriter writer = null; /** * wrap a GeliFileWriter in the Genotype writer interface * * @param writeTo where to write to + */ + public GeliAdapter(File writeTo) { + this.writeTo = writeTo; + } + + /** + * wrap a GeliFileWriter in the Genotype writer interface + * * @param fileHeader the file header to write out */ - public GeliAdapter(File writeTo, final SAMFileHeader fileHeader) { + public void writeHeader(final SAMFileHeader fileHeader) { this.writer = GeliFileWriter.newInstanceForPresortedRecords(writeTo, fileHeader); } @@ -67,6 +77,8 @@ public class GeliAdapter implements GenotypeWriter { * @param contig the contig you're calling in * @param position the position on the contig * @param referenceBase the reference base + * @param maxMappingQuality the max MQ + * @param readCount the read count * @param likelihoods the likelihoods of each of the possible alleles */ private void addGenotypeCall(SAMSequenceRecord contig, @@ -99,6 +111,9 @@ public class GeliAdapter implements GenotypeWriter { } public void addGenotypeLikelihoods(GenotypeLikelihoods gl) { + if ( writer == null ) + throw new IllegalStateException("The Geli Header must be written before records can be added"); + writer.addGenotypeLikelihoods(gl); } @@ -108,6 +123,9 @@ public class GeliAdapter implements GenotypeWriter { * @param call the call to add */ public void addGenotypeCall(Genotype call) { + if ( writer == null ) + throw new IllegalStateException("The Geli Header must be written before calls can be added"); + if ( !(call instanceof GeliGenotypeCall) ) throw new IllegalArgumentException("Only GeliGenotypeCalls should be passed in to the Geli writers"); GeliGenotypeCall gCall = (GeliGenotypeCall)call; @@ -135,7 +153,7 @@ public class GeliAdapter implements GenotypeWriter { /** * add a no call to the genotype file, if supported. * - * @param position + * @param position the position */ public void addNoCall(int position) { throw new UnsupportedOperationException("Geli format does not support no-calls"); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java index 17680eeeb..83be8bc78 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java @@ -53,7 +53,7 @@ public class GLFWriter implements GenotypeWriter { public static final short[] glfMagic = {'G', 'L', 'F', '\3'}; // our header text, reference sequence name (i.e. chr1), and it's length - private String headerText = ""; + private String headerText = null; private String referenceSequenceName = null; private long referenceSequenceLength = 0; @@ -63,29 +63,42 @@ public class GLFWriter implements GenotypeWriter { /** * The public constructor for creating a GLF object * - * @param headerText the header text (currently unclear what the contents are) * @param writeTo the location to write to */ - public GLFWriter(String headerText, File writeTo) { - this.headerText = headerText; + public GLFWriter(File writeTo) { outputBinaryCodec = new BinaryCodec(new DataOutputStream(new BlockCompressedOutputStream(writeTo))); outputBinaryCodec.setOutputFileName(writeTo.toString()); - this.writeHeader(); } /** * The public constructor for creating a GLF object * - * @param headerText the header text (currently unclear what the contents are) * @param writeTo the location to write to */ - public GLFWriter(String headerText, OutputStream writeTo) { - this.headerText = headerText; + public GLFWriter(OutputStream writeTo) { outputBinaryCodec = new BinaryCodec(writeTo); outputBinaryCodec.setOutputFileName(writeTo.toString()); - this.writeHeader(); } + /** + * Write out the header information for the GLF file. The header contains + * the magic number, the length of the header text, the text itself, the reference + * sequence (null terminated) preceeded by it's length, and the the genomic + * length of the reference sequence. + * + * @param headerText the header text to write + */ + public void writeHeader(String headerText) { + this.headerText = headerText; + for (int x = 0; x < glfMagic.length; x++) { + outputBinaryCodec.writeUByte(glfMagic[x]); + } + if (!(headerText.equals(""))) { + outputBinaryCodec.writeString(headerText, true, true); + } else { + outputBinaryCodec.writeInt(0); + } + } /** * add a point genotype to the GLF writer @@ -103,6 +116,8 @@ public class GLFWriter implements GenotypeWriter { char refBase, int readDepth, LikelihoodObject lhValues) { + if ( headerText == null ) + throw new IllegalStateException("The GLF Header must be written before calls can be added"); // check if we've jumped to a new contig checkSequence(contig.getSequenceName(), contig.getSequenceLength()); @@ -122,6 +137,9 @@ public class GLFWriter implements GenotypeWriter { * @param call the genotype call */ public void addGenotypeCall(Genotype call) { + if ( headerText == null ) + throw new IllegalStateException("The GLF Header must be written before calls can be added"); + if ( !(call instanceof GLFGenotypeCall) ) throw new IllegalArgumentException("Only GeliGenotypeCalls should be passed in to the Geli writers"); GLFGenotypeCall gCall = (GLFGenotypeCall)call; @@ -176,6 +194,9 @@ public class GLFWriter implements GenotypeWriter { IndelLikelihood secondHomZyg, byte hetLikelihood) { + if ( headerText == null ) + throw new IllegalStateException("The GLF Header must be written before calls can be added"); + // check if we've jumped to a new contig checkSequence(contig.getSequenceName(), contig.getSequenceLength()); @@ -213,27 +234,13 @@ public class GLFWriter implements GenotypeWriter { * @param rec the GLF record to write. */ public void addGLFRecord(String contigName, int contigLength, GLFRecord rec) { + if ( headerText == null ) + throw new IllegalStateException("The GLF Header must be written before records can be added"); + checkSequence(contigName, contigLength); rec.write(this.outputBinaryCodec); } - /** - * Write out the header information for the GLF file. The header contains - * the magic number, the length of the header text, the text itself, the reference - * sequence (null terminated) preceeded by it's length, and the the genomic - * length of the reference sequence. - */ - private void writeHeader() { - for (int x = 0; x < glfMagic.length; x++) { - outputBinaryCodec.writeUByte(glfMagic[x]); - } - if (!(headerText.equals(""))) { - outputBinaryCodec.writeString(headerText, true, true); - } else { - outputBinaryCodec.writeInt(0); - } - } - /** * check to see if we've jumped to a new contig * @@ -255,12 +262,18 @@ public class GLFWriter implements GenotypeWriter { /** add a sequence definition to the glf */ private void addSequence() { + if ( headerText == null ) + throw new IllegalStateException("The GLF Header must be written before sequences can be added"); + outputBinaryCodec.writeString(referenceSequenceName, true, true); outputBinaryCodec.writeUInt(referenceSequenceLength); } /** write end record */ private void writeEndRecord() { + if ( headerText == null ) + throw new IllegalStateException("The GLF Header must be written before records can be added"); + outputBinaryCodec.writeUByte((short) 0); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java index ba3745871..3f0c1cce6 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java @@ -25,55 +25,33 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { protected static Logger logger = Logger.getLogger(VCFGenotypeWriterAdapter.class); - public VCFGenotypeWriterAdapter(File writeTo, Set sampleNames, Set headerInfo) { - mSampleNames.addAll(sampleNames); - - initializeHeader(headerInfo); - + public VCFGenotypeWriterAdapter(File writeTo) { if (writeTo == null) throw new RuntimeException("VCF output file must not be null"); - mWriter = new VCFWriter(mHeader, writeTo); + mWriter = new VCFWriter(writeTo); } - public VCFGenotypeWriterAdapter(OutputStream writeTo, Set sampleNames, Set headerInfo) { - mSampleNames.addAll(sampleNames); - - initializeHeader(headerInfo); - + public VCFGenotypeWriterAdapter(OutputStream writeTo) { if (writeTo == null) throw new RuntimeException("VCF output stream must not be null"); - mWriter = new VCFWriter(mHeader, writeTo); + mWriter = new VCFWriter(writeTo); } /** * initialize this VCF header * - * @param optionalHeaderInfo the optional header fields + * @param sampleNames the sample names + * @param headerInfo the optional header fields */ - private void initializeHeader(Set optionalHeaderInfo) { - Set hInfo = new TreeSet(); + public void writeHeader(Set sampleNames, Set headerInfo) { + mSampleNames.addAll(sampleNames); // setup the header fields + Set hInfo = new TreeSet(); hInfo.add(new VCFHeaderLine(VCFHeader.FILE_FORMAT_KEY, VCFHeader.VCF_VERSION)); - hInfo.addAll(optionalHeaderInfo); + hInfo.addAll(headerInfo); // setup the sample names mHeader = new VCFHeader(hInfo, mSampleNames); - } - - /** - * get the samples names from genotype objects - * - * @param genotypes the genotype list - * - * @return a list of strings representing the sample names - */ - private static List getSampleNames(List genotypes) { - List strings = new ArrayList(); - for (Genotype genotype : genotypes) { - if (!(genotype instanceof VCFGenotypeCall)) - throw new IllegalArgumentException("Genotypes passed to VCF must be backed by SampledBacked interface"); - strings.add(((VCFGenotypeCall) genotype).getSampleName()); - } - return strings; + mWriter.writeHeader(mHeader); } /** @@ -105,6 +83,9 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { * @param genotypes the list of genotypes */ public void addMultiSampleCall(List genotypes, VariationCall locusdata) { + if ( mHeader == null ) + throw new IllegalStateException("The VCF Header must be written before records can be added"); + if ( locusdata != null && !(locusdata instanceof VCFVariationCall) ) throw new IllegalArgumentException("Only VCFVariationCall objects should be passed in to the VCF writers"); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java index c8ca43f18..be9d8265e 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java @@ -11,43 +11,39 @@ public class VCFWriter { // the VCF header we're storing - private VCFHeader mHeader; + private VCFHeader mHeader = null; // the print stream we're writting to BufferedWriter mWriter; private final String FIELD_SEPERATOR = "\t"; /** - * create a VCF writer, given a VCF header and a file to write to + * create a VCF writer, given a file to write to * - * @param header the VCF header * @param location the file location to write to */ - public VCFWriter(VCFHeader header, File location) { + public VCFWriter(File location) { FileOutputStream output; try { output = new FileOutputStream(location); } catch (FileNotFoundException e) { throw new RuntimeException("Unable to create VCF file at location: " + location); } - initialize(header, output); + mWriter = new BufferedWriter(new OutputStreamWriter(output)); } /** - * create a VCF writer, given a VCF header and a file to write to + * create a VCF writer, given a stream to write to * - * @param header the VCF header - * @param location the file location to write to + * @param output the file location to write to */ - public VCFWriter(VCFHeader header, OutputStream location) { - initialize(header, location); + public VCFWriter(OutputStream output) { + mWriter = new BufferedWriter(new OutputStreamWriter(output)); } - private void initialize(VCFHeader header, OutputStream location) { + public void writeHeader(VCFHeader header) { this.mHeader = header; - mWriter = new BufferedWriter( - new OutputStreamWriter(location)); try { // the fileformat field needs to be written first TreeSet nonFormatMetaData = new TreeSet(); @@ -87,6 +83,9 @@ public class VCFWriter { * @param record the record to output */ public void addRecord(VCFRecord record) { + if ( mHeader == null ) + throw new IllegalStateException("The VCF Header must be written before records can be added"); + String vcfString = record.toStringEncoding(mHeader); try { mWriter.write(vcfString + "\n"); @@ -96,7 +95,6 @@ public class VCFWriter { } - /** * attempt to close the VCF file */ diff --git a/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java b/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java index 3afb76fbe..df49b57a7 100755 --- a/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java @@ -5,10 +5,8 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.genotype.BasicGenotype; import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.GenotypeWriter; -import org.broadinstitute.sting.utils.genotype.LikelihoodsBacked; import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; @@ -96,7 +94,8 @@ public class GLFWriterTest extends BaseTest { File writeTo = new File("testGLF.glf"); writeTo.deleteOnExit(); - rec = new GLFWriter(header, writeTo); + rec = new GLFWriter(writeTo); + ((GLFWriter)rec).writeHeader(header); for (int x = 0; x < 100; x++) { GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, x + 1); Genotype type = createGenotype(x % 10, loc, 'A'); @@ -112,7 +111,8 @@ public class GLFWriterTest extends BaseTest { File writeTo = new File("testGLF2.glf"); writeTo.deleteOnExit(); List types = new ArrayList(); - rec = new GLFWriter(header, writeTo); + rec = new GLFWriter(writeTo); + ((GLFWriter)rec).writeHeader(header); for (int x = 0; x < 100; x++) { GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, x + 1); FakeGenotype type = createGenotype(x % 10, loc, 'A'); @@ -167,7 +167,6 @@ class FakeGenotype extends GLFGenotypeCall implements Comparable { } - @Override public int compareTo(FakeGenotype that) { if (this.getLocation().compareTo(that.getLocation()) != 0) { System.err.println("Location's aren't equal; this = " + this.getLocation() + " that = " + that.getLocation()); diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterTest.java index 38f2dfc3d..a1592a131 100644 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterTest.java @@ -39,7 +39,8 @@ public class VCFWriterTest extends BaseTest { @Test public void testBasicWriteAndRead() { VCFHeader header = createFakeHeader(metaData,additionalColumns); - VCFWriter writer = new VCFWriter(header,fakeVCFFile); + VCFWriter writer = new VCFWriter(fakeVCFFile); + writer.writeHeader(header); writer.addRecord(createVCFRecord(header)); writer.addRecord(createVCFRecord(header)); writer.close();