From 4f59bfd51374a86579a979404336852e07be7c9b Mon Sep 17 00:00:00 2001 From: ebanks Date: Fri, 18 Dec 2009 07:20:23 +0000 Subject: [PATCH] Updates to the various GenotypeWriters to make them do simple things like write records (plus allow GLFReader to close). Adding first pass of stub and storage classes for the GenotypeWriters so that UG can be parallelizable. Not hooked up yet, so UG is unchanged. The mergeInto() code in the storage class is ugly, but it's all Tribble's fault. We can clean it up later if this whole thing works. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2400 348d0f76-0448-11de-a6fe-93d51630548a --- .../io/storage/GenotypeWriterStorage.java | 126 ++++++++++++ .../gatk/io/stubs/GenotypeWriterStub.java | 187 ++++++++++++++++++ .../utils/genotype/geli/GeliAdapter.java | 4 + .../utils/genotype/geli/GeliTextWriter.java | 6 + .../sting/utils/genotype/glf/GLFReader.java | 7 +- .../vcf/VCFGenotypeWriterAdapter.java | 4 + 6 files changed, 331 insertions(+), 3 deletions(-) create mode 100755 java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java create mode 100755 java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterStub.java diff --git a/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java b/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java new file mode 100755 index 000000000..1d125d9a6 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.io.storage; + +import java.io.*; +import java.util.List; + +import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub; +import org.broadinstitute.sting.utils.genotype.*; +import org.broadinstitute.sting.utils.genotype.glf.*; +import org.broadinstitute.sting.utils.genotype.geli.*; +import org.broadinstitute.sting.utils.genotype.vcf.*; +import edu.mit.broad.picard.genotype.geli.GeliFileReader; + +/** + * Provides temporary storage for GenotypeWriters. + * + * @author ebanks + * @version 0.1 + */ +public class GenotypeWriterStorage implements GenotypeWriter, Storage { + private final File file; + private final GenotypeWriter writer; + + public GenotypeWriterStorage( GenotypeWriterStub stub ) { + this(stub, stub.getFile()); + } + + public GenotypeWriterStorage( GenotypeWriterStub stub, File file ) { + this.file = file; + writer = GenotypeWriterFactory.create(stub.getFormat(), + stub.getSAMFileHeader(), + file, + stub.getSampleNames(), + stub.getHeaderInfo()); + } + + public void mergeInto( GenotypeWriter targetStream ) { + + // TODO -- This is ugly, but there is no GenotypeWriter interface since + // TODO -- VCFReaders need to be separated out for compatability with Tribble + // TODO -- and the adapters don't all implement a common interface. Fix me. Please. + + // VCF + if ( targetStream instanceof VCFGenotypeWriterAdapter ) { + VCFReader reader = new VCFReader(file); + while ( reader.hasNext() ) + ((VCFGenotypeWriterAdapter)targetStream).addRecord(reader.next()); + reader.close(); + } + + // GELI TEXT + else if ( targetStream instanceof GeliTextWriter ) { + GeliFileReader reader = new GeliFileReader(file); + while ( reader.hasNext() ) + ((GeliTextWriter)targetStream).addGenotypeLikelihoods(reader.next()); + reader.close(); + } + + // GELI BINARY + else if ( targetStream instanceof GeliAdapter ) { + GeliFileReader reader = new GeliFileReader(file); + while ( reader.hasNext() ) + ((GeliAdapter)targetStream).addGenotypeLikelihoods(reader.next()); + reader.close(); + } + + // GLF + else if ( targetStream instanceof GLFWriter ) { + GLFReader reader = new GLFReader(file); + while ( reader.hasNext() ) { + // TODO -- Find out from Aaron if this is correct. Looking through the code, + // TODO -- it looks like this will exhibit the correct behavior - but it feels + // TODO -- wrong that we get the contig/length of the record before we call next() + ((GLFWriter)targetStream).addGLFRecord(reader.getReferenceName(), reader.getReferenceLength(), reader.next()); + } + reader.close(); + } + + file.delete(); + } + + public void addGenotypeCall(Genotype call) { + writer.addGenotypeCall(call); + } + + public void addNoCall(int position) { + writer.addNoCall(position); + } + + public void addMultiSampleCall(List genotypes, VariationCall variation) { + writer.addMultiSampleCall(genotypes, variation); + } + + public boolean supportsMultiSample() { + return writer.supportsMultiSample(); + } + + public void close() { + writer.close(); + } + +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterStub.java b/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterStub.java new file mode 100755 index 000000000..febdf6db9 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterStub.java @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.io.stubs; + +import java.io.File; +import java.util.List; +import java.util.Set; + +import org.broadinstitute.sting.gatk.io.OutputTracker; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.utils.genotype.GenotypeWriter; +import org.broadinstitute.sting.utils.genotype.Genotype; +import org.broadinstitute.sting.utils.genotype.VariationCall; +import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; +import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine; +import net.sf.samtools.SAMFileHeader; + +/** + * A stub for routing and management of genotype reading and writing. + * + * @author ebanks + * @version 0.1 + */ +public class GenotypeWriterStub implements Stub, GenotypeWriter { + + /** + * Engine to use for collecting attributes for the output SAM file. + */ + private final GenomeAnalysisEngine engine; + + /** + * The file that this stub should write to. Should be passed along to + * whatever happens to create the StreamConnector. + */ + private final File genotypeFile; + + /** + * The file format for the output + */ + private final GenotypeWriterFactory.GENOTYPE_FORMAT format; + + /** + * The sample names for the output file + */ + private final Set sampleNames; + + + /** + * The header info for the output file + */ + private final Set headerInfo; + + + + /** + * Connects this stub with an external stream capable of serving the + * requests of the consumer of this stub. + */ + private OutputTracker outputTracker = null; + + /** + * Create a new stub given the requested file. + * @param engine GATK engine. + * @param genotypeFile file to (ultimately) create. + * @param format file format. + * @param sampleNames sample names to use for creating writer. + * @param headerInfo header info to use for creating writer. + */ + public GenotypeWriterStub( GenomeAnalysisEngine engine, + File genotypeFile, + GenotypeWriterFactory.GENOTYPE_FORMAT format, + Set sampleNames, + Set headerInfo) { + this.engine = engine; + this.genotypeFile = genotypeFile; + this.format = format; + this.sampleNames = sampleNames; + this.headerInfo = headerInfo; + } + + /** + * Retrieves the file to (ultimately) be created. + * @return The file. Must not be null. + */ + public File getFile() { + return genotypeFile; + } + + /** + * Retrieves the header to use when creating the new file. + * @return header to use when creating the new file. + */ + public SAMFileHeader getSAMFileHeader() { + return engine.getSAMFileHeader(); + } + + /** + * Retrieves the format to use when creating the new file. + * @return format to use when creating the new file. + */ + public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() { + return format; + } + + /** + * Retrieves the sample names to use when creating the new file. + * @return sample names to use when creating the new file. + */ + public Set getSampleNames() { + return sampleNames; + } + + /** + * Retrieves the header info to use when creating the new file. + * @return header info to use when creating the new file. + */ + public Set getHeaderInfo() { + return headerInfo; + } + + /** + * Registers the given streamConnector with this stub. + * @param outputTracker The connector used to provide an appropriate stream. + */ + public void register( OutputTracker outputTracker ) { + this.outputTracker = outputTracker; + } + + /** + * @{inheritDoc} + */ + public void addGenotypeCall(Genotype call) { + outputTracker.getStorage(this).addGenotypeCall(call); + } + + /** + * @{inheritDoc} + */ + public void addNoCall(int position) { + outputTracker.getStorage(this).addNoCall(position); + } + + /** + * @{inheritDoc} + */ + public void addMultiSampleCall(List genotypes, VariationCall variation) { + outputTracker.getStorage(this).addMultiSampleCall(genotypes, variation); + } + + /** + * @{inheritDoc} + */ + public boolean supportsMultiSample() { + return outputTracker.getStorage(this).supportsMultiSample(); + } + + /** + * @{inheritDoc} + */ + public void close() { + outputTracker.getStorage(this).close(); + } + +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java index 2f101973f..aba286aee 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java @@ -98,6 +98,10 @@ public class GeliAdapter implements GenotypeWriter { throw new UnsupportedOperationException("Geli format does not support variable length allele calls"); } + public void addGenotypeLikelihoods(GenotypeLikelihoods gl) { + writer.addGenotypeLikelihoods(gl); + } + /** * Add a genotype, given a genotype call * diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java index a88b9a278..1f249955b 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java @@ -11,6 +11,8 @@ import java.io.PrintWriter; import java.util.Arrays; import java.util.List; +import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods; + /** * @author aaron @@ -94,6 +96,10 @@ public class GeliTextWriter implements GenotypeWriter { posteriors[9])); } + public void addGenotypeLikelihoods(GenotypeLikelihoods gl) { + mWriter.println(gl.toString()); + } + /** * add a no call to the genotype file, if supported. * diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFReader.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFReader.java index 434352850..691f1409d 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFReader.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFReader.java @@ -146,12 +146,10 @@ public class GLFReader implements Iterator { return new VariableLengthCall(refBase, offset, readDepth, rmsMapping, lkHom1, lkHom2, lkHet, indelLen1, indelSeq1, indelLen2, indelSeq2); } - @Override public boolean hasNext() { return (nextRecord != null); } - @Override public GLFRecord next() { GLFRecord ret = nextRecord; short firstBase = protectedByteReadForFile(); @@ -220,11 +218,14 @@ public class GLFReader implements Iterator { return false; } - @Override public void remove() { throw new StingException("GLFReader doesn't support remove()"); } + public void close() { + inputBinaryCodec.close(); + } + /** * getter methods */ diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java index 84caedfdf..ba3745871 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java @@ -172,6 +172,10 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { mWriter.addRecord(vcfRecord); } + public void addRecord(VCFRecord vcfRecord) { + mWriter.addRecord(vcfRecord); + } + /** * get the information fields of the VCF record, given the meta data and parameters *