diff --git a/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java b/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java new file mode 100755 index 000000000..1d125d9a6 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.io.storage; + +import java.io.*; +import java.util.List; + +import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub; +import org.broadinstitute.sting.utils.genotype.*; +import org.broadinstitute.sting.utils.genotype.glf.*; +import org.broadinstitute.sting.utils.genotype.geli.*; +import org.broadinstitute.sting.utils.genotype.vcf.*; +import edu.mit.broad.picard.genotype.geli.GeliFileReader; + +/** + * Provides temporary storage for GenotypeWriters. + * + * @author ebanks + * @version 0.1 + */ +public class GenotypeWriterStorage implements GenotypeWriter, Storage { + private final File file; + private final GenotypeWriter writer; + + public GenotypeWriterStorage( GenotypeWriterStub stub ) { + this(stub, stub.getFile()); + } + + public GenotypeWriterStorage( GenotypeWriterStub stub, File file ) { + this.file = file; + writer = GenotypeWriterFactory.create(stub.getFormat(), + stub.getSAMFileHeader(), + file, + stub.getSampleNames(), + stub.getHeaderInfo()); + } + + public void mergeInto( GenotypeWriter targetStream ) { + + // TODO -- This is ugly, but there is no GenotypeWriter interface since + // TODO -- VCFReaders need to be separated out for compatability with Tribble + // TODO -- and the adapters don't all implement a common interface. Fix me. Please. + + // VCF + if ( targetStream instanceof VCFGenotypeWriterAdapter ) { + VCFReader reader = new VCFReader(file); + while ( reader.hasNext() ) + ((VCFGenotypeWriterAdapter)targetStream).addRecord(reader.next()); + reader.close(); + } + + // GELI TEXT + else if ( targetStream instanceof GeliTextWriter ) { + GeliFileReader reader = new GeliFileReader(file); + while ( reader.hasNext() ) + ((GeliTextWriter)targetStream).addGenotypeLikelihoods(reader.next()); + reader.close(); + } + + // GELI BINARY + else if ( targetStream instanceof GeliAdapter ) { + GeliFileReader reader = new GeliFileReader(file); + while ( reader.hasNext() ) + ((GeliAdapter)targetStream).addGenotypeLikelihoods(reader.next()); + reader.close(); + } + + // GLF + else if ( targetStream instanceof GLFWriter ) { + GLFReader reader = new GLFReader(file); + while ( reader.hasNext() ) { + // TODO -- Find out from Aaron if this is correct. Looking through the code, + // TODO -- it looks like this will exhibit the correct behavior - but it feels + // TODO -- wrong that we get the contig/length of the record before we call next() + ((GLFWriter)targetStream).addGLFRecord(reader.getReferenceName(), reader.getReferenceLength(), reader.next()); + } + reader.close(); + } + + file.delete(); + } + + public void addGenotypeCall(Genotype call) { + writer.addGenotypeCall(call); + } + + public void addNoCall(int position) { + writer.addNoCall(position); + } + + public void addMultiSampleCall(List genotypes, VariationCall variation) { + writer.addMultiSampleCall(genotypes, variation); + } + + public boolean supportsMultiSample() { + return writer.supportsMultiSample(); + } + + public void close() { + writer.close(); + } + +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterStub.java b/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterStub.java new file mode 100755 index 000000000..febdf6db9 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterStub.java @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.io.stubs; + +import java.io.File; +import java.util.List; +import java.util.Set; + +import org.broadinstitute.sting.gatk.io.OutputTracker; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.utils.genotype.GenotypeWriter; +import org.broadinstitute.sting.utils.genotype.Genotype; +import org.broadinstitute.sting.utils.genotype.VariationCall; +import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; +import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine; +import net.sf.samtools.SAMFileHeader; + +/** + * A stub for routing and management of genotype reading and writing. + * + * @author ebanks + * @version 0.1 + */ +public class GenotypeWriterStub implements Stub, GenotypeWriter { + + /** + * Engine to use for collecting attributes for the output SAM file. + */ + private final GenomeAnalysisEngine engine; + + /** + * The file that this stub should write to. Should be passed along to + * whatever happens to create the StreamConnector. + */ + private final File genotypeFile; + + /** + * The file format for the output + */ + private final GenotypeWriterFactory.GENOTYPE_FORMAT format; + + /** + * The sample names for the output file + */ + private final Set sampleNames; + + + /** + * The header info for the output file + */ + private final Set headerInfo; + + + + /** + * Connects this stub with an external stream capable of serving the + * requests of the consumer of this stub. + */ + private OutputTracker outputTracker = null; + + /** + * Create a new stub given the requested file. + * @param engine GATK engine. + * @param genotypeFile file to (ultimately) create. + * @param format file format. + * @param sampleNames sample names to use for creating writer. + * @param headerInfo header info to use for creating writer. + */ + public GenotypeWriterStub( GenomeAnalysisEngine engine, + File genotypeFile, + GenotypeWriterFactory.GENOTYPE_FORMAT format, + Set sampleNames, + Set headerInfo) { + this.engine = engine; + this.genotypeFile = genotypeFile; + this.format = format; + this.sampleNames = sampleNames; + this.headerInfo = headerInfo; + } + + /** + * Retrieves the file to (ultimately) be created. + * @return The file. Must not be null. + */ + public File getFile() { + return genotypeFile; + } + + /** + * Retrieves the header to use when creating the new file. + * @return header to use when creating the new file. + */ + public SAMFileHeader getSAMFileHeader() { + return engine.getSAMFileHeader(); + } + + /** + * Retrieves the format to use when creating the new file. + * @return format to use when creating the new file. + */ + public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() { + return format; + } + + /** + * Retrieves the sample names to use when creating the new file. + * @return sample names to use when creating the new file. + */ + public Set getSampleNames() { + return sampleNames; + } + + /** + * Retrieves the header info to use when creating the new file. + * @return header info to use when creating the new file. + */ + public Set getHeaderInfo() { + return headerInfo; + } + + /** + * Registers the given streamConnector with this stub. + * @param outputTracker The connector used to provide an appropriate stream. + */ + public void register( OutputTracker outputTracker ) { + this.outputTracker = outputTracker; + } + + /** + * @{inheritDoc} + */ + public void addGenotypeCall(Genotype call) { + outputTracker.getStorage(this).addGenotypeCall(call); + } + + /** + * @{inheritDoc} + */ + public void addNoCall(int position) { + outputTracker.getStorage(this).addNoCall(position); + } + + /** + * @{inheritDoc} + */ + public void addMultiSampleCall(List genotypes, VariationCall variation) { + outputTracker.getStorage(this).addMultiSampleCall(genotypes, variation); + } + + /** + * @{inheritDoc} + */ + public boolean supportsMultiSample() { + return outputTracker.getStorage(this).supportsMultiSample(); + } + + /** + * @{inheritDoc} + */ + public void close() { + outputTracker.getStorage(this).close(); + } + +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java index 2f101973f..aba286aee 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java @@ -98,6 +98,10 @@ public class GeliAdapter implements GenotypeWriter { throw new UnsupportedOperationException("Geli format does not support variable length allele calls"); } + public void addGenotypeLikelihoods(GenotypeLikelihoods gl) { + writer.addGenotypeLikelihoods(gl); + } + /** * Add a genotype, given a genotype call * diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java index a88b9a278..1f249955b 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java @@ -11,6 +11,8 @@ import java.io.PrintWriter; import java.util.Arrays; import java.util.List; +import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods; + /** * @author aaron @@ -94,6 +96,10 @@ public class GeliTextWriter implements GenotypeWriter { posteriors[9])); } + public void addGenotypeLikelihoods(GenotypeLikelihoods gl) { + mWriter.println(gl.toString()); + } + /** * add a no call to the genotype file, if supported. * diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFReader.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFReader.java index 434352850..691f1409d 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFReader.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFReader.java @@ -146,12 +146,10 @@ public class GLFReader implements Iterator { return new VariableLengthCall(refBase, offset, readDepth, rmsMapping, lkHom1, lkHom2, lkHet, indelLen1, indelSeq1, indelLen2, indelSeq2); } - @Override public boolean hasNext() { return (nextRecord != null); } - @Override public GLFRecord next() { GLFRecord ret = nextRecord; short firstBase = protectedByteReadForFile(); @@ -220,11 +218,14 @@ public class GLFReader implements Iterator { return false; } - @Override public void remove() { throw new StingException("GLFReader doesn't support remove()"); } + public void close() { + inputBinaryCodec.close(); + } + /** * getter methods */ diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java index 84caedfdf..ba3745871 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java @@ -172,6 +172,10 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { mWriter.addRecord(vcfRecord); } + public void addRecord(VCFRecord vcfRecord) { + mWriter.addRecord(vcfRecord); + } + /** * get the information fields of the VCF record, given the meta data and parameters *