Updates to the various GenotypeWriters to make them do simple things like write records (plus allow GLFReader to close).

Adding first pass of stub and storage classes for the GenotypeWriters so that UG can be parallelizable.  Not hooked up yet, so UG is unchanged.
The mergeInto() code in the storage class is ugly, but it's all Tribble's fault.  We can clean it up later if this whole thing works.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2400 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-12-18 07:20:23 +00:00
parent 1cde4161b7
commit 4f59bfd513
6 changed files with 331 additions and 3 deletions

View File

@ -0,0 +1,126 @@
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.io.storage;
import java.io.*;
import java.util.List;
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub;
import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.utils.genotype.glf.*;
import org.broadinstitute.sting.utils.genotype.geli.*;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import edu.mit.broad.picard.genotype.geli.GeliFileReader;
/**
* Provides temporary storage for GenotypeWriters.
*
* @author ebanks
* @version 0.1
*/
public class GenotypeWriterStorage implements GenotypeWriter, Storage<GenotypeWriter> {
private final File file;
private final GenotypeWriter writer;
public GenotypeWriterStorage( GenotypeWriterStub stub ) {
this(stub, stub.getFile());
}
public GenotypeWriterStorage( GenotypeWriterStub stub, File file ) {
this.file = file;
writer = GenotypeWriterFactory.create(stub.getFormat(),
stub.getSAMFileHeader(),
file,
stub.getSampleNames(),
stub.getHeaderInfo());
}
public void mergeInto( GenotypeWriter targetStream ) {
// TODO -- This is ugly, but there is no GenotypeWriter interface since
// TODO -- VCFReaders need to be separated out for compatability with Tribble
// TODO -- and the adapters don't all implement a common interface. Fix me. Please.
// VCF
if ( targetStream instanceof VCFGenotypeWriterAdapter ) {
VCFReader reader = new VCFReader(file);
while ( reader.hasNext() )
((VCFGenotypeWriterAdapter)targetStream).addRecord(reader.next());
reader.close();
}
// GELI TEXT
else if ( targetStream instanceof GeliTextWriter ) {
GeliFileReader reader = new GeliFileReader(file);
while ( reader.hasNext() )
((GeliTextWriter)targetStream).addGenotypeLikelihoods(reader.next());
reader.close();
}
// GELI BINARY
else if ( targetStream instanceof GeliAdapter ) {
GeliFileReader reader = new GeliFileReader(file);
while ( reader.hasNext() )
((GeliAdapter)targetStream).addGenotypeLikelihoods(reader.next());
reader.close();
}
// GLF
else if ( targetStream instanceof GLFWriter ) {
GLFReader reader = new GLFReader(file);
while ( reader.hasNext() ) {
// TODO -- Find out from Aaron if this is correct. Looking through the code,
// TODO -- it looks like this will exhibit the correct behavior - but it feels
// TODO -- wrong that we get the contig/length of the record before we call next()
((GLFWriter)targetStream).addGLFRecord(reader.getReferenceName(), reader.getReferenceLength(), reader.next());
}
reader.close();
}
file.delete();
}
public void addGenotypeCall(Genotype call) {
writer.addGenotypeCall(call);
}
public void addNoCall(int position) {
writer.addNoCall(position);
}
public void addMultiSampleCall(List<Genotype> genotypes, VariationCall variation) {
writer.addMultiSampleCall(genotypes, variation);
}
public boolean supportsMultiSample() {
return writer.supportsMultiSample();
}
public void close() {
writer.close();
}
}

View File

@ -0,0 +1,187 @@
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.io.stubs;
import java.io.File;
import java.util.List;
import java.util.Set;
import org.broadinstitute.sting.gatk.io.OutputTracker;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.VariationCall;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine;
import net.sf.samtools.SAMFileHeader;
/**
* A stub for routing and management of genotype reading and writing.
*
* @author ebanks
* @version 0.1
*/
public class GenotypeWriterStub implements Stub<GenotypeWriter>, GenotypeWriter {
/**
* Engine to use for collecting attributes for the output SAM file.
*/
private final GenomeAnalysisEngine engine;
/**
* The file that this stub should write to. Should be passed along to
* whatever happens to create the StreamConnector.
*/
private final File genotypeFile;
/**
* The file format for the output
*/
private final GenotypeWriterFactory.GENOTYPE_FORMAT format;
/**
* The sample names for the output file
*/
private final Set<String> sampleNames;
/**
* The header info for the output file
*/
private final Set<VCFHeaderLine> headerInfo;
/**
* Connects this stub with an external stream capable of serving the
* requests of the consumer of this stub.
*/
private OutputTracker outputTracker = null;
/**
* Create a new stub given the requested file.
* @param engine GATK engine.
* @param genotypeFile file to (ultimately) create.
* @param format file format.
* @param sampleNames sample names to use for creating writer.
* @param headerInfo header info to use for creating writer.
*/
public GenotypeWriterStub( GenomeAnalysisEngine engine,
File genotypeFile,
GenotypeWriterFactory.GENOTYPE_FORMAT format,
Set<String> sampleNames,
Set<VCFHeaderLine> headerInfo) {
this.engine = engine;
this.genotypeFile = genotypeFile;
this.format = format;
this.sampleNames = sampleNames;
this.headerInfo = headerInfo;
}
/**
* Retrieves the file to (ultimately) be created.
* @return The file. Must not be null.
*/
public File getFile() {
return genotypeFile;
}
/**
* Retrieves the header to use when creating the new file.
* @return header to use when creating the new file.
*/
public SAMFileHeader getSAMFileHeader() {
return engine.getSAMFileHeader();
}
/**
* Retrieves the format to use when creating the new file.
* @return format to use when creating the new file.
*/
public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() {
return format;
}
/**
* Retrieves the sample names to use when creating the new file.
* @return sample names to use when creating the new file.
*/
public Set<String> getSampleNames() {
return sampleNames;
}
/**
* Retrieves the header info to use when creating the new file.
* @return header info to use when creating the new file.
*/
public Set<VCFHeaderLine> getHeaderInfo() {
return headerInfo;
}
/**
* Registers the given streamConnector with this stub.
* @param outputTracker The connector used to provide an appropriate stream.
*/
public void register( OutputTracker outputTracker ) {
this.outputTracker = outputTracker;
}
/**
* @{inheritDoc}
*/
public void addGenotypeCall(Genotype call) {
outputTracker.getStorage(this).addGenotypeCall(call);
}
/**
* @{inheritDoc}
*/
public void addNoCall(int position) {
outputTracker.getStorage(this).addNoCall(position);
}
/**
* @{inheritDoc}
*/
public void addMultiSampleCall(List<Genotype> genotypes, VariationCall variation) {
outputTracker.getStorage(this).addMultiSampleCall(genotypes, variation);
}
/**
* @{inheritDoc}
*/
public boolean supportsMultiSample() {
return outputTracker.getStorage(this).supportsMultiSample();
}
/**
* @{inheritDoc}
*/
public void close() {
outputTracker.getStorage(this).close();
}
}

View File

@ -98,6 +98,10 @@ public class GeliAdapter implements GenotypeWriter {
throw new UnsupportedOperationException("Geli format does not support variable length allele calls");
}
public void addGenotypeLikelihoods(GenotypeLikelihoods gl) {
writer.addGenotypeLikelihoods(gl);
}
/**
* Add a genotype, given a genotype call
*

View File

@ -11,6 +11,8 @@ import java.io.PrintWriter;
import java.util.Arrays;
import java.util.List;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
/**
* @author aaron
@ -94,6 +96,10 @@ public class GeliTextWriter implements GenotypeWriter {
posteriors[9]));
}
public void addGenotypeLikelihoods(GenotypeLikelihoods gl) {
mWriter.println(gl.toString());
}
/**
* add a no call to the genotype file, if supported.
*

View File

@ -146,12 +146,10 @@ public class GLFReader implements Iterator<GLFRecord> {
return new VariableLengthCall(refBase, offset, readDepth, rmsMapping, lkHom1, lkHom2, lkHet, indelLen1, indelSeq1, indelLen2, indelSeq2);
}
@Override
public boolean hasNext() {
return (nextRecord != null);
}
@Override
public GLFRecord next() {
GLFRecord ret = nextRecord;
short firstBase = protectedByteReadForFile();
@ -220,11 +218,14 @@ public class GLFReader implements Iterator<GLFRecord> {
return false;
}
@Override
public void remove() {
throw new StingException("GLFReader doesn't support remove()");
}
public void close() {
inputBinaryCodec.close();
}
/**
* getter methods
*/

View File

@ -172,6 +172,10 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
mWriter.addRecord(vcfRecord);
}
public void addRecord(VCFRecord vcfRecord) {
mWriter.addRecord(vcfRecord);
}
/**
* get the information fields of the VCF record, given the meta data and parameters
*