Rework Eric's output management code given that the behavior of the UG changes drastically

depending on its output format.  Current implementation is probably a bit overkill-ish and
we can whittle this down to what's absolutely necessary.
Writing VCFs to the 'out' protected printstream may not work at this moment.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2425 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-12-22 00:33:43 +00:00
parent f448a263e9
commit 0d890e1bf0
23 changed files with 675 additions and 170 deletions

View File

@ -0,0 +1,68 @@
package org.broadinstitute.sting.gatk.io.storage;
import org.broadinstitute.sting.utils.genotype.glf.GLFReader;
import org.broadinstitute.sting.utils.genotype.glf.GLFRecord;
import org.broadinstitute.sting.utils.genotype.glf.GLFGenotypeWriter;
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub;
import java.io.File;
/**
* Provides temporary and permanent storage for genotypes in GLF format.
*
* @author mhanna
* @version 0.1
*/
public class GLFGenotypeWriterStorage extends GenotypeWriterStorage<GLFGenotypeWriter> implements GLFGenotypeWriter {
/**
* Creates new (permanent) storage for GLF genotype writers.
* @param stub Stub containing appropriate input parameters.
*/
public GLFGenotypeWriterStorage(GenotypeWriterStub stub) {
super(stub);
}
/**
* Creates new (temporary) storage for GLF genotype writers.
* @param stub Stub containing appropriate input parameters.
* @param target Target file for output data.
*/
public GLFGenotypeWriterStorage(GenotypeWriterStub stub, File target) {
super(stub,target);
}
/**
* Write the geli header to the target file.
* @param headerText The header to write.
*/
public void writeHeader(String headerText) {
((GLFGenotypeWriter)writer).writeHeader(headerText);
}
/**
* add a GLF record to the output file
*
* @param contigName the contig name
* @param contigLength the contig length
* @param rec the GLF record to write.
*/
public void addGLFRecord(String contigName, int contigLength, GLFRecord rec) {
((GLFGenotypeWriter)writer).addGLFRecord(contigName,contigLength,rec);
}
/**
* Merges the stream backing up this temporary storage into the target.
* @param target Target stream for the temporary storage. May not be null.
*/
@Override
public void mergeInto(GLFGenotypeWriter target) {
GLFReader reader = new GLFReader(file);
while ( reader.hasNext() ) {
GLFRecord rec = reader.next();
target.addGLFRecord(rec.getContig(),(int)rec.getPosition(),rec);
}
reader.close();
file.delete();
}
}

View File

@ -0,0 +1,66 @@
package org.broadinstitute.sting.gatk.io.storage;
import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter;
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub;
import java.io.File;
import edu.mit.broad.picard.genotype.geli.GeliFileReader;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
import net.sf.samtools.SAMFileHeader;
/**
* Provides temporary and permanent storage for genotypes in Geli binary format.
*
* @author mhanna
* @version 0.1
*/
public class GeliBinaryGenotypeWriterStorage extends GenotypeWriterStorage<GeliGenotypeWriter> implements GeliGenotypeWriter {
/**
* Creates new (permanent) storage for geli binary genotype writers.
* @param stub Stub containing appropriate input parameters.
*/
public GeliBinaryGenotypeWriterStorage(GenotypeWriterStub stub) {
super(stub);
}
/**
* Creates new (temporary) storage for geli binary genotype writers.
* @param stub Stub containing appropriate input parameters.
* @param target Target file for output data.
*/
public GeliBinaryGenotypeWriterStorage(GenotypeWriterStub stub, File target) {
super(stub,target);
}
/**
* Write the geli header to the target file.
* @param header The header to write.
*/
public void writeHeader(SAMFileHeader header) {
((GeliGenotypeWriter)writer).writeHeader(header);
}
/**
* Writes the genotype likelihoods to the output.
* @param gl genotype likelihoods to write.
*/
public void addGenotypeLikelihoods(GenotypeLikelihoods gl) {
((GeliGenotypeWriter)writer).addGenotypeLikelihoods(gl);
}
/**
* Merges the stream backing up this temporary storage into the target.
* @param target Target stream for the temporary storage. May not be null.
*/
@Override
public void mergeInto(GeliGenotypeWriter target) {
GeliFileReader reader = new GeliFileReader(file);
while ( reader.hasNext() )
target.addGenotypeLikelihoods(reader.next());
reader.close();
file.delete();
}
}

View File

@ -0,0 +1,65 @@
package org.broadinstitute.sting.gatk.io.storage;
import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter;
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub;
import java.io.File;
import edu.mit.broad.picard.genotype.geli.GeliFileReader;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
import net.sf.samtools.SAMFileHeader;
/**
* Provides temporary and permanent storage for genotypes in Geli text format.
*
* @author mhanna
* @version 0.1
*/
public class GeliTextGenotypeWriterStorage extends GenotypeWriterStorage<GeliGenotypeWriter> implements GeliGenotypeWriter {
/**
* Creates new (permanent) storage for geli text genotype writers.
* @param stub Stub containing appropriate input parameters.
*/
public GeliTextGenotypeWriterStorage(GenotypeWriterStub stub) {
super(stub);
}
/**
* Creates new (temporary) storage for geli text genotype writers.
* @param stub Stub containing appropriate input parameters.
* @param target Target file for output data.
*/
public GeliTextGenotypeWriterStorage(GenotypeWriterStub stub, File target) {
super(stub,target);
}
/**
* Write the geli header to the target file.
* @param header The header to write.
*/
public void writeHeader(SAMFileHeader header) {
((GeliGenotypeWriter)writer).writeHeader(header);
}
/**
* Writes the genotype likelihoods to the output.
* @param gl genotype likelihoods to write.
*/
public void addGenotypeLikelihoods(GenotypeLikelihoods gl) {
((GeliGenotypeWriter)writer).addGenotypeLikelihoods(gl);
}
/**
* Merges the stream backing up this temporary storage into the target.
* @param target Target stream for the temporary storage. May not be null.
*/
@Override
public void mergeInto(GeliGenotypeWriter target) {
GeliFileReader reader = new GeliFileReader(file);
while ( reader.hasNext() )
target.addGenotypeLikelihoods(reader.next());
reader.close();
file.delete();
}
}

View File

@ -32,11 +32,8 @@ import java.util.HashSet;
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub; import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub;
import org.broadinstitute.sting.utils.genotype.*; import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.utils.genotype.glf.*;
import org.broadinstitute.sting.utils.genotype.geli.*;
import org.broadinstitute.sting.utils.genotype.vcf.*; import org.broadinstitute.sting.utils.genotype.vcf.*;
import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.SampleUtils;
import edu.mit.broad.picard.genotype.geli.GeliFileReader;
/** /**
* Provides temporary storage for GenotypeWriters. * Provides temporary storage for GenotypeWriters.
@ -44,76 +41,32 @@ import edu.mit.broad.picard.genotype.geli.GeliFileReader;
* @author ebanks * @author ebanks
* @version 0.1 * @version 0.1
*/ */
public class GenotypeWriterStorage implements GenotypeWriter, Storage<GenotypeWriter> { public abstract class GenotypeWriterStorage<T extends GenotypeWriter> implements GenotypeWriter, Storage<T> {
private final GenotypeWriterFactory.GENOTYPE_FORMAT format; protected final File file;
private final File file; protected final GenotypeWriter writer;
private final GenotypeWriter writer;
/**
* Constructs an object which will write directly into the output file provided by the stub.
* Intentionally delaying the writing of the header -- this should be filled in by the walker.
* @param stub Stub to use when constructing the output file.
*/
public GenotypeWriterStorage( GenotypeWriterStub stub ) { public GenotypeWriterStorage( GenotypeWriterStub stub ) {
this(stub, stub.getFile()); this.file = stub.getFile();
writer = GenotypeWriterFactory.create(stub.getFormat(), file);
} }
/**
* Constructs an object which will redirect into a different file.
* @param stub Stub to use when synthesizing file / header info.
* @param file File into which to direct the output data.
*/
public GenotypeWriterStorage( GenotypeWriterStub stub, File file ) { public GenotypeWriterStorage( GenotypeWriterStub stub, File file ) {
this.format = stub.getFormat();
this.file = file; this.file = file;
writer = GenotypeWriterFactory.create(stub.getFormat(), file); writer = GenotypeWriterFactory.create(stub.getFormat(), file);
Set<String> samples = SampleUtils.getSAMFileSamples(stub.getSAMFileHeader()); Set<String> samples = SampleUtils.getSAMFileSamples(stub.getSAMFileHeader());
GenotypeWriterFactory.writeHeader(writer, stub.getSAMFileHeader(), samples, new HashSet<VCFHeaderLine>()); GenotypeWriterFactory.writeHeader(writer, stub.getSAMFileHeader(), samples, new HashSet<VCFHeaderLine>());
} }
/**
* Reports the format of the given genotyping data, taken directly from the stub.
* @return The format of the genotyping file.
*/
@Override
public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() {
return format;
}
public void mergeInto( GenotypeWriter targetStream ) {
// TODO -- This is ugly, but there is no GenotypeWriter interface since
// TODO -- VCFReaders need to be separated out for compatability with Tribble
// TODO -- and the adapters don't all implement a common interface. Fix me. Please.
// VCF
if ( targetStream instanceof VCFGenotypeWriterAdapter ) {
VCFReader reader = new VCFReader(file);
while ( reader.hasNext() )
((VCFGenotypeWriterAdapter)targetStream).addRecord(reader.next());
reader.close();
}
// GELI TEXT
else if ( targetStream instanceof GeliTextWriter ) {
GeliFileReader reader = new GeliFileReader(file);
while ( reader.hasNext() )
((GeliTextWriter)targetStream).addGenotypeLikelihoods(reader.next());
reader.close();
}
// GELI BINARY
else if ( targetStream instanceof GeliAdapter ) {
GeliFileReader reader = new GeliFileReader(file);
while ( reader.hasNext() )
((GeliAdapter)targetStream).addGenotypeLikelihoods(reader.next());
reader.close();
}
// GLF
else if ( targetStream instanceof GLFWriter ) {
GLFReader reader = new GLFReader(file);
while ( reader.hasNext() ) {
GLFRecord rec = reader.next();
((GLFWriter)targetStream).addGLFRecord(rec.getContig(),(int)rec.getPosition(),rec);
}
reader.close();
}
file.delete();
}
public void addGenotypeCall(Genotype call) { public void addGenotypeCall(Genotype call) {
writer.addGenotypeCall(call); writer.addGenotypeCall(call);
} }

View File

@ -28,9 +28,7 @@ package org.broadinstitute.sting.gatk.io.storage;
import org.broadinstitute.sting.gatk.io.stubs.Stub; import org.broadinstitute.sting.gatk.io.stubs.Stub;
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamStub; import org.broadinstitute.sting.gatk.io.stubs.OutputStreamStub;
import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterStub; import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterStub;
import org.broadinstitute.sting.gatk.io.storage.SAMFileWriterStorage; import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub;
import org.broadinstitute.sting.gatk.io.storage.Storage;
import org.broadinstitute.sting.gatk.io.storage.OutputStreamStorage;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.StingException;
import java.io.File; import java.io.File;
@ -79,6 +77,45 @@ public class StorageFactory {
else else
storage = new SAMFileWriterStorage((SAMFileWriterStub)stub); storage = new SAMFileWriterStorage((SAMFileWriterStub)stub);
} }
else if(stub instanceof GenotypeWriterStub) {
GenotypeWriterStub genotypeWriterStub = (GenotypeWriterStub)stub;
if( file != null ) {
switch(genotypeWriterStub.getFormat()) {
case GELI:
storage = new GeliTextGenotypeWriterStorage(genotypeWriterStub,file);
break;
case GELI_BINARY:
storage = new GeliBinaryGenotypeWriterStorage(genotypeWriterStub,file);
break;
case GLF:
storage = new GLFGenotypeWriterStorage(genotypeWriterStub,file);
break;
case VCF:
storage = new VCFGenotypeWriterStorage(genotypeWriterStub,file);
break;
default:
throw new StingException("Unsupported genotype file format: " + genotypeWriterStub.getFormat());
}
}
else {
switch(genotypeWriterStub.getFormat()) {
case GELI:
storage = new GeliTextGenotypeWriterStorage(genotypeWriterStub);
break;
case GELI_BINARY:
storage = new GeliBinaryGenotypeWriterStorage(genotypeWriterStub);
break;
case GLF:
storage = new GLFGenotypeWriterStorage(genotypeWriterStub);
break;
case VCF:
storage = new VCFGenotypeWriterStorage(genotypeWriterStub);
break;
default:
throw new StingException("Unsupported genotype file format: " + genotypeWriterStub.getFormat());
}
}
}
else else
throw new StingException("Unsupported stub type: " + stub.getClass().getName()); throw new StingException("Unsupported stub type: " + stub.getClass().getName());

View File

@ -0,0 +1,67 @@
package org.broadinstitute.sting.gatk.io.storage;
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine;
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub;
import java.io.File;
import java.util.Set;
/**
* Provides temporary and permanent storage for genotypes in VCF format.
*
* @author mhanna
* @version 0.1
*/
public class VCFGenotypeWriterStorage extends GenotypeWriterStorage<VCFGenotypeWriter> implements VCFGenotypeWriter {
/**
* Creates new (permanent) storage for VCF genotype writers.
* @param stub Stub containing appropriate input parameters.
*/
public VCFGenotypeWriterStorage(GenotypeWriterStub stub) {
super(stub);
}
/**
* Creates new (temporary) storage for VCF genotype writers.
* @param stub Stub containing appropriate input parameters.
* @param target Target file for output data.
*/
public VCFGenotypeWriterStorage(GenotypeWriterStub stub,File target) {
super(stub,target);
}
/**
* initialize this VCF header
*
* @param sampleNames the sample names
* @param headerInfo the optional header fields
*/
public void writeHeader(Set<String> sampleNames, Set<VCFHeaderLine> headerInfo) {
((VCFGenotypeWriter)writer).writeHeader(sampleNames,headerInfo);
}
/**
* Add a given VCF record to the given output.
* @param vcfRecord Record to add.
*/
public void addRecord(VCFRecord vcfRecord) {
((VCFGenotypeWriter)writer).addRecord(vcfRecord);
}
/**
* Merges the stream backing up this temporary storage into the target.
* @param target Target stream for the temporary storage. May not be null.
*/
@Override
public void mergeInto(VCFGenotypeWriter target) {
VCFReader reader = new VCFReader(file);
while ( reader.hasNext() )
target.addRecord(reader.next());
reader.close();
file.delete();
}
}

View File

@ -0,0 +1,53 @@
package org.broadinstitute.sting.gatk.io.stubs;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.broadinstitute.sting.utils.genotype.glf.GLFGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.glf.GLFRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import java.io.File;
/**
* Stub providing a passthrough for GLF files.
*
* @author mhanna
* @version 0.1
*/
public class GLFGenotypeWriterStub extends GenotypeWriterStub<GLFGenotypeWriter> implements GLFGenotypeWriter {
/**
* Construct a new stub with the given engine and target file.
* @param engine The engine, for extracting command-line arguments, etc.
* @param genotypeFile Target file into which to write genotyping data.
*/
public GLFGenotypeWriterStub(GenomeAnalysisEngine engine, File genotypeFile) {
super(engine,genotypeFile);
}
/**
* Gets the format of this stub. We may want to discontinue use of this method and rely on instanceof comparisons.
* @return GLF always.
*/
public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() {
return GenotypeWriterFactory.GENOTYPE_FORMAT.GLF;
}
/**
* Write the GLF header to the target file.
* @param headerText The header to write.
*/
public void writeHeader(String headerText) {
outputTracker.getStorage(this).writeHeader(headerText);
}
/**
* add a GLF record to the output file
*
* @param contigName the contig name
* @param contigLength the contig length
* @param rec the GLF record to write.
*/
public void addGLFRecord(String contigName, int contigLength, GLFRecord rec) {
outputTracker.getStorage(this).addGLFRecord(contigName, contigLength, rec);
}
}

View File

@ -0,0 +1,51 @@
package org.broadinstitute.sting.gatk.io.stubs;
import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import net.sf.samtools.SAMFileHeader;
import java.io.File;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
/**
* Stub providing a passthrough for geli binary files.
*
* @author mhanna
* @version 0.1
*/
public class GeliBinaryGenotypeWriterStub extends GenotypeWriterStub<GeliGenotypeWriter> implements GeliGenotypeWriter {
/**
* Construct a new stub with the given engine and target file.
* @param engine The engine, for extracting command-line arguments, etc.
* @param genotypeFile Target file into which to write genotyping data.
*/
public GeliBinaryGenotypeWriterStub(GenomeAnalysisEngine engine,File genotypeFile) {
super(engine,genotypeFile);
}
/**
* Gets the format of this stub. We may want to discontinue use of this method and rely on instanceof comparisons.
* @return GELI_BINARY always.
*/
public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() {
return GenotypeWriterFactory.GENOTYPE_FORMAT.GELI_BINARY;
}
/**
* Write the geli header to the target file.
* @param header The header to write.
*/
public void writeHeader(SAMFileHeader header) {
outputTracker.getStorage(this).writeHeader(header);
}
/**
* Writes the genotype likelihoods to the output.
* @param gl genotype likelihoods to write.
*/
public void addGenotypeLikelihoods(GenotypeLikelihoods gl) {
outputTracker.getStorage(this).addGenotypeLikelihoods(gl);
}
}

View File

@ -0,0 +1,51 @@
package org.broadinstitute.sting.gatk.io.stubs;
import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import java.io.File;
import net.sf.samtools.SAMFileHeader;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
/**
* Stub providing a passthrough for geli text files.
*
* @author mhanna
* @version 0.1
*/
public class GeliTextGenotypeWriterStub extends GenotypeWriterStub<GeliGenotypeWriter> implements GeliGenotypeWriter {
/**
* Construct a new stub with the given engine and target file.
* @param engine The engine, for extracting command-line arguments, etc.
* @param genotypeFile Target file into which to write genotyping data.
*/
public GeliTextGenotypeWriterStub(GenomeAnalysisEngine engine, File genotypeFile) {
super(engine,genotypeFile);
}
/**
* Gets the format of this stub. We may want to discontinue use of this method and rely on instanceof comparisons.
* @return GELI always.
*/
public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() {
return GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
}
/**
* Write the geli header to the target file.
* @param header The header to write.
*/
public void writeHeader(SAMFileHeader header) {
outputTracker.getStorage(this).writeHeader(header);
}
/**
* Writes the genotype likelihoods to the output.
* @param gl genotype likelihoods to write.
*/
public void addGenotypeLikelihoods(GenotypeLikelihoods gl) {
outputTracker.getStorage(this).addGenotypeLikelihoods(gl);
}
}

View File

@ -82,7 +82,23 @@ public class GenotypeWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
} }
} }
GenotypeWriterStub stub = new GenotypeWriterStub(engine, new File(writerFileName),genotypeFormat); GenotypeWriterStub stub = null;
switch(genotypeFormat) {
case GELI:
stub = new GeliTextGenotypeWriterStub(engine, new File(writerFileName));
break;
case GELI_BINARY:
stub = new GeliBinaryGenotypeWriterStub(engine, new File(writerFileName));
break;
case GLF:
stub = new GLFGenotypeWriterStub(engine, new File(writerFileName));
break;
case VCF:
stub = new VCFGenotypeWriterStub(engine, new File(writerFileName));
break;
default:
throw new StingException("Unable to create stub for file format " + genotypeFormat);
}
engine.addOutput(stub); engine.addOutput(stub);

View File

@ -42,7 +42,7 @@ import net.sf.samtools.SAMFileHeader;
* @author ebanks * @author ebanks
* @version 0.1 * @version 0.1
*/ */
public class GenotypeWriterStub implements Stub<GenotypeWriter>, GenotypeWriter { public abstract class GenotypeWriterStub<T extends GenotypeWriter> implements Stub<T>, GenotypeWriter {
/** /**
* Engine to use for collecting attributes for the output SAM file. * Engine to use for collecting attributes for the output SAM file.
@ -55,29 +55,20 @@ public class GenotypeWriterStub implements Stub<GenotypeWriter>, GenotypeWriter
*/ */
private final File genotypeFile; private final File genotypeFile;
/**
* The file format for the output
*/
private final GenotypeWriterFactory.GENOTYPE_FORMAT format;
/** /**
* Connects this stub with an external stream capable of serving the * Connects this stub with an external stream capable of serving the
* requests of the consumer of this stub. * requests of the consumer of this stub.
*/ */
private OutputTracker outputTracker = null; protected OutputTracker outputTracker = null;
/** /**
* Create a new stub given the requested file. * Create a new stub given the requested file.
* @param engine GATK engine. * @param engine GATK engine.
* @param genotypeFile file to (ultimately) create. * @param genotypeFile file to (ultimately) create.
* @param format file format.
*/ */
public GenotypeWriterStub( GenomeAnalysisEngine engine, public GenotypeWriterStub(GenomeAnalysisEngine engine,File genotypeFile) {
File genotypeFile,
GenotypeWriterFactory.GENOTYPE_FORMAT format) {
this.engine = engine; this.engine = engine;
this.genotypeFile = genotypeFile; this.genotypeFile = genotypeFile;
this.format = format;
} }
/** /**
@ -100,9 +91,7 @@ public class GenotypeWriterStub implements Stub<GenotypeWriter>, GenotypeWriter
* Retrieves the format to use when creating the new file. * Retrieves the format to use when creating the new file.
* @return format to use when creating the new file. * @return format to use when creating the new file.
*/ */
public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() { public abstract GenotypeWriterFactory.GENOTYPE_FORMAT getFormat();
return format;
}
/** /**
* Registers the given streamConnector with this stub. * Registers the given streamConnector with this stub.

View File

@ -0,0 +1,53 @@
package org.broadinstitute.sting.gatk.io.stubs;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import java.io.File;
import java.util.Set;
/**
* Stub providing a passthrough for VCF files.
*
* @author mhanna
* @version 0.1
*/
public class VCFGenotypeWriterStub extends GenotypeWriterStub<VCFGenotypeWriter> implements VCFGenotypeWriter {
/**
* Construct a new stub with the given engine and target file.
* @param engine The engine, for extracting command-line arguments, etc.
* @param genotypeFile Target file into which to write genotyping data.
*/
public VCFGenotypeWriterStub(GenomeAnalysisEngine engine, File genotypeFile) {
super(engine,genotypeFile);
}
/**
* Gets the format of this stub. We may want to discontinue use of this method and rely on instanceof comparisons.
* @return VCF always.
*/
public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() {
return GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
}
/**
* initialize this VCF header
*
* @param sampleNames the sample names
* @param headerInfo the optional header fields
*/
public void writeHeader(Set<String> sampleNames, Set<VCFHeaderLine> headerInfo) {
outputTracker.getStorage(this).writeHeader(sampleNames,headerInfo);
}
/**
* Add a given VCF record to the given output.
* @param vcfRecord Record to add.
*/
public void addRecord(VCFRecord vcfRecord) {
outputTracker.getStorage(this).addRecord(vcfRecord);
}
}

View File

@ -35,9 +35,10 @@ import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.pileup.*; import org.broadinstitute.sting.utils.pileup.*;
import org.broadinstitute.sting.utils.cmdLine.*; import org.broadinstitute.sting.utils.cmdLine.*;
import org.broadinstitute.sting.utils.genotype.*; import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.glf.GLFGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.vcf.*; import org.broadinstitute.sting.utils.genotype.vcf.*;
import java.io.File;
import java.util.*; import java.util.*;
@ -52,30 +53,21 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
@ArgumentCollection private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection(); @ArgumentCollection private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
// control the output // control the output
@Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = false) @Argument(doc = "File to which variants should be written", required = false)
public File VARIANTS_FILE = null; public GenotypeWriter writer = null;
@Argument(fullName = "variant_output_format", shortName = "vf", doc = "Format to be used to represent variants; default is VCF", required = false)
public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
// the model used for calculating genotypes // the model used for calculating genotypes
private GenotypeCalculationModel gcm; private GenotypeCalculationModel gcm;
// output writer
private GenotypeWriter writer;
// samples in input // samples in input
private Set<String> samples; private Set<String> samples;
// keep track of some metrics about our calls // keep track of some metrics about our calls
private CallMetrics callsMetrics; private CallMetrics callsMetrics;
/** Enable deletions in the pileup **/ /** Enable deletions in the pileup **/
public boolean includeReadsWithDeletionAtLoci() { return true; } public boolean includeReadsWithDeletionAtLoci() { return true; }
/** /**
* Sets the argument collection for the UnifiedGenotyper. * Sets the argument collection for the UnifiedGenotyper.
* To be used with walkers that call the UnifiedGenotyper's map function * To be used with walkers that call the UnifiedGenotyper's map function
@ -121,11 +113,23 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
// for ( String sample : samples ) // for ( String sample : samples )
// logger.debug("SAMPLE: " + sample); // logger.debug("SAMPLE: " + sample);
gcm = GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC, VAR_FORMAT); GenotypeWriterFactory.GENOTYPE_FORMAT format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
if(writer != null) {
if(writer instanceof VCFGenotypeWriter)
format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
else if(writer instanceof GLFGenotypeWriter)
format = GenotypeWriterFactory.GENOTYPE_FORMAT.GLF;
else if(writer instanceof GeliGenotypeWriter)
format = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
else
throw new StingException("Unsupported genotype format: " + writer.getClass().getName());
}
gcm = GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC, format);
// *** If we were called by another walker, then we don't *** // *** If we were called by another walker, then we don't ***
// *** want to do any of the other initialization steps. *** // *** want to do any of the other initialization steps. ***
if ( VARIANTS_FILE == null && out == null ) if ( writer == null )
return; return;
// *** If we got here, then we were instantiated by the GATK engine *** // *** If we got here, then we were instantiated by the GATK engine ***
@ -138,11 +142,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
// get the optional header fields // get the optional header fields
Set<VCFHeaderLine> headerInfo = getHeaderInfo(); Set<VCFHeaderLine> headerInfo = getHeaderInfo();
// create the output writer stream and initialize the header // initialize the header
if ( VARIANTS_FILE != null )
writer = GenotypeWriterFactory.create(VAR_FORMAT, VARIANTS_FILE);
else
writer = GenotypeWriterFactory.create(VAR_FORMAT, out);
GenotypeWriterFactory.writeHeader(writer, GenomeAnalysisEngine.instance.getSAMFileHeader(), samples, headerInfo); GenotypeWriterFactory.writeHeader(writer, GenomeAnalysisEngine.instance.getSAMFileHeader(), samples, headerInfo);
callsMetrics = new CallMetrics(); callsMetrics = new CallMetrics();
@ -152,7 +152,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
Set<VCFHeaderLine> headerInfo = new HashSet<VCFHeaderLine>(); Set<VCFHeaderLine> headerInfo = new HashSet<VCFHeaderLine>();
// this is only applicable to VCF // this is only applicable to VCF
if ( VAR_FORMAT != GenotypeWriterFactory.GENOTYPE_FORMAT.VCF ) if ( !(writer instanceof VCFGenotypeWriter) )
return headerInfo; return headerInfo;
// first, the basic info // first, the basic info
@ -285,7 +285,6 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
// Close any file writers // Close any file writers
public void onTraversalDone(Integer sum) { public void onTraversalDone(Integer sum) {
writer.close();
gcm.close(); gcm.close();
logger.info("Processed " + sum + " loci that are callable for SNPs"); logger.info("Processed " + sum + " loci that are callable for SNPs");
} }

View File

@ -35,14 +35,6 @@ import java.util.List;
* The interface for writing genotype calls. * The interface for writing genotype calls.
*/ */
public interface GenotypeWriter { public interface GenotypeWriter {
/**
* Gets the file format of this genotype writer, to disambiguate
* between different forms of data required.
* @return Type of this GenotypeWriter.
*/
public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat();
/** /**
* Add a genotype, given a genotype locus * Add a genotype, given a genotype locus
* @param call the locus to add * @param call the locus to add

View File

@ -22,7 +22,7 @@ import java.util.Set;
public class GenotypeWriterFactory { public class GenotypeWriterFactory {
/** available genotype writers */ /** available genotype writers */
public enum GENOTYPE_FORMAT { public enum GENOTYPE_FORMAT {
GELI, GLF, GFF, TABULAR, GELI_BINARY, VCF; GELI, GLF, GFF, TABULAR, GELI_BINARY, VCF
} }
/** /**
@ -64,16 +64,16 @@ public class GenotypeWriterFactory {
Set<String> sampleNames, Set<String> sampleNames,
Set<VCFHeaderLine> headerInfo) { Set<VCFHeaderLine> headerInfo) {
// VCF // VCF
if ( writer instanceof VCFGenotypeWriterAdapter ) { if ( writer instanceof VCFGenotypeWriter ) {
((VCFGenotypeWriterAdapter)writer).writeHeader(sampleNames, headerInfo); ((VCFGenotypeWriter)writer).writeHeader(sampleNames, headerInfo);
} }
// GELI BINARY // GELI
else if ( writer instanceof GeliAdapter ) { else if ( writer instanceof GeliGenotypeWriter ) {
((GeliAdapter)writer).writeHeader(header); ((GeliGenotypeWriter)writer).writeHeader(header);
} }
// GLF // GLF
else if ( writer instanceof GLFWriter ) { else if ( writer instanceof GLFGenotypeWriter ) {
((GLFWriter)writer).writeHeader(header.toString()); ((GLFGenotypeWriter)writer).writeHeader(header.toString());
} }
// nothing to do for GELI TEXT // nothing to do for GELI TEXT
} }

View File

@ -44,7 +44,7 @@ import java.util.List;
* Class GeliAdapter * Class GeliAdapter
* Adapts the Geli file writer to the Genotype writer interface * Adapts the Geli file writer to the Genotype writer interface
*/ */
public class GeliAdapter implements GenotypeWriter { public class GeliAdapter implements GeliGenotypeWriter {
// the file we're writing to // the file we're writing to
private File writeTo = null; private File writeTo = null;
@ -61,20 +61,12 @@ public class GeliAdapter implements GenotypeWriter {
this.writeTo = writeTo; this.writeTo = writeTo;
} }
/**
* Indicates that this is a binary-format geli writer.
* @return GENOTYPE_FORMAT.GELI_BINARY always.
*/
@Override
public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() {
return GenotypeWriterFactory.GENOTYPE_FORMAT.GELI_BINARY;
}
/** /**
* wrap a GeliFileWriter in the Genotype writer interface * wrap a GeliFileWriter in the Genotype writer interface
* *
* @param fileHeader the file header to write out * @param fileHeader the file header to write out
*/ */
@Override
public void writeHeader(final SAMFileHeader fileHeader) { public void writeHeader(final SAMFileHeader fileHeader) {
this.writer = GeliFileWriter.newInstanceForPresortedRecords(writeTo, fileHeader); this.writer = GeliFileWriter.newInstanceForPresortedRecords(writeTo, fileHeader);
} }

View File

@ -0,0 +1,26 @@
package org.broadinstitute.sting.utils.genotype.geli;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import net.sf.samtools.SAMFileHeader;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
/**
* An extension of eth GenotypeWriter interface with support
* for adding a header.
*
* @author mhanna
* @version 0.1
*/
public interface GeliGenotypeWriter extends GenotypeWriter {
/**
* Write the file header.
* @param fileHeader SAM file header from which to derive the geli header.
*/
public void writeHeader(final SAMFileHeader fileHeader);
/**
* Writes the genotype likelihoods to the output.
* @param gl genotype likelihoods to write.
*/
public void addGenotypeLikelihoods(GenotypeLikelihoods gl);
}

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.utils.genotype.geli; package org.broadinstitute.sting.utils.genotype.geli;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.*; import org.broadinstitute.sting.utils.genotype.*;
@ -21,7 +22,7 @@ import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
* <p/> * <p/>
* write out the geli text file format containing genotype information * write out the geli text file format containing genotype information
*/ */
public class GeliTextWriter implements GenotypeWriter { public class GeliTextWriter implements GeliGenotypeWriter {
// where we write to // where we write to
PrintWriter mWriter; PrintWriter mWriter;
@ -36,25 +37,23 @@ public class GeliTextWriter implements GenotypeWriter {
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
throw new StingException("Unable to open file " + file.toURI()); throw new StingException("Unable to open file " + file.toURI());
} }
mWriter.println(headerLine);
} }
public GeliTextWriter(PrintStream out) { public GeliTextWriter(PrintStream out) {
mWriter = new PrintWriter(out); mWriter = new PrintWriter(out);
mWriter.println(headerLine);
}
/**
* Indicates that this is a geli writer.
* @return GENOTYPE_FORMAT.GELI always.
*/
@Override
public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() {
return GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
} }
public final static String headerLine = "#Sequence Position ReferenceBase NumberOfReads MaxMappingQuality BestGenotype BtrLod BtnbLod AA AC AG AT CC CG CT GG GT TT"; public final static String headerLine = "#Sequence Position ReferenceBase NumberOfReads MaxMappingQuality BestGenotype BtrLod BtnbLod AA AC AG AT CC CG CT GG GT TT";
/**
* Write the file header.
* @param fileHeader SAM file header from which to derive the geli header.
*/
public void writeHeader(final SAMFileHeader fileHeader) {
// ignore the SAM header; the geli text header is fixed.
mWriter.println(headerLine);
}
/** /**
* Add a genotype, given a call * Add a genotype, given a call
* *

View File

@ -0,0 +1,27 @@
package org.broadinstitute.sting.utils.genotype.glf;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
/**
* An extension of eth GenotypeWriter interface with support
* for adding header lines.
*
* @author mhanna
* @version 0.1
*/
public interface GLFGenotypeWriter extends GenotypeWriter {
/**
* Append the given header text to the GLF file.
* @param headerText the file header to write out
*/
public void writeHeader(String headerText);
/**
* add a GLF record to the output file
*
* @param contigName the contig name
* @param contigLength the contig length
* @param rec the GLF record to write.
*/
public void addGLFRecord(String contigName, int contigLength, GLFRecord rec);
}

View File

@ -45,7 +45,7 @@ import java.util.List;
* single and variable length genotype calls using the provided functions. When you've finished * single and variable length genotype calls using the provided functions. When you've finished
* generating GLF records, make sure you close the file. * generating GLF records, make sure you close the file.
*/ */
public class GLFWriter implements GenotypeWriter { public class GLFWriter implements GLFGenotypeWriter {
// our output codec // our output codec
private final BinaryCodec outputBinaryCodec; private final BinaryCodec outputBinaryCodec;
@ -83,15 +83,6 @@ public class GLFWriter implements GenotypeWriter {
outputBinaryCodec.setOutputFileName(writeTo.toString()); outputBinaryCodec.setOutputFileName(writeTo.toString());
} }
/**
* Indicates that this is a GLF writer.
* @return GENOTYPE_FORMAT.GLF always.
*/
@Override
public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() {
return GenotypeWriterFactory.GENOTYPE_FORMAT.GLF;
}
/** /**
* Write out the header information for the GLF file. The header contains * Write out the header information for the GLF file. The header contains
* the magic number, the length of the header text, the text itself, the reference * the magic number, the length of the header text, the text itself, the reference

View File

@ -38,16 +38,6 @@ public class TabularLFWriter implements GenotypeWriter {
outStream.println("location sample_name ref alt genotype qhat qstar lodVsRef lodVsNextBest depth bases"); outStream.println("location sample_name ref alt genotype qhat qstar lodVsRef lodVsNextBest depth bases");
} }
/**
* Indicates that this is a tabular genotype writer.
* @return GENOTYPE_FORMAT.TABULAR always.
*/
@Override
public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() {
return GenotypeWriterFactory.GENOTYPE_FORMAT.TABULAR;
}
/** /**
* Add a genotype, given a genotype locus * Add a genotype, given a genotype locus
* *

View File

@ -0,0 +1,28 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import java.util.Set;
/**
* An extension of eth GenotypeWriter interface with support
* for adding header lines.
*
* @author mhanna
* @version 0.1
*/
public interface VCFGenotypeWriter extends GenotypeWriter {
/**
* initialize this VCF header
*
* @param sampleNames the sample names
* @param headerInfo the optional header fields
*/
public void writeHeader(Set<String> sampleNames, Set<VCFHeaderLine> headerInfo);
/**
* Add a given VCF record to the given output.
* @param vcfRecord Record to add.
*/
public void addRecord(VCFRecord vcfRecord);
}

View File

@ -15,7 +15,7 @@ import java.util.*;
* <p/> * <p/>
* Adapt the VCF writter to the genotype output system * Adapt the VCF writter to the genotype output system
*/ */
public class VCFGenotypeWriterAdapter implements GenotypeWriter { public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter {
// our VCF objects // our VCF objects
private VCFWriter mWriter = null; private VCFWriter mWriter = null;
private VCFHeader mHeader = null; private VCFHeader mHeader = null;
@ -35,21 +35,13 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
mWriter = new VCFWriter(writeTo); mWriter = new VCFWriter(writeTo);
} }
/**
* Indicates that this is a VCF writer.
* @return GENOTYPE_FORMAT.VCF always.
*/
@Override
public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() {
return GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
}
/** /**
* initialize this VCF header * initialize this VCF header
* *
* @param sampleNames the sample names * @param sampleNames the sample names
* @param headerInfo the optional header fields * @param headerInfo the optional header fields
*/ */
@Override
public void writeHeader(Set<String> sampleNames, Set<VCFHeaderLine> headerInfo) { public void writeHeader(Set<String> sampleNames, Set<VCFHeaderLine> headerInfo) {
mSampleNames.addAll(sampleNames); mSampleNames.addAll(sampleNames);