Rework Eric's output management code given that the behavior of the UG changes drastically

depending on its output format.  Current implementation is probably a bit overkill-ish and
we can whittle this down to what's absolutely necessary.
Writing VCFs to the 'out' protected printstream may not work at this moment.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2425 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-12-22 00:33:43 +00:00
parent f448a263e9
commit 0d890e1bf0
23 changed files with 675 additions and 170 deletions

View File

@ -0,0 +1,68 @@
package org.broadinstitute.sting.gatk.io.storage;
import org.broadinstitute.sting.utils.genotype.glf.GLFReader;
import org.broadinstitute.sting.utils.genotype.glf.GLFRecord;
import org.broadinstitute.sting.utils.genotype.glf.GLFGenotypeWriter;
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub;
import java.io.File;
/**
* Provides temporary and permanent storage for genotypes in GLF format.
*
* @author mhanna
* @version 0.1
*/
public class GLFGenotypeWriterStorage extends GenotypeWriterStorage<GLFGenotypeWriter> implements GLFGenotypeWriter {
/**
* Creates new (permanent) storage for GLF genotype writers.
* @param stub Stub containing appropriate input parameters.
*/
public GLFGenotypeWriterStorage(GenotypeWriterStub stub) {
super(stub);
}
/**
* Creates new (temporary) storage for GLF genotype writers.
* @param stub Stub containing appropriate input parameters.
* @param target Target file for output data.
*/
public GLFGenotypeWriterStorage(GenotypeWriterStub stub, File target) {
super(stub,target);
}
/**
* Write the geli header to the target file.
* @param headerText The header to write.
*/
public void writeHeader(String headerText) {
((GLFGenotypeWriter)writer).writeHeader(headerText);
}
/**
* add a GLF record to the output file
*
* @param contigName the contig name
* @param contigLength the contig length
* @param rec the GLF record to write.
*/
public void addGLFRecord(String contigName, int contigLength, GLFRecord rec) {
((GLFGenotypeWriter)writer).addGLFRecord(contigName,contigLength,rec);
}
/**
* Merges the stream backing up this temporary storage into the target.
* @param target Target stream for the temporary storage. May not be null.
*/
@Override
public void mergeInto(GLFGenotypeWriter target) {
GLFReader reader = new GLFReader(file);
while ( reader.hasNext() ) {
GLFRecord rec = reader.next();
target.addGLFRecord(rec.getContig(),(int)rec.getPosition(),rec);
}
reader.close();
file.delete();
}
}

View File

@ -0,0 +1,66 @@
package org.broadinstitute.sting.gatk.io.storage;
import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter;
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub;
import java.io.File;
import edu.mit.broad.picard.genotype.geli.GeliFileReader;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
import net.sf.samtools.SAMFileHeader;
/**
* Provides temporary and permanent storage for genotypes in Geli binary format.
*
* @author mhanna
* @version 0.1
*/
public class GeliBinaryGenotypeWriterStorage extends GenotypeWriterStorage<GeliGenotypeWriter> implements GeliGenotypeWriter {
/**
* Creates new (permanent) storage for geli binary genotype writers.
* @param stub Stub containing appropriate input parameters.
*/
public GeliBinaryGenotypeWriterStorage(GenotypeWriterStub stub) {
super(stub);
}
/**
* Creates new (temporary) storage for geli binary genotype writers.
* @param stub Stub containing appropriate input parameters.
* @param target Target file for output data.
*/
public GeliBinaryGenotypeWriterStorage(GenotypeWriterStub stub, File target) {
super(stub,target);
}
/**
* Write the geli header to the target file.
* @param header The header to write.
*/
public void writeHeader(SAMFileHeader header) {
((GeliGenotypeWriter)writer).writeHeader(header);
}
/**
* Writes the genotype likelihoods to the output.
* @param gl genotype likelihoods to write.
*/
public void addGenotypeLikelihoods(GenotypeLikelihoods gl) {
((GeliGenotypeWriter)writer).addGenotypeLikelihoods(gl);
}
/**
* Merges the stream backing up this temporary storage into the target.
* @param target Target stream for the temporary storage. May not be null.
*/
@Override
public void mergeInto(GeliGenotypeWriter target) {
GeliFileReader reader = new GeliFileReader(file);
while ( reader.hasNext() )
target.addGenotypeLikelihoods(reader.next());
reader.close();
file.delete();
}
}

View File

@ -0,0 +1,65 @@
package org.broadinstitute.sting.gatk.io.storage;
import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter;
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub;
import java.io.File;
import edu.mit.broad.picard.genotype.geli.GeliFileReader;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
import net.sf.samtools.SAMFileHeader;
/**
* Provides temporary and permanent storage for genotypes in Geli text format.
*
* @author mhanna
* @version 0.1
*/
public class GeliTextGenotypeWriterStorage extends GenotypeWriterStorage<GeliGenotypeWriter> implements GeliGenotypeWriter {
/**
* Creates new (permanent) storage for geli text genotype writers.
* @param stub Stub containing appropriate input parameters.
*/
public GeliTextGenotypeWriterStorage(GenotypeWriterStub stub) {
super(stub);
}
/**
* Creates new (temporary) storage for geli text genotype writers.
* @param stub Stub containing appropriate input parameters.
* @param target Target file for output data.
*/
public GeliTextGenotypeWriterStorage(GenotypeWriterStub stub, File target) {
super(stub,target);
}
/**
* Write the geli header to the target file.
* @param header The header to write.
*/
public void writeHeader(SAMFileHeader header) {
((GeliGenotypeWriter)writer).writeHeader(header);
}
/**
* Writes the genotype likelihoods to the output.
* @param gl genotype likelihoods to write.
*/
public void addGenotypeLikelihoods(GenotypeLikelihoods gl) {
((GeliGenotypeWriter)writer).addGenotypeLikelihoods(gl);
}
/**
* Merges the stream backing up this temporary storage into the target.
* @param target Target stream for the temporary storage. May not be null.
*/
@Override
public void mergeInto(GeliGenotypeWriter target) {
GeliFileReader reader = new GeliFileReader(file);
while ( reader.hasNext() )
target.addGenotypeLikelihoods(reader.next());
reader.close();
file.delete();
}
}

View File

@ -32,11 +32,8 @@ import java.util.HashSet;
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub;
import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.utils.genotype.glf.*;
import org.broadinstitute.sting.utils.genotype.geli.*;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import org.broadinstitute.sting.utils.SampleUtils;
import edu.mit.broad.picard.genotype.geli.GeliFileReader;
/**
* Provides temporary storage for GenotypeWriters.
@ -44,76 +41,32 @@ import edu.mit.broad.picard.genotype.geli.GeliFileReader;
* @author ebanks
* @version 0.1
*/
public class GenotypeWriterStorage implements GenotypeWriter, Storage<GenotypeWriter> {
private final GenotypeWriterFactory.GENOTYPE_FORMAT format;
private final File file;
private final GenotypeWriter writer;
public abstract class GenotypeWriterStorage<T extends GenotypeWriter> implements GenotypeWriter, Storage<T> {
protected final File file;
protected final GenotypeWriter writer;
/**
* Constructs an object which will write directly into the output file provided by the stub.
* Intentionally delaying the writing of the header -- this should be filled in by the walker.
* @param stub Stub to use when constructing the output file.
*/
public GenotypeWriterStorage( GenotypeWriterStub stub ) {
this(stub, stub.getFile());
this.file = stub.getFile();
writer = GenotypeWriterFactory.create(stub.getFormat(), file);
}
/**
* Constructs an object which will redirect into a different file.
* @param stub Stub to use when synthesizing file / header info.
* @param file File into which to direct the output data.
*/
public GenotypeWriterStorage( GenotypeWriterStub stub, File file ) {
this.format = stub.getFormat();
this.file = file;
writer = GenotypeWriterFactory.create(stub.getFormat(), file);
Set<String> samples = SampleUtils.getSAMFileSamples(stub.getSAMFileHeader());
GenotypeWriterFactory.writeHeader(writer, stub.getSAMFileHeader(), samples, new HashSet<VCFHeaderLine>());
}
/**
* Reports the format of the given genotyping data, taken directly from the stub.
* @return The format of the genotyping file.
*/
@Override
public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() {
return format;
}
public void mergeInto( GenotypeWriter targetStream ) {
// TODO -- This is ugly, but there is no GenotypeWriter interface since
// TODO -- VCFReaders need to be separated out for compatability with Tribble
// TODO -- and the adapters don't all implement a common interface. Fix me. Please.
// VCF
if ( targetStream instanceof VCFGenotypeWriterAdapter ) {
VCFReader reader = new VCFReader(file);
while ( reader.hasNext() )
((VCFGenotypeWriterAdapter)targetStream).addRecord(reader.next());
reader.close();
}
// GELI TEXT
else if ( targetStream instanceof GeliTextWriter ) {
GeliFileReader reader = new GeliFileReader(file);
while ( reader.hasNext() )
((GeliTextWriter)targetStream).addGenotypeLikelihoods(reader.next());
reader.close();
}
// GELI BINARY
else if ( targetStream instanceof GeliAdapter ) {
GeliFileReader reader = new GeliFileReader(file);
while ( reader.hasNext() )
((GeliAdapter)targetStream).addGenotypeLikelihoods(reader.next());
reader.close();
}
// GLF
else if ( targetStream instanceof GLFWriter ) {
GLFReader reader = new GLFReader(file);
while ( reader.hasNext() ) {
GLFRecord rec = reader.next();
((GLFWriter)targetStream).addGLFRecord(rec.getContig(),(int)rec.getPosition(),rec);
}
reader.close();
}
file.delete();
}
public void addGenotypeCall(Genotype call) {
writer.addGenotypeCall(call);
}

View File

@ -28,9 +28,7 @@ package org.broadinstitute.sting.gatk.io.storage;
import org.broadinstitute.sting.gatk.io.stubs.Stub;
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamStub;
import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterStub;
import org.broadinstitute.sting.gatk.io.storage.SAMFileWriterStorage;
import org.broadinstitute.sting.gatk.io.storage.Storage;
import org.broadinstitute.sting.gatk.io.storage.OutputStreamStorage;
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub;
import org.broadinstitute.sting.utils.StingException;
import java.io.File;
@ -79,6 +77,45 @@ public class StorageFactory {
else
storage = new SAMFileWriterStorage((SAMFileWriterStub)stub);
}
else if(stub instanceof GenotypeWriterStub) {
GenotypeWriterStub genotypeWriterStub = (GenotypeWriterStub)stub;
if( file != null ) {
switch(genotypeWriterStub.getFormat()) {
case GELI:
storage = new GeliTextGenotypeWriterStorage(genotypeWriterStub,file);
break;
case GELI_BINARY:
storage = new GeliBinaryGenotypeWriterStorage(genotypeWriterStub,file);
break;
case GLF:
storage = new GLFGenotypeWriterStorage(genotypeWriterStub,file);
break;
case VCF:
storage = new VCFGenotypeWriterStorage(genotypeWriterStub,file);
break;
default:
throw new StingException("Unsupported genotype file format: " + genotypeWriterStub.getFormat());
}
}
else {
switch(genotypeWriterStub.getFormat()) {
case GELI:
storage = new GeliTextGenotypeWriterStorage(genotypeWriterStub);
break;
case GELI_BINARY:
storage = new GeliBinaryGenotypeWriterStorage(genotypeWriterStub);
break;
case GLF:
storage = new GLFGenotypeWriterStorage(genotypeWriterStub);
break;
case VCF:
storage = new VCFGenotypeWriterStorage(genotypeWriterStub);
break;
default:
throw new StingException("Unsupported genotype file format: " + genotypeWriterStub.getFormat());
}
}
}
else
throw new StingException("Unsupported stub type: " + stub.getClass().getName());

View File

@ -0,0 +1,67 @@
package org.broadinstitute.sting.gatk.io.storage;
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine;
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub;
import java.io.File;
import java.util.Set;
/**
* Provides temporary and permanent storage for genotypes in VCF format.
*
* @author mhanna
* @version 0.1
*/
public class VCFGenotypeWriterStorage extends GenotypeWriterStorage<VCFGenotypeWriter> implements VCFGenotypeWriter {
/**
* Creates new (permanent) storage for VCF genotype writers.
* @param stub Stub containing appropriate input parameters.
*/
public VCFGenotypeWriterStorage(GenotypeWriterStub stub) {
super(stub);
}
/**
* Creates new (temporary) storage for VCF genotype writers.
* @param stub Stub containing appropriate input parameters.
* @param target Target file for output data.
*/
public VCFGenotypeWriterStorage(GenotypeWriterStub stub,File target) {
super(stub,target);
}
/**
* initialize this VCF header
*
* @param sampleNames the sample names
* @param headerInfo the optional header fields
*/
public void writeHeader(Set<String> sampleNames, Set<VCFHeaderLine> headerInfo) {
((VCFGenotypeWriter)writer).writeHeader(sampleNames,headerInfo);
}
/**
* Add a given VCF record to the given output.
* @param vcfRecord Record to add.
*/
public void addRecord(VCFRecord vcfRecord) {
((VCFGenotypeWriter)writer).addRecord(vcfRecord);
}
/**
* Merges the stream backing up this temporary storage into the target.
* @param target Target stream for the temporary storage. May not be null.
*/
@Override
public void mergeInto(VCFGenotypeWriter target) {
VCFReader reader = new VCFReader(file);
while ( reader.hasNext() )
target.addRecord(reader.next());
reader.close();
file.delete();
}
}

View File

@ -0,0 +1,53 @@
package org.broadinstitute.sting.gatk.io.stubs;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.broadinstitute.sting.utils.genotype.glf.GLFGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.glf.GLFRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import java.io.File;
/**
* Stub providing a passthrough for GLF files.
*
* @author mhanna
* @version 0.1
*/
public class GLFGenotypeWriterStub extends GenotypeWriterStub<GLFGenotypeWriter> implements GLFGenotypeWriter {
/**
* Construct a new stub with the given engine and target file.
* @param engine The engine, for extracting command-line arguments, etc.
* @param genotypeFile Target file into which to write genotyping data.
*/
public GLFGenotypeWriterStub(GenomeAnalysisEngine engine, File genotypeFile) {
super(engine,genotypeFile);
}
/**
* Gets the format of this stub. We may want to discontinue use of this method and rely on instanceof comparisons.
* @return GLF always.
*/
public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() {
return GenotypeWriterFactory.GENOTYPE_FORMAT.GLF;
}
/**
* Write the GLF header to the target file.
* @param headerText The header to write.
*/
public void writeHeader(String headerText) {
outputTracker.getStorage(this).writeHeader(headerText);
}
/**
* add a GLF record to the output file
*
* @param contigName the contig name
* @param contigLength the contig length
* @param rec the GLF record to write.
*/
public void addGLFRecord(String contigName, int contigLength, GLFRecord rec) {
outputTracker.getStorage(this).addGLFRecord(contigName, contigLength, rec);
}
}

View File

@ -0,0 +1,51 @@
package org.broadinstitute.sting.gatk.io.stubs;
import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import net.sf.samtools.SAMFileHeader;
import java.io.File;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
/**
* Stub providing a passthrough for geli binary files.
*
* @author mhanna
* @version 0.1
*/
public class GeliBinaryGenotypeWriterStub extends GenotypeWriterStub<GeliGenotypeWriter> implements GeliGenotypeWriter {
/**
* Construct a new stub with the given engine and target file.
* @param engine The engine, for extracting command-line arguments, etc.
* @param genotypeFile Target file into which to write genotyping data.
*/
public GeliBinaryGenotypeWriterStub(GenomeAnalysisEngine engine,File genotypeFile) {
super(engine,genotypeFile);
}
/**
* Gets the format of this stub. We may want to discontinue use of this method and rely on instanceof comparisons.
* @return GELI_BINARY always.
*/
public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() {
return GenotypeWriterFactory.GENOTYPE_FORMAT.GELI_BINARY;
}
/**
* Write the geli header to the target file.
* @param header The header to write.
*/
public void writeHeader(SAMFileHeader header) {
outputTracker.getStorage(this).writeHeader(header);
}
/**
* Writes the genotype likelihoods to the output.
* @param gl genotype likelihoods to write.
*/
public void addGenotypeLikelihoods(GenotypeLikelihoods gl) {
outputTracker.getStorage(this).addGenotypeLikelihoods(gl);
}
}

View File

@ -0,0 +1,51 @@
package org.broadinstitute.sting.gatk.io.stubs;
import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import java.io.File;
import net.sf.samtools.SAMFileHeader;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
/**
* Stub providing a passthrough for geli text files.
*
* @author mhanna
* @version 0.1
*/
public class GeliTextGenotypeWriterStub extends GenotypeWriterStub<GeliGenotypeWriter> implements GeliGenotypeWriter {
/**
* Construct a new stub with the given engine and target file.
* @param engine The engine, for extracting command-line arguments, etc.
* @param genotypeFile Target file into which to write genotyping data.
*/
public GeliTextGenotypeWriterStub(GenomeAnalysisEngine engine, File genotypeFile) {
super(engine,genotypeFile);
}
/**
* Gets the format of this stub. We may want to discontinue use of this method and rely on instanceof comparisons.
* @return GELI always.
*/
public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() {
return GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
}
/**
* Write the geli header to the target file.
* @param header The header to write.
*/
public void writeHeader(SAMFileHeader header) {
outputTracker.getStorage(this).writeHeader(header);
}
/**
* Writes the genotype likelihoods to the output.
* @param gl genotype likelihoods to write.
*/
public void addGenotypeLikelihoods(GenotypeLikelihoods gl) {
outputTracker.getStorage(this).addGenotypeLikelihoods(gl);
}
}

View File

@ -82,7 +82,23 @@ public class GenotypeWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
}
}
GenotypeWriterStub stub = new GenotypeWriterStub(engine, new File(writerFileName),genotypeFormat);
GenotypeWriterStub stub = null;
switch(genotypeFormat) {
case GELI:
stub = new GeliTextGenotypeWriterStub(engine, new File(writerFileName));
break;
case GELI_BINARY:
stub = new GeliBinaryGenotypeWriterStub(engine, new File(writerFileName));
break;
case GLF:
stub = new GLFGenotypeWriterStub(engine, new File(writerFileName));
break;
case VCF:
stub = new VCFGenotypeWriterStub(engine, new File(writerFileName));
break;
default:
throw new StingException("Unable to create stub for file format " + genotypeFormat);
}
engine.addOutput(stub);

View File

@ -42,7 +42,7 @@ import net.sf.samtools.SAMFileHeader;
* @author ebanks
* @version 0.1
*/
public class GenotypeWriterStub implements Stub<GenotypeWriter>, GenotypeWriter {
public abstract class GenotypeWriterStub<T extends GenotypeWriter> implements Stub<T>, GenotypeWriter {
/**
* Engine to use for collecting attributes for the output SAM file.
@ -55,29 +55,20 @@ public class GenotypeWriterStub implements Stub<GenotypeWriter>, GenotypeWriter
*/
private final File genotypeFile;
/**
* The file format for the output
*/
private final GenotypeWriterFactory.GENOTYPE_FORMAT format;
/**
* Connects this stub with an external stream capable of serving the
* requests of the consumer of this stub.
*/
private OutputTracker outputTracker = null;
protected OutputTracker outputTracker = null;
/**
* Create a new stub given the requested file.
* @param engine GATK engine.
* @param genotypeFile file to (ultimately) create.
* @param format file format.
*/
public GenotypeWriterStub( GenomeAnalysisEngine engine,
File genotypeFile,
GenotypeWriterFactory.GENOTYPE_FORMAT format) {
public GenotypeWriterStub(GenomeAnalysisEngine engine,File genotypeFile) {
this.engine = engine;
this.genotypeFile = genotypeFile;
this.format = format;
}
/**
@ -100,9 +91,7 @@ public class GenotypeWriterStub implements Stub<GenotypeWriter>, GenotypeWriter
* Retrieves the format to use when creating the new file.
* @return format to use when creating the new file.
*/
public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() {
return format;
}
public abstract GenotypeWriterFactory.GENOTYPE_FORMAT getFormat();
/**
* Registers the given streamConnector with this stub.

View File

@ -0,0 +1,53 @@
package org.broadinstitute.sting.gatk.io.stubs;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import java.io.File;
import java.util.Set;
/**
* Stub providing a passthrough for VCF files.
*
* @author mhanna
* @version 0.1
*/
public class VCFGenotypeWriterStub extends GenotypeWriterStub<VCFGenotypeWriter> implements VCFGenotypeWriter {
/**
* Construct a new stub with the given engine and target file.
* @param engine The engine, for extracting command-line arguments, etc.
* @param genotypeFile Target file into which to write genotyping data.
*/
public VCFGenotypeWriterStub(GenomeAnalysisEngine engine, File genotypeFile) {
super(engine,genotypeFile);
}
/**
* Gets the format of this stub. We may want to discontinue use of this method and rely on instanceof comparisons.
* @return VCF always.
*/
public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() {
return GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
}
/**
* initialize this VCF header
*
* @param sampleNames the sample names
* @param headerInfo the optional header fields
*/
public void writeHeader(Set<String> sampleNames, Set<VCFHeaderLine> headerInfo) {
outputTracker.getStorage(this).writeHeader(sampleNames,headerInfo);
}
/**
* Add a given VCF record to the given output.
* @param vcfRecord Record to add.
*/
public void addRecord(VCFRecord vcfRecord) {
outputTracker.getStorage(this).addRecord(vcfRecord);
}
}

View File

@ -35,9 +35,10 @@ import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.pileup.*;
import org.broadinstitute.sting.utils.cmdLine.*;
import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.glf.GLFGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import java.io.File;
import java.util.*;
@ -52,30 +53,21 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
@ArgumentCollection private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
// control the output
@Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = false)
public File VARIANTS_FILE = null;
@Argument(fullName = "variant_output_format", shortName = "vf", doc = "Format to be used to represent variants; default is VCF", required = false)
public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
@Argument(doc = "File to which variants should be written", required = false)
public GenotypeWriter writer = null;
// the model used for calculating genotypes
private GenotypeCalculationModel gcm;
// output writer
private GenotypeWriter writer;
// samples in input
private Set<String> samples;
// keep track of some metrics about our calls
private CallMetrics callsMetrics;
/** Enable deletions in the pileup **/
public boolean includeReadsWithDeletionAtLoci() { return true; }
/**
* Sets the argument collection for the UnifiedGenotyper.
* To be used with walkers that call the UnifiedGenotyper's map function
@ -121,11 +113,23 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
// for ( String sample : samples )
// logger.debug("SAMPLE: " + sample);
gcm = GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC, VAR_FORMAT);
GenotypeWriterFactory.GENOTYPE_FORMAT format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
if(writer != null) {
if(writer instanceof VCFGenotypeWriter)
format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
else if(writer instanceof GLFGenotypeWriter)
format = GenotypeWriterFactory.GENOTYPE_FORMAT.GLF;
else if(writer instanceof GeliGenotypeWriter)
format = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
else
throw new StingException("Unsupported genotype format: " + writer.getClass().getName());
}
gcm = GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC, format);
// *** If we were called by another walker, then we don't ***
// *** want to do any of the other initialization steps. ***
if ( VARIANTS_FILE == null && out == null )
if ( writer == null )
return;
// *** If we got here, then we were instantiated by the GATK engine ***
@ -138,11 +142,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
// get the optional header fields
Set<VCFHeaderLine> headerInfo = getHeaderInfo();
// create the output writer stream and initialize the header
if ( VARIANTS_FILE != null )
writer = GenotypeWriterFactory.create(VAR_FORMAT, VARIANTS_FILE);
else
writer = GenotypeWriterFactory.create(VAR_FORMAT, out);
// initialize the header
GenotypeWriterFactory.writeHeader(writer, GenomeAnalysisEngine.instance.getSAMFileHeader(), samples, headerInfo);
callsMetrics = new CallMetrics();
@ -152,7 +152,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
Set<VCFHeaderLine> headerInfo = new HashSet<VCFHeaderLine>();
// this is only applicable to VCF
if ( VAR_FORMAT != GenotypeWriterFactory.GENOTYPE_FORMAT.VCF )
if ( !(writer instanceof VCFGenotypeWriter) )
return headerInfo;
// first, the basic info
@ -285,7 +285,6 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
// Close any file writers
public void onTraversalDone(Integer sum) {
writer.close();
gcm.close();
logger.info("Processed " + sum + " loci that are callable for SNPs");
}

View File

@ -35,14 +35,6 @@ import java.util.List;
* The interface for writing genotype calls.
*/
public interface GenotypeWriter {
/**
* Gets the file format of this genotype writer, to disambiguate
* between different forms of data required.
* @return Type of this GenotypeWriter.
*/
public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat();
/**
* Add a genotype, given a genotype locus
* @param call the locus to add

View File

@ -22,7 +22,7 @@ import java.util.Set;
public class GenotypeWriterFactory {
/** available genotype writers */
public enum GENOTYPE_FORMAT {
GELI, GLF, GFF, TABULAR, GELI_BINARY, VCF;
GELI, GLF, GFF, TABULAR, GELI_BINARY, VCF
}
/**
@ -64,16 +64,16 @@ public class GenotypeWriterFactory {
Set<String> sampleNames,
Set<VCFHeaderLine> headerInfo) {
// VCF
if ( writer instanceof VCFGenotypeWriterAdapter ) {
((VCFGenotypeWriterAdapter)writer).writeHeader(sampleNames, headerInfo);
if ( writer instanceof VCFGenotypeWriter ) {
((VCFGenotypeWriter)writer).writeHeader(sampleNames, headerInfo);
}
// GELI BINARY
else if ( writer instanceof GeliAdapter ) {
((GeliAdapter)writer).writeHeader(header);
// GELI
else if ( writer instanceof GeliGenotypeWriter ) {
((GeliGenotypeWriter)writer).writeHeader(header);
}
// GLF
else if ( writer instanceof GLFWriter ) {
((GLFWriter)writer).writeHeader(header.toString());
else if ( writer instanceof GLFGenotypeWriter ) {
((GLFGenotypeWriter)writer).writeHeader(header.toString());
}
// nothing to do for GELI TEXT
}

View File

@ -44,7 +44,7 @@ import java.util.List;
* Class GeliAdapter
* Adapts the Geli file writer to the Genotype writer interface
*/
public class GeliAdapter implements GenotypeWriter {
public class GeliAdapter implements GeliGenotypeWriter {
// the file we're writing to
private File writeTo = null;
@ -61,20 +61,12 @@ public class GeliAdapter implements GenotypeWriter {
this.writeTo = writeTo;
}
/**
* Indicates that this is a binary-format geli writer.
* @return GENOTYPE_FORMAT.GELI_BINARY always.
*/
@Override
public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() {
return GenotypeWriterFactory.GENOTYPE_FORMAT.GELI_BINARY;
}
/**
* wrap a GeliFileWriter in the Genotype writer interface
*
* @param fileHeader the file header to write out
*/
@Override
public void writeHeader(final SAMFileHeader fileHeader) {
this.writer = GeliFileWriter.newInstanceForPresortedRecords(writeTo, fileHeader);
}

View File

@ -0,0 +1,26 @@
package org.broadinstitute.sting.utils.genotype.geli;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import net.sf.samtools.SAMFileHeader;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
/**
* An extension of eth GenotypeWriter interface with support
* for adding a header.
*
* @author mhanna
* @version 0.1
*/
public interface GeliGenotypeWriter extends GenotypeWriter {
/**
* Write the file header.
* @param fileHeader SAM file header from which to derive the geli header.
*/
public void writeHeader(final SAMFileHeader fileHeader);
/**
* Writes the genotype likelihoods to the output.
* @param gl genotype likelihoods to write.
*/
public void addGenotypeLikelihoods(GenotypeLikelihoods gl);
}

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.utils.genotype.geli;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.*;
@ -21,7 +22,7 @@ import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
* <p/>
* write out the geli text file format containing genotype information
*/
public class GeliTextWriter implements GenotypeWriter {
public class GeliTextWriter implements GeliGenotypeWriter {
// where we write to
PrintWriter mWriter;
@ -36,25 +37,23 @@ public class GeliTextWriter implements GenotypeWriter {
} catch (FileNotFoundException e) {
throw new StingException("Unable to open file " + file.toURI());
}
mWriter.println(headerLine);
}
public GeliTextWriter(PrintStream out) {
mWriter = new PrintWriter(out);
mWriter.println(headerLine);
}
/**
* Indicates that this is a geli writer.
* @return GENOTYPE_FORMAT.GELI always.
*/
@Override
public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() {
return GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
}
public final static String headerLine = "#Sequence Position ReferenceBase NumberOfReads MaxMappingQuality BestGenotype BtrLod BtnbLod AA AC AG AT CC CG CT GG GT TT";
/**
* Write the file header.
* @param fileHeader SAM file header from which to derive the geli header.
*/
public void writeHeader(final SAMFileHeader fileHeader) {
// ignore the SAM header; the geli text header is fixed.
mWriter.println(headerLine);
}
/**
* Add a genotype, given a call
*

View File

@ -0,0 +1,27 @@
package org.broadinstitute.sting.utils.genotype.glf;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
/**
* An extension of eth GenotypeWriter interface with support
* for adding header lines.
*
* @author mhanna
* @version 0.1
*/
public interface GLFGenotypeWriter extends GenotypeWriter {
/**
* Append the given header text to the GLF file.
* @param headerText the file header to write out
*/
public void writeHeader(String headerText);
/**
* add a GLF record to the output file
*
* @param contigName the contig name
* @param contigLength the contig length
* @param rec the GLF record to write.
*/
public void addGLFRecord(String contigName, int contigLength, GLFRecord rec);
}

View File

@ -45,7 +45,7 @@ import java.util.List;
* single and variable length genotype calls using the provided functions. When you've finished
* generating GLF records, make sure you close the file.
*/
public class GLFWriter implements GenotypeWriter {
public class GLFWriter implements GLFGenotypeWriter {
// our output codec
private final BinaryCodec outputBinaryCodec;
@ -83,15 +83,6 @@ public class GLFWriter implements GenotypeWriter {
outputBinaryCodec.setOutputFileName(writeTo.toString());
}
/**
* Indicates that this is a GLF writer.
* @return GENOTYPE_FORMAT.GLF always.
*/
@Override
public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() {
return GenotypeWriterFactory.GENOTYPE_FORMAT.GLF;
}
/**
* Write out the header information for the GLF file. The header contains
* the magic number, the length of the header text, the text itself, the reference

View File

@ -38,16 +38,6 @@ public class TabularLFWriter implements GenotypeWriter {
outStream.println("location sample_name ref alt genotype qhat qstar lodVsRef lodVsNextBest depth bases");
}
/**
* Indicates that this is a tabular genotype writer.
* @return GENOTYPE_FORMAT.TABULAR always.
*/
@Override
public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() {
return GenotypeWriterFactory.GENOTYPE_FORMAT.TABULAR;
}
/**
* Add a genotype, given a genotype locus
*

View File

@ -0,0 +1,28 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import java.util.Set;
/**
* An extension of eth GenotypeWriter interface with support
* for adding header lines.
*
* @author mhanna
* @version 0.1
*/
public interface VCFGenotypeWriter extends GenotypeWriter {
/**
* initialize this VCF header
*
* @param sampleNames the sample names
* @param headerInfo the optional header fields
*/
public void writeHeader(Set<String> sampleNames, Set<VCFHeaderLine> headerInfo);
/**
* Add a given VCF record to the given output.
* @param vcfRecord Record to add.
*/
public void addRecord(VCFRecord vcfRecord);
}

View File

@ -15,7 +15,7 @@ import java.util.*;
* <p/>
* Adapt the VCF writter to the genotype output system
*/
public class VCFGenotypeWriterAdapter implements GenotypeWriter {
public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter {
// our VCF objects
private VCFWriter mWriter = null;
private VCFHeader mHeader = null;
@ -35,21 +35,13 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
mWriter = new VCFWriter(writeTo);
}
/**
* Indicates that this is a VCF writer.
* @return GENOTYPE_FORMAT.VCF always.
*/
@Override
public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() {
return GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
}
/**
* initialize this VCF header
*
* @param sampleNames the sample names
* @param headerInfo the optional header fields
*/
@Override
public void writeHeader(Set<String> sampleNames, Set<VCFHeaderLine> headerInfo) {
mSampleNames.addAll(sampleNames);