Pushed header initialization out of the GenotypeWriter constructors and into a writeHeader method, in preparation for parallelization.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2406 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-12-18 19:16:41 +00:00
parent eeddf0d08e
commit 4ea31fd949
16 changed files with 146 additions and 170 deletions

View File

@ -27,12 +27,15 @@ package org.broadinstitute.sting.gatk.io.storage;
import java.io.*; import java.io.*;
import java.util.List; import java.util.List;
import java.util.Set;
import java.util.HashSet;
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub; import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub;
import org.broadinstitute.sting.utils.genotype.*; import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.utils.genotype.glf.*; import org.broadinstitute.sting.utils.genotype.glf.*;
import org.broadinstitute.sting.utils.genotype.geli.*; import org.broadinstitute.sting.utils.genotype.geli.*;
import org.broadinstitute.sting.utils.genotype.vcf.*; import org.broadinstitute.sting.utils.genotype.vcf.*;
import org.broadinstitute.sting.utils.SampleUtils;
import edu.mit.broad.picard.genotype.geli.GeliFileReader; import edu.mit.broad.picard.genotype.geli.GeliFileReader;
/** /**
@ -51,11 +54,9 @@ public class GenotypeWriterStorage implements GenotypeWriter, Storage<GenotypeWr
public GenotypeWriterStorage( GenotypeWriterStub stub, File file ) { public GenotypeWriterStorage( GenotypeWriterStub stub, File file ) {
this.file = file; this.file = file;
writer = GenotypeWriterFactory.create(stub.getFormat(), writer = GenotypeWriterFactory.create(stub.getFormat(), file);
stub.getSAMFileHeader(), Set<String> samples = SampleUtils.getSAMFileSamples(stub.getSAMFileHeader());
file, GenotypeWriterFactory.writeHeader(writer, stub.getSAMFileHeader(), samples, new HashSet<VCFHeaderLine>());
stub.getSampleNames(),
stub.getHeaderInfo());
} }
public void mergeInto( GenotypeWriter targetStream ) { public void mergeInto( GenotypeWriter targetStream ) {

View File

@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.io.stubs;
import java.io.File; import java.io.File;
import java.util.List; import java.util.List;
import java.util.Set;
import org.broadinstitute.sting.gatk.io.OutputTracker; import org.broadinstitute.sting.gatk.io.OutputTracker;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
@ -35,7 +34,6 @@ import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.VariationCall; import org.broadinstitute.sting.utils.genotype.VariationCall;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine;
import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileHeader;
/** /**
@ -62,19 +60,6 @@ public class GenotypeWriterStub implements Stub<GenotypeWriter>, GenotypeWriter
*/ */
private final GenotypeWriterFactory.GENOTYPE_FORMAT format; private final GenotypeWriterFactory.GENOTYPE_FORMAT format;
/**
* The sample names for the output file
*/
private final Set<String> sampleNames;
/**
* The header info for the output file
*/
private final Set<VCFHeaderLine> headerInfo;
/** /**
* Connects this stub with an external stream capable of serving the * Connects this stub with an external stream capable of serving the
* requests of the consumer of this stub. * requests of the consumer of this stub.
@ -86,19 +71,13 @@ public class GenotypeWriterStub implements Stub<GenotypeWriter>, GenotypeWriter
* @param engine GATK engine. * @param engine GATK engine.
* @param genotypeFile file to (ultimately) create. * @param genotypeFile file to (ultimately) create.
* @param format file format. * @param format file format.
* @param sampleNames sample names to use for creating writer.
* @param headerInfo header info to use for creating writer.
*/ */
public GenotypeWriterStub( GenomeAnalysisEngine engine, public GenotypeWriterStub( GenomeAnalysisEngine engine,
File genotypeFile, File genotypeFile,
GenotypeWriterFactory.GENOTYPE_FORMAT format, GenotypeWriterFactory.GENOTYPE_FORMAT format) {
Set<String> sampleNames,
Set<VCFHeaderLine> headerInfo) {
this.engine = engine; this.engine = engine;
this.genotypeFile = genotypeFile; this.genotypeFile = genotypeFile;
this.format = format; this.format = format;
this.sampleNames = sampleNames;
this.headerInfo = headerInfo;
} }
/** /**
@ -125,22 +104,6 @@ public class GenotypeWriterStub implements Stub<GenotypeWriter>, GenotypeWriter
return format; return format;
} }
/**
* Retrieves the sample names to use when creating the new file.
* @return sample names to use when creating the new file.
*/
public Set<String> getSampleNames() {
return sampleNames;
}
/**
* Retrieves the header info to use when creating the new file.
* @return header info to use when creating the new file.
*/
public Set<VCFHeaderLine> getHeaderInfo() {
return headerInfo;
}
/** /**
* Registers the given streamConnector with this stub. * Registers the given streamConnector with this stub.
* @param outputTracker The connector used to provide an appropriate stream. * @param outputTracker The connector used to provide an appropriate stream.

View File

@ -122,8 +122,9 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
hInfo.add(new VCFHeaderLine("annotatorReference", getToolkit().getArguments().referenceFile.getName())); hInfo.add(new VCFHeaderLine("annotatorReference", getToolkit().getArguments().referenceFile.getName()));
hInfo.addAll(getVCFAnnotationDescriptions(requestedAnnotations)); hInfo.addAll(getVCFAnnotationDescriptions(requestedAnnotations));
vcfWriter = new VCFWriter(VCF_OUT);
vcfHeader = new VCFHeader(hInfo, samples); vcfHeader = new VCFHeader(hInfo, samples);
vcfWriter = new VCFWriter(vcfHeader, VCF_OUT); vcfWriter.writeHeader(vcfHeader);
} }
/** /**

View File

@ -108,9 +108,9 @@ public class CallsetConcordanceWalker extends RodWalker<Integer, Integer> {
hInfo.add(new VCFHeaderLine("source", "CallsetConcordance")); hInfo.add(new VCFHeaderLine("source", "CallsetConcordance"));
hInfo.add(new VCFHeaderLine("note", "\"This file represents a concordance test of various call sets - NOT the output from a multi-sample caller\"")); hInfo.add(new VCFHeaderLine("note", "\"This file represents a concordance test of various call sets - NOT the output from a multi-sample caller\""));
hInfo.addAll(getVCFAnnotationDescriptions(requestedTypes)); hInfo.addAll(getVCFAnnotationDescriptions(requestedTypes));
VCFHeader header = new VCFHeader(hInfo, samples);
vcfWriter = new VCFWriter(header, OUTPUT); vcfWriter = new VCFWriter(OUTPUT);
vcfWriter.writeHeader(new VCFHeader(hInfo, samples));
} }
public static Set<VCFHeaderLine> getVCFAnnotationDescriptions(Collection<ConcordanceType> types) { public static Set<VCFHeaderLine> getVCFAnnotationDescriptions(Collection<ConcordanceType> types) {

View File

@ -64,8 +64,8 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
} }
} }
VCFHeader header = new VCFHeader(hInfo, rod.getHeader().getGenotypeSamples()); writer = new VCFWriter(out);
writer = new VCFWriter(header, out); writer.writeHeader(new VCFHeader(hInfo, rod.getHeader().getGenotypeSamples()));
} }
public void initialize() { public void initialize() {

View File

@ -37,8 +37,6 @@ import org.broadinstitute.sting.utils.cmdLine.*;
import org.broadinstitute.sting.utils.genotype.*; import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.utils.genotype.vcf.*; import org.broadinstitute.sting.utils.genotype.vcf.*;
import net.sf.samtools.SAMReadGroupRecord;
import java.io.File; import java.io.File;
import java.util.*; import java.util.*;
@ -68,7 +66,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
private GenotypeWriter writer; private GenotypeWriter writer;
// samples in input // samples in input
private TreeSet<String> samples; private Set<String> samples;
// keep track of some metrics about our calls // keep track of some metrics about our calls
private CallMetrics callsMetrics; private CallMetrics callsMetrics;
@ -113,15 +111,11 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
} }
// get all of the unique sample names // get all of the unique sample names
samples = new TreeSet<String>();
// if we're supposed to assume a single sample // if we're supposed to assume a single sample
if ( UAC.ASSUME_SINGLE_SAMPLE != null ) { if ( UAC.ASSUME_SINGLE_SAMPLE != null )
samples.add(UAC.ASSUME_SINGLE_SAMPLE); samples.add(UAC.ASSUME_SINGLE_SAMPLE);
} else { else
List<SAMReadGroupRecord> readGroups = getToolkit().getSAMFileHeader().getReadGroups(); samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
for ( SAMReadGroupRecord readGroup : readGroups )
samples.add(readGroup.getSample());
}
// print them out for debugging (need separate loop to ensure uniqueness) // print them out for debugging (need separate loop to ensure uniqueness)
// for ( String sample : samples ) // for ( String sample : samples )
@ -144,15 +138,12 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
// get the optional header fields // get the optional header fields
Set<VCFHeaderLine> headerInfo = getHeaderInfo(); Set<VCFHeaderLine> headerInfo = getHeaderInfo();
// create the output writer stream // create the output writer stream and initialize the header
if ( VARIANTS_FILE != null ) if ( VARIANTS_FILE != null )
writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE, writer = GenotypeWriterFactory.create(VAR_FORMAT, VARIANTS_FILE);
samples,
headerInfo);
else else
writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out, writer = GenotypeWriterFactory.create(VAR_FORMAT, out);
samples, GenotypeWriterFactory.writeHeader(writer, GenomeAnalysisEngine.instance.getSAMFileHeader(), samples, headerInfo);
headerInfo);
callsMetrics = new CallMetrics(); callsMetrics = new CallMetrics();
} }

View File

@ -49,7 +49,8 @@ public class VariantsToVCF extends RefWalker<Integer, Integer> {
} }
vcfheader = getHeader(args, sampleNames.keySet()); vcfheader = getHeader(args, sampleNames.keySet());
vcfwriter = new VCFWriter(vcfheader, VCF_OUT); vcfwriter = new VCFWriter(VCF_OUT);
vcfwriter.writeHeader(vcfheader);
} }
public static VCFHeader getHeader(GATKArgumentCollection args, Set<String> sampleNames) { public static VCFHeader getHeader(GATKArgumentCollection args, Set<String> sampleNames) {

View File

@ -20,7 +20,7 @@ public class VCFSubsetWalker extends RefWalker<ArrayList<VCFRecord>, VCFWriter>
private HashSet<String> SAMPLES; private HashSet<String> SAMPLES;
@Argument(fullName="vcfsubset", shortName="O", doc="File to write VCF subset to", required=false) @Argument(fullName="vcfsubset", shortName="O", doc="File to write VCF subset to", required=false)
private File VPATH; private File VPATH = null;
@Argument(fullName="includeNonVariants", shortName="INV", doc="Include non-variant loci", required=false) @Argument(fullName="includeNonVariants", shortName="INV", doc="Include non-variant loci", required=false)
private boolean INCLUDE_NON_VARIANTS = false; private boolean INCLUDE_NON_VARIANTS = false;
@ -43,7 +43,8 @@ public class VCFSubsetWalker extends RefWalker<ArrayList<VCFRecord>, VCFWriter>
vheader = new VCFHeader(metaData, additionalColumns); vheader = new VCFHeader(metaData, additionalColumns);
if (VPATH != null) { if (VPATH != null) {
vwriter = new VCFWriter(vheader, VPATH); vwriter = new VCFWriter(VPATH);
vwriter.writeHeader(vheader);
} }
} }

View File

@ -342,9 +342,9 @@ class VCFGrep extends CommandLineProgram
if (autocorrect) { reader = new VCFReader(VCFHomogenizer.create(in_filename)); } if (autocorrect) { reader = new VCFReader(VCFHomogenizer.create(in_filename)); }
else { reader = new VCFReader(new File(in_filename)); } else { reader = new VCFReader(new File(in_filename)); }
VCFHeader header = reader.getHeader();
writer = new VCFWriter(header, new File(out_filename)); writer = new VCFWriter(new File(out_filename));
writer.writeHeader(reader.getHeader());
while(reader.hasNext()) while(reader.hasNext())
{ {

View File

@ -28,48 +28,56 @@ public class GenotypeWriterFactory {
/** /**
* create a genotype writer * create a genotype writer
* @param format the format * @param format the format
* @param header the sam file header
* @param destination the destination file * @param destination the destination file
* @param sampleNames the sample names
* @param headerInfo the optional header info fields
* @return the genotype writer object * @return the genotype writer object
*/ */
public static GenotypeWriter create(GENOTYPE_FORMAT format, public static GenotypeWriter create(GENOTYPE_FORMAT format, File destination) {
SAMFileHeader header,
File destination,
Set<String> sampleNames,
Set<VCFHeaderLine> headerInfo) {
switch (format) { switch (format) {
case GLF: case GLF:
return new GLFWriter(header.toString(), destination); return new GLFWriter(destination);
case GELI: case GELI:
return new GeliTextWriter(destination); return new GeliTextWriter(destination);
case GELI_BINARY: case GELI_BINARY:
return new GeliAdapter(destination, header); return new GeliAdapter(destination);
case VCF: case VCF:
return new VCFGenotypeWriterAdapter(destination, sampleNames, headerInfo); return new VCFGenotypeWriterAdapter(destination);
default: default:
throw new StingException("Genotype writer " + format.toString() + " is not implemented"); throw new StingException("Genotype writer " + format.toString() + " is not implemented");
} }
} }
public static GenotypeWriter create(GENOTYPE_FORMAT format, public static GenotypeWriter create(GENOTYPE_FORMAT format, PrintStream destination) {
SAMFileHeader header,
PrintStream destination,
Set<String> sampleNames,
Set<VCFHeaderLine> headerInfo) {
switch (format) { switch (format) {
case GELI: case GELI:
return new GeliTextWriter(destination); return new GeliTextWriter(destination);
case GLF: case GLF:
return new GLFWriter(header.toString(), destination); return new GLFWriter(destination);
case VCF: case VCF:
return new VCFGenotypeWriterAdapter(destination, sampleNames, headerInfo); return new VCFGenotypeWriterAdapter(destination);
default: default:
throw new StingException("Genotype writer to " + format.toString() + " to standard output is not implemented"); throw new StingException("Genotype writer to " + format.toString() + " to standard output is not implemented");
} }
} }
public static void writeHeader(GenotypeWriter writer,
SAMFileHeader header,
Set<String> sampleNames,
Set<VCFHeaderLine> headerInfo) {
// VCF
if ( writer instanceof VCFGenotypeWriterAdapter ) {
((VCFGenotypeWriterAdapter)writer).writeHeader(sampleNames, headerInfo);
}
// GELI BINARY
else if ( writer instanceof GeliAdapter ) {
((GeliAdapter)writer).writeHeader(header);
}
// GLF
else if ( writer instanceof GLFWriter ) {
((GLFWriter)writer).writeHeader(header.toString());
}
// nothing to do for GELI TEXT
}
/** /**
* create a genotype call * create a genotype call
* @param format the format * @param format the format

View File

@ -46,17 +46,27 @@ import java.util.List;
*/ */
public class GeliAdapter implements GenotypeWriter { public class GeliAdapter implements GenotypeWriter {
// the file we're writing to
private File writeTo = null;
// the geli file writer we're adapting // the geli file writer we're adapting
private final GeliFileWriter writer; private GeliFileWriter writer = null;
/** /**
* wrap a GeliFileWriter in the Genotype writer interface * wrap a GeliFileWriter in the Genotype writer interface
* *
* @param writeTo where to write to * @param writeTo where to write to
*/
public GeliAdapter(File writeTo) {
this.writeTo = writeTo;
}
/**
* wrap a GeliFileWriter in the Genotype writer interface
*
* @param fileHeader the file header to write out * @param fileHeader the file header to write out
*/ */
public GeliAdapter(File writeTo, final SAMFileHeader fileHeader) { public void writeHeader(final SAMFileHeader fileHeader) {
this.writer = GeliFileWriter.newInstanceForPresortedRecords(writeTo, fileHeader); this.writer = GeliFileWriter.newInstanceForPresortedRecords(writeTo, fileHeader);
} }
@ -67,6 +77,8 @@ public class GeliAdapter implements GenotypeWriter {
* @param contig the contig you're calling in * @param contig the contig you're calling in
* @param position the position on the contig * @param position the position on the contig
* @param referenceBase the reference base * @param referenceBase the reference base
* @param maxMappingQuality the max MQ
* @param readCount the read count
* @param likelihoods the likelihoods of each of the possible alleles * @param likelihoods the likelihoods of each of the possible alleles
*/ */
private void addGenotypeCall(SAMSequenceRecord contig, private void addGenotypeCall(SAMSequenceRecord contig,
@ -99,6 +111,9 @@ public class GeliAdapter implements GenotypeWriter {
} }
public void addGenotypeLikelihoods(GenotypeLikelihoods gl) { public void addGenotypeLikelihoods(GenotypeLikelihoods gl) {
if ( writer == null )
throw new IllegalStateException("The Geli Header must be written before records can be added");
writer.addGenotypeLikelihoods(gl); writer.addGenotypeLikelihoods(gl);
} }
@ -108,6 +123,9 @@ public class GeliAdapter implements GenotypeWriter {
* @param call the call to add * @param call the call to add
*/ */
public void addGenotypeCall(Genotype call) { public void addGenotypeCall(Genotype call) {
if ( writer == null )
throw new IllegalStateException("The Geli Header must be written before calls can be added");
if ( !(call instanceof GeliGenotypeCall) ) if ( !(call instanceof GeliGenotypeCall) )
throw new IllegalArgumentException("Only GeliGenotypeCalls should be passed in to the Geli writers"); throw new IllegalArgumentException("Only GeliGenotypeCalls should be passed in to the Geli writers");
GeliGenotypeCall gCall = (GeliGenotypeCall)call; GeliGenotypeCall gCall = (GeliGenotypeCall)call;
@ -135,7 +153,7 @@ public class GeliAdapter implements GenotypeWriter {
/** /**
* add a no call to the genotype file, if supported. * add a no call to the genotype file, if supported.
* *
* @param position * @param position the position
*/ */
public void addNoCall(int position) { public void addNoCall(int position) {
throw new UnsupportedOperationException("Geli format does not support no-calls"); throw new UnsupportedOperationException("Geli format does not support no-calls");

View File

@ -53,7 +53,7 @@ public class GLFWriter implements GenotypeWriter {
public static final short[] glfMagic = {'G', 'L', 'F', '\3'}; public static final short[] glfMagic = {'G', 'L', 'F', '\3'};
// our header text, reference sequence name (i.e. chr1), and it's length // our header text, reference sequence name (i.e. chr1), and it's length
private String headerText = ""; private String headerText = null;
private String referenceSequenceName = null; private String referenceSequenceName = null;
private long referenceSequenceLength = 0; private long referenceSequenceLength = 0;
@ -63,29 +63,42 @@ public class GLFWriter implements GenotypeWriter {
/** /**
* The public constructor for creating a GLF object * The public constructor for creating a GLF object
* *
* @param headerText the header text (currently unclear what the contents are)
* @param writeTo the location to write to * @param writeTo the location to write to
*/ */
public GLFWriter(String headerText, File writeTo) { public GLFWriter(File writeTo) {
this.headerText = headerText;
outputBinaryCodec = new BinaryCodec(new DataOutputStream(new BlockCompressedOutputStream(writeTo))); outputBinaryCodec = new BinaryCodec(new DataOutputStream(new BlockCompressedOutputStream(writeTo)));
outputBinaryCodec.setOutputFileName(writeTo.toString()); outputBinaryCodec.setOutputFileName(writeTo.toString());
this.writeHeader();
} }
/** /**
* The public constructor for creating a GLF object * The public constructor for creating a GLF object
* *
* @param headerText the header text (currently unclear what the contents are)
* @param writeTo the location to write to * @param writeTo the location to write to
*/ */
public GLFWriter(String headerText, OutputStream writeTo) { public GLFWriter(OutputStream writeTo) {
this.headerText = headerText;
outputBinaryCodec = new BinaryCodec(writeTo); outputBinaryCodec = new BinaryCodec(writeTo);
outputBinaryCodec.setOutputFileName(writeTo.toString()); outputBinaryCodec.setOutputFileName(writeTo.toString());
this.writeHeader();
} }
/**
* Write out the header information for the GLF file. The header contains
* the magic number, the length of the header text, the text itself, the reference
* sequence (null terminated) preceeded by it's length, and the the genomic
* length of the reference sequence.
*
* @param headerText the header text to write
*/
public void writeHeader(String headerText) {
this.headerText = headerText;
for (int x = 0; x < glfMagic.length; x++) {
outputBinaryCodec.writeUByte(glfMagic[x]);
}
if (!(headerText.equals(""))) {
outputBinaryCodec.writeString(headerText, true, true);
} else {
outputBinaryCodec.writeInt(0);
}
}
/** /**
* add a point genotype to the GLF writer * add a point genotype to the GLF writer
@ -103,6 +116,8 @@ public class GLFWriter implements GenotypeWriter {
char refBase, char refBase,
int readDepth, int readDepth,
LikelihoodObject lhValues) { LikelihoodObject lhValues) {
if ( headerText == null )
throw new IllegalStateException("The GLF Header must be written before calls can be added");
// check if we've jumped to a new contig // check if we've jumped to a new contig
checkSequence(contig.getSequenceName(), contig.getSequenceLength()); checkSequence(contig.getSequenceName(), contig.getSequenceLength());
@ -122,6 +137,9 @@ public class GLFWriter implements GenotypeWriter {
* @param call the genotype call * @param call the genotype call
*/ */
public void addGenotypeCall(Genotype call) { public void addGenotypeCall(Genotype call) {
if ( headerText == null )
throw new IllegalStateException("The GLF Header must be written before calls can be added");
if ( !(call instanceof GLFGenotypeCall) ) if ( !(call instanceof GLFGenotypeCall) )
throw new IllegalArgumentException("Only GeliGenotypeCalls should be passed in to the Geli writers"); throw new IllegalArgumentException("Only GeliGenotypeCalls should be passed in to the Geli writers");
GLFGenotypeCall gCall = (GLFGenotypeCall)call; GLFGenotypeCall gCall = (GLFGenotypeCall)call;
@ -176,6 +194,9 @@ public class GLFWriter implements GenotypeWriter {
IndelLikelihood secondHomZyg, IndelLikelihood secondHomZyg,
byte hetLikelihood) { byte hetLikelihood) {
if ( headerText == null )
throw new IllegalStateException("The GLF Header must be written before calls can be added");
// check if we've jumped to a new contig // check if we've jumped to a new contig
checkSequence(contig.getSequenceName(), contig.getSequenceLength()); checkSequence(contig.getSequenceName(), contig.getSequenceLength());
@ -213,27 +234,13 @@ public class GLFWriter implements GenotypeWriter {
* @param rec the GLF record to write. * @param rec the GLF record to write.
*/ */
public void addGLFRecord(String contigName, int contigLength, GLFRecord rec) { public void addGLFRecord(String contigName, int contigLength, GLFRecord rec) {
if ( headerText == null )
throw new IllegalStateException("The GLF Header must be written before records can be added");
checkSequence(contigName, contigLength); checkSequence(contigName, contigLength);
rec.write(this.outputBinaryCodec); rec.write(this.outputBinaryCodec);
} }
/**
* Write out the header information for the GLF file. The header contains
* the magic number, the length of the header text, the text itself, the reference
* sequence (null terminated) preceeded by it's length, and the the genomic
* length of the reference sequence.
*/
private void writeHeader() {
for (int x = 0; x < glfMagic.length; x++) {
outputBinaryCodec.writeUByte(glfMagic[x]);
}
if (!(headerText.equals(""))) {
outputBinaryCodec.writeString(headerText, true, true);
} else {
outputBinaryCodec.writeInt(0);
}
}
/** /**
* check to see if we've jumped to a new contig * check to see if we've jumped to a new contig
* *
@ -255,12 +262,18 @@ public class GLFWriter implements GenotypeWriter {
/** add a sequence definition to the glf */ /** add a sequence definition to the glf */
private void addSequence() { private void addSequence() {
if ( headerText == null )
throw new IllegalStateException("The GLF Header must be written before sequences can be added");
outputBinaryCodec.writeString(referenceSequenceName, true, true); outputBinaryCodec.writeString(referenceSequenceName, true, true);
outputBinaryCodec.writeUInt(referenceSequenceLength); outputBinaryCodec.writeUInt(referenceSequenceLength);
} }
/** write end record */ /** write end record */
private void writeEndRecord() { private void writeEndRecord() {
if ( headerText == null )
throw new IllegalStateException("The GLF Header must be written before records can be added");
outputBinaryCodec.writeUByte((short) 0); outputBinaryCodec.writeUByte((short) 0);
} }

View File

@ -25,55 +25,33 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
protected static Logger logger = Logger.getLogger(VCFGenotypeWriterAdapter.class); protected static Logger logger = Logger.getLogger(VCFGenotypeWriterAdapter.class);
public VCFGenotypeWriterAdapter(File writeTo, Set<String> sampleNames, Set<VCFHeaderLine> headerInfo) { public VCFGenotypeWriterAdapter(File writeTo) {
mSampleNames.addAll(sampleNames);
initializeHeader(headerInfo);
if (writeTo == null) throw new RuntimeException("VCF output file must not be null"); if (writeTo == null) throw new RuntimeException("VCF output file must not be null");
mWriter = new VCFWriter(mHeader, writeTo); mWriter = new VCFWriter(writeTo);
} }
public VCFGenotypeWriterAdapter(OutputStream writeTo, Set<String> sampleNames, Set<VCFHeaderLine> headerInfo) { public VCFGenotypeWriterAdapter(OutputStream writeTo) {
mSampleNames.addAll(sampleNames);
initializeHeader(headerInfo);
if (writeTo == null) throw new RuntimeException("VCF output stream must not be null"); if (writeTo == null) throw new RuntimeException("VCF output stream must not be null");
mWriter = new VCFWriter(mHeader, writeTo); mWriter = new VCFWriter(writeTo);
} }
/** /**
* initialize this VCF header * initialize this VCF header
* *
* @param optionalHeaderInfo the optional header fields * @param sampleNames the sample names
* @param headerInfo the optional header fields
*/ */
private void initializeHeader(Set<VCFHeaderLine> optionalHeaderInfo) { public void writeHeader(Set<String> sampleNames, Set<VCFHeaderLine> headerInfo) {
Set<VCFHeaderLine> hInfo = new TreeSet<VCFHeaderLine>(); mSampleNames.addAll(sampleNames);
// setup the header fields // setup the header fields
Set<VCFHeaderLine> hInfo = new TreeSet<VCFHeaderLine>();
hInfo.add(new VCFHeaderLine(VCFHeader.FILE_FORMAT_KEY, VCFHeader.VCF_VERSION)); hInfo.add(new VCFHeaderLine(VCFHeader.FILE_FORMAT_KEY, VCFHeader.VCF_VERSION));
hInfo.addAll(optionalHeaderInfo); hInfo.addAll(headerInfo);
// setup the sample names // setup the sample names
mHeader = new VCFHeader(hInfo, mSampleNames); mHeader = new VCFHeader(hInfo, mSampleNames);
} mWriter.writeHeader(mHeader);
/**
* get the samples names from genotype objects
*
* @param genotypes the genotype list
*
* @return a list of strings representing the sample names
*/
private static List<String> getSampleNames(List<Genotype> genotypes) {
List<String> strings = new ArrayList<String>();
for (Genotype genotype : genotypes) {
if (!(genotype instanceof VCFGenotypeCall))
throw new IllegalArgumentException("Genotypes passed to VCF must be backed by SampledBacked interface");
strings.add(((VCFGenotypeCall) genotype).getSampleName());
}
return strings;
} }
/** /**
@ -105,6 +83,9 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
* @param genotypes the list of genotypes * @param genotypes the list of genotypes
*/ */
public void addMultiSampleCall(List<Genotype> genotypes, VariationCall locusdata) { public void addMultiSampleCall(List<Genotype> genotypes, VariationCall locusdata) {
if ( mHeader == null )
throw new IllegalStateException("The VCF Header must be written before records can be added");
if ( locusdata != null && !(locusdata instanceof VCFVariationCall) ) if ( locusdata != null && !(locusdata instanceof VCFVariationCall) )
throw new IllegalArgumentException("Only VCFVariationCall objects should be passed in to the VCF writers"); throw new IllegalArgumentException("Only VCFVariationCall objects should be passed in to the VCF writers");

View File

@ -11,43 +11,39 @@ public class VCFWriter {
// the VCF header we're storing // the VCF header we're storing
private VCFHeader mHeader; private VCFHeader mHeader = null;
// the print stream we're writting to // the print stream we're writting to
BufferedWriter mWriter; BufferedWriter mWriter;
private final String FIELD_SEPERATOR = "\t"; private final String FIELD_SEPERATOR = "\t";
/** /**
* create a VCF writer, given a VCF header and a file to write to * create a VCF writer, given a file to write to
* *
* @param header the VCF header
* @param location the file location to write to * @param location the file location to write to
*/ */
public VCFWriter(VCFHeader header, File location) { public VCFWriter(File location) {
FileOutputStream output; FileOutputStream output;
try { try {
output = new FileOutputStream(location); output = new FileOutputStream(location);
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
throw new RuntimeException("Unable to create VCF file at location: " + location); throw new RuntimeException("Unable to create VCF file at location: " + location);
} }
initialize(header, output); mWriter = new BufferedWriter(new OutputStreamWriter(output));
} }
/** /**
* create a VCF writer, given a VCF header and a file to write to * create a VCF writer, given a stream to write to
* *
* @param header the VCF header * @param output the file location to write to
* @param location the file location to write to
*/ */
public VCFWriter(VCFHeader header, OutputStream location) { public VCFWriter(OutputStream output) {
initialize(header, location); mWriter = new BufferedWriter(new OutputStreamWriter(output));
} }
private void initialize(VCFHeader header, OutputStream location) { public void writeHeader(VCFHeader header) {
this.mHeader = header; this.mHeader = header;
mWriter = new BufferedWriter(
new OutputStreamWriter(location));
try { try {
// the fileformat field needs to be written first // the fileformat field needs to be written first
TreeSet<VCFHeaderLine> nonFormatMetaData = new TreeSet<VCFHeaderLine>(); TreeSet<VCFHeaderLine> nonFormatMetaData = new TreeSet<VCFHeaderLine>();
@ -87,6 +83,9 @@ public class VCFWriter {
* @param record the record to output * @param record the record to output
*/ */
public void addRecord(VCFRecord record) { public void addRecord(VCFRecord record) {
if ( mHeader == null )
throw new IllegalStateException("The VCF Header must be written before records can be added");
String vcfString = record.toStringEncoding(mHeader); String vcfString = record.toStringEncoding(mHeader);
try { try {
mWriter.write(vcfString + "\n"); mWriter.write(vcfString + "\n");
@ -96,7 +95,6 @@ public class VCFWriter {
} }
/** /**
* attempt to close the VCF file * attempt to close the VCF file
*/ */

View File

@ -5,10 +5,8 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter; import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.LikelihoodsBacked;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Before; import org.junit.Before;
import org.junit.BeforeClass; import org.junit.BeforeClass;
@ -96,7 +94,8 @@ public class GLFWriterTest extends BaseTest {
File writeTo = new File("testGLF.glf"); File writeTo = new File("testGLF.glf");
writeTo.deleteOnExit(); writeTo.deleteOnExit();
rec = new GLFWriter(header, writeTo); rec = new GLFWriter(writeTo);
((GLFWriter)rec).writeHeader(header);
for (int x = 0; x < 100; x++) { for (int x = 0; x < 100; x++) {
GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, x + 1); GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, x + 1);
Genotype type = createGenotype(x % 10, loc, 'A'); Genotype type = createGenotype(x % 10, loc, 'A');
@ -112,7 +111,8 @@ public class GLFWriterTest extends BaseTest {
File writeTo = new File("testGLF2.glf"); File writeTo = new File("testGLF2.glf");
writeTo.deleteOnExit(); writeTo.deleteOnExit();
List<FakeGenotype> types = new ArrayList<FakeGenotype>(); List<FakeGenotype> types = new ArrayList<FakeGenotype>();
rec = new GLFWriter(header, writeTo); rec = new GLFWriter(writeTo);
((GLFWriter)rec).writeHeader(header);
for (int x = 0; x < 100; x++) { for (int x = 0; x < 100; x++) {
GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, x + 1); GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, x + 1);
FakeGenotype type = createGenotype(x % 10, loc, 'A'); FakeGenotype type = createGenotype(x % 10, loc, 'A');
@ -167,7 +167,6 @@ class FakeGenotype extends GLFGenotypeCall implements Comparable<FakeGenotype> {
} }
@Override
public int compareTo(FakeGenotype that) { public int compareTo(FakeGenotype that) {
if (this.getLocation().compareTo(that.getLocation()) != 0) { if (this.getLocation().compareTo(that.getLocation()) != 0) {
System.err.println("Location's aren't equal; this = " + this.getLocation() + " that = " + that.getLocation()); System.err.println("Location's aren't equal; this = " + this.getLocation() + " that = " + that.getLocation());

View File

@ -39,7 +39,8 @@ public class VCFWriterTest extends BaseTest {
@Test @Test
public void testBasicWriteAndRead() { public void testBasicWriteAndRead() {
VCFHeader header = createFakeHeader(metaData,additionalColumns); VCFHeader header = createFakeHeader(metaData,additionalColumns);
VCFWriter writer = new VCFWriter(header,fakeVCFFile); VCFWriter writer = new VCFWriter(fakeVCFFile);
writer.writeHeader(header);
writer.addRecord(createVCFRecord(header)); writer.addRecord(createVCFRecord(header));
writer.addRecord(createVCFRecord(header)); writer.addRecord(createVCFRecord(header));
writer.close(); writer.close();