Pushed header initialization out of the GenotypeWriter constructors and into a writeHeader method, in preparation for parallelization.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2406 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
eeddf0d08e
commit
4ea31fd949
|
|
@ -27,12 +27,15 @@ package org.broadinstitute.sting.gatk.io.storage;
|
|||
|
||||
import java.io.*;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.HashSet;
|
||||
|
||||
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.*;
|
||||
import org.broadinstitute.sting.utils.genotype.geli.*;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import edu.mit.broad.picard.genotype.geli.GeliFileReader;
|
||||
|
||||
/**
|
||||
|
|
@ -51,11 +54,9 @@ public class GenotypeWriterStorage implements GenotypeWriter, Storage<GenotypeWr
|
|||
|
||||
public GenotypeWriterStorage( GenotypeWriterStub stub, File file ) {
|
||||
this.file = file;
|
||||
writer = GenotypeWriterFactory.create(stub.getFormat(),
|
||||
stub.getSAMFileHeader(),
|
||||
file,
|
||||
stub.getSampleNames(),
|
||||
stub.getHeaderInfo());
|
||||
writer = GenotypeWriterFactory.create(stub.getFormat(), file);
|
||||
Set<String> samples = SampleUtils.getSAMFileSamples(stub.getSAMFileHeader());
|
||||
GenotypeWriterFactory.writeHeader(writer, stub.getSAMFileHeader(), samples, new HashSet<VCFHeaderLine>());
|
||||
}
|
||||
|
||||
public void mergeInto( GenotypeWriter targetStream ) {
|
||||
|
|
|
|||
|
|
@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.io.stubs;
|
|||
|
||||
import java.io.File;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
|
|
@ -35,7 +34,6 @@ import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
|||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.VariationCall;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
|
||||
/**
|
||||
|
|
@ -62,19 +60,6 @@ public class GenotypeWriterStub implements Stub<GenotypeWriter>, GenotypeWriter
|
|||
*/
|
||||
private final GenotypeWriterFactory.GENOTYPE_FORMAT format;
|
||||
|
||||
/**
|
||||
* The sample names for the output file
|
||||
*/
|
||||
private final Set<String> sampleNames;
|
||||
|
||||
|
||||
/**
|
||||
* The header info for the output file
|
||||
*/
|
||||
private final Set<VCFHeaderLine> headerInfo;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Connects this stub with an external stream capable of serving the
|
||||
* requests of the consumer of this stub.
|
||||
|
|
@ -86,19 +71,13 @@ public class GenotypeWriterStub implements Stub<GenotypeWriter>, GenotypeWriter
|
|||
* @param engine GATK engine.
|
||||
* @param genotypeFile file to (ultimately) create.
|
||||
* @param format file format.
|
||||
* @param sampleNames sample names to use for creating writer.
|
||||
* @param headerInfo header info to use for creating writer.
|
||||
*/
|
||||
public GenotypeWriterStub( GenomeAnalysisEngine engine,
|
||||
File genotypeFile,
|
||||
GenotypeWriterFactory.GENOTYPE_FORMAT format,
|
||||
Set<String> sampleNames,
|
||||
Set<VCFHeaderLine> headerInfo) {
|
||||
GenotypeWriterFactory.GENOTYPE_FORMAT format) {
|
||||
this.engine = engine;
|
||||
this.genotypeFile = genotypeFile;
|
||||
this.format = format;
|
||||
this.sampleNames = sampleNames;
|
||||
this.headerInfo = headerInfo;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -125,22 +104,6 @@ public class GenotypeWriterStub implements Stub<GenotypeWriter>, GenotypeWriter
|
|||
return format;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the sample names to use when creating the new file.
|
||||
* @return sample names to use when creating the new file.
|
||||
*/
|
||||
public Set<String> getSampleNames() {
|
||||
return sampleNames;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the header info to use when creating the new file.
|
||||
* @return header info to use when creating the new file.
|
||||
*/
|
||||
public Set<VCFHeaderLine> getHeaderInfo() {
|
||||
return headerInfo;
|
||||
}
|
||||
|
||||
/**
|
||||
* Registers the given streamConnector with this stub.
|
||||
* @param outputTracker The connector used to provide an appropriate stream.
|
||||
|
|
|
|||
|
|
@ -122,8 +122,9 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
|
|||
hInfo.add(new VCFHeaderLine("annotatorReference", getToolkit().getArguments().referenceFile.getName()));
|
||||
hInfo.addAll(getVCFAnnotationDescriptions(requestedAnnotations));
|
||||
|
||||
vcfWriter = new VCFWriter(VCF_OUT);
|
||||
vcfHeader = new VCFHeader(hInfo, samples);
|
||||
vcfWriter = new VCFWriter(vcfHeader, VCF_OUT);
|
||||
vcfWriter.writeHeader(vcfHeader);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -108,9 +108,9 @@ public class CallsetConcordanceWalker extends RodWalker<Integer, Integer> {
|
|||
hInfo.add(new VCFHeaderLine("source", "CallsetConcordance"));
|
||||
hInfo.add(new VCFHeaderLine("note", "\"This file represents a concordance test of various call sets - NOT the output from a multi-sample caller\""));
|
||||
hInfo.addAll(getVCFAnnotationDescriptions(requestedTypes));
|
||||
VCFHeader header = new VCFHeader(hInfo, samples);
|
||||
|
||||
vcfWriter = new VCFWriter(header, OUTPUT);
|
||||
vcfWriter = new VCFWriter(OUTPUT);
|
||||
vcfWriter.writeHeader(new VCFHeader(hInfo, samples));
|
||||
}
|
||||
|
||||
public static Set<VCFHeaderLine> getVCFAnnotationDescriptions(Collection<ConcordanceType> types) {
|
||||
|
|
|
|||
|
|
@ -64,8 +64,8 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
}
|
||||
|
||||
VCFHeader header = new VCFHeader(hInfo, rod.getHeader().getGenotypeSamples());
|
||||
writer = new VCFWriter(header, out);
|
||||
writer = new VCFWriter(out);
|
||||
writer.writeHeader(new VCFHeader(hInfo, rod.getHeader().getGenotypeSamples()));
|
||||
}
|
||||
|
||||
public void initialize() {
|
||||
|
|
|
|||
|
|
@ -37,8 +37,6 @@ import org.broadinstitute.sting.utils.cmdLine.*;
|
|||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -68,7 +66,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
|
|||
private GenotypeWriter writer;
|
||||
|
||||
// samples in input
|
||||
private TreeSet<String> samples;
|
||||
private Set<String> samples;
|
||||
|
||||
// keep track of some metrics about our calls
|
||||
private CallMetrics callsMetrics;
|
||||
|
|
@ -113,15 +111,11 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
|
|||
}
|
||||
|
||||
// get all of the unique sample names
|
||||
samples = new TreeSet<String>();
|
||||
// if we're supposed to assume a single sample
|
||||
if ( UAC.ASSUME_SINGLE_SAMPLE != null ) {
|
||||
if ( UAC.ASSUME_SINGLE_SAMPLE != null )
|
||||
samples.add(UAC.ASSUME_SINGLE_SAMPLE);
|
||||
} else {
|
||||
List<SAMReadGroupRecord> readGroups = getToolkit().getSAMFileHeader().getReadGroups();
|
||||
for ( SAMReadGroupRecord readGroup : readGroups )
|
||||
samples.add(readGroup.getSample());
|
||||
}
|
||||
else
|
||||
samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
|
||||
|
||||
// print them out for debugging (need separate loop to ensure uniqueness)
|
||||
// for ( String sample : samples )
|
||||
|
|
@ -144,15 +138,12 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
|
|||
// get the optional header fields
|
||||
Set<VCFHeaderLine> headerInfo = getHeaderInfo();
|
||||
|
||||
// create the output writer stream
|
||||
// create the output writer stream and initialize the header
|
||||
if ( VARIANTS_FILE != null )
|
||||
writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE,
|
||||
samples,
|
||||
headerInfo);
|
||||
writer = GenotypeWriterFactory.create(VAR_FORMAT, VARIANTS_FILE);
|
||||
else
|
||||
writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out,
|
||||
samples,
|
||||
headerInfo);
|
||||
writer = GenotypeWriterFactory.create(VAR_FORMAT, out);
|
||||
GenotypeWriterFactory.writeHeader(writer, GenomeAnalysisEngine.instance.getSAMFileHeader(), samples, headerInfo);
|
||||
|
||||
callsMetrics = new CallMetrics();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -49,7 +49,8 @@ public class VariantsToVCF extends RefWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
vcfheader = getHeader(args, sampleNames.keySet());
|
||||
vcfwriter = new VCFWriter(vcfheader, VCF_OUT);
|
||||
vcfwriter = new VCFWriter(VCF_OUT);
|
||||
vcfwriter.writeHeader(vcfheader);
|
||||
}
|
||||
|
||||
public static VCFHeader getHeader(GATKArgumentCollection args, Set<String> sampleNames) {
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ public class VCFSubsetWalker extends RefWalker<ArrayList<VCFRecord>, VCFWriter>
|
|||
private HashSet<String> SAMPLES;
|
||||
|
||||
@Argument(fullName="vcfsubset", shortName="O", doc="File to write VCF subset to", required=false)
|
||||
private File VPATH;
|
||||
private File VPATH = null;
|
||||
|
||||
@Argument(fullName="includeNonVariants", shortName="INV", doc="Include non-variant loci", required=false)
|
||||
private boolean INCLUDE_NON_VARIANTS = false;
|
||||
|
|
@ -43,7 +43,8 @@ public class VCFSubsetWalker extends RefWalker<ArrayList<VCFRecord>, VCFWriter>
|
|||
|
||||
vheader = new VCFHeader(metaData, additionalColumns);
|
||||
if (VPATH != null) {
|
||||
vwriter = new VCFWriter(vheader, VPATH);
|
||||
vwriter = new VCFWriter(VPATH);
|
||||
vwriter.writeHeader(vheader);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -342,9 +342,9 @@ class VCFGrep extends CommandLineProgram
|
|||
if (autocorrect) { reader = new VCFReader(VCFHomogenizer.create(in_filename)); }
|
||||
else { reader = new VCFReader(new File(in_filename)); }
|
||||
|
||||
VCFHeader header = reader.getHeader();
|
||||
|
||||
writer = new VCFWriter(header, new File(out_filename));
|
||||
writer = new VCFWriter(new File(out_filename));
|
||||
writer.writeHeader(reader.getHeader());
|
||||
|
||||
while(reader.hasNext())
|
||||
{
|
||||
|
|
|
|||
|
|
@ -28,48 +28,56 @@ public class GenotypeWriterFactory {
|
|||
/**
|
||||
* create a genotype writer
|
||||
* @param format the format
|
||||
* @param header the sam file header
|
||||
* @param destination the destination file
|
||||
* @param sampleNames the sample names
|
||||
* @param headerInfo the optional header info fields
|
||||
* @return the genotype writer object
|
||||
*/
|
||||
public static GenotypeWriter create(GENOTYPE_FORMAT format,
|
||||
SAMFileHeader header,
|
||||
File destination,
|
||||
Set<String> sampleNames,
|
||||
Set<VCFHeaderLine> headerInfo) {
|
||||
public static GenotypeWriter create(GENOTYPE_FORMAT format, File destination) {
|
||||
switch (format) {
|
||||
case GLF:
|
||||
return new GLFWriter(header.toString(), destination);
|
||||
return new GLFWriter(destination);
|
||||
case GELI:
|
||||
return new GeliTextWriter(destination);
|
||||
case GELI_BINARY:
|
||||
return new GeliAdapter(destination, header);
|
||||
return new GeliAdapter(destination);
|
||||
case VCF:
|
||||
return new VCFGenotypeWriterAdapter(destination, sampleNames, headerInfo);
|
||||
return new VCFGenotypeWriterAdapter(destination);
|
||||
default:
|
||||
throw new StingException("Genotype writer " + format.toString() + " is not implemented");
|
||||
}
|
||||
}
|
||||
|
||||
public static GenotypeWriter create(GENOTYPE_FORMAT format,
|
||||
SAMFileHeader header,
|
||||
PrintStream destination,
|
||||
Set<String> sampleNames,
|
||||
Set<VCFHeaderLine> headerInfo) {
|
||||
public static GenotypeWriter create(GENOTYPE_FORMAT format, PrintStream destination) {
|
||||
switch (format) {
|
||||
case GELI:
|
||||
return new GeliTextWriter(destination);
|
||||
case GLF:
|
||||
return new GLFWriter(header.toString(), destination);
|
||||
return new GLFWriter(destination);
|
||||
case VCF:
|
||||
return new VCFGenotypeWriterAdapter(destination, sampleNames, headerInfo);
|
||||
return new VCFGenotypeWriterAdapter(destination);
|
||||
default:
|
||||
throw new StingException("Genotype writer to " + format.toString() + " to standard output is not implemented");
|
||||
}
|
||||
}
|
||||
|
||||
public static void writeHeader(GenotypeWriter writer,
|
||||
SAMFileHeader header,
|
||||
Set<String> sampleNames,
|
||||
Set<VCFHeaderLine> headerInfo) {
|
||||
// VCF
|
||||
if ( writer instanceof VCFGenotypeWriterAdapter ) {
|
||||
((VCFGenotypeWriterAdapter)writer).writeHeader(sampleNames, headerInfo);
|
||||
}
|
||||
// GELI BINARY
|
||||
else if ( writer instanceof GeliAdapter ) {
|
||||
((GeliAdapter)writer).writeHeader(header);
|
||||
}
|
||||
// GLF
|
||||
else if ( writer instanceof GLFWriter ) {
|
||||
((GLFWriter)writer).writeHeader(header.toString());
|
||||
}
|
||||
// nothing to do for GELI TEXT
|
||||
}
|
||||
|
||||
/**
|
||||
* create a genotype call
|
||||
* @param format the format
|
||||
|
|
|
|||
|
|
@ -46,17 +46,27 @@ import java.util.List;
|
|||
*/
|
||||
public class GeliAdapter implements GenotypeWriter {
|
||||
|
||||
// the file we're writing to
|
||||
private File writeTo = null;
|
||||
|
||||
// the geli file writer we're adapting
|
||||
private final GeliFileWriter writer;
|
||||
private GeliFileWriter writer = null;
|
||||
|
||||
/**
|
||||
* wrap a GeliFileWriter in the Genotype writer interface
|
||||
*
|
||||
* @param writeTo where to write to
|
||||
*/
|
||||
public GeliAdapter(File writeTo) {
|
||||
this.writeTo = writeTo;
|
||||
}
|
||||
|
||||
/**
|
||||
* wrap a GeliFileWriter in the Genotype writer interface
|
||||
*
|
||||
* @param fileHeader the file header to write out
|
||||
*/
|
||||
public GeliAdapter(File writeTo, final SAMFileHeader fileHeader) {
|
||||
public void writeHeader(final SAMFileHeader fileHeader) {
|
||||
this.writer = GeliFileWriter.newInstanceForPresortedRecords(writeTo, fileHeader);
|
||||
}
|
||||
|
||||
|
|
@ -67,6 +77,8 @@ public class GeliAdapter implements GenotypeWriter {
|
|||
* @param contig the contig you're calling in
|
||||
* @param position the position on the contig
|
||||
* @param referenceBase the reference base
|
||||
* @param maxMappingQuality the max MQ
|
||||
* @param readCount the read count
|
||||
* @param likelihoods the likelihoods of each of the possible alleles
|
||||
*/
|
||||
private void addGenotypeCall(SAMSequenceRecord contig,
|
||||
|
|
@ -99,6 +111,9 @@ public class GeliAdapter implements GenotypeWriter {
|
|||
}
|
||||
|
||||
public void addGenotypeLikelihoods(GenotypeLikelihoods gl) {
|
||||
if ( writer == null )
|
||||
throw new IllegalStateException("The Geli Header must be written before records can be added");
|
||||
|
||||
writer.addGenotypeLikelihoods(gl);
|
||||
}
|
||||
|
||||
|
|
@ -108,6 +123,9 @@ public class GeliAdapter implements GenotypeWriter {
|
|||
* @param call the call to add
|
||||
*/
|
||||
public void addGenotypeCall(Genotype call) {
|
||||
if ( writer == null )
|
||||
throw new IllegalStateException("The Geli Header must be written before calls can be added");
|
||||
|
||||
if ( !(call instanceof GeliGenotypeCall) )
|
||||
throw new IllegalArgumentException("Only GeliGenotypeCalls should be passed in to the Geli writers");
|
||||
GeliGenotypeCall gCall = (GeliGenotypeCall)call;
|
||||
|
|
@ -135,7 +153,7 @@ public class GeliAdapter implements GenotypeWriter {
|
|||
/**
|
||||
* add a no call to the genotype file, if supported.
|
||||
*
|
||||
* @param position
|
||||
* @param position the position
|
||||
*/
|
||||
public void addNoCall(int position) {
|
||||
throw new UnsupportedOperationException("Geli format does not support no-calls");
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ public class GLFWriter implements GenotypeWriter {
|
|||
public static final short[] glfMagic = {'G', 'L', 'F', '\3'};
|
||||
|
||||
// our header text, reference sequence name (i.e. chr1), and it's length
|
||||
private String headerText = "";
|
||||
private String headerText = null;
|
||||
private String referenceSequenceName = null;
|
||||
private long referenceSequenceLength = 0;
|
||||
|
||||
|
|
@ -63,29 +63,42 @@ public class GLFWriter implements GenotypeWriter {
|
|||
/**
|
||||
* The public constructor for creating a GLF object
|
||||
*
|
||||
* @param headerText the header text (currently unclear what the contents are)
|
||||
* @param writeTo the location to write to
|
||||
*/
|
||||
public GLFWriter(String headerText, File writeTo) {
|
||||
this.headerText = headerText;
|
||||
public GLFWriter(File writeTo) {
|
||||
outputBinaryCodec = new BinaryCodec(new DataOutputStream(new BlockCompressedOutputStream(writeTo)));
|
||||
outputBinaryCodec.setOutputFileName(writeTo.toString());
|
||||
this.writeHeader();
|
||||
}
|
||||
|
||||
/**
|
||||
* The public constructor for creating a GLF object
|
||||
*
|
||||
* @param headerText the header text (currently unclear what the contents are)
|
||||
* @param writeTo the location to write to
|
||||
*/
|
||||
public GLFWriter(String headerText, OutputStream writeTo) {
|
||||
this.headerText = headerText;
|
||||
public GLFWriter(OutputStream writeTo) {
|
||||
outputBinaryCodec = new BinaryCodec(writeTo);
|
||||
outputBinaryCodec.setOutputFileName(writeTo.toString());
|
||||
this.writeHeader();
|
||||
}
|
||||
|
||||
/**
|
||||
* Write out the header information for the GLF file. The header contains
|
||||
* the magic number, the length of the header text, the text itself, the reference
|
||||
* sequence (null terminated) preceeded by it's length, and the the genomic
|
||||
* length of the reference sequence.
|
||||
*
|
||||
* @param headerText the header text to write
|
||||
*/
|
||||
public void writeHeader(String headerText) {
|
||||
this.headerText = headerText;
|
||||
for (int x = 0; x < glfMagic.length; x++) {
|
||||
outputBinaryCodec.writeUByte(glfMagic[x]);
|
||||
}
|
||||
if (!(headerText.equals(""))) {
|
||||
outputBinaryCodec.writeString(headerText, true, true);
|
||||
} else {
|
||||
outputBinaryCodec.writeInt(0);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* add a point genotype to the GLF writer
|
||||
|
|
@ -103,6 +116,8 @@ public class GLFWriter implements GenotypeWriter {
|
|||
char refBase,
|
||||
int readDepth,
|
||||
LikelihoodObject lhValues) {
|
||||
if ( headerText == null )
|
||||
throw new IllegalStateException("The GLF Header must be written before calls can be added");
|
||||
|
||||
// check if we've jumped to a new contig
|
||||
checkSequence(contig.getSequenceName(), contig.getSequenceLength());
|
||||
|
|
@ -122,6 +137,9 @@ public class GLFWriter implements GenotypeWriter {
|
|||
* @param call the genotype call
|
||||
*/
|
||||
public void addGenotypeCall(Genotype call) {
|
||||
if ( headerText == null )
|
||||
throw new IllegalStateException("The GLF Header must be written before calls can be added");
|
||||
|
||||
if ( !(call instanceof GLFGenotypeCall) )
|
||||
throw new IllegalArgumentException("Only GeliGenotypeCalls should be passed in to the Geli writers");
|
||||
GLFGenotypeCall gCall = (GLFGenotypeCall)call;
|
||||
|
|
@ -176,6 +194,9 @@ public class GLFWriter implements GenotypeWriter {
|
|||
IndelLikelihood secondHomZyg,
|
||||
byte hetLikelihood) {
|
||||
|
||||
if ( headerText == null )
|
||||
throw new IllegalStateException("The GLF Header must be written before calls can be added");
|
||||
|
||||
// check if we've jumped to a new contig
|
||||
checkSequence(contig.getSequenceName(), contig.getSequenceLength());
|
||||
|
||||
|
|
@ -213,27 +234,13 @@ public class GLFWriter implements GenotypeWriter {
|
|||
* @param rec the GLF record to write.
|
||||
*/
|
||||
public void addGLFRecord(String contigName, int contigLength, GLFRecord rec) {
|
||||
if ( headerText == null )
|
||||
throw new IllegalStateException("The GLF Header must be written before records can be added");
|
||||
|
||||
checkSequence(contigName, contigLength);
|
||||
rec.write(this.outputBinaryCodec);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write out the header information for the GLF file. The header contains
|
||||
* the magic number, the length of the header text, the text itself, the reference
|
||||
* sequence (null terminated) preceeded by it's length, and the the genomic
|
||||
* length of the reference sequence.
|
||||
*/
|
||||
private void writeHeader() {
|
||||
for (int x = 0; x < glfMagic.length; x++) {
|
||||
outputBinaryCodec.writeUByte(glfMagic[x]);
|
||||
}
|
||||
if (!(headerText.equals(""))) {
|
||||
outputBinaryCodec.writeString(headerText, true, true);
|
||||
} else {
|
||||
outputBinaryCodec.writeInt(0);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* check to see if we've jumped to a new contig
|
||||
*
|
||||
|
|
@ -255,12 +262,18 @@ public class GLFWriter implements GenotypeWriter {
|
|||
|
||||
/** add a sequence definition to the glf */
|
||||
private void addSequence() {
|
||||
if ( headerText == null )
|
||||
throw new IllegalStateException("The GLF Header must be written before sequences can be added");
|
||||
|
||||
outputBinaryCodec.writeString(referenceSequenceName, true, true);
|
||||
outputBinaryCodec.writeUInt(referenceSequenceLength);
|
||||
}
|
||||
|
||||
/** write end record */
|
||||
private void writeEndRecord() {
|
||||
if ( headerText == null )
|
||||
throw new IllegalStateException("The GLF Header must be written before records can be added");
|
||||
|
||||
outputBinaryCodec.writeUByte((short) 0);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,55 +25,33 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
|||
protected static Logger logger = Logger.getLogger(VCFGenotypeWriterAdapter.class);
|
||||
|
||||
|
||||
public VCFGenotypeWriterAdapter(File writeTo, Set<String> sampleNames, Set<VCFHeaderLine> headerInfo) {
|
||||
mSampleNames.addAll(sampleNames);
|
||||
|
||||
initializeHeader(headerInfo);
|
||||
|
||||
public VCFGenotypeWriterAdapter(File writeTo) {
|
||||
if (writeTo == null) throw new RuntimeException("VCF output file must not be null");
|
||||
mWriter = new VCFWriter(mHeader, writeTo);
|
||||
mWriter = new VCFWriter(writeTo);
|
||||
}
|
||||
|
||||
public VCFGenotypeWriterAdapter(OutputStream writeTo, Set<String> sampleNames, Set<VCFHeaderLine> headerInfo) {
|
||||
mSampleNames.addAll(sampleNames);
|
||||
|
||||
initializeHeader(headerInfo);
|
||||
|
||||
public VCFGenotypeWriterAdapter(OutputStream writeTo) {
|
||||
if (writeTo == null) throw new RuntimeException("VCF output stream must not be null");
|
||||
mWriter = new VCFWriter(mHeader, writeTo);
|
||||
mWriter = new VCFWriter(writeTo);
|
||||
}
|
||||
|
||||
/**
|
||||
* initialize this VCF header
|
||||
*
|
||||
* @param optionalHeaderInfo the optional header fields
|
||||
* @param sampleNames the sample names
|
||||
* @param headerInfo the optional header fields
|
||||
*/
|
||||
private void initializeHeader(Set<VCFHeaderLine> optionalHeaderInfo) {
|
||||
Set<VCFHeaderLine> hInfo = new TreeSet<VCFHeaderLine>();
|
||||
public void writeHeader(Set<String> sampleNames, Set<VCFHeaderLine> headerInfo) {
|
||||
mSampleNames.addAll(sampleNames);
|
||||
|
||||
// setup the header fields
|
||||
Set<VCFHeaderLine> hInfo = new TreeSet<VCFHeaderLine>();
|
||||
hInfo.add(new VCFHeaderLine(VCFHeader.FILE_FORMAT_KEY, VCFHeader.VCF_VERSION));
|
||||
hInfo.addAll(optionalHeaderInfo);
|
||||
hInfo.addAll(headerInfo);
|
||||
|
||||
// setup the sample names
|
||||
mHeader = new VCFHeader(hInfo, mSampleNames);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the samples names from genotype objects
|
||||
*
|
||||
* @param genotypes the genotype list
|
||||
*
|
||||
* @return a list of strings representing the sample names
|
||||
*/
|
||||
private static List<String> getSampleNames(List<Genotype> genotypes) {
|
||||
List<String> strings = new ArrayList<String>();
|
||||
for (Genotype genotype : genotypes) {
|
||||
if (!(genotype instanceof VCFGenotypeCall))
|
||||
throw new IllegalArgumentException("Genotypes passed to VCF must be backed by SampledBacked interface");
|
||||
strings.add(((VCFGenotypeCall) genotype).getSampleName());
|
||||
}
|
||||
return strings;
|
||||
mWriter.writeHeader(mHeader);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -105,6 +83,9 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
|||
* @param genotypes the list of genotypes
|
||||
*/
|
||||
public void addMultiSampleCall(List<Genotype> genotypes, VariationCall locusdata) {
|
||||
if ( mHeader == null )
|
||||
throw new IllegalStateException("The VCF Header must be written before records can be added");
|
||||
|
||||
if ( locusdata != null && !(locusdata instanceof VCFVariationCall) )
|
||||
throw new IllegalArgumentException("Only VCFVariationCall objects should be passed in to the VCF writers");
|
||||
|
||||
|
|
|
|||
|
|
@ -11,43 +11,39 @@ public class VCFWriter {
|
|||
|
||||
|
||||
// the VCF header we're storing
|
||||
private VCFHeader mHeader;
|
||||
private VCFHeader mHeader = null;
|
||||
|
||||
// the print stream we're writting to
|
||||
BufferedWriter mWriter;
|
||||
private final String FIELD_SEPERATOR = "\t";
|
||||
|
||||
/**
|
||||
* create a VCF writer, given a VCF header and a file to write to
|
||||
* create a VCF writer, given a file to write to
|
||||
*
|
||||
* @param header the VCF header
|
||||
* @param location the file location to write to
|
||||
*/
|
||||
public VCFWriter(VCFHeader header, File location) {
|
||||
public VCFWriter(File location) {
|
||||
FileOutputStream output;
|
||||
try {
|
||||
output = new FileOutputStream(location);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new RuntimeException("Unable to create VCF file at location: " + location);
|
||||
}
|
||||
initialize(header, output);
|
||||
mWriter = new BufferedWriter(new OutputStreamWriter(output));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* create a VCF writer, given a VCF header and a file to write to
|
||||
* create a VCF writer, given a stream to write to
|
||||
*
|
||||
* @param header the VCF header
|
||||
* @param location the file location to write to
|
||||
* @param output the file location to write to
|
||||
*/
|
||||
public VCFWriter(VCFHeader header, OutputStream location) {
|
||||
initialize(header, location);
|
||||
public VCFWriter(OutputStream output) {
|
||||
mWriter = new BufferedWriter(new OutputStreamWriter(output));
|
||||
}
|
||||
|
||||
private void initialize(VCFHeader header, OutputStream location) {
|
||||
public void writeHeader(VCFHeader header) {
|
||||
this.mHeader = header;
|
||||
mWriter = new BufferedWriter(
|
||||
new OutputStreamWriter(location));
|
||||
try {
|
||||
// the fileformat field needs to be written first
|
||||
TreeSet<VCFHeaderLine> nonFormatMetaData = new TreeSet<VCFHeaderLine>();
|
||||
|
|
@ -87,6 +83,9 @@ public class VCFWriter {
|
|||
* @param record the record to output
|
||||
*/
|
||||
public void addRecord(VCFRecord record) {
|
||||
if ( mHeader == null )
|
||||
throw new IllegalStateException("The VCF Header must be written before records can be added");
|
||||
|
||||
String vcfString = record.toStringEncoding(mHeader);
|
||||
try {
|
||||
mWriter.write(vcfString + "\n");
|
||||
|
|
@ -96,7 +95,6 @@ public class VCFWriter {
|
|||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* attempt to close the VCF file
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -5,10 +5,8 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.LikelihoodsBacked;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
|
|
@ -96,7 +94,8 @@ public class GLFWriterTest extends BaseTest {
|
|||
File writeTo = new File("testGLF.glf");
|
||||
writeTo.deleteOnExit();
|
||||
|
||||
rec = new GLFWriter(header, writeTo);
|
||||
rec = new GLFWriter(writeTo);
|
||||
((GLFWriter)rec).writeHeader(header);
|
||||
for (int x = 0; x < 100; x++) {
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, x + 1);
|
||||
Genotype type = createGenotype(x % 10, loc, 'A');
|
||||
|
|
@ -112,7 +111,8 @@ public class GLFWriterTest extends BaseTest {
|
|||
File writeTo = new File("testGLF2.glf");
|
||||
writeTo.deleteOnExit();
|
||||
List<FakeGenotype> types = new ArrayList<FakeGenotype>();
|
||||
rec = new GLFWriter(header, writeTo);
|
||||
rec = new GLFWriter(writeTo);
|
||||
((GLFWriter)rec).writeHeader(header);
|
||||
for (int x = 0; x < 100; x++) {
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, x + 1);
|
||||
FakeGenotype type = createGenotype(x % 10, loc, 'A');
|
||||
|
|
@ -167,7 +167,6 @@ class FakeGenotype extends GLFGenotypeCall implements Comparable<FakeGenotype> {
|
|||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int compareTo(FakeGenotype that) {
|
||||
if (this.getLocation().compareTo(that.getLocation()) != 0) {
|
||||
System.err.println("Location's aren't equal; this = " + this.getLocation() + " that = " + that.getLocation());
|
||||
|
|
|
|||
|
|
@ -39,7 +39,8 @@ public class VCFWriterTest extends BaseTest {
|
|||
@Test
|
||||
public void testBasicWriteAndRead() {
|
||||
VCFHeader header = createFakeHeader(metaData,additionalColumns);
|
||||
VCFWriter writer = new VCFWriter(header,fakeVCFFile);
|
||||
VCFWriter writer = new VCFWriter(fakeVCFFile);
|
||||
writer.writeHeader(header);
|
||||
writer.addRecord(createVCFRecord(header));
|
||||
writer.addRecord(createVCFRecord(header));
|
||||
writer.close();
|
||||
|
|
|
|||
Loading…
Reference in New Issue