Getting rid of GenotypeWriter interface. Of note:
- GATKVCFWriter deleted, to be replaced if absolutely necessary when VCF writing goes into Tribble. - VCFWriter is now an interface, for easier redirection. - VCFWriterImpl fleshes out the VCFWriter interface. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4026 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
542d394e09
commit
cb144734c0
|
|
@ -35,7 +35,7 @@ import org.broadinstitute.sting.gatk.walkers.Walker;
|
|||
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterArgumentTypeDescriptor;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
|
|
@ -99,7 +99,7 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
|||
* @return A collection of type descriptors generating implementation-dependent placeholders.
|
||||
*/
|
||||
protected Collection<ArgumentTypeDescriptor> getArgumentTypeDescriptors() {
|
||||
return Arrays.asList( new GenotypeWriterArgumentTypeDescriptor(GATKEngine),
|
||||
return Arrays.asList( new VCFWriterArgumentTypeDescriptor(GATKEngine),
|
||||
new SAMFileReaderArgumentTypeDescriptor(GATKEngine),
|
||||
new SAMFileWriterArgumentTypeDescriptor(GATKEngine),
|
||||
new OutputStreamArgumentTypeDescriptor(GATKEngine) );
|
||||
|
|
|
|||
|
|
@ -1,86 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.io.storage;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.Set;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
|
||||
/**
|
||||
* Provides temporary storage for GenotypeWriters.
|
||||
*
|
||||
* @author ebanks
|
||||
* @version 0.1
|
||||
*/
|
||||
public abstract class GenotypeWriterStorage<T extends GenotypeWriter> implements GenotypeWriter, Storage<T> {
|
||||
protected final File file;
|
||||
protected final PrintStream stream;
|
||||
protected final GenotypeWriter writer;
|
||||
|
||||
/**
|
||||
* Constructs an object which will write directly into the output file provided by the stub.
|
||||
* Intentionally delaying the writing of the header -- this should be filled in by the walker.
|
||||
* @param stub Stub to use when constructing the output file.
|
||||
*/
|
||||
public GenotypeWriterStorage( GenotypeWriterStub stub ) {
|
||||
this.file = stub.getFile();
|
||||
this.stream = stub.getOutputStream();
|
||||
if(file != null)
|
||||
writer = GenotypeWriterFactory.create(file);
|
||||
else if(stream != null)
|
||||
writer = GenotypeWriterFactory.create(stream);
|
||||
else
|
||||
throw new StingException("Unable to create target to which to write; storage was provided with neither a file nor a stream.");
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs an object which will redirect into a different file.
|
||||
* @param stub Stub to use when synthesizing file / header info.
|
||||
* @param file File into which to direct the output data.
|
||||
*/
|
||||
public GenotypeWriterStorage( GenotypeWriterStub stub, File file ) {
|
||||
this.file = file;
|
||||
this.stream = null;
|
||||
writer = GenotypeWriterFactory.create(file);
|
||||
Set<String> samples = SampleUtils.getSAMFileSamples(stub.getSAMFileHeader());
|
||||
GenotypeWriterFactory.writeHeader(writer, new VCFHeader(null, samples));
|
||||
}
|
||||
|
||||
public void add(VariantContext vc, byte ref) {
|
||||
writer.add(vc, ref);
|
||||
}
|
||||
|
||||
public void close() {
|
||||
writer.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -28,7 +28,7 @@ package org.broadinstitute.sting.gatk.io.storage;
|
|||
import org.broadinstitute.sting.gatk.io.stubs.Stub;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamStub;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterStub;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -62,7 +62,7 @@ public class StorageFactory {
|
|||
* @param <T> Type of the stream to create.
|
||||
* @return Storage object with a facade of type T.
|
||||
*/
|
||||
public static <T> Storage<T> createStorage( Stub<T> stub, File file ) {
|
||||
public static <T> Storage<T> createStorage( Stub<T> stub, File file ) {
|
||||
Storage storage;
|
||||
|
||||
if(stub instanceof OutputStreamStub) {
|
||||
|
|
@ -77,12 +77,12 @@ public class StorageFactory {
|
|||
else
|
||||
storage = new SAMFileWriterStorage((SAMFileWriterStub)stub);
|
||||
}
|
||||
else if(stub instanceof GenotypeWriterStub) {
|
||||
GenotypeWriterStub genotypeWriterStub = (GenotypeWriterStub)stub;
|
||||
else if(stub instanceof VCFWriterStub) {
|
||||
VCFWriterStub vcfWriterStub = (VCFWriterStub)stub;
|
||||
if( file != null )
|
||||
storage = new VCFGenotypeWriterStorage(genotypeWriterStub,file);
|
||||
storage = new VCFWriterStorage(vcfWriterStub,file);
|
||||
else
|
||||
storage = new VCFGenotypeWriterStorage(genotypeWriterStub);
|
||||
storage = new VCFWriterStorage(vcfWriterStub);
|
||||
}
|
||||
else
|
||||
throw new StingException("Unsupported stub type: " + stub.getClass().getName());
|
||||
|
|
|
|||
|
|
@ -1,58 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.io.storage;
|
||||
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriter;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
/**
|
||||
* Provides temporary and permanent storage for genotypes in VCF format.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class VCFGenotypeWriterStorage extends GenotypeWriterStorage<VCFGenotypeWriter> implements VCFGenotypeWriter {
|
||||
/**
|
||||
* Creates new (permanent) storage for VCF genotype writers.
|
||||
* @param stub Stub containing appropriate input parameters.
|
||||
*/
|
||||
public VCFGenotypeWriterStorage(GenotypeWriterStub stub) {
|
||||
super(stub);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates new (temporary) storage for VCF genotype writers.
|
||||
* @param stub Stub containing appropriate input parameters.
|
||||
* @param target Target file for output data.
|
||||
*/
|
||||
public VCFGenotypeWriterStorage(GenotypeWriterStub stub,File target) {
|
||||
super(stub,target);
|
||||
}
|
||||
|
||||
/**
|
||||
* initialize this VCF header
|
||||
*
|
||||
* @param header the header
|
||||
*/
|
||||
public void writeHeader(VCFHeader header) {
|
||||
((VCFGenotypeWriter)writer).writeHeader(header);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a given VCF file to the writer.
|
||||
* @param file file from which to add records
|
||||
*/
|
||||
public void append(File file) {
|
||||
((VCFGenotypeWriter)writer).append(file);
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges the stream backing up this temporary storage into the target.
|
||||
* @param target Target stream for the temporary storage. May not be null.
|
||||
*/
|
||||
public void mergeInto(VCFGenotypeWriter target) {
|
||||
target.append(file);
|
||||
file.delete();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,109 @@
|
|||
package org.broadinstitute.sting.gatk.io.storage;
|
||||
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Provides temporary and permanent storage for genotypes in VCF format.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
|
||||
protected final File file;
|
||||
protected final PrintStream stream;
|
||||
protected final VCFWriter writer;
|
||||
|
||||
/**
|
||||
* Constructs an object which will write directly into the output file provided by the stub.
|
||||
* Intentionally delaying the writing of the header -- this should be filled in by the walker.
|
||||
* @param stub Stub to use when constructing the output file.
|
||||
*/
|
||||
public VCFWriterStorage( VCFWriterStub stub ) {
|
||||
if(stub.getFile() != null) {
|
||||
this.file = stub.getFile();
|
||||
try {
|
||||
this.stream = new PrintStream(stub.getFile());
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new StingException("Unable to open target output stream",ex);
|
||||
}
|
||||
}
|
||||
else if(stub.getOutputStream() != null) {
|
||||
this.file = null;
|
||||
this.stream = stub.getOutputStream();
|
||||
}
|
||||
else
|
||||
throw new StingException("Unable to create target to which to write; storage was provided with neither a file nor a stream.");
|
||||
|
||||
writer = new VCFWriterImpl(stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs an object which will redirect into a different file.
|
||||
* @param stub Stub to use when synthesizing file / header info.
|
||||
* @param file File into which to direct the output data.
|
||||
*/
|
||||
public VCFWriterStorage(VCFWriterStub stub, File file) {
|
||||
this.file = file;
|
||||
try {
|
||||
this.stream = new PrintStream(file);
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new StingException("Unable to open target output stream",ex);
|
||||
}
|
||||
writer = new VCFWriterImpl(this.stream);
|
||||
Set<String> samples = SampleUtils.getSAMFileSamples(stub.getSAMFileHeader());
|
||||
writer.writeHeader(new VCFHeader(null, samples));
|
||||
}
|
||||
|
||||
public void add(VariantContext vc, byte ref) {
|
||||
writer.add(vc, ref);
|
||||
}
|
||||
|
||||
/**
|
||||
* initialize this VCF header
|
||||
*
|
||||
* @param header the header
|
||||
*/
|
||||
public void writeHeader(VCFHeader header) {
|
||||
writer.writeHeader(header);
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the VCF storage object.
|
||||
*/
|
||||
public void close() {
|
||||
writer.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges the stream backing up this temporary storage into the target.
|
||||
* @param target Target stream for the temporary storage. May not be null.
|
||||
*/
|
||||
public void mergeInto(VCFWriterStorage target) {
|
||||
PrintStream formattingTarget = new PrintStream(target.stream);
|
||||
try {
|
||||
BufferedReader reader = new BufferedReader(new FileReader(file));
|
||||
String line = reader.readLine();
|
||||
while ( line != null ) {
|
||||
if (!VCFHeaderLine.isHeaderLine(line))
|
||||
formattingTarget.printf("%s%n",line);
|
||||
line = reader.readLine();
|
||||
}
|
||||
|
||||
reader.close();
|
||||
} catch (IOException e) {
|
||||
throw new StingException("Error reading file " + file + " in GATKVCFWriter: ", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,60 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.io.stubs;
|
||||
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriter;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
|
||||
/**
|
||||
* Stub providing a passthrough for VCF files.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class VCFGenotypeWriterStub extends GenotypeWriterStub<VCFGenotypeWriter> implements VCFGenotypeWriter {
|
||||
/**
|
||||
* Construct a new stub with the given engine and target file.
|
||||
* @param engine The engine, for extracting command-line arguments, etc.
|
||||
* @param genotypeFile Target file into which to write genotyping data.
|
||||
*/
|
||||
public VCFGenotypeWriterStub(GenomeAnalysisEngine engine, File genotypeFile) {
|
||||
super(engine,genotypeFile);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a new stub with the given engine and target stream.
|
||||
* @param engine The engine, for extracting command-line arguments, etc.
|
||||
* @param genotypeStream Target stream into which to write genotyping data.
|
||||
*/
|
||||
public VCFGenotypeWriterStub(GenomeAnalysisEngine engine, PrintStream genotypeStream) {
|
||||
super(engine,genotypeStream);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the format of this stub. We may want to discontinue use of this method and rely on instanceof comparisons.
|
||||
* @return VCF always.
|
||||
*/
|
||||
public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() {
|
||||
return GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
|
||||
}
|
||||
|
||||
/**
|
||||
* initialize this VCF header
|
||||
*
|
||||
* @param header the header
|
||||
*/
|
||||
public void writeHeader(VCFHeader header) {
|
||||
outputTracker.getStorage(this).writeHeader(header);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a given VCF file to the writer.
|
||||
* @param file file from which to add records
|
||||
*/
|
||||
public void append(File file) {
|
||||
outputTracker.getStorage(this).append(file);
|
||||
}
|
||||
}
|
||||
|
|
@ -26,8 +26,7 @@
|
|||
package org.broadinstitute.sting.gatk.io.stubs;
|
||||
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -41,7 +40,7 @@ import java.util.Arrays;
|
|||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class GenotypeWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||
public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||
/**
|
||||
* The engine into which output stubs should be fed.
|
||||
*/
|
||||
|
|
@ -51,7 +50,7 @@ public class GenotypeWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
|
|||
* Create a new GenotypeWriter argument, notifying the given engine when that argument has been created.
|
||||
* @param engine the engine to be notified.
|
||||
*/
|
||||
public GenotypeWriterArgumentTypeDescriptor(GenomeAnalysisEngine engine) {
|
||||
public VCFWriterArgumentTypeDescriptor(GenomeAnalysisEngine engine) {
|
||||
this.engine = engine;
|
||||
}
|
||||
|
||||
|
|
@ -62,7 +61,7 @@ public class GenotypeWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
|
|||
*/
|
||||
@Override
|
||||
public boolean supports( Class type ) {
|
||||
return GenotypeWriter.class.equals(type);
|
||||
return VCFWriter.class.equals(type);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -73,8 +72,7 @@ public class GenotypeWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
|
|||
*/
|
||||
@Override
|
||||
public List<ArgumentDefinition> createArgumentDefinitions( ArgumentSource source ) {
|
||||
return Arrays.asList( createGenotypeFileArgumentDefinition(source),
|
||||
createGenotypeFormatArgumentDefinition(source) );
|
||||
return Arrays.asList( createGenotypeFileArgumentDefinition(source) );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -92,7 +90,7 @@ public class GenotypeWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
|
|||
*/
|
||||
@Override
|
||||
public Object getDefault() {
|
||||
GenotypeWriterStub defaultGenotypeWriter = new VCFGenotypeWriterStub(engine,System.out);
|
||||
VCFWriterStub defaultGenotypeWriter = new VCFWriterStub(engine,System.out);
|
||||
engine.addOutput(defaultGenotypeWriter);
|
||||
return defaultGenotypeWriter;
|
||||
}
|
||||
|
|
@ -111,7 +109,7 @@ public class GenotypeWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
|
|||
File writerFile = writerFileName != null ? new File(writerFileName) : null;
|
||||
|
||||
// Create a stub for the given object.
|
||||
GenotypeWriterStub stub = (writerFile != null) ? new VCFGenotypeWriterStub(engine, writerFile) : new VCFGenotypeWriterStub(engine,System.out);
|
||||
VCFWriterStub stub = (writerFile != null) ? new VCFWriterStub(engine, writerFile) : new VCFWriterStub(engine,System.out);
|
||||
|
||||
engine.addOutput(stub);
|
||||
|
||||
|
|
@ -136,27 +134,4 @@ public class GenotypeWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
|
|||
source.isHidden(),
|
||||
null );
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates the optional compression level argument for the BAM file.
|
||||
* @param source Argument source for the BAM file. Must not be null.
|
||||
* @return Argument definition for the BAM file itself. Will not be null.
|
||||
*/
|
||||
private ArgumentDefinition createGenotypeFormatArgumentDefinition(ArgumentSource source) {
|
||||
Annotation annotation = this.getArgumentAnnotation(source);
|
||||
return new ArgumentDefinition( ArgumentIOType.getIOType(annotation),
|
||||
GenotypeWriterFactory.GENOTYPE_FORMAT.class,
|
||||
"variant_output_format",
|
||||
"vf",
|
||||
"Format to be used to represent variants; default is VCF",
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
null,
|
||||
source.isHidden(),
|
||||
null,
|
||||
null,
|
||||
null );
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -29,10 +29,11 @@ import java.io.File;
|
|||
import java.io.PrintStream;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||
import org.broadinstitute.sting.gatk.io.storage.VCFWriterStorage;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
|
||||
/**
|
||||
|
|
@ -41,7 +42,7 @@ import net.sf.samtools.SAMFileHeader;
|
|||
* @author ebanks
|
||||
* @version 0.1
|
||||
*/
|
||||
public abstract class GenotypeWriterStub<T extends GenotypeWriter> implements Stub<T>, GenotypeWriter {
|
||||
public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
|
||||
|
||||
/**
|
||||
* Engine to use for collecting attributes for the output SAM file.
|
||||
|
|
@ -71,7 +72,7 @@ public abstract class GenotypeWriterStub<T extends GenotypeWriter> implements St
|
|||
* @param engine GATK engine.
|
||||
* @param genotypeFile file to (ultimately) create.
|
||||
*/
|
||||
public GenotypeWriterStub(GenomeAnalysisEngine engine,File genotypeFile) {
|
||||
public VCFWriterStub(GenomeAnalysisEngine engine,File genotypeFile) {
|
||||
this.engine = engine;
|
||||
this.genotypeFile = genotypeFile;
|
||||
this.genotypeStream = null;
|
||||
|
|
@ -82,7 +83,7 @@ public abstract class GenotypeWriterStub<T extends GenotypeWriter> implements St
|
|||
* @param engine GATK engine.
|
||||
* @param genotypeStream stream to (ultimately) write.
|
||||
*/
|
||||
public GenotypeWriterStub(GenomeAnalysisEngine engine,PrintStream genotypeStream) {
|
||||
public VCFWriterStub(GenomeAnalysisEngine engine,PrintStream genotypeStream) {
|
||||
this.engine = engine;
|
||||
this.genotypeFile = null;
|
||||
this.genotypeStream = genotypeStream;
|
||||
|
|
@ -112,12 +113,6 @@ public abstract class GenotypeWriterStub<T extends GenotypeWriter> implements St
|
|||
return engine.getSAMFileHeader();
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the format to use when creating the new file.
|
||||
* @return format to use when creating the new file.
|
||||
*/
|
||||
public abstract GenotypeWriterFactory.GENOTYPE_FORMAT getFormat();
|
||||
|
||||
/**
|
||||
* Registers the given streamConnector with this stub.
|
||||
* @param outputTracker The connector used to provide an appropriate stream.
|
||||
|
|
@ -126,6 +121,10 @@ public abstract class GenotypeWriterStub<T extends GenotypeWriter> implements St
|
|||
this.outputTracker = outputTracker;
|
||||
}
|
||||
|
||||
public void writeHeader(VCFHeader header) {
|
||||
outputTracker.getStorage(this).writeHeader(header);
|
||||
}
|
||||
|
||||
/**
|
||||
* @{inheritDoc}
|
||||
*/
|
||||
|
|
@ -139,5 +138,4 @@ public abstract class GenotypeWriterStub<T extends GenotypeWriter> implements St
|
|||
public void close() {
|
||||
outputTracker.getStorage(this).close();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -126,7 +126,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
}
|
||||
|
||||
vcfwriter = new VCFWriter(out);
|
||||
vcfwriter = new VCFWriterImpl(out);
|
||||
vcfwriter.writeHeader(new VCFHeader(hInfo, samples));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ import org.broadinstitute.sting.commandline.Argument;
|
|||
import org.broadinstitute.sting.commandline.CommandLineUtils;
|
||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -153,7 +154,7 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
|
|||
hInfo.add(new VCFHeaderLine("VariantAnnotator", "\"" + CommandLineUtils.createApproximateCommandLineArgumentString(getToolkit(), args, getClass()) + "\""));
|
||||
}
|
||||
|
||||
vcfWriter = new VCFWriter(out);
|
||||
vcfWriter = new VCFWriterImpl(out);
|
||||
VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
|
||||
vcfWriter.writeHeader(vcfHeader);
|
||||
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ import org.broadinstitute.sting.commandline.Argument;
|
|||
import org.broadinstitute.sting.commandline.CommandLineUtils;
|
||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -118,7 +119,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
hInfo.add(new VCFHeaderLine("VariantFiltration", "\"" + CommandLineUtils.createApproximateCommandLineArgumentString(getToolkit(), args, getClass()) + "\""));
|
||||
}
|
||||
|
||||
writer = new VCFWriter(out);
|
||||
writer = new VCFWriterImpl(out);
|
||||
writer.writeHeader(new VCFHeader(hInfo, new TreeSet<String>(vc.getSampleNames())));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
|
|
@ -56,7 +56,7 @@ public class BatchedCallsMerger extends LocusWalker<VariantContext, Integer> imp
|
|||
@ArgumentCollection private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
|
||||
|
||||
@Argument(doc = "VCF file to which variants should be written", required = false)
|
||||
public GenotypeWriter writer = null;
|
||||
public VCFWriter writer = null;
|
||||
|
||||
@Argument(fullName="rod_list", shortName="rods", doc="A comma-separated string describing the rod names representing individual call batches", required=true)
|
||||
protected String ROD_STRING = null;
|
||||
|
|
@ -91,7 +91,7 @@ public class BatchedCallsMerger extends LocusWalker<VariantContext, Integer> imp
|
|||
UG_engine.samples = samples;
|
||||
|
||||
// initialize the header
|
||||
GenotypeWriterFactory.writeHeader(writer, new VCFHeader(headerLines, samples));
|
||||
writer.writeHeader(new VCFHeader(headerLines, samples));
|
||||
}
|
||||
|
||||
public VariantContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
|
||||
// control the output
|
||||
@Argument(doc = "File to which variants should be written", required = false)
|
||||
public GenotypeWriter writer = null;
|
||||
public VCFWriter writer = null;
|
||||
|
||||
@Argument(fullName = "verbose_mode", shortName = "verbose", doc = "File to print all of the annotated and detailed debugging output", required = false)
|
||||
protected PrintStream verboseWriter = null;
|
||||
|
|
@ -126,16 +126,12 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
}
|
||||
|
||||
// initialize the header
|
||||
GenotypeWriterFactory.writeHeader(writer, new VCFHeader(getHeaderInfo(), UG_engine.samples)) ;
|
||||
writer.writeHeader(new VCFHeader(getHeaderInfo(), UG_engine.samples)) ;
|
||||
}
|
||||
|
||||
private Set<VCFHeaderLine> getHeaderInfo() {
|
||||
Set<VCFHeaderLine> headerInfo = new HashSet<VCFHeaderLine>();
|
||||
|
||||
// this is only applicable to VCF
|
||||
if ( !(writer instanceof VCFGenotypeWriter) )
|
||||
return headerInfo;
|
||||
|
||||
// all annotation fields from VariantAnnotatorEngine
|
||||
headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions());
|
||||
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
|||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.pileup.*;
|
||||
import org.broad.tribble.vcf.VCFConstants;
|
||||
|
||||
|
|
@ -65,7 +65,7 @@ public class UnifiedGenotyperEngine {
|
|||
|
||||
// the various loggers and writers
|
||||
protected Logger logger = null;
|
||||
protected GenotypeWriter genotypeWriter = null;
|
||||
protected VCFWriter vcfWriter = null;
|
||||
protected PrintStream verboseWriter = null;
|
||||
|
||||
// samples in input
|
||||
|
|
@ -76,15 +76,15 @@ public class UnifiedGenotyperEngine {
|
|||
initialize(toolkit, UAC, null, null, null, null);
|
||||
}
|
||||
|
||||
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, GenotypeWriter genotypeWriter, PrintStream verboseWriter, VariantAnnotatorEngine engine) {
|
||||
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, VCFWriter genotypeWriter, PrintStream verboseWriter, VariantAnnotatorEngine engine) {
|
||||
initialize(toolkit, UAC, logger, genotypeWriter, verboseWriter, engine);
|
||||
|
||||
}
|
||||
|
||||
private void initialize(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, GenotypeWriter genotypeWriter, PrintStream verboseWriter, VariantAnnotatorEngine engine) {
|
||||
private void initialize(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, VCFWriter genotypeWriter, PrintStream verboseWriter, VariantAnnotatorEngine engine) {
|
||||
this.UAC = UAC;
|
||||
this.logger = logger;
|
||||
this.genotypeWriter = genotypeWriter;
|
||||
this.vcfWriter = genotypeWriter;
|
||||
this.verboseWriter = verboseWriter;
|
||||
this.annotationEngine = engine;
|
||||
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ import org.broadinstitute.sting.utils.QualityUtils;
|
|||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -118,7 +119,7 @@ public class SequenomValidationConverter extends RodWalker<Pair<VariantContext,
|
|||
if ( sampleNames == null )
|
||||
sampleNames = new TreeSet<String>();
|
||||
|
||||
VCFWriter vcfWriter = new VCFWriter(out);
|
||||
VCFWriter vcfWriter = new VCFWriterImpl(out);
|
||||
|
||||
// set up the info and filter headers
|
||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
|
|
|
|||
|
|
@ -48,6 +48,7 @@ import org.broadinstitute.sting.utils.StingException;
|
|||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -336,7 +337,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
|||
determineContextNamePartSizes();
|
||||
|
||||
if ( outputVCF != null )
|
||||
writer = new VCFWriter(new File(outputVCF));
|
||||
writer = new VCFWriterImpl(new File(outputVCF));
|
||||
|
||||
if ( rsIDFile != null ) {
|
||||
if ( maxRsIDBuild == Integer.MAX_VALUE )
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
|||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -148,7 +149,7 @@ public class ApplyVariantCuts extends RodWalker<Integer, Integer> {
|
|||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||
hInfo.add(new VCFInfoHeaderLine("OQ", 1, VCFHeaderLineType.Float, "The original variant quality score"));
|
||||
hInfo.add(new VCFHeaderLine("source", "VariantOptimizer"));
|
||||
vcfWriter = new VCFWriter( new File(OUTPUT_FILENAME) );
|
||||
vcfWriter = new VCFWriterImpl( new File(OUTPUT_FILENAME) );
|
||||
final TreeSet<String> samples = new TreeSet<String>();
|
||||
samples.addAll(SampleUtils.getSampleListWithVCFHeader(getToolkit(), null));
|
||||
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
|||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
|
@ -137,7 +138,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
hInfo.add(new VCFHeaderLine("source", "VariantOptimizer"));
|
||||
samples.addAll(SampleUtils.getUniqueSamplesFromRods(getToolkit()));
|
||||
|
||||
vcfWriter = new VCFWriter( new File(OUTPUT_PREFIX + ".vcf") );
|
||||
vcfWriter = new VCFWriterImpl( new File(OUTPUT_PREFIX + ".vcf") );
|
||||
final VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
|
||||
vcfWriter.writeHeader(vcfHeader);
|
||||
|
||||
|
|
|
|||
|
|
@ -81,7 +81,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
|||
private VariantAnnotatorEngine engine;
|
||||
|
||||
public void initialize() {
|
||||
vcfWriter = new VCFWriter(out);
|
||||
vcfWriter = new VCFWriterImpl(out);
|
||||
validateAnnotateUnionArguments();
|
||||
|
||||
Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), null);
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
|
|||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
|
||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -51,7 +52,7 @@ public class FilterLiftedVariants extends RodWalker<Integer, Integer> {
|
|||
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant"));
|
||||
Map<String, VCFHeader> vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant"));
|
||||
|
||||
writer = new VCFWriter(out);
|
||||
writer = new VCFWriterImpl(out);
|
||||
final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey("variant") ? vcfHeaders.get("variant").getMetaData() : null, samples);
|
||||
writer.writeHeader(vcfHeader);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ import org.broad.tribble.util.variantcontext.VariantContext;
|
|||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -74,7 +75,7 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
|
|||
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant"));
|
||||
Map<String, VCFHeader> vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant"));
|
||||
|
||||
writer = new VCFWriter(out);
|
||||
writer = new VCFWriterImpl(out);
|
||||
final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey("variant") ? vcfHeaders.get("variant").getMetaData() : null, samples);
|
||||
writer.writeHeader(vcfHeader);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010.
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -12,15 +12,14 @@
|
|||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||
|
|
@ -43,6 +42,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
|||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.regex.Matcher;
|
||||
|
|
@ -79,7 +79,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
|
||||
*/
|
||||
public void initialize() {
|
||||
vcfWriter = new VCFWriter(out);
|
||||
vcfWriter = new VCFWriterImpl(out);
|
||||
|
||||
ArrayList<String> rodNames = new ArrayList<String>();
|
||||
rodNames.add("variant");
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ public class IndelAnnotator extends RodWalker<Integer,Long>{
|
|||
anno.add(new VCFInfoHeaderLine("type",1, VCFHeaderLineType.String,"Genomic interpretation (according to RefSeq)"));
|
||||
hInfo.addAll(anno);
|
||||
|
||||
vcfWriter = new VCFWriter(out);
|
||||
vcfWriter = new VCFWriterImpl(out);
|
||||
VCFHeader vcfHeader = new VCFHeader(hInfo, SampleUtils.getUniqueSamplesFromRods(getToolkit()));
|
||||
vcfWriter.writeHeader(vcfHeader);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ import org.broadinstitute.sting.utils.StingException;
|
|||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
|
@ -46,7 +47,7 @@ public class IndelDBRateWalker extends RodWalker<OverlapTable,OverlapTabulator>
|
|||
}
|
||||
|
||||
if ( outVCF != null ) {
|
||||
vcfWriter = new VCFWriter(outVCF);
|
||||
vcfWriter = new VCFWriterImpl(outVCF);
|
||||
Set<VCFHeaderLine> header = new HashSet<VCFHeaderLine>();
|
||||
header.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||
VCFHeader vcfHeader = new VCFHeader(header, SampleUtils.getUniqueSamplesFromRods(getToolkit()));
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ import org.broadinstitute.sting.utils.StingException;
|
|||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
|
@ -372,7 +373,7 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
|
|||
*********** REDUCE INIT
|
||||
*/
|
||||
public VCFWriter reduceInit() {
|
||||
VCFWriter writer = new VCFWriter(out);
|
||||
VCFWriter writer = new VCFWriterImpl(out);
|
||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||
hInfo.add(new VCFHeaderLine("source", "MendelianViolationClassifier"));
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ public class TestVariantContextWalker extends RodWalker<Integer, Integer> {
|
|||
|
||||
public void initialize() {
|
||||
if ( outputVCF != null )
|
||||
writer = new VCFWriter(new File(outputVCF));
|
||||
writer = new VCFWriterImpl(new File(outputVCF));
|
||||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
|
|
@ -79,7 +80,7 @@ public class VCF4WriterTestWalker extends RodWalker<Integer, Integer> {
|
|||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||
|
||||
|
||||
vcfWriter = new VCFWriter(new File(OUTPUT_FILE));
|
||||
vcfWriter = new VCFWriterImpl(new File(OUTPUT_FILE));
|
||||
VCFHeader header = null;
|
||||
for( final ReferenceOrderedDataSource source : dataSources ) {
|
||||
final RMDTrack rod = source.getReferenceOrderedData();
|
||||
|
|
|
|||
|
|
@ -44,6 +44,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
|
||||
import org.broad.tribble.vcf.*;
|
||||
|
||||
import java.io.*;
|
||||
|
|
@ -92,7 +93,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
|||
hInfo.add(new VCFHeaderLine("source", "BeagleImputation"));
|
||||
|
||||
// Open output file specified by output VCF ROD
|
||||
vcfWriter = new VCFWriter(new File(OUTPUT_FILE));
|
||||
vcfWriter = new VCFWriterImpl(new File(OUTPUT_FILE));
|
||||
final List<ReferenceOrderedDataSource> dataSources = this.getToolkit().getRodDataSources();
|
||||
|
||||
for( final ReferenceOrderedDataSource source : dataSources ) {
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ import org.broadinstitute.sting.utils.StingException;
|
|||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
@ -72,7 +73,7 @@ public class ReadBackedPhasingWalker extends LocusWalker<Pair<VariantContextStat
|
|||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||
hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
|
||||
|
||||
writer = new VCFWriter(new File(phasedVCFFile));
|
||||
writer = new VCFWriterImpl(new File(phasedVCFFile));
|
||||
writer.writeHeader(new VCFHeader(hInfo, new TreeSet<String>(vc.getSampleNames())));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -44,6 +44,7 @@ import org.broadinstitute.sting.utils.StingException;
|
|||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.MendelianViolationEvaluator;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -86,7 +87,7 @@ public class TrioGenotyperWalker extends RefWalker<VariantContext, Integer>{
|
|||
FAMILY_MEMBERS = Arrays.asList(mom, dad, kid);
|
||||
|
||||
// initialize the writer
|
||||
writer = new VCFWriter(new File(vcfOutputFile));
|
||||
writer = new VCFWriterImpl(new File(vcfOutputFile));
|
||||
}
|
||||
|
||||
public VariantContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
|
|
|||
|
|
@ -63,6 +63,7 @@ import org.broadinstitute.sting.utils.StingException;
|
|||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
|
||||
|
||||
/**
|
||||
* Annotates variant calls with information from user-specified tabular files.
|
||||
|
|
@ -240,7 +241,7 @@ public class GenomicAnnotator extends RodWalker<LinkedList<VariantContext>, Link
|
|||
hInfo.add(new VCFHeaderLine("annotatorReference", getToolkit().getArguments().referenceFile.getName()));
|
||||
hInfo.addAll(engine.getVCFAnnotationDescriptions());
|
||||
|
||||
vcfWriter = new VCFWriter(VCF_OUT);
|
||||
vcfWriter = new VCFWriterImpl(VCF_OUT);
|
||||
VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
|
||||
vcfWriter.writeHeader(vcfHeader);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -94,7 +94,7 @@ public class VariantSelect extends RodWalker<Integer, Integer> {
|
|||
hInfo.add(new VCFFilterHeaderLine(exp.name, exp.expStr));
|
||||
}
|
||||
|
||||
writer = new VCFWriter(out);
|
||||
writer = new VCFWriterImpl(out);
|
||||
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant"));
|
||||
|
||||
final VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ import net.sf.samtools.SAMFileWriter;
|
|||
import org.apache.commons.lang.StringEscapeUtils;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.broadinstitute.sting.gatk.filters.PlatformUnitFilterHelper;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.InputStream;
|
||||
|
|
@ -212,7 +212,7 @@ public abstract class ArgumentField {
|
|||
if (InputStream.class.isAssignableFrom(clazz)) return File.class;
|
||||
if (SAMFileReader.class.isAssignableFrom(clazz)) return File.class;
|
||||
if (OutputStream.class.isAssignableFrom(clazz)) return File.class;
|
||||
if (GenotypeWriter.class.isAssignableFrom(clazz)) return File.class;
|
||||
if (VCFWriter.class.isAssignableFrom(clazz)) return File.class;
|
||||
if (SAMFileWriter.class.isAssignableFrom(clazz)) return File.class;
|
||||
if (PlatformUnitFilterHelper.class.isAssignableFrom(clazz)) return String.class;
|
||||
return clazz;
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.CommandLineGATK;
|
|||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.WalkerManager;
|
||||
import org.broadinstitute.sting.gatk.filters.FilterManager;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterArgumentTypeDescriptor;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor;
|
||||
|
|
@ -83,7 +83,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
|
|||
@Override
|
||||
protected Collection<ArgumentTypeDescriptor> getArgumentTypeDescriptors() {
|
||||
List<ArgumentTypeDescriptor> typeDescriptors = new ArrayList<ArgumentTypeDescriptor>();
|
||||
typeDescriptors.add(new GenotypeWriterArgumentTypeDescriptor(GATKEngine));
|
||||
typeDescriptors.add(new VCFWriterArgumentTypeDescriptor(GATKEngine));
|
||||
typeDescriptors.add(new SAMFileReaderArgumentTypeDescriptor(GATKEngine));
|
||||
typeDescriptors.add(new SAMFileWriterArgumentTypeDescriptor(GATKEngine));
|
||||
typeDescriptors.add(new OutputStreamArgumentTypeDescriptor(GATKEngine));
|
||||
|
|
|
|||
|
|
@ -1,49 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author aaron, ebanks
|
||||
* <p/>
|
||||
* Class GenotypeWriter
|
||||
* <p/>
|
||||
* The interface for writing genotype calls.
|
||||
*/
|
||||
public interface GenotypeWriter {
|
||||
/**
|
||||
* Add a record, given a variant context, with the genotype fields restricted to what is defined in the header
|
||||
* @param vc the variant context representing the call to add
|
||||
* @param refBase This is required for VCF writers, as the VCF format explicitly requires (previous) ref base for an indel.
|
||||
*/
|
||||
public void add(VariantContext vc, byte refBase);
|
||||
|
||||
/** finish writing, closing any open files. */
|
||||
public void close();
|
||||
|
||||
}
|
||||
|
|
@ -1,40 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.vcf.GATKVCFWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class GenotypeWriterFactory
|
||||
* <p/>
|
||||
* A descriptions should go here. Blame aaron if it's missing.
|
||||
*/
|
||||
public class GenotypeWriterFactory {
|
||||
/** available genotype writers */
|
||||
public enum GENOTYPE_FORMAT {
|
||||
GELI, GLF, GELI_BINARY, VCF
|
||||
}
|
||||
|
||||
/**
|
||||
* create a genotype writer
|
||||
* @param destination the destination file
|
||||
* @return the genotype writer object
|
||||
*/
|
||||
public static GenotypeWriter create(File destination) {
|
||||
return new GATKVCFWriter(destination);
|
||||
}
|
||||
|
||||
public static GenotypeWriter create(PrintStream destination) {
|
||||
return new GATKVCFWriter(destination);
|
||||
}
|
||||
|
||||
public static void writeHeader(GenotypeWriter writer, VCFHeader vcfHeader) {
|
||||
((VCFGenotypeWriter)writer).writeHeader(vcfHeader);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,92 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.genotype.geli;
|
||||
|
||||
import edu.mit.broad.picard.genotype.geli.GeliFileWriter;
|
||||
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author aaron, ebanks
|
||||
* @version 1.0
|
||||
* <p/>
|
||||
* Class GeliAdapter
|
||||
* Adapts the Geli file writer to the Genotype writer interface
|
||||
*/
|
||||
public class GeliAdapter implements GeliGenotypeWriter {
|
||||
|
||||
// the file we're writing to
|
||||
private File writeTo = null;
|
||||
|
||||
// the geli file writer we're adapting
|
||||
private GeliFileWriter writer = null;
|
||||
|
||||
/**
|
||||
* wrap a GeliFileWriter in the Genotype writer interface
|
||||
*
|
||||
* @param writeTo where to write to
|
||||
*/
|
||||
public GeliAdapter(File writeTo) {
|
||||
this.writeTo = writeTo;
|
||||
}
|
||||
|
||||
/**
|
||||
* wrap a GeliFileWriter in the Genotype writer interface
|
||||
*
|
||||
* @param fileHeader the file header to write out
|
||||
*/
|
||||
public void writeHeader(final SAMFileHeader fileHeader) {
|
||||
this.writer = GeliFileWriter.newInstanceForPresortedRecords(writeTo, fileHeader);
|
||||
}
|
||||
|
||||
public void addGenotypeLikelihoods(GenotypeLikelihoods gl) {
|
||||
if ( writer == null )
|
||||
throw new IllegalStateException("The Geli Header must be written before records can be added");
|
||||
|
||||
writer.addGenotypeLikelihoods(gl);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a genotype, given a variant context
|
||||
*
|
||||
* @param vc the variant context representing the call to add
|
||||
* @param refBase not used by this writer
|
||||
*/
|
||||
public void add(VariantContext vc, byte refBase) {
|
||||
throw new UnsupportedOperationException("We no longer support writing Geli");
|
||||
}
|
||||
|
||||
/** finish writing, closing any open files. */
|
||||
public void close() {
|
||||
if (this.writer != null) {
|
||||
this.writer.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,26 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.genotype.geli;
|
||||
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
|
||||
|
||||
/**
|
||||
* An extension of eth GenotypeWriter interface with support
|
||||
* for adding a header.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public interface GeliGenotypeWriter extends GenotypeWriter {
|
||||
/**
|
||||
* Write the file header.
|
||||
* @param fileHeader SAM file header from which to derive the geli header.
|
||||
*/
|
||||
public void writeHeader(final SAMFileHeader fileHeader);
|
||||
|
||||
/**
|
||||
* Writes the genotype likelihoods to the output.
|
||||
* @param gl genotype likelihoods to write.
|
||||
*/
|
||||
public void addGenotypeLikelihoods(GenotypeLikelihoods gl);
|
||||
}
|
||||
|
|
@ -1,79 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.genotype.geli;
|
||||
|
||||
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.PrintStream;
|
||||
import java.io.PrintWriter;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class GeliTextWriter
|
||||
* <p/>
|
||||
* write out the geli text file format containing genotype information
|
||||
*/
|
||||
public class GeliTextWriter implements GeliGenotypeWriter {
|
||||
// where we write to
|
||||
PrintWriter mWriter;
|
||||
|
||||
// used to store the max mapping quality as a field in variant contexts
|
||||
public static final String MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY = "MAXIMUM_MAPPING_QUALITY";
|
||||
// used to store the max mapping quality as a field in variant contexts
|
||||
public static final String READ_COUNT_ATTRIBUTE_KEY = "READ_COUNT";
|
||||
|
||||
/**
|
||||
* create a geli text writer
|
||||
*
|
||||
* @param file the file to write to
|
||||
*/
|
||||
public GeliTextWriter(File file) {
|
||||
try {
|
||||
mWriter = new PrintWriter(file);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("Unable to open file " + file.toURI());
|
||||
}
|
||||
}
|
||||
|
||||
public GeliTextWriter(PrintStream out) {
|
||||
mWriter = new PrintWriter(out);
|
||||
}
|
||||
|
||||
public final static String headerLine = "#Sequence Position ReferenceBase NumberOfReads MaxMappingQuality BestGenotype BtrLod BtnbLod AA AC AG AT CC CG CT GG GT TT";
|
||||
|
||||
/**
|
||||
* Write the file header.
|
||||
* @param fileHeader SAM file header from which to derive the geli header.
|
||||
*/
|
||||
public void writeHeader(final SAMFileHeader fileHeader) {
|
||||
// ignore the SAM header; the geli text header is fixed.
|
||||
mWriter.println(headerLine);
|
||||
mWriter.flush(); // necessary so that writing to an output stream will work
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a genotype, given a variant context
|
||||
*
|
||||
* @param vc the variant context representing the call to add
|
||||
* @param refBase required by the inteface; not used by this writer.
|
||||
*/
|
||||
public void add(VariantContext vc, byte refBase) {
|
||||
throw new UnsupportedOperationException("We no longer support writing Geli");
|
||||
}
|
||||
|
||||
public void addGenotypeLikelihoods(GenotypeLikelihoods gl) {
|
||||
mWriter.println(gl.toString());
|
||||
mWriter.flush(); // necessary so that writing to an output stream will work
|
||||
}
|
||||
|
||||
/** finish writing, closing any open files. */
|
||||
public void close() {
|
||||
mWriter.flush();
|
||||
mWriter.close();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,27 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.genotype.glf;
|
||||
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||
|
||||
/**
|
||||
* An extension of eth GenotypeWriter interface with support
|
||||
* for adding header lines.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public interface GLFGenotypeWriter extends GenotypeWriter {
|
||||
/**
|
||||
* Append the given header text to the GLF file.
|
||||
* @param headerText the file header to write out
|
||||
*/
|
||||
public void writeHeader(String headerText);
|
||||
|
||||
/**
|
||||
* add a GLF record to the output file
|
||||
*
|
||||
* @param contigName the contig name
|
||||
* @param contigLength the contig length
|
||||
* @param rec the GLF record to write.
|
||||
*/
|
||||
public void addGLFRecord(String contigName, int contigLength, GLFRecord rec);
|
||||
}
|
||||
|
|
@ -43,7 +43,7 @@ import java.io.OutputStream;
|
|||
* single and variable length genotype calls using the provided functions. When you've finished
|
||||
* generating GLF records, make sure you close the file.
|
||||
*/
|
||||
public class GLFWriter implements GLFGenotypeWriter {
|
||||
public class GLFWriter {
|
||||
// our output codec
|
||||
private final BinaryCodec outputBinaryCodec;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,29 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
/**
|
||||
* An extension of the GenotypeWriter interface with support
|
||||
* for adding header lines.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public interface VCFGenotypeWriter extends GenotypeWriter {
|
||||
/**
|
||||
* initialize this VCF header
|
||||
*
|
||||
* @param header the header
|
||||
*/
|
||||
public void writeHeader(VCFHeader header);
|
||||
|
||||
/**
|
||||
* Add a given VCF file to the writer.
|
||||
* @param file file from which to add records
|
||||
*/
|
||||
public void append(File file);
|
||||
|
||||
}
|
||||
|
|
@ -1,407 +1,19 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* this class writes VCF files
|
||||
*/
|
||||
public class VCFWriter {
|
||||
public interface VCFWriter {
|
||||
|
||||
// the VCF header we're storing
|
||||
protected VCFHeader mHeader = null;
|
||||
|
||||
// the print stream we're writting to
|
||||
protected BufferedWriter mWriter;
|
||||
|
||||
// were filters applied?
|
||||
protected boolean filtersWereAppliedToContext = false;
|
||||
|
||||
/**
|
||||
* create a VCF writer, given a file to write to
|
||||
*
|
||||
* @param location the file location to write to
|
||||
*/
|
||||
public VCFWriter(File location) {
|
||||
FileOutputStream output;
|
||||
try {
|
||||
output = new FileOutputStream(location);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new RuntimeException("Unable to create VCF file at location: " + location);
|
||||
}
|
||||
|
||||
mWriter = new BufferedWriter(new OutputStreamWriter(output));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* create a VCF writer, given a stream to write to
|
||||
*
|
||||
* @param output the file location to write to
|
||||
*/
|
||||
public VCFWriter(OutputStream output) {
|
||||
mWriter = new BufferedWriter(new OutputStreamWriter(output));
|
||||
}
|
||||
|
||||
public void writeHeader(VCFHeader header) {
|
||||
this.mHeader = header;
|
||||
|
||||
try {
|
||||
// the file format field needs to be written first
|
||||
mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF4_0.getFormatString() + "=" + VCFHeaderVersion.VCF4_0.getVersionString() + "\n");
|
||||
|
||||
for ( VCFHeaderLine line : header.getMetaData() ) {
|
||||
if ( line.getKey().equals(VCFHeaderVersion.VCF4_0.getFormatString()) ||
|
||||
line.getKey().equals(VCFHeaderVersion.VCF3_3.getFormatString()) ||
|
||||
line.getKey().equals(VCFHeaderVersion.VCF3_2.getFormatString()) )
|
||||
continue;
|
||||
|
||||
// are the records filtered (so we know what to put in the FILTER column of passing records) ?
|
||||
if ( line instanceof VCFFilterHeaderLine )
|
||||
filtersWereAppliedToContext = true;
|
||||
|
||||
mWriter.write(VCFHeader.METADATA_INDICATOR);
|
||||
mWriter.write(line.toString());
|
||||
mWriter.write("\n");
|
||||
}
|
||||
|
||||
// write out the column line
|
||||
mWriter.write(VCFHeader.HEADER_INDICATOR);
|
||||
for ( VCFHeader.HEADER_FIELDS field : header.getHeaderFields() ) {
|
||||
mWriter.write(field.toString());
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
}
|
||||
|
||||
if ( header.hasGenotypingData() ) {
|
||||
mWriter.write("FORMAT");
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
for ( String sample : header.getGenotypeSamples() ) {
|
||||
mWriter.write(sample);
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
} }
|
||||
|
||||
mWriter.write("\n");
|
||||
mWriter.flush(); // necessary so that writing to an output stream will work
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new RuntimeException("IOException writing the VCF header", e);
|
||||
}
|
||||
}
|
||||
public void writeHeader(VCFHeader header);
|
||||
|
||||
/**
|
||||
* attempt to close the VCF file
|
||||
*/
|
||||
public void close() {
|
||||
try {
|
||||
mWriter.flush();
|
||||
mWriter.close();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Unable to close VCFFile");
|
||||
}
|
||||
}
|
||||
|
||||
public void add(VariantContext vc, byte refBase) {
|
||||
if ( mHeader == null )
|
||||
throw new IllegalStateException("The VCF Header must be written before records can be added");
|
||||
|
||||
try {
|
||||
|
||||
vc = VariantContextUtils.createVariantContextWithPaddedAlleles(vc, refBase);
|
||||
|
||||
GenomeLoc loc = VariantContextUtils.getLocation(vc);
|
||||
Map<Allele, String> alleleMap = new HashMap<Allele, String>(vc.getAlleles().size());
|
||||
alleleMap.put(Allele.NO_CALL, VCFConstants.EMPTY_ALLELE); // convenience for lookup
|
||||
|
||||
// CHROM
|
||||
mWriter.write(loc.getContig());
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
|
||||
// POS
|
||||
mWriter.write(String.valueOf(loc.getStart()));
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
|
||||
// ID
|
||||
String ID = vc.hasAttribute(VariantContext.ID_KEY) ? vc.getAttributeAsString(VariantContext.ID_KEY) : VCFConstants.EMPTY_ID_FIELD;
|
||||
mWriter.write(ID);
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
|
||||
// REF
|
||||
alleleMap.put(vc.getReference(), "0");
|
||||
String refString = vc.getReference().getBaseString();
|
||||
mWriter.write(refString);
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
|
||||
// ALT
|
||||
if ( vc.isVariant() ) {
|
||||
Allele altAllele = vc.getAlternateAllele(0);
|
||||
alleleMap.put(altAllele, "1");
|
||||
String alt = altAllele.getBaseString();
|
||||
mWriter.write(alt);
|
||||
|
||||
for (int i = 1; i < vc.getAlternateAlleles().size(); i++) {
|
||||
altAllele = vc.getAlternateAllele(i);
|
||||
alleleMap.put(altAllele, String.valueOf(i+1));
|
||||
alt = altAllele.getBaseString();
|
||||
mWriter.write(",");
|
||||
mWriter.write(alt);
|
||||
}
|
||||
} else {
|
||||
mWriter.write(VCFConstants.EMPTY_ALTERNATE_ALLELE_FIELD);
|
||||
}
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
|
||||
// QUAL
|
||||
if ( !vc.hasNegLog10PError() )
|
||||
mWriter.write(VCFConstants.MISSING_VALUE_v4);
|
||||
else
|
||||
mWriter.write(getQualValue(vc.getPhredScaledQual()));
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
|
||||
// FILTER
|
||||
String filters = vc.isFiltered() ? Utils.join(";", Utils.sorted(vc.getFilters())) : (filtersWereAppliedToContext || vc.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
|
||||
mWriter.write(filters);
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
|
||||
// INFO
|
||||
Map<String, String> infoFields = new TreeMap<String, String>();
|
||||
for ( Map.Entry<String, Object> field : vc.getAttributes().entrySet() ) {
|
||||
String key = field.getKey();
|
||||
if ( key.equals(VariantContext.ID_KEY) || key.equals(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY) )
|
||||
continue;
|
||||
|
||||
String outputValue = formatVCFField(field.getValue());
|
||||
if ( outputValue != null )
|
||||
infoFields.put(key, outputValue);
|
||||
}
|
||||
writeInfoString(infoFields);
|
||||
|
||||
// FORMAT
|
||||
List<String> genotypeAttributeKeys = new ArrayList<String>();
|
||||
if ( vc.hasGenotypes() ) {
|
||||
genotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY);
|
||||
for ( String key : calcVCFGenotypeKeys(vc) ) {
|
||||
genotypeAttributeKeys.add(key);
|
||||
}
|
||||
} else if ( mHeader.hasGenotypingData() ) {
|
||||
// this needs to be done in case all samples are no-calls
|
||||
genotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY);
|
||||
}
|
||||
|
||||
if ( genotypeAttributeKeys.size() > 0 ) {
|
||||
String genotypeFormatString = Utils.join(VCFConstants.GENOTYPE_FIELD_SEPARATOR, genotypeAttributeKeys);
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
mWriter.write(genotypeFormatString);
|
||||
|
||||
addGenotypeData(vc, alleleMap, genotypeAttributeKeys);
|
||||
}
|
||||
|
||||
mWriter.write("\n");
|
||||
mWriter.flush(); // necessary so that writing to an output stream will work
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Unable to write the VCF object to a file");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private String getQualValue(double qual) {
|
||||
String s = String.format(VCFConstants.DOUBLE_PRECISION_FORMAT_STRING, qual);
|
||||
if ( s.endsWith(VCFConstants.DOUBLE_PRECISION_INT_SUFFIX) )
|
||||
s = s.substring(0, s.length() - VCFConstants.DOUBLE_PRECISION_INT_SUFFIX.length());
|
||||
return s;
|
||||
}
|
||||
|
||||
/**
|
||||
* create the info string; assumes that no values are null
|
||||
*
|
||||
* @param infoFields a map of info fields
|
||||
* @throws IOException for writer
|
||||
*/
|
||||
protected void writeInfoString(Map<String, String> infoFields) throws IOException {
|
||||
if ( infoFields.isEmpty() ) {
|
||||
mWriter.write(VCFConstants.EMPTY_INFO_FIELD);
|
||||
return;
|
||||
}
|
||||
|
||||
boolean isFirst = true;
|
||||
for ( Map.Entry<String, String> entry : infoFields.entrySet() ) {
|
||||
if ( isFirst )
|
||||
isFirst = false;
|
||||
else
|
||||
mWriter.write(VCFConstants.INFO_FIELD_SEPARATOR);
|
||||
|
||||
String key = entry.getKey();
|
||||
mWriter.write(key);
|
||||
|
||||
if ( !entry.getValue().equals("") ) {
|
||||
int numVals = 1;
|
||||
VCFInfoHeaderLine metaData = mHeader.getInfoHeaderLine(key);
|
||||
if ( metaData != null )
|
||||
numVals = metaData.getCount();
|
||||
|
||||
// take care of unbounded encoding
|
||||
if ( numVals == VCFInfoHeaderLine.UNBOUNDED )
|
||||
numVals = 1;
|
||||
|
||||
if ( numVals > 0 ) {
|
||||
mWriter.write("=");
|
||||
mWriter.write(entry.getValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* add the genotype data
|
||||
*
|
||||
* @param vc the variant context
|
||||
* @param genotypeFormatKeys Genotype formatting string
|
||||
* @param alleleMap alleles for this context
|
||||
* @throws IOException for writer
|
||||
*/
|
||||
private void addGenotypeData(VariantContext vc, Map<Allele, String> alleleMap, List<String> genotypeFormatKeys)
|
||||
throws IOException {
|
||||
|
||||
for ( String sample : mHeader.getGenotypeSamples() ) {
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
|
||||
Genotype g = vc.getGenotype(sample);
|
||||
if ( g == null ) {
|
||||
// TODO -- The VariantContext needs to know what the general ploidy is of the samples
|
||||
// TODO -- We shouldn't be assuming diploid genotypes here!
|
||||
mWriter.write(VCFConstants.EMPTY_GENOTYPE);
|
||||
continue;
|
||||
}
|
||||
|
||||
writeAllele(g.getAllele(0), alleleMap);
|
||||
for (int i = 1; i < g.getPloidy(); i++) {
|
||||
mWriter.write(g.genotypesArePhased() ? VCFConstants.PHASED : VCFConstants.UNPHASED);
|
||||
writeAllele(g.getAllele(i), alleleMap);
|
||||
}
|
||||
|
||||
List<String> attrs = new ArrayList<String>(genotypeFormatKeys.size());
|
||||
for ( String key : genotypeFormatKeys ) {
|
||||
if ( key.equals(VCFConstants.GENOTYPE_KEY) )
|
||||
continue;
|
||||
|
||||
Object val = g.hasAttribute(key) ? g.getAttribute(key) : VCFConstants.MISSING_VALUE_v4;
|
||||
|
||||
// some exceptions
|
||||
if ( key.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) {
|
||||
if ( MathUtils.compareDoubles(g.getNegLog10PError(), Genotype.NO_NEG_LOG_10PERROR) == 0 )
|
||||
val = VCFConstants.MISSING_VALUE_v4;
|
||||
else {
|
||||
val = getQualValue(Math.min(g.getPhredScaledQual(), VCFConstants.MAX_GENOTYPE_QUAL));
|
||||
}
|
||||
} else if ( key.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
|
||||
val = g.isFiltered() ? Utils.join(";", Utils.sorted(g.getFilters())) : (g.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
|
||||
}
|
||||
|
||||
VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(key);
|
||||
if ( metaData != null ) {
|
||||
int numInFormatField = metaData.getCount();
|
||||
if ( numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4) ) {
|
||||
// If we have a missing field but multiple values are expected, we need to construct a new string with all fields.
|
||||
// For example, if Number=2, the string has to be ".,."
|
||||
StringBuilder sb = new StringBuilder(VCFConstants.MISSING_VALUE_v4);
|
||||
for ( int i = 1; i < numInFormatField; i++ ) {
|
||||
sb.append(",");
|
||||
sb.append(VCFConstants.MISSING_VALUE_v4);
|
||||
}
|
||||
val = sb.toString();
|
||||
}
|
||||
}
|
||||
|
||||
// assume that if key is absent, then the given string encoding suffices
|
||||
String outputValue = formatVCFField(val);
|
||||
if ( outputValue != null )
|
||||
attrs.add(outputValue);
|
||||
}
|
||||
|
||||
// strip off trailing missing values
|
||||
for (int i = attrs.size()-1; i >= 0; i--) {
|
||||
if ( isMissingValue(attrs.get(i)) )
|
||||
attrs.remove(i);
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
for (String s : attrs ) {
|
||||
mWriter.write(VCFConstants.GENOTYPE_FIELD_SEPARATOR);
|
||||
mWriter.write(s);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isMissingValue(String s) {
|
||||
// we need to deal with the case that it's a list of missing values
|
||||
return (MathUtils.countOccurrences(VCFConstants.MISSING_VALUE_v4.charAt(0), s) + MathUtils.countOccurrences(',', s) == s.length());
|
||||
}
|
||||
|
||||
private void writeAllele(Allele allele, Map<Allele, String> alleleMap) throws IOException {
|
||||
String encoding = alleleMap.get(allele);
|
||||
if ( encoding == null )
|
||||
throw new StingException("Allele " + allele + " is not an allele in the variant context");
|
||||
mWriter.write(encoding);
|
||||
}
|
||||
|
||||
private static String formatVCFField(Object val) {
|
||||
String result;
|
||||
if ( val == null )
|
||||
result = VCFConstants.MISSING_VALUE_v4;
|
||||
else if ( val instanceof Double )
|
||||
result = String.format(VCFConstants.DOUBLE_PRECISION_FORMAT_STRING, (Double)val);
|
||||
else if ( val instanceof Boolean )
|
||||
result = (Boolean)val ? "" : null; // empty string for true, null for false
|
||||
else if ( val instanceof List ) {
|
||||
result = formatVCFField(((List)val).toArray());
|
||||
} else if ( val instanceof Object[] ) {
|
||||
Object[] array = (Object[])val;
|
||||
if ( array.length == 0 )
|
||||
return formatVCFField(null);
|
||||
StringBuffer sb = new StringBuffer(formatVCFField(array[0]));
|
||||
for ( int i = 1; i < array.length; i++) {
|
||||
sb.append(",");
|
||||
sb.append(formatVCFField(array[i]));
|
||||
}
|
||||
result = sb.toString();
|
||||
} else
|
||||
result = val.toString();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static List<String> calcVCFGenotypeKeys(VariantContext vc) {
|
||||
Set<String> keys = new HashSet<String>();
|
||||
|
||||
boolean sawGoodQual = false;
|
||||
boolean sawGenotypeFilter = false;
|
||||
for ( Genotype g : vc.getGenotypes().values() ) {
|
||||
keys.addAll(g.getAttributes().keySet());
|
||||
if ( g.hasNegLog10PError() )
|
||||
sawGoodQual = true;
|
||||
if (g.isFiltered() && g.isCalled())
|
||||
sawGenotypeFilter = true;
|
||||
}
|
||||
|
||||
if ( sawGoodQual )
|
||||
keys.add(VCFConstants.GENOTYPE_QUALITY_KEY);
|
||||
|
||||
if (sawGenotypeFilter)
|
||||
keys.add(VCFConstants.GENOTYPE_FILTER_KEY);
|
||||
|
||||
return Utils.sorted(new ArrayList<String>(keys));
|
||||
}
|
||||
|
||||
public void close();
|
||||
|
||||
public void add(VariantContext vc, byte refBase);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,431 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* this class writes VCF files
|
||||
*/
|
||||
public class VCFWriterImpl implements VCFWriter {
|
||||
|
||||
// the VCF header we're storing
|
||||
protected VCFHeader mHeader = null;
|
||||
|
||||
// the print stream we're writting to
|
||||
protected BufferedWriter mWriter;
|
||||
|
||||
// were filters applied?
|
||||
protected boolean filtersWereAppliedToContext = false;
|
||||
|
||||
/**
|
||||
* create a VCF writer, given a file to write to
|
||||
*
|
||||
* @param location the file location to write to
|
||||
*/
|
||||
public VCFWriterImpl(File location) {
|
||||
FileOutputStream output;
|
||||
try {
|
||||
output = new FileOutputStream(location);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new RuntimeException("Unable to create VCF file at location: " + location);
|
||||
}
|
||||
|
||||
mWriter = new BufferedWriter(new OutputStreamWriter(output));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* create a VCF writer, given a stream to write to
|
||||
*
|
||||
* @param output the file location to write to
|
||||
*/
|
||||
public VCFWriterImpl(OutputStream output) {
|
||||
mWriter = new BufferedWriter(new OutputStreamWriter(output));
|
||||
}
|
||||
|
||||
public void writeHeader(VCFHeader header) {
|
||||
this.mHeader = header;
|
||||
|
||||
try {
|
||||
// the file format field needs to be written first
|
||||
mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF4_0.getFormatString() + "=" + VCFHeaderVersion.VCF4_0.getVersionString() + "\n");
|
||||
|
||||
for ( VCFHeaderLine line : header.getMetaData() ) {
|
||||
if ( line.getKey().equals(VCFHeaderVersion.VCF4_0.getFormatString()) ||
|
||||
line.getKey().equals(VCFHeaderVersion.VCF3_3.getFormatString()) ||
|
||||
line.getKey().equals(VCFHeaderVersion.VCF3_2.getFormatString()) )
|
||||
continue;
|
||||
|
||||
// are the records filtered (so we know what to put in the FILTER column of passing records) ?
|
||||
if ( line instanceof VCFFilterHeaderLine)
|
||||
filtersWereAppliedToContext = true;
|
||||
|
||||
mWriter.write(VCFHeader.METADATA_INDICATOR);
|
||||
mWriter.write(line.toString());
|
||||
mWriter.write("\n");
|
||||
}
|
||||
|
||||
// write out the column line
|
||||
mWriter.write(VCFHeader.HEADER_INDICATOR);
|
||||
for ( VCFHeader.HEADER_FIELDS field : header.getHeaderFields() ) {
|
||||
mWriter.write(field.toString());
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
}
|
||||
|
||||
if ( header.hasGenotypingData() ) {
|
||||
mWriter.write("FORMAT");
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
for ( String sample : header.getGenotypeSamples() ) {
|
||||
mWriter.write(sample);
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
} }
|
||||
|
||||
mWriter.write("\n");
|
||||
mWriter.flush(); // necessary so that writing to an output stream will work
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new RuntimeException("IOException writing the VCF header", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* attempt to close the VCF file
|
||||
*/
|
||||
public void close() {
|
||||
try {
|
||||
mWriter.flush();
|
||||
mWriter.close();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Unable to close VCFFile");
|
||||
}
|
||||
}
|
||||
|
||||
public void add(VariantContext vc, byte refBase) {
|
||||
if ( mHeader == null )
|
||||
throw new IllegalStateException("The VCF Header must be written before records can be added");
|
||||
|
||||
try {
|
||||
|
||||
vc = VariantContextUtils.createVariantContextWithPaddedAlleles(vc, refBase);
|
||||
|
||||
GenomeLoc loc = VariantContextUtils.getLocation(vc);
|
||||
Map<Allele, String> alleleMap = new HashMap<Allele, String>(vc.getAlleles().size());
|
||||
alleleMap.put(Allele.NO_CALL, VCFConstants.EMPTY_ALLELE); // convenience for lookup
|
||||
|
||||
// CHROM
|
||||
mWriter.write(loc.getContig());
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
|
||||
// POS
|
||||
mWriter.write(String.valueOf(loc.getStart()));
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
|
||||
// ID
|
||||
String ID = vc.hasAttribute(VariantContext.ID_KEY) ? vc.getAttributeAsString(VariantContext.ID_KEY) : VCFConstants.EMPTY_ID_FIELD;
|
||||
mWriter.write(ID);
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
|
||||
// REF
|
||||
alleleMap.put(vc.getReference(), "0");
|
||||
String refString = vc.getReference().getBaseString();
|
||||
mWriter.write(refString);
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
|
||||
// ALT
|
||||
if ( vc.isVariant() ) {
|
||||
Allele altAllele = vc.getAlternateAllele(0);
|
||||
alleleMap.put(altAllele, "1");
|
||||
String alt = altAllele.getBaseString();
|
||||
mWriter.write(alt);
|
||||
|
||||
for (int i = 1; i < vc.getAlternateAlleles().size(); i++) {
|
||||
altAllele = vc.getAlternateAllele(i);
|
||||
alleleMap.put(altAllele, String.valueOf(i+1));
|
||||
alt = altAllele.getBaseString();
|
||||
mWriter.write(",");
|
||||
mWriter.write(alt);
|
||||
}
|
||||
} else {
|
||||
mWriter.write(VCFConstants.EMPTY_ALTERNATE_ALLELE_FIELD);
|
||||
}
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
|
||||
// QUAL
|
||||
if ( !vc.hasNegLog10PError() )
|
||||
mWriter.write(VCFConstants.MISSING_VALUE_v4);
|
||||
else
|
||||
mWriter.write(getQualValue(vc.getPhredScaledQual()));
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
|
||||
// FILTER
|
||||
String filters = vc.isFiltered() ? Utils.join(";", Utils.sorted(vc.getFilters())) : (filtersWereAppliedToContext || vc.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
|
||||
mWriter.write(filters);
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
|
||||
// INFO
|
||||
Map<String, String> infoFields = new TreeMap<String, String>();
|
||||
for ( Map.Entry<String, Object> field : vc.getAttributes().entrySet() ) {
|
||||
String key = field.getKey();
|
||||
if ( key.equals(VariantContext.ID_KEY) || key.equals(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY) )
|
||||
continue;
|
||||
|
||||
String outputValue = formatVCFField(field.getValue());
|
||||
if ( outputValue != null )
|
||||
infoFields.put(key, outputValue);
|
||||
}
|
||||
writeInfoString(infoFields);
|
||||
|
||||
// FORMAT
|
||||
List<String> genotypeAttributeKeys = new ArrayList<String>();
|
||||
if ( vc.hasGenotypes() ) {
|
||||
genotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY);
|
||||
for ( String key : calcVCFGenotypeKeys(vc) ) {
|
||||
genotypeAttributeKeys.add(key);
|
||||
}
|
||||
} else if ( mHeader.hasGenotypingData() ) {
|
||||
// this needs to be done in case all samples are no-calls
|
||||
genotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY);
|
||||
}
|
||||
|
||||
if ( genotypeAttributeKeys.size() > 0 ) {
|
||||
String genotypeFormatString = Utils.join(VCFConstants.GENOTYPE_FIELD_SEPARATOR, genotypeAttributeKeys);
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
mWriter.write(genotypeFormatString);
|
||||
|
||||
addGenotypeData(vc, alleleMap, genotypeAttributeKeys);
|
||||
}
|
||||
|
||||
mWriter.write("\n");
|
||||
mWriter.flush(); // necessary so that writing to an output stream will work
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Unable to write the VCF object to a file");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private String getQualValue(double qual) {
|
||||
String s = String.format(VCFConstants.DOUBLE_PRECISION_FORMAT_STRING, qual);
|
||||
if ( s.endsWith(VCFConstants.DOUBLE_PRECISION_INT_SUFFIX) )
|
||||
s = s.substring(0, s.length() - VCFConstants.DOUBLE_PRECISION_INT_SUFFIX.length());
|
||||
return s;
|
||||
}
|
||||
|
||||
/**
|
||||
* create the info string; assumes that no values are null
|
||||
*
|
||||
* @param infoFields a map of info fields
|
||||
* @throws IOException for writer
|
||||
*/
|
||||
protected void writeInfoString(Map<String, String> infoFields) throws IOException {
|
||||
if ( infoFields.isEmpty() ) {
|
||||
mWriter.write(VCFConstants.EMPTY_INFO_FIELD);
|
||||
return;
|
||||
}
|
||||
|
||||
boolean isFirst = true;
|
||||
for ( Map.Entry<String, String> entry : infoFields.entrySet() ) {
|
||||
if ( isFirst )
|
||||
isFirst = false;
|
||||
else
|
||||
mWriter.write(VCFConstants.INFO_FIELD_SEPARATOR);
|
||||
|
||||
String key = entry.getKey();
|
||||
mWriter.write(key);
|
||||
|
||||
if ( !entry.getValue().equals("") ) {
|
||||
int numVals = 1;
|
||||
VCFInfoHeaderLine metaData = mHeader.getInfoHeaderLine(key);
|
||||
if ( metaData != null )
|
||||
numVals = metaData.getCount();
|
||||
|
||||
// take care of unbounded encoding
|
||||
if ( numVals == VCFInfoHeaderLine.UNBOUNDED )
|
||||
numVals = 1;
|
||||
|
||||
if ( numVals > 0 ) {
|
||||
mWriter.write("=");
|
||||
mWriter.write(entry.getValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* add the genotype data
|
||||
*
|
||||
* @param vc the variant context
|
||||
* @param genotypeFormatKeys Genotype formatting string
|
||||
* @param alleleMap alleles for this context
|
||||
* @throws IOException for writer
|
||||
*/
|
||||
private void addGenotypeData(VariantContext vc, Map<Allele, String> alleleMap, List<String> genotypeFormatKeys)
|
||||
throws IOException {
|
||||
|
||||
for ( String sample : mHeader.getGenotypeSamples() ) {
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
|
||||
Genotype g = vc.getGenotype(sample);
|
||||
if ( g == null ) {
|
||||
// TODO -- The VariantContext needs to know what the general ploidy is of the samples
|
||||
// TODO -- We shouldn't be assuming diploid genotypes here!
|
||||
mWriter.write(VCFConstants.EMPTY_GENOTYPE);
|
||||
continue;
|
||||
}
|
||||
|
||||
writeAllele(g.getAllele(0), alleleMap);
|
||||
for (int i = 1; i < g.getPloidy(); i++) {
|
||||
mWriter.write(g.genotypesArePhased() ? VCFConstants.PHASED : VCFConstants.UNPHASED);
|
||||
writeAllele(g.getAllele(i), alleleMap);
|
||||
}
|
||||
|
||||
List<String> attrs = new ArrayList<String>(genotypeFormatKeys.size());
|
||||
for ( String key : genotypeFormatKeys ) {
|
||||
if ( key.equals(VCFConstants.GENOTYPE_KEY) )
|
||||
continue;
|
||||
|
||||
Object val = g.hasAttribute(key) ? g.getAttribute(key) : VCFConstants.MISSING_VALUE_v4;
|
||||
|
||||
// some exceptions
|
||||
if ( key.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) {
|
||||
if ( MathUtils.compareDoubles(g.getNegLog10PError(), Genotype.NO_NEG_LOG_10PERROR) == 0 )
|
||||
val = VCFConstants.MISSING_VALUE_v4;
|
||||
else {
|
||||
val = getQualValue(Math.min(g.getPhredScaledQual(), VCFConstants.MAX_GENOTYPE_QUAL));
|
||||
}
|
||||
} else if ( key.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
|
||||
val = g.isFiltered() ? Utils.join(";", Utils.sorted(g.getFilters())) : (g.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
|
||||
}
|
||||
|
||||
VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(key);
|
||||
if ( metaData != null ) {
|
||||
int numInFormatField = metaData.getCount();
|
||||
if ( numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4) ) {
|
||||
// If we have a missing field but multiple values are expected, we need to construct a new string with all fields.
|
||||
// For example, if Number=2, the string has to be ".,."
|
||||
StringBuilder sb = new StringBuilder(VCFConstants.MISSING_VALUE_v4);
|
||||
for ( int i = 1; i < numInFormatField; i++ ) {
|
||||
sb.append(",");
|
||||
sb.append(VCFConstants.MISSING_VALUE_v4);
|
||||
}
|
||||
val = sb.toString();
|
||||
}
|
||||
}
|
||||
|
||||
// assume that if key is absent, then the given string encoding suffices
|
||||
String outputValue = formatVCFField(val);
|
||||
if ( outputValue != null )
|
||||
attrs.add(outputValue);
|
||||
}
|
||||
|
||||
// strip off trailing missing values
|
||||
for (int i = attrs.size()-1; i >= 0; i--) {
|
||||
if ( isMissingValue(attrs.get(i)) )
|
||||
attrs.remove(i);
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
for (String s : attrs ) {
|
||||
mWriter.write(VCFConstants.GENOTYPE_FIELD_SEPARATOR);
|
||||
mWriter.write(s);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isMissingValue(String s) {
|
||||
// we need to deal with the case that it's a list of missing values
|
||||
return (MathUtils.countOccurrences(VCFConstants.MISSING_VALUE_v4.charAt(0), s) + MathUtils.countOccurrences(',', s) == s.length());
|
||||
}
|
||||
|
||||
private void writeAllele(Allele allele, Map<Allele, String> alleleMap) throws IOException {
|
||||
String encoding = alleleMap.get(allele);
|
||||
if ( encoding == null )
|
||||
throw new StingException("Allele " + allele + " is not an allele in the variant context");
|
||||
mWriter.write(encoding);
|
||||
}
|
||||
|
||||
private static String formatVCFField(Object val) {
|
||||
String result;
|
||||
if ( val == null )
|
||||
result = VCFConstants.MISSING_VALUE_v4;
|
||||
else if ( val instanceof Double )
|
||||
result = String.format(VCFConstants.DOUBLE_PRECISION_FORMAT_STRING, (Double)val);
|
||||
else if ( val instanceof Boolean )
|
||||
result = (Boolean)val ? "" : null; // empty string for true, null for false
|
||||
else if ( val instanceof List ) {
|
||||
result = formatVCFField(((List)val).toArray());
|
||||
} else if ( val instanceof Object[] ) {
|
||||
Object[] array = (Object[])val;
|
||||
if ( array.length == 0 )
|
||||
return formatVCFField(null);
|
||||
StringBuffer sb = new StringBuffer(formatVCFField(array[0]));
|
||||
for ( int i = 1; i < array.length; i++) {
|
||||
sb.append(",");
|
||||
sb.append(formatVCFField(array[i]));
|
||||
}
|
||||
result = sb.toString();
|
||||
} else
|
||||
result = val.toString();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static List<String> calcVCFGenotypeKeys(VariantContext vc) {
|
||||
Set<String> keys = new HashSet<String>();
|
||||
|
||||
boolean sawGoodQual = false;
|
||||
boolean sawGenotypeFilter = false;
|
||||
for ( Genotype g : vc.getGenotypes().values() ) {
|
||||
keys.addAll(g.getAttributes().keySet());
|
||||
if ( g.hasNegLog10PError() )
|
||||
sawGoodQual = true;
|
||||
if (g.isFiltered() && g.isCalled())
|
||||
sawGenotypeFilter = true;
|
||||
}
|
||||
|
||||
if ( sawGoodQual )
|
||||
keys.add(VCFConstants.GENOTYPE_QUALITY_KEY);
|
||||
|
||||
if (sawGenotypeFilter)
|
||||
keys.add(VCFConstants.GENOTYPE_FILTER_KEY);
|
||||
|
||||
return Utils.sorted(new ArrayList<String>(keys));
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -1,74 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.vcf;
|
||||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
import org.broad.tribble.vcf.*;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* <p/>
|
||||
* Class GATKVCFWriter
|
||||
* <p/>
|
||||
* GATK-specific version of the VCF Writer
|
||||
*/
|
||||
public class GATKVCFWriter extends VCFWriter implements VCFGenotypeWriter {
|
||||
|
||||
public GATKVCFWriter(File writeTo) {
|
||||
super(writeTo);
|
||||
}
|
||||
|
||||
public GATKVCFWriter(OutputStream writeTo) {
|
||||
super(writeTo);
|
||||
}
|
||||
|
||||
public void writeHeader(VCFHeader header) {
|
||||
// TODO -- put the command-line generating code for the header right here
|
||||
super.writeHeader(header);
|
||||
}
|
||||
|
||||
public void append(File file) {
|
||||
try {
|
||||
BufferedReader reader = new BufferedReader(new FileReader(file));
|
||||
String line = reader.readLine();
|
||||
while ( line != null ) {
|
||||
if ( !VCFHeaderLine.isHeaderLine(line) ) {
|
||||
mWriter.write(line);
|
||||
mWriter.write("\n");
|
||||
}
|
||||
line = reader.readLine();
|
||||
}
|
||||
|
||||
reader.close();
|
||||
} catch (IOException e) {
|
||||
throw new StingException("Error reading file " + file + " in GATKVCFWriter: ", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -45,7 +45,7 @@ public class VCFWriterUnitTest extends BaseTest {
|
|||
@Test
|
||||
public void testBasicWriteAndRead() {
|
||||
VCFHeader header = createFakeHeader(metaData,additionalColumns);
|
||||
VCFWriter writer = new VCFWriter(fakeVCFFile);
|
||||
VCFWriter writer = new VCFWriterImpl(fakeVCFFile);
|
||||
writer.writeHeader(header);
|
||||
writer.add(createVC(header),"A".getBytes()[0]);
|
||||
writer.add(createVC(header),"A".getBytes()[0]);
|
||||
|
|
|
|||
Loading…
Reference in New Issue