Refactored indexing part of StandardVCFWriter into superclass
-- Now other implementations of the VCFWriter can easily share common functions, such as writing an index on the fly
This commit is contained in:
parent
01b6177ce1
commit
fe5724b6ea
|
|
@ -0,0 +1,116 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||||
|
|
||||||
|
import org.broad.tribble.Tribble;
|
||||||
|
import org.broad.tribble.TribbleException;
|
||||||
|
import org.broad.tribble.index.DynamicIndexCreator;
|
||||||
|
import org.broad.tribble.index.Index;
|
||||||
|
import org.broad.tribble.index.IndexFactory;
|
||||||
|
import org.broad.tribble.util.LittleEndianOutputStream;
|
||||||
|
import org.broad.tribble.util.PositionalStream;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* this class writes VCF files
|
||||||
|
*/
|
||||||
|
public abstract class IndexingVCFWriter implements VCFWriter {
|
||||||
|
final private File indexFile;
|
||||||
|
final private String name;
|
||||||
|
|
||||||
|
private PositionalStream positionalStream;
|
||||||
|
private DynamicIndexCreator indexer;
|
||||||
|
private LittleEndianOutputStream idxStream;
|
||||||
|
|
||||||
|
protected IndexingVCFWriter(String name, File location, OutputStream output, boolean enableOnTheFlyIndexing) {
|
||||||
|
this.name = name;
|
||||||
|
|
||||||
|
if ( enableOnTheFlyIndexing ) {
|
||||||
|
indexFile = Tribble.indexFile(location);
|
||||||
|
try {
|
||||||
|
idxStream = new LittleEndianOutputStream(new FileOutputStream(indexFile));
|
||||||
|
//System.out.println("Creating index on the fly for " + location);
|
||||||
|
indexer = new DynamicIndexCreator(IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
|
||||||
|
indexer.initialize(location, indexer.defaultBinSize());
|
||||||
|
positionalStream = new PositionalStream(output);
|
||||||
|
} catch ( IOException ex ) {
|
||||||
|
// No matter what we keep going, since we don't care if we can't create the index file
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
idxStream = null;
|
||||||
|
indexer = null;
|
||||||
|
positionalStream = null;
|
||||||
|
indexFile = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getStreamName() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public abstract void writeHeader(VCFHeader header);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* attempt to close the VCF file
|
||||||
|
*/
|
||||||
|
public void close() {
|
||||||
|
// try to close the index stream (keep it separate to help debugging efforts)
|
||||||
|
if ( indexer != null ) {
|
||||||
|
try {
|
||||||
|
Index index = indexer.finalizeIndex(positionalStream.getPosition());
|
||||||
|
index.write(idxStream);
|
||||||
|
idxStream.close();
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new ReviewedStingException("Unable to close index for " + getStreamName(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* add a record to the file
|
||||||
|
*
|
||||||
|
* @param vc the Variant Context object
|
||||||
|
*/
|
||||||
|
public void add(VariantContext vc) {
|
||||||
|
// if we are doing on the fly indexing, add the record ***before*** we write any bytes
|
||||||
|
if ( indexer != null )
|
||||||
|
indexer.addFeature(vc, positionalStream.getPosition());
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static final String writerName(File location, OutputStream stream) {
|
||||||
|
return location == null ? stream.toString() : location.getAbsolutePath();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static OutputStream openOutputStream(File location) {
|
||||||
|
try {
|
||||||
|
return new FileOutputStream(location);
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
throw new ReviewedStingException("Unable to create VCF file at location: " + location, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -44,26 +44,19 @@ import java.util.*;
|
||||||
/**
|
/**
|
||||||
* this class writes VCF files
|
* this class writes VCF files
|
||||||
*/
|
*/
|
||||||
public class StandardVCFWriter implements VCFWriter {
|
public class StandardVCFWriter extends IndexingVCFWriter {
|
||||||
|
// the print stream we're writing to
|
||||||
|
final protected BufferedWriter mWriter;
|
||||||
|
|
||||||
|
// should we write genotypes or just sites?
|
||||||
|
final protected boolean doNotWriteGenotypes;
|
||||||
|
|
||||||
// the VCF header we're storing
|
// the VCF header we're storing
|
||||||
protected VCFHeader mHeader = null;
|
protected VCFHeader mHeader = null;
|
||||||
|
|
||||||
// the print stream we're writing to
|
|
||||||
protected BufferedWriter mWriter;
|
|
||||||
protected PositionalStream positionalStream = null;
|
|
||||||
|
|
||||||
// were filters applied?
|
// were filters applied?
|
||||||
protected boolean filtersWereAppliedToContext = false;
|
protected boolean filtersWereAppliedToContext = false;
|
||||||
|
|
||||||
// should we write genotypes or just sites?
|
|
||||||
protected boolean doNotWriteGenotypes = false;
|
|
||||||
|
|
||||||
protected DynamicIndexCreator indexer = null;
|
|
||||||
protected File indexFile = null;
|
|
||||||
LittleEndianOutputStream idxStream = null;
|
|
||||||
File location = null;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* create a VCF writer, given a file to write to
|
* create a VCF writer, given a file to write to
|
||||||
*
|
*
|
||||||
|
|
@ -93,32 +86,22 @@ public class StandardVCFWriter implements VCFWriter {
|
||||||
* @param doNotWriteGenotypes do not write genotypes
|
* @param doNotWriteGenotypes do not write genotypes
|
||||||
*/
|
*/
|
||||||
public StandardVCFWriter(OutputStream output, boolean doNotWriteGenotypes) {
|
public StandardVCFWriter(OutputStream output, boolean doNotWriteGenotypes) {
|
||||||
mWriter = new BufferedWriter(new OutputStreamWriter(output));
|
this(null, output, false, doNotWriteGenotypes);
|
||||||
this.doNotWriteGenotypes = doNotWriteGenotypes;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public StandardVCFWriter(File location, OutputStream output, boolean enableOnTheFlyIndexing, boolean doNotWriteGenotypes) {
|
public StandardVCFWriter(File location, OutputStream output, boolean enableOnTheFlyIndexing, boolean doNotWriteGenotypes) {
|
||||||
this.location = location;
|
super(writerName(location, output), location, output, enableOnTheFlyIndexing);
|
||||||
|
mWriter = new BufferedWriter(new OutputStreamWriter(output)); // todo -- fix buffer size
|
||||||
if ( enableOnTheFlyIndexing ) {
|
|
||||||
indexFile = Tribble.indexFile(location);
|
|
||||||
try {
|
|
||||||
idxStream = new LittleEndianOutputStream(new FileOutputStream(indexFile));
|
|
||||||
//System.out.println("Creating index on the fly for " + location);
|
|
||||||
indexer = new DynamicIndexCreator(IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
|
|
||||||
indexer.initialize(location, indexer.defaultBinSize());
|
|
||||||
positionalStream = new PositionalStream(output);
|
|
||||||
output = positionalStream;
|
|
||||||
} catch ( IOException ex ) {
|
|
||||||
// No matter what we keep going, since we don't care if we can't create the index file
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//mWriter = new BufferedWriter(new OutputStreamWriter(new PositionalStream(output)));
|
|
||||||
mWriter = new BufferedWriter(new OutputStreamWriter(output));
|
|
||||||
this.doNotWriteGenotypes = doNotWriteGenotypes;
|
this.doNotWriteGenotypes = doNotWriteGenotypes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// VCFWriter interface functions
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@Override
|
||||||
public void writeHeader(VCFHeader header) {
|
public void writeHeader(VCFHeader header) {
|
||||||
mHeader = doNotWriteGenotypes ? new VCFHeader(header.getMetaData()) : header;
|
mHeader = doNotWriteGenotypes ? new VCFHeader(header.getMetaData()) : header;
|
||||||
|
|
||||||
|
|
@ -158,44 +141,24 @@ public class StandardVCFWriter implements VCFWriter {
|
||||||
mWriter.flush(); // necessary so that writing to an output stream will work
|
mWriter.flush(); // necessary so that writing to an output stream will work
|
||||||
}
|
}
|
||||||
catch (IOException e) {
|
catch (IOException e) {
|
||||||
throw new TribbleException("IOException writing the VCF header to " + locationString(), e);
|
throw new ReviewedStingException("IOException writing the VCF header to " + getStreamName(), e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private String locationString() {
|
|
||||||
return location == null ? mWriter.toString() : location.getAbsolutePath();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* attempt to close the VCF file
|
* attempt to close the VCF file
|
||||||
*/
|
*/
|
||||||
|
@Override
|
||||||
public void close() {
|
public void close() {
|
||||||
// try to close the vcf stream
|
// try to close the vcf stream
|
||||||
try {
|
try {
|
||||||
mWriter.flush();
|
mWriter.flush();
|
||||||
mWriter.close();
|
mWriter.close();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new TribbleException("Unable to close " + locationString() + " because of " + e.getMessage());
|
throw new ReviewedStingException("Unable to close " + getStreamName(), e);
|
||||||
}
|
}
|
||||||
|
|
||||||
// try to close the index stream (keep it separate to help debugging efforts)
|
super.close();
|
||||||
if ( indexer != null ) {
|
|
||||||
try {
|
|
||||||
Index index = indexer.finalizeIndex(positionalStream.getPosition());
|
|
||||||
index.write(idxStream);
|
|
||||||
idxStream.close();
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new TribbleException("Unable to close index for " + locationString() + " because of " + e.getMessage());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected static OutputStream openOutputStream(File location) {
|
|
||||||
try {
|
|
||||||
return new FileOutputStream(location);
|
|
||||||
} catch (FileNotFoundException e) {
|
|
||||||
throw new TribbleException("Unable to create VCF file at location: " + location);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -203,28 +166,17 @@ public class StandardVCFWriter implements VCFWriter {
|
||||||
*
|
*
|
||||||
* @param vc the Variant Context object
|
* @param vc the Variant Context object
|
||||||
*/
|
*/
|
||||||
|
@Override
|
||||||
public void add(VariantContext vc) {
|
public void add(VariantContext vc) {
|
||||||
add(vc, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* add a record to the file
|
|
||||||
*
|
|
||||||
* @param vc the Variant Context object
|
|
||||||
* @param refBaseShouldBeAppliedToEndOfAlleles *** THIS SHOULD BE FALSE EXCEPT FOR AN INDEL AT THE EXTREME BEGINNING OF A CONTIG (WHERE THERE IS NO PREVIOUS BASE, SO WE USE THE BASE AFTER THE EVENT INSTEAD)
|
|
||||||
*/
|
|
||||||
public void add(VariantContext vc, boolean refBaseShouldBeAppliedToEndOfAlleles) {
|
|
||||||
if ( mHeader == null )
|
if ( mHeader == null )
|
||||||
throw new IllegalStateException("The VCF Header must be written before records can be added: " + locationString());
|
throw new IllegalStateException("The VCF Header must be written before records can be added: " + getStreamName());
|
||||||
|
|
||||||
if ( doNotWriteGenotypes )
|
if ( doNotWriteGenotypes )
|
||||||
vc = VariantContext.modifyGenotypes(vc, null);
|
vc = VariantContext.modifyGenotypes(vc, null);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
vc = VariantContext.createVariantContextWithPaddedAlleles(vc, refBaseShouldBeAppliedToEndOfAlleles);
|
vc = VariantContext.createVariantContextWithPaddedAlleles(vc, false);
|
||||||
|
super.add(vc);
|
||||||
// if we are doing on the fly indexing, add the record ***before*** we write any bytes
|
|
||||||
if ( indexer != null ) indexer.addFeature(vc, positionalStream.getPosition());
|
|
||||||
|
|
||||||
Map<Allele, String> alleleMap = new HashMap<Allele, String>(vc.getAlleles().size());
|
Map<Allele, String> alleleMap = new HashMap<Allele, String>(vc.getAlleles().size());
|
||||||
alleleMap.put(Allele.NO_CALL, VCFConstants.EMPTY_ALLELE); // convenience for lookup
|
alleleMap.put(Allele.NO_CALL, VCFConstants.EMPTY_ALLELE); // convenience for lookup
|
||||||
|
|
@ -317,10 +269,16 @@ public class StandardVCFWriter implements VCFWriter {
|
||||||
mWriter.write("\n");
|
mWriter.write("\n");
|
||||||
mWriter.flush(); // necessary so that writing to an output stream will work
|
mWriter.flush(); // necessary so that writing to an output stream will work
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new RuntimeException("Unable to write the VCF object to " + locationString());
|
throw new RuntimeException("Unable to write the VCF object to " + getStreamName());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// implementation functions
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
public static final String getFilterString(final VariantContext vc) {
|
public static final String getFilterString(final VariantContext vc) {
|
||||||
return getFilterString(vc, false);
|
return getFilterString(vc, false);
|
||||||
}
|
}
|
||||||
|
|
@ -531,12 +489,11 @@ public class StandardVCFWriter implements VCFWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static int countOccurrences(char c, String s) {
|
private static int countOccurrences(char c, String s) {
|
||||||
int count = 0;
|
int count = 0;
|
||||||
for (int i = 0; i < s.length(); i++) {
|
for (int i = 0; i < s.length(); i++) {
|
||||||
count += s.charAt(i) == c ? 1 : 0;
|
count += s.charAt(i) == c ? 1 : 0;
|
||||||
}
|
}
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue