Refactored indexing part of StandardVCFWriter into superclass

-- Now other implementations of the VCFWriter can easily share common functions, such as writing an index on the fly
This commit is contained in:
Mark DePristo 2011-09-07 23:27:08 -04:00
parent 01b6177ce1
commit fe5724b6ea
2 changed files with 148 additions and 75 deletions

View File

@ -0,0 +1,116 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.codecs.vcf;
import org.broad.tribble.Tribble;
import org.broad.tribble.TribbleException;
import org.broad.tribble.index.DynamicIndexCreator;
import org.broad.tribble.index.Index;
import org.broad.tribble.index.IndexFactory;
import org.broad.tribble.util.LittleEndianOutputStream;
import org.broad.tribble.util.PositionalStream;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.*;
/**
* this class writes VCF files
*/
public abstract class IndexingVCFWriter implements VCFWriter {
final private File indexFile;
final private String name;
private PositionalStream positionalStream;
private DynamicIndexCreator indexer;
private LittleEndianOutputStream idxStream;
protected IndexingVCFWriter(String name, File location, OutputStream output, boolean enableOnTheFlyIndexing) {
this.name = name;
if ( enableOnTheFlyIndexing ) {
indexFile = Tribble.indexFile(location);
try {
idxStream = new LittleEndianOutputStream(new FileOutputStream(indexFile));
//System.out.println("Creating index on the fly for " + location);
indexer = new DynamicIndexCreator(IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
indexer.initialize(location, indexer.defaultBinSize());
positionalStream = new PositionalStream(output);
} catch ( IOException ex ) {
// No matter what we keep going, since we don't care if we can't create the index file
}
} else {
idxStream = null;
indexer = null;
positionalStream = null;
indexFile = null;
}
}
public String getStreamName() {
return name;
}
public abstract void writeHeader(VCFHeader header);
/**
* attempt to close the VCF file
*/
public void close() {
// try to close the index stream (keep it separate to help debugging efforts)
if ( indexer != null ) {
try {
Index index = indexer.finalizeIndex(positionalStream.getPosition());
index.write(idxStream);
idxStream.close();
} catch (IOException e) {
throw new ReviewedStingException("Unable to close index for " + getStreamName(), e);
}
}
}
/**
* add a record to the file
*
* @param vc the Variant Context object
*/
public void add(VariantContext vc) {
// if we are doing on the fly indexing, add the record ***before*** we write any bytes
if ( indexer != null )
indexer.addFeature(vc, positionalStream.getPosition());
}
protected static final String writerName(File location, OutputStream stream) {
return location == null ? stream.toString() : location.getAbsolutePath();
}
protected static OutputStream openOutputStream(File location) {
try {
return new FileOutputStream(location);
} catch (FileNotFoundException e) {
throw new ReviewedStingException("Unable to create VCF file at location: " + location, e);
}
}
}

View File

@ -44,26 +44,19 @@ import java.util.*;
/**
* this class writes VCF files
*/
public class StandardVCFWriter implements VCFWriter {
public class StandardVCFWriter extends IndexingVCFWriter {
// the print stream we're writing to
final protected BufferedWriter mWriter;
// should we write genotypes or just sites?
final protected boolean doNotWriteGenotypes;
// the VCF header we're storing
protected VCFHeader mHeader = null;
// the print stream we're writing to
protected BufferedWriter mWriter;
protected PositionalStream positionalStream = null;
// were filters applied?
protected boolean filtersWereAppliedToContext = false;
// should we write genotypes or just sites?
protected boolean doNotWriteGenotypes = false;
protected DynamicIndexCreator indexer = null;
protected File indexFile = null;
LittleEndianOutputStream idxStream = null;
File location = null;
/**
* create a VCF writer, given a file to write to
*
@ -93,32 +86,22 @@ public class StandardVCFWriter implements VCFWriter {
* @param doNotWriteGenotypes do not write genotypes
*/
public StandardVCFWriter(OutputStream output, boolean doNotWriteGenotypes) {
mWriter = new BufferedWriter(new OutputStreamWriter(output));
this.doNotWriteGenotypes = doNotWriteGenotypes;
this(null, output, false, doNotWriteGenotypes);
}
public StandardVCFWriter(File location, OutputStream output, boolean enableOnTheFlyIndexing, boolean doNotWriteGenotypes) {
this.location = location;
if ( enableOnTheFlyIndexing ) {
indexFile = Tribble.indexFile(location);
try {
idxStream = new LittleEndianOutputStream(new FileOutputStream(indexFile));
//System.out.println("Creating index on the fly for " + location);
indexer = new DynamicIndexCreator(IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
indexer.initialize(location, indexer.defaultBinSize());
positionalStream = new PositionalStream(output);
output = positionalStream;
} catch ( IOException ex ) {
// No matter what we keep going, since we don't care if we can't create the index file
}
}
//mWriter = new BufferedWriter(new OutputStreamWriter(new PositionalStream(output)));
mWriter = new BufferedWriter(new OutputStreamWriter(output));
super(writerName(location, output), location, output, enableOnTheFlyIndexing);
mWriter = new BufferedWriter(new OutputStreamWriter(output)); // todo -- fix buffer size
this.doNotWriteGenotypes = doNotWriteGenotypes;
}
// --------------------------------------------------------------------------------
//
// VCFWriter interface functions
//
// --------------------------------------------------------------------------------
@Override
public void writeHeader(VCFHeader header) {
mHeader = doNotWriteGenotypes ? new VCFHeader(header.getMetaData()) : header;
@ -158,44 +141,24 @@ public class StandardVCFWriter implements VCFWriter {
mWriter.flush(); // necessary so that writing to an output stream will work
}
catch (IOException e) {
throw new TribbleException("IOException writing the VCF header to " + locationString(), e);
throw new ReviewedStingException("IOException writing the VCF header to " + getStreamName(), e);
}
}
private String locationString() {
return location == null ? mWriter.toString() : location.getAbsolutePath();
}
/**
* attempt to close the VCF file
*/
@Override
public void close() {
// try to close the vcf stream
try {
mWriter.flush();
mWriter.close();
} catch (IOException e) {
throw new TribbleException("Unable to close " + locationString() + " because of " + e.getMessage());
throw new ReviewedStingException("Unable to close " + getStreamName(), e);
}
// try to close the index stream (keep it separate to help debugging efforts)
if ( indexer != null ) {
try {
Index index = indexer.finalizeIndex(positionalStream.getPosition());
index.write(idxStream);
idxStream.close();
} catch (IOException e) {
throw new TribbleException("Unable to close index for " + locationString() + " because of " + e.getMessage());
}
}
}
protected static OutputStream openOutputStream(File location) {
try {
return new FileOutputStream(location);
} catch (FileNotFoundException e) {
throw new TribbleException("Unable to create VCF file at location: " + location);
}
super.close();
}
/**
@ -203,28 +166,17 @@ public class StandardVCFWriter implements VCFWriter {
*
* @param vc the Variant Context object
*/
@Override
public void add(VariantContext vc) {
add(vc, false);
}
/**
* add a record to the file
*
* @param vc the Variant Context object
* @param refBaseShouldBeAppliedToEndOfAlleles *** THIS SHOULD BE FALSE EXCEPT FOR AN INDEL AT THE EXTREME BEGINNING OF A CONTIG (WHERE THERE IS NO PREVIOUS BASE, SO WE USE THE BASE AFTER THE EVENT INSTEAD)
*/
public void add(VariantContext vc, boolean refBaseShouldBeAppliedToEndOfAlleles) {
if ( mHeader == null )
throw new IllegalStateException("The VCF Header must be written before records can be added: " + locationString());
throw new IllegalStateException("The VCF Header must be written before records can be added: " + getStreamName());
if ( doNotWriteGenotypes )
vc = VariantContext.modifyGenotypes(vc, null);
try {
vc = VariantContext.createVariantContextWithPaddedAlleles(vc, refBaseShouldBeAppliedToEndOfAlleles);
// if we are doing on the fly indexing, add the record ***before*** we write any bytes
if ( indexer != null ) indexer.addFeature(vc, positionalStream.getPosition());
vc = VariantContext.createVariantContextWithPaddedAlleles(vc, false);
super.add(vc);
Map<Allele, String> alleleMap = new HashMap<Allele, String>(vc.getAlleles().size());
alleleMap.put(Allele.NO_CALL, VCFConstants.EMPTY_ALLELE); // convenience for lookup
@ -317,10 +269,16 @@ public class StandardVCFWriter implements VCFWriter {
mWriter.write("\n");
mWriter.flush(); // necessary so that writing to an output stream will work
} catch (IOException e) {
throw new RuntimeException("Unable to write the VCF object to " + locationString());
throw new RuntimeException("Unable to write the VCF object to " + getStreamName());
}
}
// --------------------------------------------------------------------------------
//
// implementation functions
//
// --------------------------------------------------------------------------------
public static final String getFilterString(final VariantContext vc) {
return getFilterString(vc, false);
}
@ -531,12 +489,11 @@ public class StandardVCFWriter implements VCFWriter {
}
public static int countOccurrences(char c, String s) {
private static int countOccurrences(char c, String s) {
int count = 0;
for (int i = 0; i < s.length(); i++) {
count += s.charAt(i) == c ? 1 : 0;
}
return count;
}
}