Refactored indexing part of StandardVCFWriter into superclass
-- Now other implementations of the VCFWriter can easily share common functions, such as writing an index on the fly
This commit is contained in:
parent
01b6177ce1
commit
fe5724b6ea
|
|
@ -0,0 +1,116 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||
|
||||
import org.broad.tribble.Tribble;
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broad.tribble.index.DynamicIndexCreator;
|
||||
import org.broad.tribble.index.Index;
|
||||
import org.broad.tribble.index.IndexFactory;
|
||||
import org.broad.tribble.util.LittleEndianOutputStream;
|
||||
import org.broad.tribble.util.PositionalStream;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
/**
|
||||
* this class writes VCF files
|
||||
*/
|
||||
public abstract class IndexingVCFWriter implements VCFWriter {
|
||||
final private File indexFile;
|
||||
final private String name;
|
||||
|
||||
private PositionalStream positionalStream;
|
||||
private DynamicIndexCreator indexer;
|
||||
private LittleEndianOutputStream idxStream;
|
||||
|
||||
protected IndexingVCFWriter(String name, File location, OutputStream output, boolean enableOnTheFlyIndexing) {
|
||||
this.name = name;
|
||||
|
||||
if ( enableOnTheFlyIndexing ) {
|
||||
indexFile = Tribble.indexFile(location);
|
||||
try {
|
||||
idxStream = new LittleEndianOutputStream(new FileOutputStream(indexFile));
|
||||
//System.out.println("Creating index on the fly for " + location);
|
||||
indexer = new DynamicIndexCreator(IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
|
||||
indexer.initialize(location, indexer.defaultBinSize());
|
||||
positionalStream = new PositionalStream(output);
|
||||
} catch ( IOException ex ) {
|
||||
// No matter what we keep going, since we don't care if we can't create the index file
|
||||
}
|
||||
} else {
|
||||
idxStream = null;
|
||||
indexer = null;
|
||||
positionalStream = null;
|
||||
indexFile = null;
|
||||
}
|
||||
}
|
||||
|
||||
public String getStreamName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public abstract void writeHeader(VCFHeader header);
|
||||
|
||||
/**
|
||||
* attempt to close the VCF file
|
||||
*/
|
||||
public void close() {
|
||||
// try to close the index stream (keep it separate to help debugging efforts)
|
||||
if ( indexer != null ) {
|
||||
try {
|
||||
Index index = indexer.finalizeIndex(positionalStream.getPosition());
|
||||
index.write(idxStream);
|
||||
idxStream.close();
|
||||
} catch (IOException e) {
|
||||
throw new ReviewedStingException("Unable to close index for " + getStreamName(), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* add a record to the file
|
||||
*
|
||||
* @param vc the Variant Context object
|
||||
*/
|
||||
public void add(VariantContext vc) {
|
||||
// if we are doing on the fly indexing, add the record ***before*** we write any bytes
|
||||
if ( indexer != null )
|
||||
indexer.addFeature(vc, positionalStream.getPosition());
|
||||
}
|
||||
|
||||
protected static final String writerName(File location, OutputStream stream) {
|
||||
return location == null ? stream.toString() : location.getAbsolutePath();
|
||||
}
|
||||
|
||||
protected static OutputStream openOutputStream(File location) {
|
||||
try {
|
||||
return new FileOutputStream(location);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new ReviewedStingException("Unable to create VCF file at location: " + location, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -44,26 +44,19 @@ import java.util.*;
|
|||
/**
|
||||
* this class writes VCF files
|
||||
*/
|
||||
public class StandardVCFWriter implements VCFWriter {
|
||||
public class StandardVCFWriter extends IndexingVCFWriter {
|
||||
// the print stream we're writing to
|
||||
final protected BufferedWriter mWriter;
|
||||
|
||||
// should we write genotypes or just sites?
|
||||
final protected boolean doNotWriteGenotypes;
|
||||
|
||||
// the VCF header we're storing
|
||||
protected VCFHeader mHeader = null;
|
||||
|
||||
// the print stream we're writing to
|
||||
protected BufferedWriter mWriter;
|
||||
protected PositionalStream positionalStream = null;
|
||||
|
||||
// were filters applied?
|
||||
protected boolean filtersWereAppliedToContext = false;
|
||||
|
||||
// should we write genotypes or just sites?
|
||||
protected boolean doNotWriteGenotypes = false;
|
||||
|
||||
protected DynamicIndexCreator indexer = null;
|
||||
protected File indexFile = null;
|
||||
LittleEndianOutputStream idxStream = null;
|
||||
File location = null;
|
||||
|
||||
/**
|
||||
* create a VCF writer, given a file to write to
|
||||
*
|
||||
|
|
@ -93,32 +86,22 @@ public class StandardVCFWriter implements VCFWriter {
|
|||
* @param doNotWriteGenotypes do not write genotypes
|
||||
*/
|
||||
public StandardVCFWriter(OutputStream output, boolean doNotWriteGenotypes) {
|
||||
mWriter = new BufferedWriter(new OutputStreamWriter(output));
|
||||
this.doNotWriteGenotypes = doNotWriteGenotypes;
|
||||
this(null, output, false, doNotWriteGenotypes);
|
||||
}
|
||||
|
||||
public StandardVCFWriter(File location, OutputStream output, boolean enableOnTheFlyIndexing, boolean doNotWriteGenotypes) {
|
||||
this.location = location;
|
||||
|
||||
if ( enableOnTheFlyIndexing ) {
|
||||
indexFile = Tribble.indexFile(location);
|
||||
try {
|
||||
idxStream = new LittleEndianOutputStream(new FileOutputStream(indexFile));
|
||||
//System.out.println("Creating index on the fly for " + location);
|
||||
indexer = new DynamicIndexCreator(IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
|
||||
indexer.initialize(location, indexer.defaultBinSize());
|
||||
positionalStream = new PositionalStream(output);
|
||||
output = positionalStream;
|
||||
} catch ( IOException ex ) {
|
||||
// No matter what we keep going, since we don't care if we can't create the index file
|
||||
}
|
||||
}
|
||||
|
||||
//mWriter = new BufferedWriter(new OutputStreamWriter(new PositionalStream(output)));
|
||||
mWriter = new BufferedWriter(new OutputStreamWriter(output));
|
||||
super(writerName(location, output), location, output, enableOnTheFlyIndexing);
|
||||
mWriter = new BufferedWriter(new OutputStreamWriter(output)); // todo -- fix buffer size
|
||||
this.doNotWriteGenotypes = doNotWriteGenotypes;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// VCFWriter interface functions
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
@Override
|
||||
public void writeHeader(VCFHeader header) {
|
||||
mHeader = doNotWriteGenotypes ? new VCFHeader(header.getMetaData()) : header;
|
||||
|
||||
|
|
@ -158,44 +141,24 @@ public class StandardVCFWriter implements VCFWriter {
|
|||
mWriter.flush(); // necessary so that writing to an output stream will work
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new TribbleException("IOException writing the VCF header to " + locationString(), e);
|
||||
throw new ReviewedStingException("IOException writing the VCF header to " + getStreamName(), e);
|
||||
}
|
||||
}
|
||||
|
||||
private String locationString() {
|
||||
return location == null ? mWriter.toString() : location.getAbsolutePath();
|
||||
}
|
||||
|
||||
/**
|
||||
* attempt to close the VCF file
|
||||
*/
|
||||
@Override
|
||||
public void close() {
|
||||
// try to close the vcf stream
|
||||
try {
|
||||
mWriter.flush();
|
||||
mWriter.close();
|
||||
} catch (IOException e) {
|
||||
throw new TribbleException("Unable to close " + locationString() + " because of " + e.getMessage());
|
||||
throw new ReviewedStingException("Unable to close " + getStreamName(), e);
|
||||
}
|
||||
|
||||
// try to close the index stream (keep it separate to help debugging efforts)
|
||||
if ( indexer != null ) {
|
||||
try {
|
||||
Index index = indexer.finalizeIndex(positionalStream.getPosition());
|
||||
index.write(idxStream);
|
||||
idxStream.close();
|
||||
} catch (IOException e) {
|
||||
throw new TribbleException("Unable to close index for " + locationString() + " because of " + e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected static OutputStream openOutputStream(File location) {
|
||||
try {
|
||||
return new FileOutputStream(location);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new TribbleException("Unable to create VCF file at location: " + location);
|
||||
}
|
||||
super.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -203,28 +166,17 @@ public class StandardVCFWriter implements VCFWriter {
|
|||
*
|
||||
* @param vc the Variant Context object
|
||||
*/
|
||||
@Override
|
||||
public void add(VariantContext vc) {
|
||||
add(vc, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* add a record to the file
|
||||
*
|
||||
* @param vc the Variant Context object
|
||||
* @param refBaseShouldBeAppliedToEndOfAlleles *** THIS SHOULD BE FALSE EXCEPT FOR AN INDEL AT THE EXTREME BEGINNING OF A CONTIG (WHERE THERE IS NO PREVIOUS BASE, SO WE USE THE BASE AFTER THE EVENT INSTEAD)
|
||||
*/
|
||||
public void add(VariantContext vc, boolean refBaseShouldBeAppliedToEndOfAlleles) {
|
||||
if ( mHeader == null )
|
||||
throw new IllegalStateException("The VCF Header must be written before records can be added: " + locationString());
|
||||
throw new IllegalStateException("The VCF Header must be written before records can be added: " + getStreamName());
|
||||
|
||||
if ( doNotWriteGenotypes )
|
||||
vc = VariantContext.modifyGenotypes(vc, null);
|
||||
|
||||
try {
|
||||
vc = VariantContext.createVariantContextWithPaddedAlleles(vc, refBaseShouldBeAppliedToEndOfAlleles);
|
||||
|
||||
// if we are doing on the fly indexing, add the record ***before*** we write any bytes
|
||||
if ( indexer != null ) indexer.addFeature(vc, positionalStream.getPosition());
|
||||
vc = VariantContext.createVariantContextWithPaddedAlleles(vc, false);
|
||||
super.add(vc);
|
||||
|
||||
Map<Allele, String> alleleMap = new HashMap<Allele, String>(vc.getAlleles().size());
|
||||
alleleMap.put(Allele.NO_CALL, VCFConstants.EMPTY_ALLELE); // convenience for lookup
|
||||
|
|
@ -317,10 +269,16 @@ public class StandardVCFWriter implements VCFWriter {
|
|||
mWriter.write("\n");
|
||||
mWriter.flush(); // necessary so that writing to an output stream will work
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Unable to write the VCF object to " + locationString());
|
||||
throw new RuntimeException("Unable to write the VCF object to " + getStreamName());
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// implementation functions
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
public static final String getFilterString(final VariantContext vc) {
|
||||
return getFilterString(vc, false);
|
||||
}
|
||||
|
|
@ -531,12 +489,11 @@ public class StandardVCFWriter implements VCFWriter {
|
|||
}
|
||||
|
||||
|
||||
public static int countOccurrences(char c, String s) {
|
||||
private static int countOccurrences(char c, String s) {
|
||||
int count = 0;
|
||||
for (int i = 0; i < s.length(); i++) {
|
||||
count += s.charAt(i) == c ? 1 : 0;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue