speed-up for VCF, adding code to the VCF reader to automagically make an index if one doesn't already exist, and a change to the VCF writer unit test

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3305 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2010-05-05 20:19:42 +00:00
parent 6bbcc47b5d
commit a0d71540df
4 changed files with 19 additions and 5 deletions

View File

@ -61,7 +61,7 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
// the linear index extension
private static final String linearIndexExtension = ".idx";
public static final String linearIndexExtension = ".idx";
/** Create a new plugin manager. */
public TribbleRMDTrackBuilder() {
@ -118,7 +118,7 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
* @return a linear index for the specified type
* @throws IOException if we cannot write the index file
*/
private LinearIndex createIndex(File inputFile, FeatureCodec codec) throws IOException {
public static LinearIndex createIndex(File inputFile, FeatureCodec codec) throws IOException {
LinearIndexCreator create = new LinearIndexCreator(inputFile, codec);
return create.createIndex();
}
@ -131,7 +131,7 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
* @param inputFile the target file to make an index for
* @return true if we need to create an index, false otherwise
*/
private boolean requireIndex(File inputFile) {
public static boolean requireIndex(File inputFile) {
// can we read the index? if not, create an index
File indexFile = new File(inputFile.getAbsolutePath() + linearIndexExtension);
if (!(indexFile.canRead())) return true;

View File

@ -63,13 +63,15 @@ public abstract class GATKFeature implements Feature {
*/
public static class TribbleGATKFeature extends GATKFeature {
private final Feature feature;
private GenomeLoc position = null;
public TribbleGATKFeature(Feature f, String name) {
super(name);
feature = f;
}
public GenomeLoc getLocation() {
return GenomeLocParser.createGenomeLoc(feature.getChr(), feature.getStart(), feature.getEnd());
if (position == null) position = GenomeLocParser.createGenomeLoc(feature.getChr(), feature.getStart(), feature.getEnd());
return position;
}
/** Return the features reference sequence name, e.g chromosome or contig */

View File

@ -21,7 +21,9 @@ import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import org.broad.tribble.FeatureReader;
import org.broad.tribble.index.linear.LinearIndex;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
@ -57,6 +59,14 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
private void initialize(File vcfFile, VCFCodec.LineTransform transform) {
VCFCodec codec = new VCFCodec();
LinearIndex index = null;
if (TribbleRMDTrackBuilder.requireIndex(vcfFile)) {
try {
index = TribbleRMDTrackBuilder.createIndex(vcfFile, new VCFCodec());
} catch (IOException e) {
throw new StingException("Unable to make required index for file " + vcfFile + " do you have write permissions to the directory?");
}
}
if (transform != null) codec.setTransformer(transform);
try {
vcfReader = new FeatureReader(vcfFile,codec);

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.utils.genotype.vcf;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLocParser;
@ -54,6 +55,7 @@ public class VCFWriterUnitTest extends BaseTest {
}
Assert.assertEquals(2,counter);
reader.close();
new File(fakeVCFFile + TribbleRMDTrackBuilder.linearIndexExtension).delete();
fakeVCFFile.delete();
}