speed-up for VCF, adding code to the VCF reader to automagically make an index if one doesn't already exist, and a change to the VCF writer unit test

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3305 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2010-05-05 20:19:42 +00:00
parent 6bbcc47b5d
commit a0d71540df
4 changed files with 19 additions and 5 deletions

View File

@ -61,7 +61,7 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
// the linear index extension // the linear index extension
private static final String linearIndexExtension = ".idx"; public static final String linearIndexExtension = ".idx";
/** Create a new plugin manager. */ /** Create a new plugin manager. */
public TribbleRMDTrackBuilder() { public TribbleRMDTrackBuilder() {
@ -118,7 +118,7 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
* @return a linear index for the specified type * @return a linear index for the specified type
* @throws IOException if we cannot write the index file * @throws IOException if we cannot write the index file
*/ */
private LinearIndex createIndex(File inputFile, FeatureCodec codec) throws IOException { public static LinearIndex createIndex(File inputFile, FeatureCodec codec) throws IOException {
LinearIndexCreator create = new LinearIndexCreator(inputFile, codec); LinearIndexCreator create = new LinearIndexCreator(inputFile, codec);
return create.createIndex(); return create.createIndex();
} }
@ -131,7 +131,7 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
* @param inputFile the target file to make an index for * @param inputFile the target file to make an index for
* @return true if we need to create an index, false otherwise * @return true if we need to create an index, false otherwise
*/ */
private boolean requireIndex(File inputFile) { public static boolean requireIndex(File inputFile) {
// can we read the index? if not, create an index // can we read the index? if not, create an index
File indexFile = new File(inputFile.getAbsolutePath() + linearIndexExtension); File indexFile = new File(inputFile.getAbsolutePath() + linearIndexExtension);
if (!(indexFile.canRead())) return true; if (!(indexFile.canRead())) return true;

View File

@ -63,13 +63,15 @@ public abstract class GATKFeature implements Feature {
*/ */
public static class TribbleGATKFeature extends GATKFeature { public static class TribbleGATKFeature extends GATKFeature {
private final Feature feature; private final Feature feature;
private GenomeLoc position = null;
public TribbleGATKFeature(Feature f, String name) { public TribbleGATKFeature(Feature f, String name) {
super(name); super(name);
feature = f; feature = f;
} }
public GenomeLoc getLocation() { public GenomeLoc getLocation() {
return GenomeLocParser.createGenomeLoc(feature.getChr(), feature.getStart(), feature.getEnd()); if (position == null) position = GenomeLocParser.createGenomeLoc(feature.getChr(), feature.getStart(), feature.getEnd());
return position;
} }
/** Return the features reference sequence name, e.g chromosome or contig */ /** Return the features reference sequence name, e.g chromosome or contig */

View File

@ -21,7 +21,9 @@ import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream; import java.util.zip.GZIPInputStream;
import org.broad.tribble.FeatureReader; import org.broad.tribble.FeatureReader;
import org.broad.tribble.index.linear.LinearIndex;
import org.broad.tribble.vcf.*; import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
@ -57,6 +59,14 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
private void initialize(File vcfFile, VCFCodec.LineTransform transform) { private void initialize(File vcfFile, VCFCodec.LineTransform transform) {
VCFCodec codec = new VCFCodec(); VCFCodec codec = new VCFCodec();
LinearIndex index = null;
if (TribbleRMDTrackBuilder.requireIndex(vcfFile)) {
try {
index = TribbleRMDTrackBuilder.createIndex(vcfFile, new VCFCodec());
} catch (IOException e) {
throw new StingException("Unable to make required index for file " + vcfFile + " do you have write permissions to the directory?");
}
}
if (transform != null) codec.setTransformer(transform); if (transform != null) codec.setTransformer(transform);
try { try {
vcfReader = new FeatureReader(vcfFile,codec); vcfReader = new FeatureReader(vcfFile,codec);

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.utils.genotype.vcf;
import org.broad.tribble.vcf.*; import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
@ -54,6 +55,7 @@ public class VCFWriterUnitTest extends BaseTest {
} }
Assert.assertEquals(2,counter); Assert.assertEquals(2,counter);
reader.close(); reader.close();
new File(fakeVCFFile + TribbleRMDTrackBuilder.linearIndexExtension).delete();
fakeVCFFile.delete(); fakeVCFFile.delete();
} }