updates for Tribble 82, fixes for Ryans case where multiple processes would attempt to read/write to the same index, and a couple other Tribble-centric bug fixes.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3382 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2010-05-18 19:34:45 +00:00
parent 635f61c22d
commit 7cfb9ff3dc
9 changed files with 155 additions and 78 deletions

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
import org.broad.tribble.FeatureReader;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureReaderTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
@ -51,7 +52,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
public ReferenceOrderedDataSource( Walker walker, RMDTrack rod) {
this.rod = rod;
if (rod.supportsQuery())
iteratorPool = new ReferenceOrderedQueryDataPool(new TribbleRMDTrackBuilder(), rod);
iteratorPool = new ReferenceOrderedQueryDataPool(new TribbleRMDTrackBuilder(), (FeatureReaderTrack)rod);
else
iteratorPool = new ReferenceOrderedDataPool( walker, rod );
}
@ -186,9 +187,11 @@ class ReferenceOrderedQueryDataPool extends ResourcePool<FeatureReader, Location
// our tribble track builder
private final TribbleRMDTrackBuilder builder;
public ReferenceOrderedQueryDataPool( TribbleRMDTrackBuilder builder, RMDTrack rod ) {
public ReferenceOrderedQueryDataPool( TribbleRMDTrackBuilder builder, FeatureReaderTrack rod ) {
this.rod = rod;
this.builder = builder;
// a little bit of a hack, but it saves us from re-reading the index from the file
this.addNewResource(rod.getReader());
}
@Override
@ -208,8 +211,7 @@ class ReferenceOrderedQueryDataPool extends ResourcePool<FeatureReader, Location
try {
if (position instanceof MappedStreamSegment) {
GenomeLoc pos = ((MappedStreamSegment) position).locus;
//System.err.println("Querying position1 " + pos.getContig() + " start " + pos.getStart() + " stop " + pos.getStop());
return new SeekableRODIterator(new FeatureToGATKFeatureIterator(resource.query(pos.getContig(), (int) pos.getStart(), (int) pos.getStop()),rod.getName()));
return new SeekableRODIterator(new FeatureToGATKFeatureIterator(resource.query(pos.getContig(),(int) pos.getStart(), (int) pos.getStop()),rod.getName()));
} else {
return new SeekableRODIterator(new FeatureToGATKFeatureIterator(resource.iterator(),rod.getName()));
}

View File

@ -112,4 +112,8 @@ public class FeatureReaderTrack extends RMDTrack implements QueryableTrack {
}
reader = null;
}
public FeatureReader getReader() {
return reader;
}
}

View File

@ -27,12 +27,15 @@ package org.broadinstitute.sting.gatk.refdata.tracks.builders;
import org.apache.log4j.Logger;
import org.broad.tribble.*;
import org.broad.tribble.index.Index;
import org.broad.tribble.index.linear.LinearIndex;
import org.broad.tribble.index.linear.LinearIndexCreator;
import org.broad.tribble.readers.BasicFeatureReader;
import org.broad.tribble.util.LineReader;
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureReaderTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException;
import org.broadinstitute.sting.utils.file.FSLock;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.StingException;
@ -90,11 +93,8 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
*/
@Override
public RMDTrack createInstanceOfTrack(Class targetClass, String name, File inputFile) throws RMDTrackCreationException {
// make a feature reader
FeatureReader reader;
reader = createFeatureReader(targetClass, inputFile);
// return a feature reader track
return new FeatureReaderTrack(targetClass, name, inputFile, reader);
return new FeatureReaderTrack(targetClass, name, inputFile, createFeatureReader(targetClass, inputFile));
}
/**
@ -106,15 +106,8 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
public FeatureReader createFeatureReader(Class targetClass, File inputFile) {
FeatureReader reader = null;
try {
// check to see if the input file has an index
if (requireIndex(inputFile)) {
logger.warn("Creating Tribble Index for file " + inputFile);
LinearIndex index = createIndex(inputFile, this.createByType(targetClass), true);
reader = new BasicFeatureReader(inputFile,index, this.createByType(targetClass));
}
else {
reader = new BasicFeatureReader(inputFile,this.createByType(targetClass));
}
Index index = loadIndex(inputFile, this.createByType(targetClass), true);
reader = new BasicFeatureReader(inputFile.getAbsolutePath(), index, this.createByType(targetClass));
} catch (FileNotFoundException e) {
throw new StingException("Unable to create reader with file " + inputFile, e);
} catch (IOException e) {
@ -131,38 +124,62 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
* @return a linear index for the specified type
* @throws IOException if we cannot write the index file
*/
public static LinearIndex createIndex(File inputFile, FeatureCodec codec, boolean onDisk) throws IOException {
LinearIndexCreator create = new LinearIndexCreator(inputFile, codec);
// if we can write the index, we should, but if not just create it in memory
public static Index loadIndex(File inputFile, FeatureCodec codec, boolean onDisk) throws IOException {
// our return index
LinearIndex returnIndex = null;
// create the index file name, locking on the index file name
File indexFile = new File(inputFile.getAbsoluteFile() + linearIndexExtension);
if (indexFile.getParentFile().canWrite() && (!indexFile.exists() || indexFile.canWrite()) && onDisk)
return create.createIndex();
else {
if (onDisk) logger.info("Unable to write to location " + indexFile + " for index file, creating index in memory only");
return create.createIndex(null);
FSLock lock = new FSLock(indexFile);
// acquire a lock on the file
boolean obtainedLock = lock.lock();
try {
// if the file exists, and we can read it, load the index from disk
if (indexFile.exists() && indexFile.canRead() && obtainedLock) {
logger.info("Loading Tribble index from disk for file " + inputFile);
return LinearIndex.createIndex(indexFile);
}
// else we need to create the index, and write it to disk if we can
else
return writeIndexToDisk(inputFile, codec, onDisk, indexFile, obtainedLock);
}
finally {
lock.unlock();
}
}
/**
* this function checks if we need to make an index file. There are three cases:
* 1. The index file doesn't exist; return true
* 2. The index does exist, but is older than the file. We delete the index and return true
* 3. else return false;
* @param inputFile the target file to make an index for
* @return true if we need to create an index, false otherwise
* attempt to create the index, and to disk
* @param inputFile the input file
* @param codec the codec to use
* @param onDisk if they asked for disk storage or now
* @param indexFile the index file location
* @param obtainedLock did we obtain the lock on the file?
* @return the index object
* @throws IOException
*/
public static boolean requireIndex(File inputFile) {
// can we read the index? if not, create an index
File indexFile = new File(inputFile.getAbsolutePath() + linearIndexExtension);
if (!(indexFile.canRead())) return true;
if (inputFile.lastModified() > indexFile.lastModified()) {
logger.warn("Removing out of date (index file date older than target file ) index file " + indexFile);
indexFile.delete();
return true;
private static LinearIndex writeIndexToDisk(File inputFile, FeatureCodec codec, boolean onDisk, File indexFile, boolean obtainedLock) throws IOException {
LinearIndexCreator create = new LinearIndexCreator(inputFile, codec);
LinearIndex index = create.createIndex();
// if the index doesn't exist, and we can write to the directory, and we got a lock, write to the disk
if (indexFile.getParentFile().canWrite() &&
(!indexFile.exists() || indexFile.canWrite()) &&
onDisk &&
obtainedLock) {
logger.info("Creating Tribble Index on disk for file " + inputFile);
index.write(indexFile);
return index;
}
// we can't write it to disk, just store it in memory
else {
// if they wanted to write, let them know we couldn't
if (onDisk) logger.warn("Unable to write to " + indexFile + " for the index file, creating index in memory only");
return index;
}
return false;
}
}
@ -177,7 +194,7 @@ class FakeTribbleTrack implements FeatureCodec {
}
@Override
public int headerLineCount(File file) {
return 0;
public int readHeader(LineReader reader) {
return 0; // the basics
}
}

View File

@ -0,0 +1,69 @@
package org.broadinstitute.sting.utils.file;
import org.broadinstitute.sting.utils.StingException;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.channels.FileChannel;
import java.nio.channels.FileLock;
/**
* a quick implementation of a file based lock, using the Java NIO classes
*/
public class FSLock {
private static final String lockString = ".lock";
private final File lockFile;
private FileLock lock = null;
FileChannel fc = null;
/**
* create a file system, given a base file to which a lock string gets appended.
* @param baseLocation the base file location
*/
public FSLock(File baseLocation) {
lockFile = new File(baseLocation.getAbsoluteFile() + lockString);
lockFile.deleteOnExit();
}
/**
* lock the file
*
* @return boolean true if we obtained a lock
*/
public boolean lock() {
if (lock != null) throw new IllegalStateException("Unable to lock on file " + lockFile + " there is already a lock active");
if (lockFile.exists()) {
System.err.println("exits!!");
return false;
}
try {
fc = new RandomAccessFile(lockFile,"rw").getChannel();
lock = fc.lock();
return lock != null && lock.isValid();
} catch (FileNotFoundException e) {
throw new StingException("Unable to create lock file named " + lockFile,e);
} catch (IOException e) {
throw new StingException("Unable to create lock file named " + lockFile,e);
}
}
/**
* unlock the file
*
* note: this allows unlocking a file that failed to lock (no required user checks on null locks).
*/
public void unlock() {
try {
if (lock != null)
lock.release();
if (fc != null)
fc.close();
if (lockFile.exists())
lockFile.delete();
} catch (IOException e) {
throw new StingException("Unable to create lock file named " + lockFile,e);
}
}
}

View File

@ -1,32 +1,16 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import org.broad.tribble.FeatureReader;
import org.broad.tribble.index.linear.LinearIndex;
import org.broad.tribble.index.Index;
import org.broad.tribble.readers.BasicFeatureReader;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
/** The VCFReader class, which given a valid vcf file, parses out the header and VCF records */
public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
@ -75,10 +59,10 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
*/
private void initialize(File vcfFile, VCFCodec.LineTransform transform, boolean createIndexOnDisk) {
VCFCodec codec = new VCFCodec();
LinearIndex index = createIndex(vcfFile, createIndexOnDisk);
Index index = createIndex(vcfFile, createIndexOnDisk);
if (transform != null) codec.setTransformer(transform);
try {
vcfReader = new BasicFeatureReader(vcfFile,index,codec);
vcfReader = new BasicFeatureReader(vcfFile.getAbsolutePath(),index,codec);
iterator= vcfReader.iterator();
} catch (FileNotFoundException e) {
throw new StingException("Unable to read VCF File from " + vcfFile, e);
@ -94,15 +78,14 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
* @param createIndexOnDisk do we create the index on disk (or only in memory?)
* @return an instance of an index
*/
private LinearIndex createIndex(File vcfFile, boolean createIndexOnDisk) {
LinearIndex index = null;
if (TribbleRMDTrackBuilder.requireIndex(vcfFile)) {
try {
index = TribbleRMDTrackBuilder.createIndex(vcfFile, new VCFCodec(), createIndexOnDisk);
} catch (IOException e) {
throw new StingException("Unable to make required index for file " + vcfFile + " do you have write permissions to the directory?");
}
private Index createIndex(File vcfFile, boolean createIndexOnDisk) {
Index index = null;
try {
index = TribbleRMDTrackBuilder.loadIndex(vcfFile, new VCFCodec(), createIndexOnDisk);
} catch (IOException e) {
throw new StingException("Unable to make required index for file " + vcfFile + " do you have write permissions to the directory?");
}
return index;
}

View File

@ -30,6 +30,7 @@ import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.util.Map;
@ -59,14 +60,15 @@ public class TribbleRMDTrackBuilderUnitTest extends BaseTest {
@Test
public void testBuilderIndexUnwriteable() {
File vcfFile = new File(validationDataLocation + "/ROD_validation/mixedup.vcf");
File vcfFile = new File(validationDataLocation + "/ROD_validation/relic.vcf");
try {
builder.createIndex(vcfFile,new VCFCodec(), true);
builder.loadIndex(vcfFile,new VCFCodec(), true);
} catch (IOException e) {
Assert.fail("Unable to make index because of IO exception " + e.getMessage());
e.printStackTrace();
Assert.fail("IO exception unexpected" + e.getMessage());
}
// make sure we didn't write the file (check that it's length is zero)
Assert.assertEquals(0,new File(vcfFile + TribbleRMDTrackBuilder.linearIndexExtension).length());
// make sure we didn't write the file (check that it's timestamp is within bounds)
Assert.assertTrue(Math.abs(1274210993000l - new File(vcfFile + TribbleRMDTrackBuilder.linearIndexExtension).lastModified()) < 100);
}
}

View File

@ -1,3 +0,0 @@
<ivy-module version="1.0">
<info organisation="org.broad" module="tribble" revision="80" status="integration" publication="20100512124200" />
</ivy-module>

View File

@ -0,0 +1,3 @@
<ivy-module version="1.0">
<info organisation="org.broad" module="tribble" revision="82" status="integration" publication="20100517124200" />
</ivy-module>