Slightly more thread safe CachingIndexedFastaSequenceFile.java. Likely passes parallel testing

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4869 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-12-17 21:05:17 +00:00
parent 6f29a9dbb4
commit 3bbc6a0540
2 changed files with 38 additions and 25 deletions

View File

@ -39,32 +39,37 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
* Thread-safe! Uses a lock object to protect write and access to the cache.
*/
public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
/** global enable flag */
private static final boolean USE_CACHE = true;
/** do we want to print debugging information about cache efficiency? */
private static final boolean PRINT_EFFICIENCY = false;
/** If we are printing efficiency info, what frequency should we do it at? */
private static final int PRINT_FREQUENCY = 10000;
/** The default cache size in bp */
private static final long DEFAULT_CACHE_SIZE = 1000000;
private static long ourStaticCacheSize = DEFAULT_CACHE_SIZE;
// information about checking efficiency
long cacheHits = 0;
long cacheMisses = 0;
private static class Cache {
long cacheStart = -1;
long cacheStop = -1;
long cacheSize = DEFAULT_CACHE_SIZE;
long cacheMissBackup = 100;
ReferenceSequence cachedSequence = null;
/** The cache size of this CachingIndexedFastaSequenceFile */
long cacheSize = DEFAULT_CACHE_SIZE;
public Cache(long cacheSize) {
this.cacheSize = cacheSize;
this.cacheMissBackup = Math.max(cacheSize / 100, 1);
}
/** When we have a cache miss at position X, we load sequence from X - cacheMissBackup */
long cacheMissBackup = 100;
/** Represents a specific cached sequence, with a specific start and stop, as well as the bases */
private static class Cache {
long start = -1, stop = -1;
ReferenceSequence seq = null;
}
private static ThreadLocal<Cache> cache = new ThreadLocal<Cache> () {
@Override protected Cache initialValue() {
return new Cache(ourStaticCacheSize);
return new Cache();
}
};
@ -76,7 +81,12 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
*/
public CachingIndexedFastaSequenceFile(final File file, final FastaSequenceIndex index, long cacheSize) {
super(file, index);
ourStaticCacheSize = cacheSize; // BUG: class only supports one univeral cache size
setCacheSize(cacheSize);
}
private void setCacheSize(long cacheSize) {
this.cacheSize = cacheSize;
this.cacheMissBackup = Math.max(cacheSize / 1000, 1);
}
/**
@ -99,7 +109,7 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
public CachingIndexedFastaSequenceFile(final File file, long cacheSize ) {
super(file);
ourStaticCacheSize = cacheSize; // BUG: class only supports one univeral cache size
setCacheSize(cacheSize);
}
public void printEfficiency() {
@ -134,7 +144,7 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
Cache myCache = cache.get();
//System.out.printf("getSubsequentAt cache=%s%n", myCache);
if ( ! USE_CACHE || (stop - start) >= myCache.cacheSize ) {
if ( ! USE_CACHE || (stop - start) >= cacheSize ) {
cacheMisses++;
result = super.getSubsequenceAt(contig, start, stop);
} else {
@ -144,25 +154,25 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
if (stop > contigInfo.getSequenceLength())
throw new PicardException("Query asks for data past end of contig");
if ( start < myCache.cacheStart || stop > myCache.cacheStop || myCache.cachedSequence == null || myCache.cachedSequence.getContigIndex() != contigInfo.getSequenceIndex() ) {
if ( start < myCache.start || stop > myCache.stop || myCache.seq == null || myCache.seq.getContigIndex() != contigInfo.getSequenceIndex() ) {
cacheMisses++;
myCache.cacheStart = Math.max(start - myCache.cacheMissBackup, 0);
myCache.cacheStop = Math.min(myCache.cacheStart + myCache.cacheSize, contigInfo.getSequenceLength());
myCache.cachedSequence = super.getSubsequenceAt(contig, myCache.cacheStart, myCache.cacheStop);
myCache.start = Math.max(start - cacheMissBackup, 0);
myCache.stop = Math.min(myCache.start + cacheSize, contigInfo.getSequenceLength());
myCache.seq = super.getSubsequenceAt(contig, myCache.start, myCache.stop);
//System.out.printf("New cache at %s %d-%d%n", contig, cacheStart, cacheStop);
} else {
cacheHits++;
}
// at this point we determine where in the cache we want to extract the requested subsequence
int cacheOffsetStart = (int)(start - myCache.cacheStart);
int cacheOffsetStart = (int)(start - myCache.start);
int cacheOffsetStop = (int)(stop - start + cacheOffsetStart + 1);
try {
result = new ReferenceSequence(myCache.cachedSequence.getName(), myCache.cachedSequence.getContigIndex(), Arrays.copyOfRange(myCache.cachedSequence.getBases(), cacheOffsetStart, cacheOffsetStop));
result = new ReferenceSequence(myCache.seq.getName(), myCache.seq.getContigIndex(), Arrays.copyOfRange(myCache.seq.getBases(), cacheOffsetStart, cacheOffsetStop));
} catch ( ArrayIndexOutOfBoundsException e ) {
throw new ReviewedStingException(String.format("BUG: bad array indexing. Cache start %d and end %d, request start %d end %d, offset start %d and end %d, base size %d",
myCache.cacheStart, myCache.cacheStop, start, stop, cacheOffsetStart, cacheOffsetStop, myCache.cachedSequence.getBases().length), e);
myCache.start, myCache.stop, start, stop, cacheOffsetStart, cacheOffsetStop, myCache.seq.getBases().length), e);
}
}

View File

@ -50,8 +50,8 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
IndexedFastaSequenceFile uncached = new IndexedFastaSequenceFile(fasta);
SAMSequenceRecord contig = uncached.getSequenceDictionary().getSequence(0);
//logger.warn(String.format("Checking contig %s length %d with cache size %d and query size %d",
// contig.getSequenceName(), contig.getSequenceLength(), cacheSize, querySize));
logger.warn(String.format("Checking contig %s length %d with cache size %d and query size %d",
contig.getSequenceName(), contig.getSequenceLength(), cacheSize, querySize));
for ( int i = 0; i < contig.getSequenceLength(); i += STEP_SIZE ) {
int start = i;
int stop = start + querySize;
@ -77,6 +77,9 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
int middleStart = (contig.getSequenceLength() - querySize) / 2;
int middleStop = middleStart + querySize;
logger.warn(String.format("Checking contig %s length %d with cache size %d and query size %d with intermediate query",
contig.getSequenceName(), contig.getSequenceLength(), cacheSize, querySize));
for ( int i = 0; i < contig.getSequenceLength(); i += 10 ) {
int start = i;
int stop = start + querySize;