Slightly more thread safe CachingIndexedFastaSequenceFile.java. Likely passes parallel testing
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4869 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
6f29a9dbb4
commit
3bbc6a0540
|
|
@ -39,32 +39,37 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
* Thread-safe! Uses a lock object to protect write and access to the cache.
|
* Thread-safe! Uses a lock object to protect write and access to the cache.
|
||||||
*/
|
*/
|
||||||
public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
|
public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
|
||||||
|
/** global enable flag */
|
||||||
private static final boolean USE_CACHE = true;
|
private static final boolean USE_CACHE = true;
|
||||||
|
|
||||||
|
/** do we want to print debugging information about cache efficiency? */
|
||||||
private static final boolean PRINT_EFFICIENCY = false;
|
private static final boolean PRINT_EFFICIENCY = false;
|
||||||
|
|
||||||
|
/** If we are printing efficiency info, what frequency should we do it at? */
|
||||||
private static final int PRINT_FREQUENCY = 10000;
|
private static final int PRINT_FREQUENCY = 10000;
|
||||||
|
|
||||||
|
/** The default cache size in bp */
|
||||||
private static final long DEFAULT_CACHE_SIZE = 1000000;
|
private static final long DEFAULT_CACHE_SIZE = 1000000;
|
||||||
|
|
||||||
private static long ourStaticCacheSize = DEFAULT_CACHE_SIZE;
|
// information about checking efficiency
|
||||||
|
|
||||||
long cacheHits = 0;
|
long cacheHits = 0;
|
||||||
long cacheMisses = 0;
|
long cacheMisses = 0;
|
||||||
|
|
||||||
private static class Cache {
|
/** The cache size of this CachingIndexedFastaSequenceFile */
|
||||||
long cacheStart = -1;
|
|
||||||
long cacheStop = -1;
|
|
||||||
long cacheSize = DEFAULT_CACHE_SIZE;
|
long cacheSize = DEFAULT_CACHE_SIZE;
|
||||||
long cacheMissBackup = 100;
|
|
||||||
ReferenceSequence cachedSequence = null;
|
|
||||||
|
|
||||||
public Cache(long cacheSize) {
|
/** When we have a cache miss at position X, we load sequence from X - cacheMissBackup */
|
||||||
this.cacheSize = cacheSize;
|
long cacheMissBackup = 100;
|
||||||
this.cacheMissBackup = Math.max(cacheSize / 100, 1);
|
|
||||||
}
|
/** Represents a specific cached sequence, with a specific start and stop, as well as the bases */
|
||||||
|
private static class Cache {
|
||||||
|
long start = -1, stop = -1;
|
||||||
|
ReferenceSequence seq = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static ThreadLocal<Cache> cache = new ThreadLocal<Cache> () {
|
private static ThreadLocal<Cache> cache = new ThreadLocal<Cache> () {
|
||||||
@Override protected Cache initialValue() {
|
@Override protected Cache initialValue() {
|
||||||
return new Cache(ourStaticCacheSize);
|
return new Cache();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -76,7 +81,12 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
|
||||||
*/
|
*/
|
||||||
public CachingIndexedFastaSequenceFile(final File file, final FastaSequenceIndex index, long cacheSize) {
|
public CachingIndexedFastaSequenceFile(final File file, final FastaSequenceIndex index, long cacheSize) {
|
||||||
super(file, index);
|
super(file, index);
|
||||||
ourStaticCacheSize = cacheSize; // BUG: class only supports one univeral cache size
|
setCacheSize(cacheSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void setCacheSize(long cacheSize) {
|
||||||
|
this.cacheSize = cacheSize;
|
||||||
|
this.cacheMissBackup = Math.max(cacheSize / 1000, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -99,7 +109,7 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
|
||||||
|
|
||||||
public CachingIndexedFastaSequenceFile(final File file, long cacheSize ) {
|
public CachingIndexedFastaSequenceFile(final File file, long cacheSize ) {
|
||||||
super(file);
|
super(file);
|
||||||
ourStaticCacheSize = cacheSize; // BUG: class only supports one univeral cache size
|
setCacheSize(cacheSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void printEfficiency() {
|
public void printEfficiency() {
|
||||||
|
|
@ -134,7 +144,7 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
|
||||||
Cache myCache = cache.get();
|
Cache myCache = cache.get();
|
||||||
//System.out.printf("getSubsequentAt cache=%s%n", myCache);
|
//System.out.printf("getSubsequentAt cache=%s%n", myCache);
|
||||||
|
|
||||||
if ( ! USE_CACHE || (stop - start) >= myCache.cacheSize ) {
|
if ( ! USE_CACHE || (stop - start) >= cacheSize ) {
|
||||||
cacheMisses++;
|
cacheMisses++;
|
||||||
result = super.getSubsequenceAt(contig, start, stop);
|
result = super.getSubsequenceAt(contig, start, stop);
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -144,25 +154,25 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
|
||||||
if (stop > contigInfo.getSequenceLength())
|
if (stop > contigInfo.getSequenceLength())
|
||||||
throw new PicardException("Query asks for data past end of contig");
|
throw new PicardException("Query asks for data past end of contig");
|
||||||
|
|
||||||
if ( start < myCache.cacheStart || stop > myCache.cacheStop || myCache.cachedSequence == null || myCache.cachedSequence.getContigIndex() != contigInfo.getSequenceIndex() ) {
|
if ( start < myCache.start || stop > myCache.stop || myCache.seq == null || myCache.seq.getContigIndex() != contigInfo.getSequenceIndex() ) {
|
||||||
cacheMisses++;
|
cacheMisses++;
|
||||||
myCache.cacheStart = Math.max(start - myCache.cacheMissBackup, 0);
|
myCache.start = Math.max(start - cacheMissBackup, 0);
|
||||||
myCache.cacheStop = Math.min(myCache.cacheStart + myCache.cacheSize, contigInfo.getSequenceLength());
|
myCache.stop = Math.min(myCache.start + cacheSize, contigInfo.getSequenceLength());
|
||||||
myCache.cachedSequence = super.getSubsequenceAt(contig, myCache.cacheStart, myCache.cacheStop);
|
myCache.seq = super.getSubsequenceAt(contig, myCache.start, myCache.stop);
|
||||||
//System.out.printf("New cache at %s %d-%d%n", contig, cacheStart, cacheStop);
|
//System.out.printf("New cache at %s %d-%d%n", contig, cacheStart, cacheStop);
|
||||||
} else {
|
} else {
|
||||||
cacheHits++;
|
cacheHits++;
|
||||||
}
|
}
|
||||||
|
|
||||||
// at this point we determine where in the cache we want to extract the requested subsequence
|
// at this point we determine where in the cache we want to extract the requested subsequence
|
||||||
int cacheOffsetStart = (int)(start - myCache.cacheStart);
|
int cacheOffsetStart = (int)(start - myCache.start);
|
||||||
int cacheOffsetStop = (int)(stop - start + cacheOffsetStart + 1);
|
int cacheOffsetStop = (int)(stop - start + cacheOffsetStart + 1);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
result = new ReferenceSequence(myCache.cachedSequence.getName(), myCache.cachedSequence.getContigIndex(), Arrays.copyOfRange(myCache.cachedSequence.getBases(), cacheOffsetStart, cacheOffsetStop));
|
result = new ReferenceSequence(myCache.seq.getName(), myCache.seq.getContigIndex(), Arrays.copyOfRange(myCache.seq.getBases(), cacheOffsetStart, cacheOffsetStop));
|
||||||
} catch ( ArrayIndexOutOfBoundsException e ) {
|
} catch ( ArrayIndexOutOfBoundsException e ) {
|
||||||
throw new ReviewedStingException(String.format("BUG: bad array indexing. Cache start %d and end %d, request start %d end %d, offset start %d and end %d, base size %d",
|
throw new ReviewedStingException(String.format("BUG: bad array indexing. Cache start %d and end %d, request start %d end %d, offset start %d and end %d, base size %d",
|
||||||
myCache.cacheStart, myCache.cacheStop, start, stop, cacheOffsetStart, cacheOffsetStop, myCache.cachedSequence.getBases().length), e);
|
myCache.start, myCache.stop, start, stop, cacheOffsetStart, cacheOffsetStop, myCache.seq.getBases().length), e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -50,8 +50,8 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
|
||||||
IndexedFastaSequenceFile uncached = new IndexedFastaSequenceFile(fasta);
|
IndexedFastaSequenceFile uncached = new IndexedFastaSequenceFile(fasta);
|
||||||
|
|
||||||
SAMSequenceRecord contig = uncached.getSequenceDictionary().getSequence(0);
|
SAMSequenceRecord contig = uncached.getSequenceDictionary().getSequence(0);
|
||||||
//logger.warn(String.format("Checking contig %s length %d with cache size %d and query size %d",
|
logger.warn(String.format("Checking contig %s length %d with cache size %d and query size %d",
|
||||||
// contig.getSequenceName(), contig.getSequenceLength(), cacheSize, querySize));
|
contig.getSequenceName(), contig.getSequenceLength(), cacheSize, querySize));
|
||||||
for ( int i = 0; i < contig.getSequenceLength(); i += STEP_SIZE ) {
|
for ( int i = 0; i < contig.getSequenceLength(); i += STEP_SIZE ) {
|
||||||
int start = i;
|
int start = i;
|
||||||
int stop = start + querySize;
|
int stop = start + querySize;
|
||||||
|
|
@ -77,6 +77,9 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
|
||||||
int middleStart = (contig.getSequenceLength() - querySize) / 2;
|
int middleStart = (contig.getSequenceLength() - querySize) / 2;
|
||||||
int middleStop = middleStart + querySize;
|
int middleStop = middleStart + querySize;
|
||||||
|
|
||||||
|
logger.warn(String.format("Checking contig %s length %d with cache size %d and query size %d with intermediate query",
|
||||||
|
contig.getSequenceName(), contig.getSequenceLength(), cacheSize, querySize));
|
||||||
|
|
||||||
for ( int i = 0; i < contig.getSequenceLength(); i += 10 ) {
|
for ( int i = 0; i < contig.getSequenceLength(); i += 10 ) {
|
||||||
int start = i;
|
int start = i;
|
||||||
int stop = start + querySize;
|
int stop = start + querySize;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue