Correctness fix: index can concat chunks around shard edges, and my code didn't account for that.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2861 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2010-02-19 21:44:33 +00:00
parent 0e05a3acb0
commit cc09f48cd8
3 changed files with 15 additions and 65 deletions

View File

@ -106,7 +106,7 @@ public class BAMFileIndex2 extends BAMFileIndex
final int level = getLevelForBinNumber(bin.binNumber);
final int levelStart = LEVEL_STARTS[level];
final int levelSize = ((level==getNumIndexLevels()-1) ? MAX_BINS-1 : LEVEL_STARTS[level+1]) - levelStart;
return (bin.binNumber - levelStart)*(BIN_SPAN/levelSize);
return (bin.binNumber - levelStart)*(BIN_SPAN/levelSize)+1;
}
/**
@ -118,7 +118,7 @@ public class BAMFileIndex2 extends BAMFileIndex
final int level = getLevelForBinNumber(bin.binNumber);
final int levelStart = LEVEL_STARTS[level];
final int levelSize = ((level==getNumIndexLevels()-1) ? MAX_BINS-1 : LEVEL_STARTS[level+1]) - levelStart;
return (bin.binNumber - levelStart + 1)*(BIN_SPAN/levelSize) - 1;
return (bin.binNumber-levelStart+1)*(BIN_SPAN/levelSize);
}
/**
@ -213,25 +213,9 @@ public class BAMFileIndex2 extends BAMFileIndex
}
List<Chunk> chunkList = new ArrayList<Chunk>();
for(Bin coveringBin: binTree)
chunkList.addAll(binToChunks.get(coveringBin));
// Find the nearest adjacent bin. This can act as a minimum offset
Bin closestAdjacentBin = null;
for(Bin adjacentBin: allBins) {
if(getLevelForBinNumber(adjacentBin.binNumber) != binLevel)
continue;
if(adjacentBin.binNumber<bin.binNumber && (closestAdjacentBin == null || closestAdjacentBin.binNumber < adjacentBin.binNumber))
closestAdjacentBin = adjacentBin;
}
// Find the offset of the closest bin.
long adjacentBinOffset = 0;
if(closestAdjacentBin != null) {
for(Chunk chunk: binToChunks.get(closestAdjacentBin)) {
if(adjacentBinOffset < chunk.getChunkEnd())
adjacentBinOffset = chunk.getChunkEnd();
}
for(Bin coveringBin: binTree) {
for(Chunk chunk: binToChunks.get(coveringBin))
chunkList.add(chunk.clone());
}
final int start = getFirstLocusInBin(bin)-1;
@ -242,40 +226,7 @@ public class BAMFileIndex2 extends BAMFileIndex
minimumOffset = index.indexEntries[regionLinearBin];
chunkList = optimizeChunkList(chunkList, minimumOffset);
long[] chunkArray = convertToArray(chunkList);
// Trim off anything before the first desired bin.
int location = Arrays.binarySearch(chunkArray,adjacentBinOffset);
// location not found, but insertion point was determined.
long trimmedChunkArray[] = chunkArray;
// If the location of the element is in an even bucket (a start position), trim everything before it.
if(location >= 0) {
if(location%2==0) {
trimmedChunkArray = new long[chunkArray.length-location];
System.arraycopy(chunkArray,location,trimmedChunkArray,0,trimmedChunkArray.length);
}
else {
trimmedChunkArray = new long[chunkArray.length-location-1];
System.arraycopy(chunkArray,location+1,trimmedChunkArray,0,trimmedChunkArray.length);
}
}
else {
location = -(location+1);
if(location < chunkArray.length) {
if(location%2==0) {
trimmedChunkArray = new long[chunkArray.length-location];
System.arraycopy(chunkArray,location,trimmedChunkArray,0,trimmedChunkArray.length);
}
else {
trimmedChunkArray = new long[chunkArray.length-location+1];
trimmedChunkArray[0] = adjacentBinOffset;
System.arraycopy(chunkArray,location,trimmedChunkArray,1,trimmedChunkArray.length-1);
}
}
}
return trimmedChunkArray;
return convertToArray(chunkList);
}
/**
@ -296,8 +247,10 @@ public class BAMFileIndex2 extends BAMFileIndex
}
List<Chunk> chunkList = new ArrayList<Chunk>();
for(Bin bin: bins)
chunkList.addAll(binToChunks.get(bin));
for(Bin bin: bins) {
for(Chunk chunk: binToChunks.get(bin))
chunkList.add(chunk.clone());
}
if (chunkList.isEmpty()) {
return null;

View File

@ -12,7 +12,7 @@ import java.util.ArrayList;
* @author mhanna
* @version 0.1
*/
public class Chunk implements Comparable<Chunk> {
public class Chunk implements Cloneable,Comparable<Chunk> {
private long mChunkStart;
private long mChunkEnd;
@ -22,6 +22,10 @@ public class Chunk implements Comparable<Chunk> {
mChunkEnd = end;
}
protected Chunk clone() {
return new Chunk(mChunkStart,mChunkEnd);
}
public long getChunkStart() {
return mChunkStart;
}

View File

@ -120,13 +120,6 @@ public class IndexDelimitedLocusShardStrategy implements ShardStrategy {
*/
public IndexDelimitedLocusShard next() {
FilePointer nextFilePointer = filePointerIterator.next();
String contig = null;
long start = Long.MAX_VALUE, stop = 0;
for(GenomeLoc loc: nextFilePointer.locations) {
contig = loc.getContig();
start = Math.min(loc.getStart(),start);
stop = Math.max(loc.getStop(),stop);
}
Map<SAMFileReader2,List<Chunk>> chunksBounding = blockDrivenDataSource.getFilePointersBounding(nextFilePointer.bin);
return new IndexDelimitedLocusShard(nextFilePointer.locations,chunksBounding,Shard.ShardType.LOCUS_INTERVAL);
}