Correctness fix: index can concat chunks around shard edges, and my code didn't account for that.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2861 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
0e05a3acb0
commit
cc09f48cd8
|
|
@ -106,7 +106,7 @@ public class BAMFileIndex2 extends BAMFileIndex
|
|||
final int level = getLevelForBinNumber(bin.binNumber);
|
||||
final int levelStart = LEVEL_STARTS[level];
|
||||
final int levelSize = ((level==getNumIndexLevels()-1) ? MAX_BINS-1 : LEVEL_STARTS[level+1]) - levelStart;
|
||||
return (bin.binNumber - levelStart)*(BIN_SPAN/levelSize);
|
||||
return (bin.binNumber - levelStart)*(BIN_SPAN/levelSize)+1;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -118,7 +118,7 @@ public class BAMFileIndex2 extends BAMFileIndex
|
|||
final int level = getLevelForBinNumber(bin.binNumber);
|
||||
final int levelStart = LEVEL_STARTS[level];
|
||||
final int levelSize = ((level==getNumIndexLevels()-1) ? MAX_BINS-1 : LEVEL_STARTS[level+1]) - levelStart;
|
||||
return (bin.binNumber - levelStart + 1)*(BIN_SPAN/levelSize) - 1;
|
||||
return (bin.binNumber-levelStart+1)*(BIN_SPAN/levelSize);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -213,25 +213,9 @@ public class BAMFileIndex2 extends BAMFileIndex
|
|||
}
|
||||
|
||||
List<Chunk> chunkList = new ArrayList<Chunk>();
|
||||
for(Bin coveringBin: binTree)
|
||||
chunkList.addAll(binToChunks.get(coveringBin));
|
||||
|
||||
// Find the nearest adjacent bin. This can act as a minimum offset
|
||||
Bin closestAdjacentBin = null;
|
||||
for(Bin adjacentBin: allBins) {
|
||||
if(getLevelForBinNumber(adjacentBin.binNumber) != binLevel)
|
||||
continue;
|
||||
if(adjacentBin.binNumber<bin.binNumber && (closestAdjacentBin == null || closestAdjacentBin.binNumber < adjacentBin.binNumber))
|
||||
closestAdjacentBin = adjacentBin;
|
||||
}
|
||||
|
||||
// Find the offset of the closest bin.
|
||||
long adjacentBinOffset = 0;
|
||||
if(closestAdjacentBin != null) {
|
||||
for(Chunk chunk: binToChunks.get(closestAdjacentBin)) {
|
||||
if(adjacentBinOffset < chunk.getChunkEnd())
|
||||
adjacentBinOffset = chunk.getChunkEnd();
|
||||
}
|
||||
for(Bin coveringBin: binTree) {
|
||||
for(Chunk chunk: binToChunks.get(coveringBin))
|
||||
chunkList.add(chunk.clone());
|
||||
}
|
||||
|
||||
final int start = getFirstLocusInBin(bin)-1;
|
||||
|
|
@ -242,40 +226,7 @@ public class BAMFileIndex2 extends BAMFileIndex
|
|||
minimumOffset = index.indexEntries[regionLinearBin];
|
||||
|
||||
chunkList = optimizeChunkList(chunkList, minimumOffset);
|
||||
long[] chunkArray = convertToArray(chunkList);
|
||||
|
||||
// Trim off anything before the first desired bin.
|
||||
int location = Arrays.binarySearch(chunkArray,adjacentBinOffset);
|
||||
// location not found, but insertion point was determined.
|
||||
long trimmedChunkArray[] = chunkArray;
|
||||
|
||||
// If the location of the element is in an even bucket (a start position), trim everything before it.
|
||||
if(location >= 0) {
|
||||
if(location%2==0) {
|
||||
trimmedChunkArray = new long[chunkArray.length-location];
|
||||
System.arraycopy(chunkArray,location,trimmedChunkArray,0,trimmedChunkArray.length);
|
||||
}
|
||||
else {
|
||||
trimmedChunkArray = new long[chunkArray.length-location-1];
|
||||
System.arraycopy(chunkArray,location+1,trimmedChunkArray,0,trimmedChunkArray.length);
|
||||
}
|
||||
}
|
||||
else {
|
||||
location = -(location+1);
|
||||
if(location < chunkArray.length) {
|
||||
if(location%2==0) {
|
||||
trimmedChunkArray = new long[chunkArray.length-location];
|
||||
System.arraycopy(chunkArray,location,trimmedChunkArray,0,trimmedChunkArray.length);
|
||||
}
|
||||
else {
|
||||
trimmedChunkArray = new long[chunkArray.length-location+1];
|
||||
trimmedChunkArray[0] = adjacentBinOffset;
|
||||
System.arraycopy(chunkArray,location,trimmedChunkArray,1,trimmedChunkArray.length-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return trimmedChunkArray;
|
||||
return convertToArray(chunkList);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -296,8 +247,10 @@ public class BAMFileIndex2 extends BAMFileIndex
|
|||
}
|
||||
|
||||
List<Chunk> chunkList = new ArrayList<Chunk>();
|
||||
for(Bin bin: bins)
|
||||
chunkList.addAll(binToChunks.get(bin));
|
||||
for(Bin bin: bins) {
|
||||
for(Chunk chunk: binToChunks.get(bin))
|
||||
chunkList.add(chunk.clone());
|
||||
}
|
||||
|
||||
if (chunkList.isEmpty()) {
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ import java.util.ArrayList;
|
|||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class Chunk implements Comparable<Chunk> {
|
||||
public class Chunk implements Cloneable,Comparable<Chunk> {
|
||||
|
||||
private long mChunkStart;
|
||||
private long mChunkEnd;
|
||||
|
|
@ -22,6 +22,10 @@ public class Chunk implements Comparable<Chunk> {
|
|||
mChunkEnd = end;
|
||||
}
|
||||
|
||||
protected Chunk clone() {
|
||||
return new Chunk(mChunkStart,mChunkEnd);
|
||||
}
|
||||
|
||||
public long getChunkStart() {
|
||||
return mChunkStart;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -120,13 +120,6 @@ public class IndexDelimitedLocusShardStrategy implements ShardStrategy {
|
|||
*/
|
||||
public IndexDelimitedLocusShard next() {
|
||||
FilePointer nextFilePointer = filePointerIterator.next();
|
||||
String contig = null;
|
||||
long start = Long.MAX_VALUE, stop = 0;
|
||||
for(GenomeLoc loc: nextFilePointer.locations) {
|
||||
contig = loc.getContig();
|
||||
start = Math.min(loc.getStart(),start);
|
||||
stop = Math.max(loc.getStop(),stop);
|
||||
}
|
||||
Map<SAMFileReader2,List<Chunk>> chunksBounding = blockDrivenDataSource.getFilePointersBounding(nextFilePointer.bin);
|
||||
return new IndexDelimitedLocusShard(nextFilePointer.locations,chunksBounding,Shard.ShardType.LOCUS_INTERVAL);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue