Fix sharding bug -- loci to which >100,000 (= 1 shard) reads are assigned an

alignment start will confuse the sharding system and cause it to return duplicate reads.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1987 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-11-08 14:27:26 +00:00
parent f777c806d6
commit 21c5f543fa
1 changed files with 8 additions and 7 deletions

View File

@ -294,12 +294,16 @@ public class SAMDataSource implements SimpleDataSource {
}
int x = 0;
SAMRecord rec = null;
int lastPos = 0;
// Assuming that lastReadPos should never be null, because this is a mappedReadSeek
// and initial queries are handled by the previous conditional.
int lastContig = lastReadPos.getContigIndex();
int lastPos = (int)lastReadPos.getStart();
while (x < readsTaken) {
if (iter.hasNext()) {
rec = iter.next();
if (lastPos == rec.getAlignmentStart()) ++this.readsSeenAtLastPos;
if (lastContig == rec.getReferenceIndex() && lastPos == rec.getAlignmentStart()) ++this.readsSeenAtLastPos;
else this.readsSeenAtLastPos = 1;
lastPos = rec.getAlignmentStart();
++x;
@ -368,12 +372,9 @@ public class SAMDataSource implements SimpleDataSource {
private void correctForReadPileupSeek( StingSAMIterator iter ) {
// move the number of reads we read from the last pos
boolean atLeastOneReadSeen = false; // we have a problem where some chomesomes don't have a single read (i.e. the chrN_random chrom.)
while (iter.hasNext() && this.readsSeenAtLastPos > 0) {
iter.next();
--readsSeenAtLastPos;
for(int i = 0; i < this.readsSeenAtLastPos && iter.hasNext(); i++,iter.next())
atLeastOneReadSeen = true;
}
if (readsSeenAtLastPos != 0 && atLeastOneReadSeen) {
if (readsSeenAtLastPos > 0 && !atLeastOneReadSeen) {
throw new SimpleDataSourceLoadException("Seek problem: reads at last position count != 0");
}
}