1) now is tolerant to sloppy cigar strings with 0-length elements (at the price of extra recursive call)
2) when reads with deletions are requested, adds to the pile just those: reads with 'D' over the current reference base, but not 'N' 3) next() now implements a loop: recursive forward iteration calls to next() until ref. position with non-zero coverage is encountered were OK for (short) deletions, but with long stretches of N's they end up with stack overflow git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1568 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
542af6402e
commit
dd0085c428
|
|
@ -113,11 +113,14 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
// return null; // we are done
|
// return null; // we are done
|
||||||
|
|
||||||
//if (DEBUG2) System.out.printf("stepForwardOnGenome2: curElement=%s, counter=%d, len=%d%n", curElement, cigarElementCounter, curElement != null ? curElement.getLength() : -1);
|
//if (DEBUG2) System.out.printf("stepForwardOnGenome2: curElement=%s, counter=%d, len=%d%n", curElement, cigarElementCounter, curElement != null ? curElement.getLength() : -1);
|
||||||
if ( curElement == null || ++cigarElementCounter >= curElement.getLength() ) {
|
if ( curElement == null || ++cigarElementCounter > curElement.getLength() ) {
|
||||||
cigarOffset++;
|
cigarOffset++;
|
||||||
if ( cigarOffset < nCigarElements ) {
|
if ( cigarOffset < nCigarElements ) {
|
||||||
curElement = cigar.getCigarElement(cigarOffset);
|
curElement = cigar.getCigarElement(cigarOffset);
|
||||||
cigarElementCounter = 0;
|
cigarElementCounter = 0;
|
||||||
|
// next line: guards against cigar elements of length 0; when new cigar element is retrieved,
|
||||||
|
// we reenter in order to re-check cigarElementCounter against curElement's length
|
||||||
|
return stepForwardOnGenome();
|
||||||
} else {
|
} else {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
@ -215,31 +218,36 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
// printState();
|
// printState();
|
||||||
//}
|
//}
|
||||||
|
|
||||||
collectPendingReads(readInfo.getMaxReadsAtLocus());
|
|
||||||
|
|
||||||
// todo -- performance problem -- should be lazy, really
|
|
||||||
ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>(readStates.size());
|
ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>(readStates.size());
|
||||||
ArrayList<Integer> offsets = new ArrayList<Integer>(readStates.size());
|
ArrayList<Integer> offsets = new ArrayList<Integer>(readStates.size());
|
||||||
for ( SAMRecordState state : readStates ) {
|
|
||||||
if ( state.getCurrentCigarOperator() != CigarOperator.D && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
|
||||||
// System.out.println("Location: "+getLocation()+"; Read "+state.getRead().getReadName()+"; offset="+state.getReadOffset());
|
|
||||||
reads.add(state.getRead());
|
|
||||||
offsets.add(state.getReadOffset());
|
|
||||||
} else if ( readInfo.includeReadsWithDeletionAtLoci() ) {
|
|
||||||
reads.add(state.getRead());
|
|
||||||
offsets.add(-1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
GenomeLoc loc = getLocation();
|
|
||||||
|
|
||||||
updateReadStates(); // critical - must be called after we get the current state offsets and location
|
// keep iterating forward until we encounter a reference position that has something "real" hanging over it
|
||||||
|
// (i.e. either a real base, or a real base or a deletion if includeReadsWithDeletion is true)
|
||||||
|
while(true) {
|
||||||
|
collectPendingReads(readInfo.getMaxReadsAtLocus());
|
||||||
|
|
||||||
|
// todo -- performance problem -- should be lazy, really
|
||||||
|
for ( SAMRecordState state : readStates ) {
|
||||||
|
if ( state.getCurrentCigarOperator() != CigarOperator.D && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
||||||
|
// System.out.println("Location: "+getLocation()+"; Read "+state.getRead().getReadName()+"; offset="+state.getReadOffset());
|
||||||
|
reads.add(state.getRead());
|
||||||
|
offsets.add(state.getReadOffset());
|
||||||
|
} else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
||||||
|
reads.add(state.getRead());
|
||||||
|
offsets.add(-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
GenomeLoc loc = getLocation();
|
||||||
|
|
||||||
|
updateReadStates(); // critical - must be called after we get the current state offsets and location
|
||||||
|
|
||||||
//if (DEBUG) {
|
//if (DEBUG) {
|
||||||
// logger.debug("DONE WITH NEXT, updating read states, current state is:");
|
// logger.debug("DONE WITH NEXT, updating read states, current state is:");
|
||||||
// printState();
|
// printState();
|
||||||
//}
|
//}
|
||||||
|
// if we got reads with non-D/N over the current position, we are done
|
||||||
return reads.size() == 0 ? next() : new AlignmentContext(loc, reads, offsets);
|
if ( reads.size() != 0 ) return new AlignmentContext(loc, reads, offsets);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void collectPendingReads(final int maximumPileupSize) {
|
private void collectPendingReads(final int maximumPileupSize) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue