From dd0085c42878c84b0cd8349f4d0f5485ad61d64c Mon Sep 17 00:00:00 2001 From: asivache Date: Wed, 9 Sep 2009 20:04:04 +0000 Subject: [PATCH] 1) now is tolerant to sloppy cigar strings with 0-length elements (at the price of extra recursive call) 2) when reads with deletions are requested, adds to the pile just those: reads with 'D' over the current reference base, but not 'N' 3) next() now implements a loop: recursive forward iteration calls to next() until ref. position with non-zero coverage is encountered were OK for (short) deletions, but with long stretches of N's they end up with stack overflow git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1568 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/iterators/LocusIteratorByState.java | 44 +++++++++++-------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java index 67dde76ce..7b468ac56 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java @@ -113,11 +113,14 @@ public class LocusIteratorByState extends LocusIterator { // return null; // we are done //if (DEBUG2) System.out.printf("stepForwardOnGenome2: curElement=%s, counter=%d, len=%d%n", curElement, cigarElementCounter, curElement != null ? curElement.getLength() : -1); - if ( curElement == null || ++cigarElementCounter >= curElement.getLength() ) { + if ( curElement == null || ++cigarElementCounter > curElement.getLength() ) { cigarOffset++; if ( cigarOffset < nCigarElements ) { curElement = cigar.getCigarElement(cigarOffset); cigarElementCounter = 0; + // next line: guards against cigar elements of length 0; when new cigar element is retrieved, + // we reenter in order to re-check cigarElementCounter against curElement's length + return stepForwardOnGenome(); } else { return null; } @@ -215,31 +218,36 @@ public class LocusIteratorByState extends LocusIterator { // printState(); //} - collectPendingReads(readInfo.getMaxReadsAtLocus()); - - // todo -- performance problem -- should be lazy, really ArrayList reads = new ArrayList(readStates.size()); ArrayList offsets = new ArrayList(readStates.size()); - for ( SAMRecordState state : readStates ) { - if ( state.getCurrentCigarOperator() != CigarOperator.D && state.getCurrentCigarOperator() != CigarOperator.N ) { -// System.out.println("Location: "+getLocation()+"; Read "+state.getRead().getReadName()+"; offset="+state.getReadOffset()); - reads.add(state.getRead()); - offsets.add(state.getReadOffset()); - } else if ( readInfo.includeReadsWithDeletionAtLoci() ) { - reads.add(state.getRead()); - offsets.add(-1); - } - } - GenomeLoc loc = getLocation(); - updateReadStates(); // critical - must be called after we get the current state offsets and location + // keep iterating forward until we encounter a reference position that has something "real" hanging over it + // (i.e. either a real base, or a real base or a deletion if includeReadsWithDeletion is true) + while(true) { + collectPendingReads(readInfo.getMaxReadsAtLocus()); + + // todo -- performance problem -- should be lazy, really + for ( SAMRecordState state : readStates ) { + if ( state.getCurrentCigarOperator() != CigarOperator.D && state.getCurrentCigarOperator() != CigarOperator.N ) { +// System.out.println("Location: "+getLocation()+"; Read "+state.getRead().getReadName()+"; offset="+state.getReadOffset()); + reads.add(state.getRead()); + offsets.add(state.getReadOffset()); + } else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) { + reads.add(state.getRead()); + offsets.add(-1); + } + } + GenomeLoc loc = getLocation(); + + updateReadStates(); // critical - must be called after we get the current state offsets and location //if (DEBUG) { // logger.debug("DONE WITH NEXT, updating read states, current state is:"); // printState(); //} - - return reads.size() == 0 ? next() : new AlignmentContext(loc, reads, offsets); + // if we got reads with non-D/N over the current position, we are done + if ( reads.size() != 0 ) return new AlignmentContext(loc, reads, offsets); + } } private void collectPendingReads(final int maximumPileupSize) {