From 05500d1a8d56b11bb2fdebd86952faf232f5c080 Mon Sep 17 00:00:00 2001 From: asivache Date: Fri, 8 Oct 2010 16:34:00 +0000 Subject: [PATCH] An iterator wrapper/adapter: takes GenomeLoc iterators 1 and 2 and traverses intersections of intervals from 1 with intervals from 2. Both 1 and 2 must be SORTED and NON_OVERLAPPING, but this iterator does NOT perfrom any checks, so if these conditions are not met, the behavior is unspecified git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4468 348d0f76-0448-11de-a6fe-93d51630548a --- .../interval/OverlappingIntervalIterator.java | 194 ++++++++++++++++++ 1 file changed, 194 insertions(+) create mode 100755 java/src/org/broadinstitute/sting/utils/interval/OverlappingIntervalIterator.java diff --git a/java/src/org/broadinstitute/sting/utils/interval/OverlappingIntervalIterator.java b/java/src/org/broadinstitute/sting/utils/interval/OverlappingIntervalIterator.java new file mode 100755 index 000000000..5202b8518 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/interval/OverlappingIntervalIterator.java @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.interval; + +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.gatk.iterators.PushbackIterator; + +import java.util.Iterator; + +/** + * Created by IntelliJ IDEA. + * User: asivache + * Date: Oct 7, 2010 + * Time: 2:40:02 PM + * To change this template use File | Settings | File Templates. + */ + +/** This class provides an adapter to Iterator that returns only (parts of) underlying iterator's + * intervals overlapping with specified "master set" of bounding intervals. The underlying iterator must return + * NON-overlapping intervals in coordinate-sorted order, otherwise the behavior is unspecified. If the master set is represented by + * another interval iterator, it should return sorted and NON-overlapping intervals. + * + */ +public class OverlappingIntervalIterator implements Iterator { + PushbackIterator iter = null; + PushbackIterator boundBy = null; + + GenomeLoc prefetchedOverlap = null; + GenomeLoc currentBound = null; + GenomeLoc currentInterval = null; + + + /** Creates new overlapping iterator that will internally traverse intervals and return only + * overlaps of those with set of intervals returned by boundBy. + * @param intervals + * @param boundBy + */ + public OverlappingIntervalIterator(Iterator intervals, Iterator boundBy) { + this.iter = new PushbackIterator(intervals); + this.boundBy = new PushbackIterator(boundBy); + + if ( iter.hasNext() && boundBy.hasNext() ) { + GenomeLoc currentInterval = iter.next(); // load first interval + GenomeLoc currentBound = boundBy.next(); // load first bounding interval + fetchNextOverlap(); + } + } + + /** Traverses both iterators in sync, until the first overlap between the two is reached. If no overlap is found + * until the end of the either of the two streams, leaves prefetchedOverlap set to null + */ + private void fetchNextOverlap() { + + prefetchedOverlap = null; + + while ( currentInterval != null && currentBound != null ) { + + if ( currentInterval.isBefore(currentBound) ) { + if ( ! iter.hasNext() ) currentInterval = null; + else currentInterval = iter.next(); + continue; + } + + if ( currentInterval.isPast(currentBound) ) { + if ( ! boundBy.hasNext() ) currentBound = null; + else currentBound = boundBy.next(); + continue; + } + + // we are at this point only if currentInterval overlaps with currentBound + + prefetchedOverlap = currentInterval.intersect(currentBound); + + // now we need to advance at least one of the iterators, so that we would not + // call the same overlap again + + // however we still do not know if we are done with either current interval or current bound, because + // two special situations are possible: + // + // 1) next interval overlaps with 2) current interval also overlaps with + // the same bounding interval; next bounding interval; note that + // note that in this case next in this case next bound necessarily + // interval necessarily starts before starts before the next interval + // the next bound + // + // curr. int next int. curr. int + // ----- ------ -------------------------- + // ------------------- --------- ------------- + // curr. bound curr. bound next bound + + // To solve this issue we update either only currentInterval or only currentBound to their next value, + // whichever of those next values (intervals) comes first on the reference genome; + // the rest of the traversal to the next overlap will be performed on the next invocation of + // fetchNextOverlap(). + + advanceToNearest(); + + break; // now that we computed the overlap and advanced (at least one of) the intervals/bounds to + // the next location, we are done - bail out from the loop. + } + + } + + private void advanceToNearest() { + if ( ! iter.hasNext() ) { + currentBound = boundBy.hasNext() ? boundBy.next() : null; + } else { + if ( ! boundBy.hasNext() ) currentInterval = iter.hasNext() ? iter.next() : null; + else { + // both intervals and bounds have next value available; let's check which comes first: + GenomeLoc nextInterval = iter.next(); + GenomeLoc nextBound = boundBy.next(); + + if ( nextInterval.compareTo(nextBound) < 0 ) { + currentInterval = nextInterval; + boundBy.pushback(nextBound); + } else { + currentBound = nextBound; + iter.pushback(nextInterval); + } + + } + } + } + + /** + * Returns true if the iteration has more elements. (In other + * words, returns true if next would return an element + * rather than throwing an exception.) + * + * @return true if the iterator has more elements. + */ + public boolean hasNext() { + return prefetchedOverlap != null; + } + + /** + * Returns the next element in the iteration. + * + * @return the next element in the iteration. + * @throws java.util.NoSuchElementException + * iteration has no more elements. + */ + public GenomeLoc next() { + if ( prefetchedOverlap == null ) + throw new java.util.NoSuchElementException("Illegal call to next(): Overlapping iterator has no more overlaps"); + GenomeLoc ret = prefetchedOverlap; // cache current prefetched overlap + fetchNextOverlap(); // prefetch next overlap + return ret ; + } + + /** + * Removes from the underlying collection the last element returned by the + * iterator (optional operation). This method can be called only once per + * call to next. The behavior of an iterator is unspecified if + * the underlying collection is modified while the iteration is in + * progress in any way other than by calling this method. + * + * @throws UnsupportedOperationException if the remove + * operation is not supported by this Iterator. + * @throws IllegalStateException if the next method has not + * yet been called, or the remove method has already + * been called after the last call to the next + * method. + */ + public void remove() { + throw new UnsupportedOperationException("remove() method is not supported by OverlappingIntervalIterator"); + //To change body of implemented methods use File | Settings | File Templates. + } +}