diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java index ff053385d..29163c3ca 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java @@ -302,7 +302,7 @@ public class IndelGenotyperV2Walker extends ReadWalker { } else { // read in the whole list of intervals for cleaning GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals( - IntervalUtils.parseIntervalArguments(Arrays.asList(genotypeIntervalsFile)), IntervalMergingRule.OVERLAPPING_ONLY); + IntervalUtils.parseIntervalArguments(Arrays.asList(genotypeIntervalsFile),true), IntervalMergingRule.OVERLAPPING_ONLY); genotypeIntervals = locs.iterator(); } currentGenotypeInterval = genotypeIntervals.hasNext() ? genotypeIntervals.next() : null; diff --git a/java/src/org/broadinstitute/sting/utils/interval/OverlappingIntervalIterator.java b/java/src/org/broadinstitute/sting/utils/interval/OverlappingIntervalIterator.java new file mode 100755 index 000000000..a08bdc6da --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/interval/OverlappingIntervalIterator.java @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.interval; + +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.gatk.iterators.PushbackIterator; + +import java.util.Iterator; + +/** + * Created by IntelliJ IDEA. + * User: asivache + * Date: Oct 7, 2010 + * Time: 2:40:02 PM + * To change this template use File | Settings | File Templates. + */ + +/** This class provides an adapter to Iterator that returns only (parts of) underlying iterator's + * intervals overlapping with specified "master set" of bounding intervals. The underlying iterator must return + * NON-overlapping intervals in coordinate-sorted order, otherwise the behavior is unspecified. If the master set is represented by + * another interval iterator, it should return sorted and NON-overlapping intervals. + * + */ +public class OverlappingIntervalIterator implements Iterator { + PushbackIterator iter = null; + PushbackIterator boundBy = null; + + GenomeLoc prefetchedOverlap = null; + GenomeLoc currentBound = null; + GenomeLoc currentInterval = null; + + /** Creates new overlapping iterator that will internally traverse intervals and return only + * overlaps of those with set of intervals returned by boundBy. + * @param intervals + * @param boundBy + */ + public OverlappingIntervalIterator(Iterator intervals, Iterator boundBy) { + this.iter = new PushbackIterator(intervals); + this.boundBy = new PushbackIterator(boundBy); + + if ( iter.hasNext() && boundBy.hasNext() ) { + GenomeLoc currentInterval = iter.next(); // load first interval + GenomeLoc currentBound = boundBy.next(); // load first bounding interval + fetchNextOverlap(); + } + } + + /** Traverses both iterators in sync, until the first overlap between the two is reached. If no overlap is found + * until the end of the either of the two streams, leaves prefetchedOverlap set to null + */ + private void fetchNextOverlap() { + + prefetchedOverlap = null; + + while ( prefetchedOverlap == null ) { + + if ( currentInterval.isBefore(currentBound) ) { + if ( ! iter.hasNext() ) break; // no more intervals left; we are done + currentInterval = iter.next(); + continue; + } + + if ( currentInterval.isPast(currentBound) ) { + if ( ! boundBy.hasNext() ) break; // we are past the last available bounding interval, we are done! + currentBound = boundBy.next(); + continue; + } + + // we are at this point only if currentInterval overlaps with currentBound + + prefetchedOverlap = currentInterval.intersect(currentBound); + + // we still do not know if we are done with either current interval or current bound, because + // two special situations are possible: + // + // 1) next interval overlaps with 2) current interval also overlaps with + // the same bounding interval; next bounding interval; note that + // note that in this case next in this case next bound necessarily + // interval necessarily starts before starts before the next interval + // the next bound + // + // curr. int next int. curr. int + // ----- ------ -------------------------- + // ------------------- --------- ------------- + // curr. bound curr. bound next bound + + // To solve this issue we update either only currentInterval or only currentBound to their next value, + // whichever comes first; the rest of the traversal to the next overlap will be performed on the next invocation of + // fetchNextOverlap(). + + if ( ! iter.hasNext() ) { + + } + GenomeLoc nextInterval = iter.next(); + GenomeLoc nextBound = boundBy.next(); + + if ( nextInterval.startsBefore(nextBound)) { + currentInterval = nextInterval; + boundBy.pushback(nextBound); // in case next interval overlaps with the current bound + } else { + currentBound = nextBound; + iter.pushback(nextInterval); // in case current interval also overlaps with the next bound + } + } + + } + + /** + * Returns true if the iteration has more elements. (In other + * words, returns true if next would return an element + * rather than throwing an exception.) + * + * @return true if the iterator has more elements. + */ + public boolean hasNext() { + return false; //To change body of implemented methods use File | Settings | File Templates. + } + + /** + * Returns the next element in the iteration. + * + * @return the next element in the iteration. + * @throws java.util.NoSuchElementException + * iteration has no more elements. + */ + public GenomeLoc next() { + return null; //To change body of implemented methods use File | Settings | File Templates. + } + + /** + * Removes from the underlying collection the last element returned by the + * iterator (optional operation). This method can be called only once per + * call to next. The behavior of an iterator is unspecified if + * the underlying collection is modified while the iteration is in + * progress in any way other than by calling this method. + * + * @throws UnsupportedOperationException if the remove + * operation is not supported by this Iterator. + * @throws IllegalStateException if the next method has not + * yet been called, or the remove method has already + * been called after the last call to the next + * method. + */ + public void remove() { + throw new UnsupportedOperationException("remove() method is not supported by OverlappingIntervalIterator"); + //To change body of implemented methods use File | Settings | File Templates. + } +}