added a big speed-up for interval list input processing. With large interval sets this was taking way too long...

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1227 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-07-13 22:00:00 +00:00
parent 9f0fb9f3aa
commit e4152af387
2 changed files with 17 additions and 18 deletions

View File

@ -3,10 +3,7 @@ package org.broadinstitute.sting.utils;
import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord; import net.sf.samtools.SAMSequenceRecord;
import java.util.AbstractSet; import java.util.*;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
/** /**
* *
@ -47,7 +44,7 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
/** /**
* get an iterator over this collection * get an iterator over this collection
* *
* @return * @return an iterator<GenomeLoc>
*/ */
public Iterator<GenomeLoc> iterator() { public Iterator<GenomeLoc> iterator() {
return mArray.iterator(); return mArray.iterator();
@ -56,7 +53,7 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
/** /**
* return the size of the collection * return the size of the collection
* *
* @return * @return the size of the collection
*/ */
public int size() { public int size() {
return mArray.size(); return mArray.size();
@ -79,19 +76,19 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
* @return true * @return true
*/ */
public boolean add(GenomeLoc e) { public boolean add(GenomeLoc e) {
if (mArray.contains(e)) { // assuming that the intervals coming arrive in order saves us a fair amount of time (and it's most likely true)
throw new IllegalArgumentException("attempting to add a duplicate object to the set"); if (mArray.size() > 0 && e.isPast(mArray.get(mArray.size() - 1))) {
} mArray.add(e);
int index = 0; return true;
while (index < mArray.size()) { } else {
if (!e.isPast(mArray.get(index))) { int loc = Collections.binarySearch(mArray,e);
mArray.add(index, e); if (loc >= 0) {
throw new StingException("Genome Loc Sorted Set already contains the GenomicLoc " + e.toString());
} else {
mArray.add((loc+1) * -1,e);
return true; return true;
} }
++index;
} }
this.mArray.add(e);
return true;
} }
/** /**
@ -237,12 +234,14 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
/** /**
* Create a sorted genome location set from a list of GenomeLocs. * Create a sorted genome location set from a list of GenomeLocs.
*
* @param locs the list<GenomeLoc> * @param locs the list<GenomeLoc>
*
* @return the sorted genome loc list * @return the sorted genome loc list
*/ */
public static GenomeLocSortedSet createSetFromList(List<GenomeLoc> locs) { public static GenomeLocSortedSet createSetFromList(List<GenomeLoc> locs) {
GenomeLocSortedSet set = new GenomeLocSortedSet(); GenomeLocSortedSet set = new GenomeLocSortedSet();
for (GenomeLoc l: locs) { for (GenomeLoc l : locs) {
set.add(l); set.add(l);
} }
return set; return set;

View File

@ -78,7 +78,7 @@ public class GenomeLocSortedSetTest extends BaseTest {
} }
@Test(expected = IllegalArgumentException.class) @Test(expected = StingException.class)
public void testAddDupplicate() { public void testAddDupplicate() {
assertTrue(mSortedSet.size() == 0); assertTrue(mSortedSet.size() == 0);
GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 0, 0); GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 0, 0);