From eeb14ec717af0bc4ab6befab4bcf2d5daa2f9e5d Mon Sep 17 00:00:00 2001 From: aaron Date: Wed, 23 Sep 2009 20:38:53 +0000 Subject: [PATCH] a couple of light changes to GenomeLocSortedSet. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1708 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/utils/GenomeLocSortedSet.java | 53 ++++++++++++++++++- .../sting/utils/GenomeLocSortedSetTest.java | 28 ++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) diff --git a/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java b/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java index 81ca85957..d80a53ac5 100755 --- a/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java +++ b/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.utils; import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; +import org.apache.log4j.Logger; import java.util.*; @@ -34,8 +35,10 @@ import java.util.*; * that element. */ public class GenomeLocSortedSet extends AbstractSet { + private static Logger logger = Logger.getLogger(GenomeLocSortedSet.class); + // our private storage for the GenomeLoc's - private final ArrayList mArray = new ArrayList(); + private List mArray = new ArrayList(); /** default constructor */ public GenomeLocSortedSet() { @@ -119,6 +122,9 @@ public class GenomeLocSortedSet extends AbstractSet { (e.getStart() < g.getStart()) && !haveAdded) { mArray.add(mArray.indexOf(g), e); return true; + } else if (haveAdded && ((e.getContigIndex() > e.getContigIndex()) || + (g.getContigIndex() == e.getContigIndex() && e.getStart() > g.getStart()))) { + return true; } } /** we're at the end and we haven't found locations that should fall after it, @@ -262,4 +268,49 @@ public class GenomeLocSortedSet extends AbstractSet { return ret; } + + public boolean addAllRegions(List locations) { + this.mArray.addAll(locations); + Collections.sort(this.mArray); + this.mArray = GenomeLocSortedSet.mergeOverlappingLocations(this.mArray); + return true; + } + +/** + * merge a list of genome locs that may be overlapping, returning the list of unique genomic locations + * + * @param raw the unchecked genome loc list + * + * @return the list of merged locations + */ + public static List mergeOverlappingLocations(final List raw) { + logger.debug(" Raw locations are: " + Utils.join(", ", raw)); + if (raw.size() <= 1) + return raw; + else { + ArrayList merged = new ArrayList(); + Iterator it = raw.iterator(); + GenomeLoc prev = it.next(); + while (it.hasNext()) { + GenomeLoc curr = it.next(); + if (prev.contiguousP(curr)) { + prev = prev.merge(curr); + } else { + merged.add(prev); + prev = curr; + } + } + merged.add(prev); + return merged; + } + } + + /** + * convert this object to a list + * @return the lists + */ + public List toList() { + return this.mArray; + } + } diff --git a/java/test/org/broadinstitute/sting/utils/GenomeLocSortedSetTest.java b/java/test/org/broadinstitute/sting/utils/GenomeLocSortedSetTest.java index a665ebb4f..111265045 100755 --- a/java/test/org/broadinstitute/sting/utils/GenomeLocSortedSetTest.java +++ b/java/test/org/broadinstitute/sting/utils/GenomeLocSortedSetTest.java @@ -4,6 +4,7 @@ import net.sf.samtools.SAMFileHeader; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertEquals; import org.junit.Before; import org.junit.Test; @@ -203,4 +204,31 @@ public class GenomeLocSortedSetTest extends BaseTest { } assertTrue(seqNumber == GenomeLocSortedSetTest.NUMBER_OF_CHROMOSOMES); } + + @Test + public void testAddAll() { + mSortedSet = GenomeLocSortedSet.createSetFromSequenceDictionary(this.header.getSequenceDictionary()); + GenomeLocSortedSet set = GenomeLocSortedSet.createSetFromSequenceDictionary(this.header.getSequenceDictionary()); + // we should have sequence + assertTrue(mSortedSet.size() == GenomeLocSortedSetTest.NUMBER_OF_CHROMOSOMES); + mSortedSet.addAllRegions(set.toList()); + assertTrue(mSortedSet.size() == GenomeLocSortedSetTest.NUMBER_OF_CHROMOSOMES); + } + + @Test + public void testAddAll2() { + mSortedSet = new GenomeLocSortedSet(); + GenomeLocSortedSet mSortedSet2 = new GenomeLocSortedSet(); + for (int x=0; x < 200; x = x + 2) { + mSortedSet.add(GenomeLocParser.createGenomeLoc(1,x)); + } + assertEquals(100, mSortedSet.size()); + for (int x=1; x < 201; x = x + 2) { + mSortedSet2.add(GenomeLocParser.createGenomeLoc(1,x)); + } + assertEquals(100, mSortedSet2.size()); + mSortedSet.addAllRegions(mSortedSet2.toList()); + assertEquals(1, mSortedSet.size()); + } + }