From d056f9f3e8b712d7e3988fd97f4d3b50128e17a4 Mon Sep 17 00:00:00 2001 From: aaron Date: Fri, 22 May 2009 22:34:24 +0000 Subject: [PATCH] Changed the name to reflect the sorted nature of the set, added some fixes git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@810 348d0f76-0448-11de-a6fe-93d51630548a --- ...omeLocSet.java => GenomeLocSortedSet.java} | 23 ++- .../sting/utils/GenomeLocSetTest.java | 168 ---------------- .../sting/utils/GenomeLocSortedSetTest.java | 181 ++++++++++++++++++ 3 files changed, 195 insertions(+), 177 deletions(-) rename java/src/org/broadinstitute/sting/utils/{GenomeLocSet.java => GenomeLocSortedSet.java} (91%) delete mode 100755 java/test/org/broadinstitute/sting/utils/GenomeLocSetTest.java create mode 100755 java/test/org/broadinstitute/sting/utils/GenomeLocSortedSetTest.java diff --git a/java/src/org/broadinstitute/sting/utils/GenomeLocSet.java b/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java similarity index 91% rename from java/src/org/broadinstitute/sting/utils/GenomeLocSet.java rename to java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java index a5706ac30..850278f91 100755 --- a/java/src/org/broadinstitute/sting/utils/GenomeLocSet.java +++ b/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java @@ -37,11 +37,11 @@ import java.util.Iterator; * partial interval of a region in the collection it will remove the region from * that element. */ -public class GenomeLocSet extends AbstractSet { +public class GenomeLocSortedSet extends AbstractSet { // our private storage for the GenomeLoc's private final ArrayList mArray = new ArrayList(); - public GenomeLocSet() {} + public GenomeLocSortedSet() {} /** * get an iterator over this collection @@ -102,6 +102,9 @@ public class GenomeLocSet extends AbstractSet { if (e == null) { return false; } + // have we added it to the collection? + boolean haveAdded = false; + /** * check if the specified element overlaps any current locations, if so * we should merge the two. @@ -110,9 +113,9 @@ public class GenomeLocSet extends AbstractSet { if (g.contiguousP(e)) { GenomeLoc c = g.merge(e); mArray.set(mArray.indexOf(g),c); - return true; + haveAdded = true; } else if ((g.getContigIndex() == e.getContigIndex()) && - (g.getStart() > e.getStart())) { + (e.getStart() < g.getStart()) && !haveAdded) { mArray.add(mArray.indexOf(g), e); return true; } @@ -120,7 +123,9 @@ public class GenomeLocSet extends AbstractSet { /** we're at the end and we haven't found locations that should fall after it, * so we'll put it at the end */ - mArray.add(e); + if (!haveAdded) { + mArray.add(e); + } return true; } @@ -210,11 +215,11 @@ public class GenomeLocSet extends AbstractSet { * @param dict the sequence dictionary to create a collection from * @return the GenomeLocSet of all references sequences as GenomeLoc's */ - public static GenomeLocSet createSetFromSequenceDictionary(SAMSequenceDictionary dict) { - GenomeLocSet returnSet = new GenomeLocSet(); + public static GenomeLocSortedSet createSetFromSequenceDictionary(SAMSequenceDictionary dict) { + GenomeLocSortedSet returnSortedSet = new GenomeLocSortedSet(); for (SAMSequenceRecord record : dict.getSequences()) { - returnSet.add(new GenomeLoc(record.getSequenceIndex(),1,record.getSequenceLength())); + returnSortedSet.add(new GenomeLoc(record.getSequenceIndex(),1,record.getSequenceLength())); } - return returnSet; + return returnSortedSet; } } diff --git a/java/test/org/broadinstitute/sting/utils/GenomeLocSetTest.java b/java/test/org/broadinstitute/sting/utils/GenomeLocSetTest.java deleted file mode 100755 index 44c86b5db..000000000 --- a/java/test/org/broadinstitute/sting/utils/GenomeLocSetTest.java +++ /dev/null @@ -1,168 +0,0 @@ -package org.broadinstitute.sting.utils; - -import net.sf.samtools.SAMFileHeader; -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.sam.ArtificialSamUtils; -import static org.junit.Assert.assertTrue; -import org.junit.Before; -import org.junit.Test; - -import java.util.Iterator; - -/** - * - * User: aaron - * Date: May 22, 2009 - * Time: 2:14:07 PM - * - * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2009 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - * - * This software is supplied without any warranty or guaranteed support whatsoever. Neither - * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - * - */ - - -/** - * @author aaron - * @version 1.0 - * @date May 22, 2009 - *

- * Class GenomeLocSetTest - *

- * This tests the functions of the GenomeLocSet - */ -public class GenomeLocSetTest extends BaseTest { - - private GenomeLocSet mSet = null; - private SAMFileHeader header = ArtificialSamUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE); - private static final int NUMBER_OF_CHROMOSOMES = 5; - private static final int STARTING_CHROMOSOME = 1; - private static final int CHROMOSOME_SIZE = 1000; - - @Before - public void setup() { - GenomeLoc.setupRefContigOrdering(header.getSequenceDictionary()); - mSet = new GenomeLocSet(); - } - - @Test - public void testAdd() { - GenomeLoc g = new GenomeLoc(STARTING_CHROMOSOME, 0, 0); - assertTrue(mSet.size() == 0); - mSet.add(g); - assertTrue(mSet.size() == STARTING_CHROMOSOME); - } - - @Test - public void testRemove() { - assertTrue(mSet.size() == 0); - GenomeLoc g = new GenomeLoc(STARTING_CHROMOSOME, 0, 0); - mSet.add(g); - assertTrue(mSet.size() == STARTING_CHROMOSOME); - mSet.remove(g); - assertTrue(mSet.size() == 0); - } - - @Test(expected = IllegalArgumentException.class) - public void testAddDupplicate() { - assertTrue(mSet.size() == 0); - GenomeLoc g = new GenomeLoc(STARTING_CHROMOSOME, 0, 0); - mSet.add(g); - assertTrue(mSet.size() == STARTING_CHROMOSOME); - mSet.add(g); - } - - @Test - public void mergingOverlappingBelow() { - GenomeLoc g = new GenomeLoc(STARTING_CHROMOSOME, 0, 50); - GenomeLoc e = new GenomeLoc(STARTING_CHROMOSOME, 49, 100); - assertTrue(mSet.size() == 0); - mSet.add(g); - assertTrue(mSet.size() == STARTING_CHROMOSOME); - mSet.addRegion(e); - assertTrue(mSet.size() == STARTING_CHROMOSOME); - Iterator iter = mSet.iterator(); - GenomeLoc loc = iter.next(); - assertTrue(loc.getStart() == 0); - assertTrue(loc.getStop() == 100); - assertTrue(loc.getContigIndex() == STARTING_CHROMOSOME); - } - - @Test - public void mergingOverlappingAbove() { - GenomeLoc e = new GenomeLoc(STARTING_CHROMOSOME, 0, 50); - GenomeLoc g = new GenomeLoc(STARTING_CHROMOSOME, 49, 100); - assertTrue(mSet.size() == 0); - mSet.add(g); - assertTrue(mSet.size() == STARTING_CHROMOSOME); - mSet.addRegion(e); - assertTrue(mSet.size() == STARTING_CHROMOSOME); - Iterator iter = mSet.iterator(); - GenomeLoc loc = iter.next(); - assertTrue(loc.getStart() == 0); - assertTrue(loc.getStop() == 100); - assertTrue(loc.getContigIndex() == STARTING_CHROMOSOME); - } - - @Test - public void deleteSubRegion() { - GenomeLoc e = new GenomeLoc(STARTING_CHROMOSOME, 0, 50); - GenomeLoc g = new GenomeLoc(STARTING_CHROMOSOME, 49, 100); - mSet.add(g); - mSet.addRegion(e); - - // now delete a region - GenomeLoc d = new GenomeLoc(STARTING_CHROMOSOME, 25, 75); - mSet.removeRegion(d); - Iterator iter = mSet.iterator(); - GenomeLoc loc = iter.next(); - assertTrue(loc.getStart() == 0); - assertTrue(loc.getStop() == 24); - assertTrue(loc.getContigIndex() == STARTING_CHROMOSOME); - - loc = iter.next(); - assertTrue(loc.getStart() == 76); - assertTrue(loc.getStop() == 100); - assertTrue(loc.getContigIndex() == STARTING_CHROMOSOME); - } - @Test - public void deleteSuperRegion() { - GenomeLoc e = new GenomeLoc(STARTING_CHROMOSOME, 10, 20); - GenomeLoc g = new GenomeLoc(STARTING_CHROMOSOME, 70, 100); - mSet.add(g); - mSet.addRegion(e); - assertTrue(mSet.size() == 2); - // now delete a region - GenomeLoc d = new GenomeLoc(STARTING_CHROMOSOME, 15, 75); - mSet.removeRegion(d); - Iterator iter = mSet.iterator(); - GenomeLoc loc = iter.next(); - assertTrue(loc.getStart() == 10); - assertTrue(loc.getStop() == 14); - assertTrue(loc.getContigIndex() == STARTING_CHROMOSOME); - - loc = iter.next(); - assertTrue(loc.getStart() == 76); - assertTrue(loc.getStop() == 100); - assertTrue(loc.getContigIndex() == STARTING_CHROMOSOME); - } - - @Test - public void fromSequenceDictionary() { - mSet = GenomeLocSet.createSetFromSequenceDictionary(this.header.getSequenceDictionary()); - // we should have sequence - assertTrue(mSet.size() == GenomeLocSetTest.NUMBER_OF_CHROMOSOMES); - int seqNumber = 0; - for (GenomeLoc loc : mSet) { - assertTrue(loc.getStart() == 1); - assertTrue(loc.getStop() == GenomeLocSetTest.CHROMOSOME_SIZE); - assertTrue(loc.getContigIndex() == seqNumber); - ++seqNumber; - } - assertTrue(seqNumber == GenomeLocSetTest.NUMBER_OF_CHROMOSOMES); - } -} diff --git a/java/test/org/broadinstitute/sting/utils/GenomeLocSortedSetTest.java b/java/test/org/broadinstitute/sting/utils/GenomeLocSortedSetTest.java new file mode 100755 index 000000000..46aaf22de --- /dev/null +++ b/java/test/org/broadinstitute/sting/utils/GenomeLocSortedSetTest.java @@ -0,0 +1,181 @@ +package org.broadinstitute.sting.utils; + +import net.sf.samtools.SAMFileHeader; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.sam.ArtificialSamUtils; +import static org.junit.Assert.assertTrue; +import org.junit.Before; +import org.junit.Test; + +import java.util.Iterator; + +/** + * + * User: aaron + * Date: May 22, 2009 + * Time: 2:14:07 PM + * + * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2009 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + * + * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + * + */ + + +/** + * @author aaron + * @version 1.0 + * @date May 22, 2009 + *

+ * Class GenomeLocSetTest + *

+ * This tests the functions of the GenomeLocSet + */ +public class GenomeLocSortedSetTest extends BaseTest { + + private GenomeLocSortedSet mSortedSet = null; + private SAMFileHeader header = ArtificialSamUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE); + private static final int NUMBER_OF_CHROMOSOMES = 5; + private static final int STARTING_CHROMOSOME = 1; + private static final int CHROMOSOME_SIZE = 1000; + + @Before + public void setup() { + GenomeLoc.setupRefContigOrdering(header.getSequenceDictionary()); + mSortedSet = new GenomeLocSortedSet(); + } + + @Test + public void testAdd() { + GenomeLoc g = new GenomeLoc(1, 0, 0); + assertTrue(mSortedSet.size() == 0); + mSortedSet.add(g); + assertTrue(mSortedSet.size() == 1); + } + + @Test + public void testRemove() { + assertTrue(mSortedSet.size() == 0); + GenomeLoc g = new GenomeLoc(1, 0, 0); + mSortedSet.add(g); + assertTrue(mSortedSet.size() == 1); + mSortedSet.remove(g); + assertTrue(mSortedSet.size() == 0); + } + + @Test + public void addRegion() { + assertTrue(mSortedSet.size() == 0); + GenomeLoc g = new GenomeLoc(1, 1, 50); + mSortedSet.add(g); + GenomeLoc f = new GenomeLoc(1, 30, 80); + mSortedSet.addRegion(f); + assertTrue(mSortedSet.size() == 1); + + } + + + @Test(expected = IllegalArgumentException.class) + public void testAddDupplicate() { + assertTrue(mSortedSet.size() == 0); + GenomeLoc g = new GenomeLoc(1, 0, 0); + mSortedSet.add(g); + assertTrue(mSortedSet.size() == 1); + mSortedSet.add(g); + } + + @Test + public void mergingOverlappingBelow() { + GenomeLoc g = new GenomeLoc(1, 0, 50); + GenomeLoc e = new GenomeLoc(1, 49, 100); + assertTrue(mSortedSet.size() == 0); + mSortedSet.add(g); + assertTrue(mSortedSet.size() == 1); + mSortedSet.addRegion(e); + assertTrue(mSortedSet.size() == 1); + Iterator iter = mSortedSet.iterator(); + GenomeLoc loc = iter.next(); + assertTrue(loc.getStart() == 0); + assertTrue(loc.getStop() == 100); + assertTrue(loc.getContigIndex() == 1); + } + + @Test + public void mergingOverlappingAbove() { + GenomeLoc e = new GenomeLoc(1, 0, 50); + GenomeLoc g = new GenomeLoc(1, 49, 100); + assertTrue(mSortedSet.size() == 0); + mSortedSet.add(g); + assertTrue(mSortedSet.size() == 1); + mSortedSet.addRegion(e); + assertTrue(mSortedSet.size() == 1); + Iterator iter = mSortedSet.iterator(); + GenomeLoc loc = iter.next(); + assertTrue(loc.getStart() == 0); + assertTrue(loc.getStop() == 100); + assertTrue(loc.getContigIndex() == 1); + } + + @Test + public void deleteSubRegion() { + GenomeLoc e = new GenomeLoc(1, 0, 50); + GenomeLoc g = new GenomeLoc(1, 49, 100); + mSortedSet.add(g); + mSortedSet.addRegion(e); + + // now delete a region + GenomeLoc d = new GenomeLoc(1, 25, 75); + mSortedSet.removeRegion(d); + Iterator iter = mSortedSet.iterator(); + GenomeLoc loc = iter.next(); + assertTrue(loc.getStart() == 0); + assertTrue(loc.getStop() == 24); + assertTrue(loc.getContigIndex() == 1); + + loc = iter.next(); + assertTrue(loc.getStart() == 76); + assertTrue(loc.getStop() == 100); + assertTrue(loc.getContigIndex() == 1); + } + + @Test + public void deleteSuperRegion() { + GenomeLoc e = new GenomeLoc(1, 10, 20); + GenomeLoc g = new GenomeLoc(1, 70, 100); + mSortedSet.add(g); + mSortedSet.addRegion(e); + assertTrue(mSortedSet.size() == 2); + // now delete a region + GenomeLoc d = new GenomeLoc(1, 15, 75); + mSortedSet.removeRegion(d); + Iterator iter = mSortedSet.iterator(); + GenomeLoc loc = iter.next(); + assertTrue(loc.getStart() == 10); + assertTrue(loc.getStop() == 14); + assertTrue(loc.getContigIndex() == 1); + + loc = iter.next(); + assertTrue(loc.getStart() == 76); + assertTrue(loc.getStop() == 100); + assertTrue(loc.getContigIndex() == 1); + } + + @Test + public void fromSequenceDictionary() { + mSortedSet = GenomeLocSortedSet.createSetFromSequenceDictionary(this.header.getSequenceDictionary()); + // we should have sequence + assertTrue(mSortedSet.size() == GenomeLocSortedSetTest.NUMBER_OF_CHROMOSOMES); + int seqNumber = 0; + for (GenomeLoc loc : mSortedSet) { + assertTrue(loc.getStart() == 1); + assertTrue(loc.getStop() == GenomeLocSortedSetTest.CHROMOSOME_SIZE); + assertTrue(loc.getContigIndex() == seqNumber); + ++seqNumber; + } + assertTrue(seqNumber == GenomeLocSortedSetTest.NUMBER_OF_CHROMOSOMES); + } +}