Changed the name to reflect the sorted nature of the set, added some fixes

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@810 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-05-22 22:34:24 +00:00
parent 831d430025
commit d056f9f3e8
3 changed files with 195 additions and 177 deletions

View File

@ -37,11 +37,11 @@ import java.util.Iterator;
* partial interval of a region in the collection it will remove the region from
* that element.
*/
public class GenomeLocSet extends AbstractSet<GenomeLoc> {
public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
// our private storage for the GenomeLoc's
private final ArrayList<GenomeLoc> mArray = new ArrayList<GenomeLoc>();
public GenomeLocSet() {}
public GenomeLocSortedSet() {}
/**
* get an iterator over this collection
@ -102,6 +102,9 @@ public class GenomeLocSet extends AbstractSet<GenomeLoc> {
if (e == null) {
return false;
}
// have we added it to the collection?
boolean haveAdded = false;
/**
* check if the specified element overlaps any current locations, if so
* we should merge the two.
@ -110,9 +113,9 @@ public class GenomeLocSet extends AbstractSet<GenomeLoc> {
if (g.contiguousP(e)) {
GenomeLoc c = g.merge(e);
mArray.set(mArray.indexOf(g),c);
return true;
haveAdded = true;
} else if ((g.getContigIndex() == e.getContigIndex()) &&
(g.getStart() > e.getStart())) {
(e.getStart() < g.getStart()) && !haveAdded) {
mArray.add(mArray.indexOf(g), e);
return true;
}
@ -120,7 +123,9 @@ public class GenomeLocSet extends AbstractSet<GenomeLoc> {
/** we're at the end and we haven't found locations that should fall after it,
* so we'll put it at the end
*/
mArray.add(e);
if (!haveAdded) {
mArray.add(e);
}
return true;
}
@ -210,11 +215,11 @@ public class GenomeLocSet extends AbstractSet<GenomeLoc> {
* @param dict the sequence dictionary to create a collection from
* @return the GenomeLocSet of all references sequences as GenomeLoc's
*/
public static GenomeLocSet createSetFromSequenceDictionary(SAMSequenceDictionary dict) {
GenomeLocSet returnSet = new GenomeLocSet();
public static GenomeLocSortedSet createSetFromSequenceDictionary(SAMSequenceDictionary dict) {
GenomeLocSortedSet returnSortedSet = new GenomeLocSortedSet();
for (SAMSequenceRecord record : dict.getSequences()) {
returnSet.add(new GenomeLoc(record.getSequenceIndex(),1,record.getSequenceLength()));
returnSortedSet.add(new GenomeLoc(record.getSequenceIndex(),1,record.getSequenceLength()));
}
return returnSet;
return returnSortedSet;
}
}

View File

@ -1,168 +0,0 @@
package org.broadinstitute.sting.utils;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.sam.ArtificialSamUtils;
import static org.junit.Assert.assertTrue;
import org.junit.Before;
import org.junit.Test;
import java.util.Iterator;
/**
*
* User: aaron
* Date: May 22, 2009
* Time: 2:14:07 PM
*
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/
/**
* @author aaron
* @version 1.0
* @date May 22, 2009
* <p/>
* Class GenomeLocSetTest
* <p/>
* This tests the functions of the GenomeLocSet
*/
public class GenomeLocSetTest extends BaseTest {
private GenomeLocSet mSet = null;
private SAMFileHeader header = ArtificialSamUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE);
private static final int NUMBER_OF_CHROMOSOMES = 5;
private static final int STARTING_CHROMOSOME = 1;
private static final int CHROMOSOME_SIZE = 1000;
@Before
public void setup() {
GenomeLoc.setupRefContigOrdering(header.getSequenceDictionary());
mSet = new GenomeLocSet();
}
@Test
public void testAdd() {
GenomeLoc g = new GenomeLoc(STARTING_CHROMOSOME, 0, 0);
assertTrue(mSet.size() == 0);
mSet.add(g);
assertTrue(mSet.size() == STARTING_CHROMOSOME);
}
@Test
public void testRemove() {
assertTrue(mSet.size() == 0);
GenomeLoc g = new GenomeLoc(STARTING_CHROMOSOME, 0, 0);
mSet.add(g);
assertTrue(mSet.size() == STARTING_CHROMOSOME);
mSet.remove(g);
assertTrue(mSet.size() == 0);
}
@Test(expected = IllegalArgumentException.class)
public void testAddDupplicate() {
assertTrue(mSet.size() == 0);
GenomeLoc g = new GenomeLoc(STARTING_CHROMOSOME, 0, 0);
mSet.add(g);
assertTrue(mSet.size() == STARTING_CHROMOSOME);
mSet.add(g);
}
@Test
public void mergingOverlappingBelow() {
GenomeLoc g = new GenomeLoc(STARTING_CHROMOSOME, 0, 50);
GenomeLoc e = new GenomeLoc(STARTING_CHROMOSOME, 49, 100);
assertTrue(mSet.size() == 0);
mSet.add(g);
assertTrue(mSet.size() == STARTING_CHROMOSOME);
mSet.addRegion(e);
assertTrue(mSet.size() == STARTING_CHROMOSOME);
Iterator<GenomeLoc> iter = mSet.iterator();
GenomeLoc loc = iter.next();
assertTrue(loc.getStart() == 0);
assertTrue(loc.getStop() == 100);
assertTrue(loc.getContigIndex() == STARTING_CHROMOSOME);
}
@Test
public void mergingOverlappingAbove() {
GenomeLoc e = new GenomeLoc(STARTING_CHROMOSOME, 0, 50);
GenomeLoc g = new GenomeLoc(STARTING_CHROMOSOME, 49, 100);
assertTrue(mSet.size() == 0);
mSet.add(g);
assertTrue(mSet.size() == STARTING_CHROMOSOME);
mSet.addRegion(e);
assertTrue(mSet.size() == STARTING_CHROMOSOME);
Iterator<GenomeLoc> iter = mSet.iterator();
GenomeLoc loc = iter.next();
assertTrue(loc.getStart() == 0);
assertTrue(loc.getStop() == 100);
assertTrue(loc.getContigIndex() == STARTING_CHROMOSOME);
}
@Test
public void deleteSubRegion() {
GenomeLoc e = new GenomeLoc(STARTING_CHROMOSOME, 0, 50);
GenomeLoc g = new GenomeLoc(STARTING_CHROMOSOME, 49, 100);
mSet.add(g);
mSet.addRegion(e);
// now delete a region
GenomeLoc d = new GenomeLoc(STARTING_CHROMOSOME, 25, 75);
mSet.removeRegion(d);
Iterator<GenomeLoc> iter = mSet.iterator();
GenomeLoc loc = iter.next();
assertTrue(loc.getStart() == 0);
assertTrue(loc.getStop() == 24);
assertTrue(loc.getContigIndex() == STARTING_CHROMOSOME);
loc = iter.next();
assertTrue(loc.getStart() == 76);
assertTrue(loc.getStop() == 100);
assertTrue(loc.getContigIndex() == STARTING_CHROMOSOME);
}
@Test
public void deleteSuperRegion() {
GenomeLoc e = new GenomeLoc(STARTING_CHROMOSOME, 10, 20);
GenomeLoc g = new GenomeLoc(STARTING_CHROMOSOME, 70, 100);
mSet.add(g);
mSet.addRegion(e);
assertTrue(mSet.size() == 2);
// now delete a region
GenomeLoc d = new GenomeLoc(STARTING_CHROMOSOME, 15, 75);
mSet.removeRegion(d);
Iterator<GenomeLoc> iter = mSet.iterator();
GenomeLoc loc = iter.next();
assertTrue(loc.getStart() == 10);
assertTrue(loc.getStop() == 14);
assertTrue(loc.getContigIndex() == STARTING_CHROMOSOME);
loc = iter.next();
assertTrue(loc.getStart() == 76);
assertTrue(loc.getStop() == 100);
assertTrue(loc.getContigIndex() == STARTING_CHROMOSOME);
}
@Test
public void fromSequenceDictionary() {
mSet = GenomeLocSet.createSetFromSequenceDictionary(this.header.getSequenceDictionary());
// we should have sequence
assertTrue(mSet.size() == GenomeLocSetTest.NUMBER_OF_CHROMOSOMES);
int seqNumber = 0;
for (GenomeLoc loc : mSet) {
assertTrue(loc.getStart() == 1);
assertTrue(loc.getStop() == GenomeLocSetTest.CHROMOSOME_SIZE);
assertTrue(loc.getContigIndex() == seqNumber);
++seqNumber;
}
assertTrue(seqNumber == GenomeLocSetTest.NUMBER_OF_CHROMOSOMES);
}
}

View File

@ -0,0 +1,181 @@
package org.broadinstitute.sting.utils;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.sam.ArtificialSamUtils;
import static org.junit.Assert.assertTrue;
import org.junit.Before;
import org.junit.Test;
import java.util.Iterator;
/**
*
* User: aaron
* Date: May 22, 2009
* Time: 2:14:07 PM
*
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/
/**
* @author aaron
* @version 1.0
* @date May 22, 2009
* <p/>
* Class GenomeLocSetTest
* <p/>
* This tests the functions of the GenomeLocSet
*/
public class GenomeLocSortedSetTest extends BaseTest {
private GenomeLocSortedSet mSortedSet = null;
private SAMFileHeader header = ArtificialSamUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE);
private static final int NUMBER_OF_CHROMOSOMES = 5;
private static final int STARTING_CHROMOSOME = 1;
private static final int CHROMOSOME_SIZE = 1000;
@Before
public void setup() {
GenomeLoc.setupRefContigOrdering(header.getSequenceDictionary());
mSortedSet = new GenomeLocSortedSet();
}
@Test
public void testAdd() {
GenomeLoc g = new GenomeLoc(1, 0, 0);
assertTrue(mSortedSet.size() == 0);
mSortedSet.add(g);
assertTrue(mSortedSet.size() == 1);
}
@Test
public void testRemove() {
assertTrue(mSortedSet.size() == 0);
GenomeLoc g = new GenomeLoc(1, 0, 0);
mSortedSet.add(g);
assertTrue(mSortedSet.size() == 1);
mSortedSet.remove(g);
assertTrue(mSortedSet.size() == 0);
}
@Test
public void addRegion() {
assertTrue(mSortedSet.size() == 0);
GenomeLoc g = new GenomeLoc(1, 1, 50);
mSortedSet.add(g);
GenomeLoc f = new GenomeLoc(1, 30, 80);
mSortedSet.addRegion(f);
assertTrue(mSortedSet.size() == 1);
}
@Test(expected = IllegalArgumentException.class)
public void testAddDupplicate() {
assertTrue(mSortedSet.size() == 0);
GenomeLoc g = new GenomeLoc(1, 0, 0);
mSortedSet.add(g);
assertTrue(mSortedSet.size() == 1);
mSortedSet.add(g);
}
@Test
public void mergingOverlappingBelow() {
GenomeLoc g = new GenomeLoc(1, 0, 50);
GenomeLoc e = new GenomeLoc(1, 49, 100);
assertTrue(mSortedSet.size() == 0);
mSortedSet.add(g);
assertTrue(mSortedSet.size() == 1);
mSortedSet.addRegion(e);
assertTrue(mSortedSet.size() == 1);
Iterator<GenomeLoc> iter = mSortedSet.iterator();
GenomeLoc loc = iter.next();
assertTrue(loc.getStart() == 0);
assertTrue(loc.getStop() == 100);
assertTrue(loc.getContigIndex() == 1);
}
@Test
public void mergingOverlappingAbove() {
GenomeLoc e = new GenomeLoc(1, 0, 50);
GenomeLoc g = new GenomeLoc(1, 49, 100);
assertTrue(mSortedSet.size() == 0);
mSortedSet.add(g);
assertTrue(mSortedSet.size() == 1);
mSortedSet.addRegion(e);
assertTrue(mSortedSet.size() == 1);
Iterator<GenomeLoc> iter = mSortedSet.iterator();
GenomeLoc loc = iter.next();
assertTrue(loc.getStart() == 0);
assertTrue(loc.getStop() == 100);
assertTrue(loc.getContigIndex() == 1);
}
@Test
public void deleteSubRegion() {
GenomeLoc e = new GenomeLoc(1, 0, 50);
GenomeLoc g = new GenomeLoc(1, 49, 100);
mSortedSet.add(g);
mSortedSet.addRegion(e);
// now delete a region
GenomeLoc d = new GenomeLoc(1, 25, 75);
mSortedSet.removeRegion(d);
Iterator<GenomeLoc> iter = mSortedSet.iterator();
GenomeLoc loc = iter.next();
assertTrue(loc.getStart() == 0);
assertTrue(loc.getStop() == 24);
assertTrue(loc.getContigIndex() == 1);
loc = iter.next();
assertTrue(loc.getStart() == 76);
assertTrue(loc.getStop() == 100);
assertTrue(loc.getContigIndex() == 1);
}
@Test
public void deleteSuperRegion() {
GenomeLoc e = new GenomeLoc(1, 10, 20);
GenomeLoc g = new GenomeLoc(1, 70, 100);
mSortedSet.add(g);
mSortedSet.addRegion(e);
assertTrue(mSortedSet.size() == 2);
// now delete a region
GenomeLoc d = new GenomeLoc(1, 15, 75);
mSortedSet.removeRegion(d);
Iterator<GenomeLoc> iter = mSortedSet.iterator();
GenomeLoc loc = iter.next();
assertTrue(loc.getStart() == 10);
assertTrue(loc.getStop() == 14);
assertTrue(loc.getContigIndex() == 1);
loc = iter.next();
assertTrue(loc.getStart() == 76);
assertTrue(loc.getStop() == 100);
assertTrue(loc.getContigIndex() == 1);
}
@Test
public void fromSequenceDictionary() {
mSortedSet = GenomeLocSortedSet.createSetFromSequenceDictionary(this.header.getSequenceDictionary());
// we should have sequence
assertTrue(mSortedSet.size() == GenomeLocSortedSetTest.NUMBER_OF_CHROMOSOMES);
int seqNumber = 0;
for (GenomeLoc loc : mSortedSet) {
assertTrue(loc.getStart() == 1);
assertTrue(loc.getStop() == GenomeLocSortedSetTest.CHROMOSOME_SIZE);
assertTrue(loc.getContigIndex() == seqNumber);
++seqNumber;
}
assertTrue(seqNumber == GenomeLocSortedSetTest.NUMBER_OF_CHROMOSOMES);
}
}