Changed the name to reflect the sorted nature of the set, added some fixes
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@810 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
831d430025
commit
d056f9f3e8
|
|
@ -37,11 +37,11 @@ import java.util.Iterator;
|
|||
* partial interval of a region in the collection it will remove the region from
|
||||
* that element.
|
||||
*/
|
||||
public class GenomeLocSet extends AbstractSet<GenomeLoc> {
|
||||
public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
|
||||
// our private storage for the GenomeLoc's
|
||||
private final ArrayList<GenomeLoc> mArray = new ArrayList<GenomeLoc>();
|
||||
|
||||
public GenomeLocSet() {}
|
||||
public GenomeLocSortedSet() {}
|
||||
|
||||
/**
|
||||
* get an iterator over this collection
|
||||
|
|
@ -102,6 +102,9 @@ public class GenomeLocSet extends AbstractSet<GenomeLoc> {
|
|||
if (e == null) {
|
||||
return false;
|
||||
}
|
||||
// have we added it to the collection?
|
||||
boolean haveAdded = false;
|
||||
|
||||
/**
|
||||
* check if the specified element overlaps any current locations, if so
|
||||
* we should merge the two.
|
||||
|
|
@ -110,9 +113,9 @@ public class GenomeLocSet extends AbstractSet<GenomeLoc> {
|
|||
if (g.contiguousP(e)) {
|
||||
GenomeLoc c = g.merge(e);
|
||||
mArray.set(mArray.indexOf(g),c);
|
||||
return true;
|
||||
haveAdded = true;
|
||||
} else if ((g.getContigIndex() == e.getContigIndex()) &&
|
||||
(g.getStart() > e.getStart())) {
|
||||
(e.getStart() < g.getStart()) && !haveAdded) {
|
||||
mArray.add(mArray.indexOf(g), e);
|
||||
return true;
|
||||
}
|
||||
|
|
@ -120,7 +123,9 @@ public class GenomeLocSet extends AbstractSet<GenomeLoc> {
|
|||
/** we're at the end and we haven't found locations that should fall after it,
|
||||
* so we'll put it at the end
|
||||
*/
|
||||
mArray.add(e);
|
||||
if (!haveAdded) {
|
||||
mArray.add(e);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -210,11 +215,11 @@ public class GenomeLocSet extends AbstractSet<GenomeLoc> {
|
|||
* @param dict the sequence dictionary to create a collection from
|
||||
* @return the GenomeLocSet of all references sequences as GenomeLoc's
|
||||
*/
|
||||
public static GenomeLocSet createSetFromSequenceDictionary(SAMSequenceDictionary dict) {
|
||||
GenomeLocSet returnSet = new GenomeLocSet();
|
||||
public static GenomeLocSortedSet createSetFromSequenceDictionary(SAMSequenceDictionary dict) {
|
||||
GenomeLocSortedSet returnSortedSet = new GenomeLocSortedSet();
|
||||
for (SAMSequenceRecord record : dict.getSequences()) {
|
||||
returnSet.add(new GenomeLoc(record.getSequenceIndex(),1,record.getSequenceLength()));
|
||||
returnSortedSet.add(new GenomeLoc(record.getSequenceIndex(),1,record.getSequenceLength()));
|
||||
}
|
||||
return returnSet;
|
||||
return returnSortedSet;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,168 +0,0 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSamUtils;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
* Date: May 22, 2009
|
||||
* Time: 2:14:07 PM
|
||||
*
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
* @date May 22, 2009
|
||||
* <p/>
|
||||
* Class GenomeLocSetTest
|
||||
* <p/>
|
||||
* This tests the functions of the GenomeLocSet
|
||||
*/
|
||||
public class GenomeLocSetTest extends BaseTest {
|
||||
|
||||
private GenomeLocSet mSet = null;
|
||||
private SAMFileHeader header = ArtificialSamUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE);
|
||||
private static final int NUMBER_OF_CHROMOSOMES = 5;
|
||||
private static final int STARTING_CHROMOSOME = 1;
|
||||
private static final int CHROMOSOME_SIZE = 1000;
|
||||
|
||||
@Before
|
||||
public void setup() {
|
||||
GenomeLoc.setupRefContigOrdering(header.getSequenceDictionary());
|
||||
mSet = new GenomeLocSet();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAdd() {
|
||||
GenomeLoc g = new GenomeLoc(STARTING_CHROMOSOME, 0, 0);
|
||||
assertTrue(mSet.size() == 0);
|
||||
mSet.add(g);
|
||||
assertTrue(mSet.size() == STARTING_CHROMOSOME);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRemove() {
|
||||
assertTrue(mSet.size() == 0);
|
||||
GenomeLoc g = new GenomeLoc(STARTING_CHROMOSOME, 0, 0);
|
||||
mSet.add(g);
|
||||
assertTrue(mSet.size() == STARTING_CHROMOSOME);
|
||||
mSet.remove(g);
|
||||
assertTrue(mSet.size() == 0);
|
||||
}
|
||||
|
||||
@Test(expected = IllegalArgumentException.class)
|
||||
public void testAddDupplicate() {
|
||||
assertTrue(mSet.size() == 0);
|
||||
GenomeLoc g = new GenomeLoc(STARTING_CHROMOSOME, 0, 0);
|
||||
mSet.add(g);
|
||||
assertTrue(mSet.size() == STARTING_CHROMOSOME);
|
||||
mSet.add(g);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void mergingOverlappingBelow() {
|
||||
GenomeLoc g = new GenomeLoc(STARTING_CHROMOSOME, 0, 50);
|
||||
GenomeLoc e = new GenomeLoc(STARTING_CHROMOSOME, 49, 100);
|
||||
assertTrue(mSet.size() == 0);
|
||||
mSet.add(g);
|
||||
assertTrue(mSet.size() == STARTING_CHROMOSOME);
|
||||
mSet.addRegion(e);
|
||||
assertTrue(mSet.size() == STARTING_CHROMOSOME);
|
||||
Iterator<GenomeLoc> iter = mSet.iterator();
|
||||
GenomeLoc loc = iter.next();
|
||||
assertTrue(loc.getStart() == 0);
|
||||
assertTrue(loc.getStop() == 100);
|
||||
assertTrue(loc.getContigIndex() == STARTING_CHROMOSOME);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void mergingOverlappingAbove() {
|
||||
GenomeLoc e = new GenomeLoc(STARTING_CHROMOSOME, 0, 50);
|
||||
GenomeLoc g = new GenomeLoc(STARTING_CHROMOSOME, 49, 100);
|
||||
assertTrue(mSet.size() == 0);
|
||||
mSet.add(g);
|
||||
assertTrue(mSet.size() == STARTING_CHROMOSOME);
|
||||
mSet.addRegion(e);
|
||||
assertTrue(mSet.size() == STARTING_CHROMOSOME);
|
||||
Iterator<GenomeLoc> iter = mSet.iterator();
|
||||
GenomeLoc loc = iter.next();
|
||||
assertTrue(loc.getStart() == 0);
|
||||
assertTrue(loc.getStop() == 100);
|
||||
assertTrue(loc.getContigIndex() == STARTING_CHROMOSOME);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void deleteSubRegion() {
|
||||
GenomeLoc e = new GenomeLoc(STARTING_CHROMOSOME, 0, 50);
|
||||
GenomeLoc g = new GenomeLoc(STARTING_CHROMOSOME, 49, 100);
|
||||
mSet.add(g);
|
||||
mSet.addRegion(e);
|
||||
|
||||
// now delete a region
|
||||
GenomeLoc d = new GenomeLoc(STARTING_CHROMOSOME, 25, 75);
|
||||
mSet.removeRegion(d);
|
||||
Iterator<GenomeLoc> iter = mSet.iterator();
|
||||
GenomeLoc loc = iter.next();
|
||||
assertTrue(loc.getStart() == 0);
|
||||
assertTrue(loc.getStop() == 24);
|
||||
assertTrue(loc.getContigIndex() == STARTING_CHROMOSOME);
|
||||
|
||||
loc = iter.next();
|
||||
assertTrue(loc.getStart() == 76);
|
||||
assertTrue(loc.getStop() == 100);
|
||||
assertTrue(loc.getContigIndex() == STARTING_CHROMOSOME);
|
||||
}
|
||||
@Test
|
||||
public void deleteSuperRegion() {
|
||||
GenomeLoc e = new GenomeLoc(STARTING_CHROMOSOME, 10, 20);
|
||||
GenomeLoc g = new GenomeLoc(STARTING_CHROMOSOME, 70, 100);
|
||||
mSet.add(g);
|
||||
mSet.addRegion(e);
|
||||
assertTrue(mSet.size() == 2);
|
||||
// now delete a region
|
||||
GenomeLoc d = new GenomeLoc(STARTING_CHROMOSOME, 15, 75);
|
||||
mSet.removeRegion(d);
|
||||
Iterator<GenomeLoc> iter = mSet.iterator();
|
||||
GenomeLoc loc = iter.next();
|
||||
assertTrue(loc.getStart() == 10);
|
||||
assertTrue(loc.getStop() == 14);
|
||||
assertTrue(loc.getContigIndex() == STARTING_CHROMOSOME);
|
||||
|
||||
loc = iter.next();
|
||||
assertTrue(loc.getStart() == 76);
|
||||
assertTrue(loc.getStop() == 100);
|
||||
assertTrue(loc.getContigIndex() == STARTING_CHROMOSOME);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void fromSequenceDictionary() {
|
||||
mSet = GenomeLocSet.createSetFromSequenceDictionary(this.header.getSequenceDictionary());
|
||||
// we should have sequence
|
||||
assertTrue(mSet.size() == GenomeLocSetTest.NUMBER_OF_CHROMOSOMES);
|
||||
int seqNumber = 0;
|
||||
for (GenomeLoc loc : mSet) {
|
||||
assertTrue(loc.getStart() == 1);
|
||||
assertTrue(loc.getStop() == GenomeLocSetTest.CHROMOSOME_SIZE);
|
||||
assertTrue(loc.getContigIndex() == seqNumber);
|
||||
++seqNumber;
|
||||
}
|
||||
assertTrue(seqNumber == GenomeLocSetTest.NUMBER_OF_CHROMOSOMES);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,181 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSamUtils;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
* Date: May 22, 2009
|
||||
* Time: 2:14:07 PM
|
||||
*
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
* @date May 22, 2009
|
||||
* <p/>
|
||||
* Class GenomeLocSetTest
|
||||
* <p/>
|
||||
* This tests the functions of the GenomeLocSet
|
||||
*/
|
||||
public class GenomeLocSortedSetTest extends BaseTest {
|
||||
|
||||
private GenomeLocSortedSet mSortedSet = null;
|
||||
private SAMFileHeader header = ArtificialSamUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE);
|
||||
private static final int NUMBER_OF_CHROMOSOMES = 5;
|
||||
private static final int STARTING_CHROMOSOME = 1;
|
||||
private static final int CHROMOSOME_SIZE = 1000;
|
||||
|
||||
@Before
|
||||
public void setup() {
|
||||
GenomeLoc.setupRefContigOrdering(header.getSequenceDictionary());
|
||||
mSortedSet = new GenomeLocSortedSet();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAdd() {
|
||||
GenomeLoc g = new GenomeLoc(1, 0, 0);
|
||||
assertTrue(mSortedSet.size() == 0);
|
||||
mSortedSet.add(g);
|
||||
assertTrue(mSortedSet.size() == 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRemove() {
|
||||
assertTrue(mSortedSet.size() == 0);
|
||||
GenomeLoc g = new GenomeLoc(1, 0, 0);
|
||||
mSortedSet.add(g);
|
||||
assertTrue(mSortedSet.size() == 1);
|
||||
mSortedSet.remove(g);
|
||||
assertTrue(mSortedSet.size() == 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void addRegion() {
|
||||
assertTrue(mSortedSet.size() == 0);
|
||||
GenomeLoc g = new GenomeLoc(1, 1, 50);
|
||||
mSortedSet.add(g);
|
||||
GenomeLoc f = new GenomeLoc(1, 30, 80);
|
||||
mSortedSet.addRegion(f);
|
||||
assertTrue(mSortedSet.size() == 1);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test(expected = IllegalArgumentException.class)
|
||||
public void testAddDupplicate() {
|
||||
assertTrue(mSortedSet.size() == 0);
|
||||
GenomeLoc g = new GenomeLoc(1, 0, 0);
|
||||
mSortedSet.add(g);
|
||||
assertTrue(mSortedSet.size() == 1);
|
||||
mSortedSet.add(g);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void mergingOverlappingBelow() {
|
||||
GenomeLoc g = new GenomeLoc(1, 0, 50);
|
||||
GenomeLoc e = new GenomeLoc(1, 49, 100);
|
||||
assertTrue(mSortedSet.size() == 0);
|
||||
mSortedSet.add(g);
|
||||
assertTrue(mSortedSet.size() == 1);
|
||||
mSortedSet.addRegion(e);
|
||||
assertTrue(mSortedSet.size() == 1);
|
||||
Iterator<GenomeLoc> iter = mSortedSet.iterator();
|
||||
GenomeLoc loc = iter.next();
|
||||
assertTrue(loc.getStart() == 0);
|
||||
assertTrue(loc.getStop() == 100);
|
||||
assertTrue(loc.getContigIndex() == 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void mergingOverlappingAbove() {
|
||||
GenomeLoc e = new GenomeLoc(1, 0, 50);
|
||||
GenomeLoc g = new GenomeLoc(1, 49, 100);
|
||||
assertTrue(mSortedSet.size() == 0);
|
||||
mSortedSet.add(g);
|
||||
assertTrue(mSortedSet.size() == 1);
|
||||
mSortedSet.addRegion(e);
|
||||
assertTrue(mSortedSet.size() == 1);
|
||||
Iterator<GenomeLoc> iter = mSortedSet.iterator();
|
||||
GenomeLoc loc = iter.next();
|
||||
assertTrue(loc.getStart() == 0);
|
||||
assertTrue(loc.getStop() == 100);
|
||||
assertTrue(loc.getContigIndex() == 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void deleteSubRegion() {
|
||||
GenomeLoc e = new GenomeLoc(1, 0, 50);
|
||||
GenomeLoc g = new GenomeLoc(1, 49, 100);
|
||||
mSortedSet.add(g);
|
||||
mSortedSet.addRegion(e);
|
||||
|
||||
// now delete a region
|
||||
GenomeLoc d = new GenomeLoc(1, 25, 75);
|
||||
mSortedSet.removeRegion(d);
|
||||
Iterator<GenomeLoc> iter = mSortedSet.iterator();
|
||||
GenomeLoc loc = iter.next();
|
||||
assertTrue(loc.getStart() == 0);
|
||||
assertTrue(loc.getStop() == 24);
|
||||
assertTrue(loc.getContigIndex() == 1);
|
||||
|
||||
loc = iter.next();
|
||||
assertTrue(loc.getStart() == 76);
|
||||
assertTrue(loc.getStop() == 100);
|
||||
assertTrue(loc.getContigIndex() == 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void deleteSuperRegion() {
|
||||
GenomeLoc e = new GenomeLoc(1, 10, 20);
|
||||
GenomeLoc g = new GenomeLoc(1, 70, 100);
|
||||
mSortedSet.add(g);
|
||||
mSortedSet.addRegion(e);
|
||||
assertTrue(mSortedSet.size() == 2);
|
||||
// now delete a region
|
||||
GenomeLoc d = new GenomeLoc(1, 15, 75);
|
||||
mSortedSet.removeRegion(d);
|
||||
Iterator<GenomeLoc> iter = mSortedSet.iterator();
|
||||
GenomeLoc loc = iter.next();
|
||||
assertTrue(loc.getStart() == 10);
|
||||
assertTrue(loc.getStop() == 14);
|
||||
assertTrue(loc.getContigIndex() == 1);
|
||||
|
||||
loc = iter.next();
|
||||
assertTrue(loc.getStart() == 76);
|
||||
assertTrue(loc.getStop() == 100);
|
||||
assertTrue(loc.getContigIndex() == 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void fromSequenceDictionary() {
|
||||
mSortedSet = GenomeLocSortedSet.createSetFromSequenceDictionary(this.header.getSequenceDictionary());
|
||||
// we should have sequence
|
||||
assertTrue(mSortedSet.size() == GenomeLocSortedSetTest.NUMBER_OF_CHROMOSOMES);
|
||||
int seqNumber = 0;
|
||||
for (GenomeLoc loc : mSortedSet) {
|
||||
assertTrue(loc.getStart() == 1);
|
||||
assertTrue(loc.getStop() == GenomeLocSortedSetTest.CHROMOSOME_SIZE);
|
||||
assertTrue(loc.getContigIndex() == seqNumber);
|
||||
++seqNumber;
|
||||
}
|
||||
assertTrue(seqNumber == GenomeLocSortedSetTest.NUMBER_OF_CHROMOSOMES);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue