Changed the name to reflect the sorted nature of the set, added some fixes
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@810 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
831d430025
commit
d056f9f3e8
|
|
@ -37,11 +37,11 @@ import java.util.Iterator;
|
||||||
* partial interval of a region in the collection it will remove the region from
|
* partial interval of a region in the collection it will remove the region from
|
||||||
* that element.
|
* that element.
|
||||||
*/
|
*/
|
||||||
public class GenomeLocSet extends AbstractSet<GenomeLoc> {
|
public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
|
||||||
// our private storage for the GenomeLoc's
|
// our private storage for the GenomeLoc's
|
||||||
private final ArrayList<GenomeLoc> mArray = new ArrayList<GenomeLoc>();
|
private final ArrayList<GenomeLoc> mArray = new ArrayList<GenomeLoc>();
|
||||||
|
|
||||||
public GenomeLocSet() {}
|
public GenomeLocSortedSet() {}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* get an iterator over this collection
|
* get an iterator over this collection
|
||||||
|
|
@ -102,6 +102,9 @@ public class GenomeLocSet extends AbstractSet<GenomeLoc> {
|
||||||
if (e == null) {
|
if (e == null) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
// have we added it to the collection?
|
||||||
|
boolean haveAdded = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* check if the specified element overlaps any current locations, if so
|
* check if the specified element overlaps any current locations, if so
|
||||||
* we should merge the two.
|
* we should merge the two.
|
||||||
|
|
@ -110,9 +113,9 @@ public class GenomeLocSet extends AbstractSet<GenomeLoc> {
|
||||||
if (g.contiguousP(e)) {
|
if (g.contiguousP(e)) {
|
||||||
GenomeLoc c = g.merge(e);
|
GenomeLoc c = g.merge(e);
|
||||||
mArray.set(mArray.indexOf(g),c);
|
mArray.set(mArray.indexOf(g),c);
|
||||||
return true;
|
haveAdded = true;
|
||||||
} else if ((g.getContigIndex() == e.getContigIndex()) &&
|
} else if ((g.getContigIndex() == e.getContigIndex()) &&
|
||||||
(g.getStart() > e.getStart())) {
|
(e.getStart() < g.getStart()) && !haveAdded) {
|
||||||
mArray.add(mArray.indexOf(g), e);
|
mArray.add(mArray.indexOf(g), e);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
@ -120,7 +123,9 @@ public class GenomeLocSet extends AbstractSet<GenomeLoc> {
|
||||||
/** we're at the end and we haven't found locations that should fall after it,
|
/** we're at the end and we haven't found locations that should fall after it,
|
||||||
* so we'll put it at the end
|
* so we'll put it at the end
|
||||||
*/
|
*/
|
||||||
mArray.add(e);
|
if (!haveAdded) {
|
||||||
|
mArray.add(e);
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -210,11 +215,11 @@ public class GenomeLocSet extends AbstractSet<GenomeLoc> {
|
||||||
* @param dict the sequence dictionary to create a collection from
|
* @param dict the sequence dictionary to create a collection from
|
||||||
* @return the GenomeLocSet of all references sequences as GenomeLoc's
|
* @return the GenomeLocSet of all references sequences as GenomeLoc's
|
||||||
*/
|
*/
|
||||||
public static GenomeLocSet createSetFromSequenceDictionary(SAMSequenceDictionary dict) {
|
public static GenomeLocSortedSet createSetFromSequenceDictionary(SAMSequenceDictionary dict) {
|
||||||
GenomeLocSet returnSet = new GenomeLocSet();
|
GenomeLocSortedSet returnSortedSet = new GenomeLocSortedSet();
|
||||||
for (SAMSequenceRecord record : dict.getSequences()) {
|
for (SAMSequenceRecord record : dict.getSequences()) {
|
||||||
returnSet.add(new GenomeLoc(record.getSequenceIndex(),1,record.getSequenceLength()));
|
returnSortedSet.add(new GenomeLoc(record.getSequenceIndex(),1,record.getSequenceLength()));
|
||||||
}
|
}
|
||||||
return returnSet;
|
return returnSortedSet;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -1,168 +0,0 @@
|
||||||
package org.broadinstitute.sting.utils;
|
|
||||||
|
|
||||||
import net.sf.samtools.SAMFileHeader;
|
|
||||||
import org.broadinstitute.sting.BaseTest;
|
|
||||||
import org.broadinstitute.sting.utils.sam.ArtificialSamUtils;
|
|
||||||
import static org.junit.Assert.assertTrue;
|
|
||||||
import org.junit.Before;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
import java.util.Iterator;
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* User: aaron
|
|
||||||
* Date: May 22, 2009
|
|
||||||
* Time: 2:14:07 PM
|
|
||||||
*
|
|
||||||
* The Broad Institute
|
|
||||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
|
||||||
* This software and its documentation are copyright 2009 by the
|
|
||||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
|
||||||
*
|
|
||||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
|
||||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @author aaron
|
|
||||||
* @version 1.0
|
|
||||||
* @date May 22, 2009
|
|
||||||
* <p/>
|
|
||||||
* Class GenomeLocSetTest
|
|
||||||
* <p/>
|
|
||||||
* This tests the functions of the GenomeLocSet
|
|
||||||
*/
|
|
||||||
public class GenomeLocSetTest extends BaseTest {
|
|
||||||
|
|
||||||
private GenomeLocSet mSet = null;
|
|
||||||
private SAMFileHeader header = ArtificialSamUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE);
|
|
||||||
private static final int NUMBER_OF_CHROMOSOMES = 5;
|
|
||||||
private static final int STARTING_CHROMOSOME = 1;
|
|
||||||
private static final int CHROMOSOME_SIZE = 1000;
|
|
||||||
|
|
||||||
@Before
|
|
||||||
public void setup() {
|
|
||||||
GenomeLoc.setupRefContigOrdering(header.getSequenceDictionary());
|
|
||||||
mSet = new GenomeLocSet();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testAdd() {
|
|
||||||
GenomeLoc g = new GenomeLoc(STARTING_CHROMOSOME, 0, 0);
|
|
||||||
assertTrue(mSet.size() == 0);
|
|
||||||
mSet.add(g);
|
|
||||||
assertTrue(mSet.size() == STARTING_CHROMOSOME);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testRemove() {
|
|
||||||
assertTrue(mSet.size() == 0);
|
|
||||||
GenomeLoc g = new GenomeLoc(STARTING_CHROMOSOME, 0, 0);
|
|
||||||
mSet.add(g);
|
|
||||||
assertTrue(mSet.size() == STARTING_CHROMOSOME);
|
|
||||||
mSet.remove(g);
|
|
||||||
assertTrue(mSet.size() == 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test(expected = IllegalArgumentException.class)
|
|
||||||
public void testAddDupplicate() {
|
|
||||||
assertTrue(mSet.size() == 0);
|
|
||||||
GenomeLoc g = new GenomeLoc(STARTING_CHROMOSOME, 0, 0);
|
|
||||||
mSet.add(g);
|
|
||||||
assertTrue(mSet.size() == STARTING_CHROMOSOME);
|
|
||||||
mSet.add(g);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void mergingOverlappingBelow() {
|
|
||||||
GenomeLoc g = new GenomeLoc(STARTING_CHROMOSOME, 0, 50);
|
|
||||||
GenomeLoc e = new GenomeLoc(STARTING_CHROMOSOME, 49, 100);
|
|
||||||
assertTrue(mSet.size() == 0);
|
|
||||||
mSet.add(g);
|
|
||||||
assertTrue(mSet.size() == STARTING_CHROMOSOME);
|
|
||||||
mSet.addRegion(e);
|
|
||||||
assertTrue(mSet.size() == STARTING_CHROMOSOME);
|
|
||||||
Iterator<GenomeLoc> iter = mSet.iterator();
|
|
||||||
GenomeLoc loc = iter.next();
|
|
||||||
assertTrue(loc.getStart() == 0);
|
|
||||||
assertTrue(loc.getStop() == 100);
|
|
||||||
assertTrue(loc.getContigIndex() == STARTING_CHROMOSOME);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void mergingOverlappingAbove() {
|
|
||||||
GenomeLoc e = new GenomeLoc(STARTING_CHROMOSOME, 0, 50);
|
|
||||||
GenomeLoc g = new GenomeLoc(STARTING_CHROMOSOME, 49, 100);
|
|
||||||
assertTrue(mSet.size() == 0);
|
|
||||||
mSet.add(g);
|
|
||||||
assertTrue(mSet.size() == STARTING_CHROMOSOME);
|
|
||||||
mSet.addRegion(e);
|
|
||||||
assertTrue(mSet.size() == STARTING_CHROMOSOME);
|
|
||||||
Iterator<GenomeLoc> iter = mSet.iterator();
|
|
||||||
GenomeLoc loc = iter.next();
|
|
||||||
assertTrue(loc.getStart() == 0);
|
|
||||||
assertTrue(loc.getStop() == 100);
|
|
||||||
assertTrue(loc.getContigIndex() == STARTING_CHROMOSOME);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void deleteSubRegion() {
|
|
||||||
GenomeLoc e = new GenomeLoc(STARTING_CHROMOSOME, 0, 50);
|
|
||||||
GenomeLoc g = new GenomeLoc(STARTING_CHROMOSOME, 49, 100);
|
|
||||||
mSet.add(g);
|
|
||||||
mSet.addRegion(e);
|
|
||||||
|
|
||||||
// now delete a region
|
|
||||||
GenomeLoc d = new GenomeLoc(STARTING_CHROMOSOME, 25, 75);
|
|
||||||
mSet.removeRegion(d);
|
|
||||||
Iterator<GenomeLoc> iter = mSet.iterator();
|
|
||||||
GenomeLoc loc = iter.next();
|
|
||||||
assertTrue(loc.getStart() == 0);
|
|
||||||
assertTrue(loc.getStop() == 24);
|
|
||||||
assertTrue(loc.getContigIndex() == STARTING_CHROMOSOME);
|
|
||||||
|
|
||||||
loc = iter.next();
|
|
||||||
assertTrue(loc.getStart() == 76);
|
|
||||||
assertTrue(loc.getStop() == 100);
|
|
||||||
assertTrue(loc.getContigIndex() == STARTING_CHROMOSOME);
|
|
||||||
}
|
|
||||||
@Test
|
|
||||||
public void deleteSuperRegion() {
|
|
||||||
GenomeLoc e = new GenomeLoc(STARTING_CHROMOSOME, 10, 20);
|
|
||||||
GenomeLoc g = new GenomeLoc(STARTING_CHROMOSOME, 70, 100);
|
|
||||||
mSet.add(g);
|
|
||||||
mSet.addRegion(e);
|
|
||||||
assertTrue(mSet.size() == 2);
|
|
||||||
// now delete a region
|
|
||||||
GenomeLoc d = new GenomeLoc(STARTING_CHROMOSOME, 15, 75);
|
|
||||||
mSet.removeRegion(d);
|
|
||||||
Iterator<GenomeLoc> iter = mSet.iterator();
|
|
||||||
GenomeLoc loc = iter.next();
|
|
||||||
assertTrue(loc.getStart() == 10);
|
|
||||||
assertTrue(loc.getStop() == 14);
|
|
||||||
assertTrue(loc.getContigIndex() == STARTING_CHROMOSOME);
|
|
||||||
|
|
||||||
loc = iter.next();
|
|
||||||
assertTrue(loc.getStart() == 76);
|
|
||||||
assertTrue(loc.getStop() == 100);
|
|
||||||
assertTrue(loc.getContigIndex() == STARTING_CHROMOSOME);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void fromSequenceDictionary() {
|
|
||||||
mSet = GenomeLocSet.createSetFromSequenceDictionary(this.header.getSequenceDictionary());
|
|
||||||
// we should have sequence
|
|
||||||
assertTrue(mSet.size() == GenomeLocSetTest.NUMBER_OF_CHROMOSOMES);
|
|
||||||
int seqNumber = 0;
|
|
||||||
for (GenomeLoc loc : mSet) {
|
|
||||||
assertTrue(loc.getStart() == 1);
|
|
||||||
assertTrue(loc.getStop() == GenomeLocSetTest.CHROMOSOME_SIZE);
|
|
||||||
assertTrue(loc.getContigIndex() == seqNumber);
|
|
||||||
++seqNumber;
|
|
||||||
}
|
|
||||||
assertTrue(seqNumber == GenomeLocSetTest.NUMBER_OF_CHROMOSOMES);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -0,0 +1,181 @@
|
||||||
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMFileHeader;
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.utils.sam.ArtificialSamUtils;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* User: aaron
|
||||||
|
* Date: May 22, 2009
|
||||||
|
* Time: 2:14:07 PM
|
||||||
|
*
|
||||||
|
* The Broad Institute
|
||||||
|
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||||
|
* This software and its documentation are copyright 2009 by the
|
||||||
|
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||||
|
*
|
||||||
|
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||||
|
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author aaron
|
||||||
|
* @version 1.0
|
||||||
|
* @date May 22, 2009
|
||||||
|
* <p/>
|
||||||
|
* Class GenomeLocSetTest
|
||||||
|
* <p/>
|
||||||
|
* This tests the functions of the GenomeLocSet
|
||||||
|
*/
|
||||||
|
public class GenomeLocSortedSetTest extends BaseTest {
|
||||||
|
|
||||||
|
private GenomeLocSortedSet mSortedSet = null;
|
||||||
|
private SAMFileHeader header = ArtificialSamUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE);
|
||||||
|
private static final int NUMBER_OF_CHROMOSOMES = 5;
|
||||||
|
private static final int STARTING_CHROMOSOME = 1;
|
||||||
|
private static final int CHROMOSOME_SIZE = 1000;
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setup() {
|
||||||
|
GenomeLoc.setupRefContigOrdering(header.getSequenceDictionary());
|
||||||
|
mSortedSet = new GenomeLocSortedSet();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAdd() {
|
||||||
|
GenomeLoc g = new GenomeLoc(1, 0, 0);
|
||||||
|
assertTrue(mSortedSet.size() == 0);
|
||||||
|
mSortedSet.add(g);
|
||||||
|
assertTrue(mSortedSet.size() == 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRemove() {
|
||||||
|
assertTrue(mSortedSet.size() == 0);
|
||||||
|
GenomeLoc g = new GenomeLoc(1, 0, 0);
|
||||||
|
mSortedSet.add(g);
|
||||||
|
assertTrue(mSortedSet.size() == 1);
|
||||||
|
mSortedSet.remove(g);
|
||||||
|
assertTrue(mSortedSet.size() == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void addRegion() {
|
||||||
|
assertTrue(mSortedSet.size() == 0);
|
||||||
|
GenomeLoc g = new GenomeLoc(1, 1, 50);
|
||||||
|
mSortedSet.add(g);
|
||||||
|
GenomeLoc f = new GenomeLoc(1, 30, 80);
|
||||||
|
mSortedSet.addRegion(f);
|
||||||
|
assertTrue(mSortedSet.size() == 1);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test(expected = IllegalArgumentException.class)
|
||||||
|
public void testAddDupplicate() {
|
||||||
|
assertTrue(mSortedSet.size() == 0);
|
||||||
|
GenomeLoc g = new GenomeLoc(1, 0, 0);
|
||||||
|
mSortedSet.add(g);
|
||||||
|
assertTrue(mSortedSet.size() == 1);
|
||||||
|
mSortedSet.add(g);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void mergingOverlappingBelow() {
|
||||||
|
GenomeLoc g = new GenomeLoc(1, 0, 50);
|
||||||
|
GenomeLoc e = new GenomeLoc(1, 49, 100);
|
||||||
|
assertTrue(mSortedSet.size() == 0);
|
||||||
|
mSortedSet.add(g);
|
||||||
|
assertTrue(mSortedSet.size() == 1);
|
||||||
|
mSortedSet.addRegion(e);
|
||||||
|
assertTrue(mSortedSet.size() == 1);
|
||||||
|
Iterator<GenomeLoc> iter = mSortedSet.iterator();
|
||||||
|
GenomeLoc loc = iter.next();
|
||||||
|
assertTrue(loc.getStart() == 0);
|
||||||
|
assertTrue(loc.getStop() == 100);
|
||||||
|
assertTrue(loc.getContigIndex() == 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void mergingOverlappingAbove() {
|
||||||
|
GenomeLoc e = new GenomeLoc(1, 0, 50);
|
||||||
|
GenomeLoc g = new GenomeLoc(1, 49, 100);
|
||||||
|
assertTrue(mSortedSet.size() == 0);
|
||||||
|
mSortedSet.add(g);
|
||||||
|
assertTrue(mSortedSet.size() == 1);
|
||||||
|
mSortedSet.addRegion(e);
|
||||||
|
assertTrue(mSortedSet.size() == 1);
|
||||||
|
Iterator<GenomeLoc> iter = mSortedSet.iterator();
|
||||||
|
GenomeLoc loc = iter.next();
|
||||||
|
assertTrue(loc.getStart() == 0);
|
||||||
|
assertTrue(loc.getStop() == 100);
|
||||||
|
assertTrue(loc.getContigIndex() == 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void deleteSubRegion() {
|
||||||
|
GenomeLoc e = new GenomeLoc(1, 0, 50);
|
||||||
|
GenomeLoc g = new GenomeLoc(1, 49, 100);
|
||||||
|
mSortedSet.add(g);
|
||||||
|
mSortedSet.addRegion(e);
|
||||||
|
|
||||||
|
// now delete a region
|
||||||
|
GenomeLoc d = new GenomeLoc(1, 25, 75);
|
||||||
|
mSortedSet.removeRegion(d);
|
||||||
|
Iterator<GenomeLoc> iter = mSortedSet.iterator();
|
||||||
|
GenomeLoc loc = iter.next();
|
||||||
|
assertTrue(loc.getStart() == 0);
|
||||||
|
assertTrue(loc.getStop() == 24);
|
||||||
|
assertTrue(loc.getContigIndex() == 1);
|
||||||
|
|
||||||
|
loc = iter.next();
|
||||||
|
assertTrue(loc.getStart() == 76);
|
||||||
|
assertTrue(loc.getStop() == 100);
|
||||||
|
assertTrue(loc.getContigIndex() == 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void deleteSuperRegion() {
|
||||||
|
GenomeLoc e = new GenomeLoc(1, 10, 20);
|
||||||
|
GenomeLoc g = new GenomeLoc(1, 70, 100);
|
||||||
|
mSortedSet.add(g);
|
||||||
|
mSortedSet.addRegion(e);
|
||||||
|
assertTrue(mSortedSet.size() == 2);
|
||||||
|
// now delete a region
|
||||||
|
GenomeLoc d = new GenomeLoc(1, 15, 75);
|
||||||
|
mSortedSet.removeRegion(d);
|
||||||
|
Iterator<GenomeLoc> iter = mSortedSet.iterator();
|
||||||
|
GenomeLoc loc = iter.next();
|
||||||
|
assertTrue(loc.getStart() == 10);
|
||||||
|
assertTrue(loc.getStop() == 14);
|
||||||
|
assertTrue(loc.getContigIndex() == 1);
|
||||||
|
|
||||||
|
loc = iter.next();
|
||||||
|
assertTrue(loc.getStart() == 76);
|
||||||
|
assertTrue(loc.getStop() == 100);
|
||||||
|
assertTrue(loc.getContigIndex() == 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void fromSequenceDictionary() {
|
||||||
|
mSortedSet = GenomeLocSortedSet.createSetFromSequenceDictionary(this.header.getSequenceDictionary());
|
||||||
|
// we should have sequence
|
||||||
|
assertTrue(mSortedSet.size() == GenomeLocSortedSetTest.NUMBER_OF_CHROMOSOMES);
|
||||||
|
int seqNumber = 0;
|
||||||
|
for (GenomeLoc loc : mSortedSet) {
|
||||||
|
assertTrue(loc.getStart() == 1);
|
||||||
|
assertTrue(loc.getStop() == GenomeLocSortedSetTest.CHROMOSOME_SIZE);
|
||||||
|
assertTrue(loc.getContigIndex() == seqNumber);
|
||||||
|
++seqNumber;
|
||||||
|
}
|
||||||
|
assertTrue(seqNumber == GenomeLocSortedSetTest.NUMBER_OF_CHROMOSOMES);
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue