a couple of light changes to GenomeLocSortedSet.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1708 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-09-23 20:38:53 +00:00
parent 3916e165fb
commit eeb14ec717
2 changed files with 80 additions and 1 deletions

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.utils;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
import org.apache.log4j.Logger;
import java.util.*;
@ -34,8 +35,10 @@ import java.util.*;
* that element.
*/
public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
private static Logger logger = Logger.getLogger(GenomeLocSortedSet.class);
// our private storage for the GenomeLoc's
private final ArrayList<GenomeLoc> mArray = new ArrayList<GenomeLoc>();
private List<GenomeLoc> mArray = new ArrayList<GenomeLoc>();
/** default constructor */
public GenomeLocSortedSet() {
@ -119,6 +122,9 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
(e.getStart() < g.getStart()) && !haveAdded) {
mArray.add(mArray.indexOf(g), e);
return true;
} else if (haveAdded && ((e.getContigIndex() > e.getContigIndex()) ||
(g.getContigIndex() == e.getContigIndex() && e.getStart() > g.getStart()))) {
return true;
}
}
/** we're at the end and we haven't found locations that should fall after it,
@ -262,4 +268,49 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
return ret;
}
public boolean addAllRegions(List<GenomeLoc> locations) {
this.mArray.addAll(locations);
Collections.sort(this.mArray);
this.mArray = GenomeLocSortedSet.mergeOverlappingLocations(this.mArray);
return true;
}
/**
* merge a list of genome locs that may be overlapping, returning the list of unique genomic locations
*
* @param raw the unchecked genome loc list
*
* @return the list of merged locations
*/
public static List<GenomeLoc> mergeOverlappingLocations(final List<GenomeLoc> raw) {
logger.debug(" Raw locations are: " + Utils.join(", ", raw));
if (raw.size() <= 1)
return raw;
else {
ArrayList<GenomeLoc> merged = new ArrayList<GenomeLoc>();
Iterator<GenomeLoc> it = raw.iterator();
GenomeLoc prev = it.next();
while (it.hasNext()) {
GenomeLoc curr = it.next();
if (prev.contiguousP(curr)) {
prev = prev.merge(curr);
} else {
merged.add(prev);
prev = curr;
}
}
merged.add(prev);
return merged;
}
}
/**
* convert this object to a list
* @return the lists
*/
public List<GenomeLoc> toList() {
return this.mArray;
}
}

View File

@ -4,6 +4,7 @@ import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.assertEquals;
import org.junit.Before;
import org.junit.Test;
@ -203,4 +204,31 @@ public class GenomeLocSortedSetTest extends BaseTest {
}
assertTrue(seqNumber == GenomeLocSortedSetTest.NUMBER_OF_CHROMOSOMES);
}
@Test
public void testAddAll() {
mSortedSet = GenomeLocSortedSet.createSetFromSequenceDictionary(this.header.getSequenceDictionary());
GenomeLocSortedSet set = GenomeLocSortedSet.createSetFromSequenceDictionary(this.header.getSequenceDictionary());
// we should have sequence
assertTrue(mSortedSet.size() == GenomeLocSortedSetTest.NUMBER_OF_CHROMOSOMES);
mSortedSet.addAllRegions(set.toList());
assertTrue(mSortedSet.size() == GenomeLocSortedSetTest.NUMBER_OF_CHROMOSOMES);
}
@Test
public void testAddAll2() {
mSortedSet = new GenomeLocSortedSet();
GenomeLocSortedSet mSortedSet2 = new GenomeLocSortedSet();
for (int x=0; x < 200; x = x + 2) {
mSortedSet.add(GenomeLocParser.createGenomeLoc(1,x));
}
assertEquals(100, mSortedSet.size());
for (int x=1; x < 201; x = x + 2) {
mSortedSet2.add(GenomeLocParser.createGenomeLoc(1,x));
}
assertEquals(100, mSortedSet2.size());
mSortedSet.addAllRegions(mSortedSet2.toList());
assertEquals(1, mSortedSet.size());
}
}