a couple of light changes to GenomeLocSortedSet.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1708 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
3916e165fb
commit
eeb14ec717
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
import net.sf.samtools.SAMSequenceDictionary;
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
import net.sf.samtools.SAMSequenceRecord;
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -34,8 +35,10 @@ import java.util.*;
|
||||||
* that element.
|
* that element.
|
||||||
*/
|
*/
|
||||||
public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
|
public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
|
||||||
|
private static Logger logger = Logger.getLogger(GenomeLocSortedSet.class);
|
||||||
|
|
||||||
// our private storage for the GenomeLoc's
|
// our private storage for the GenomeLoc's
|
||||||
private final ArrayList<GenomeLoc> mArray = new ArrayList<GenomeLoc>();
|
private List<GenomeLoc> mArray = new ArrayList<GenomeLoc>();
|
||||||
|
|
||||||
/** default constructor */
|
/** default constructor */
|
||||||
public GenomeLocSortedSet() {
|
public GenomeLocSortedSet() {
|
||||||
|
|
@ -119,6 +122,9 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
|
||||||
(e.getStart() < g.getStart()) && !haveAdded) {
|
(e.getStart() < g.getStart()) && !haveAdded) {
|
||||||
mArray.add(mArray.indexOf(g), e);
|
mArray.add(mArray.indexOf(g), e);
|
||||||
return true;
|
return true;
|
||||||
|
} else if (haveAdded && ((e.getContigIndex() > e.getContigIndex()) ||
|
||||||
|
(g.getContigIndex() == e.getContigIndex() && e.getStart() > g.getStart()))) {
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/** we're at the end and we haven't found locations that should fall after it,
|
/** we're at the end and we haven't found locations that should fall after it,
|
||||||
|
|
@ -262,4 +268,49 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public boolean addAllRegions(List<GenomeLoc> locations) {
|
||||||
|
this.mArray.addAll(locations);
|
||||||
|
Collections.sort(this.mArray);
|
||||||
|
this.mArray = GenomeLocSortedSet.mergeOverlappingLocations(this.mArray);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* merge a list of genome locs that may be overlapping, returning the list of unique genomic locations
|
||||||
|
*
|
||||||
|
* @param raw the unchecked genome loc list
|
||||||
|
*
|
||||||
|
* @return the list of merged locations
|
||||||
|
*/
|
||||||
|
public static List<GenomeLoc> mergeOverlappingLocations(final List<GenomeLoc> raw) {
|
||||||
|
logger.debug(" Raw locations are: " + Utils.join(", ", raw));
|
||||||
|
if (raw.size() <= 1)
|
||||||
|
return raw;
|
||||||
|
else {
|
||||||
|
ArrayList<GenomeLoc> merged = new ArrayList<GenomeLoc>();
|
||||||
|
Iterator<GenomeLoc> it = raw.iterator();
|
||||||
|
GenomeLoc prev = it.next();
|
||||||
|
while (it.hasNext()) {
|
||||||
|
GenomeLoc curr = it.next();
|
||||||
|
if (prev.contiguousP(curr)) {
|
||||||
|
prev = prev.merge(curr);
|
||||||
|
} else {
|
||||||
|
merged.add(prev);
|
||||||
|
prev = curr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
merged.add(prev);
|
||||||
|
return merged;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* convert this object to a list
|
||||||
|
* @return the lists
|
||||||
|
*/
|
||||||
|
public List<GenomeLoc> toList() {
|
||||||
|
return this.mArray;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ import net.sf.samtools.SAMFileHeader;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
|
@ -203,4 +204,31 @@ public class GenomeLocSortedSetTest extends BaseTest {
|
||||||
}
|
}
|
||||||
assertTrue(seqNumber == GenomeLocSortedSetTest.NUMBER_OF_CHROMOSOMES);
|
assertTrue(seqNumber == GenomeLocSortedSetTest.NUMBER_OF_CHROMOSOMES);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAddAll() {
|
||||||
|
mSortedSet = GenomeLocSortedSet.createSetFromSequenceDictionary(this.header.getSequenceDictionary());
|
||||||
|
GenomeLocSortedSet set = GenomeLocSortedSet.createSetFromSequenceDictionary(this.header.getSequenceDictionary());
|
||||||
|
// we should have sequence
|
||||||
|
assertTrue(mSortedSet.size() == GenomeLocSortedSetTest.NUMBER_OF_CHROMOSOMES);
|
||||||
|
mSortedSet.addAllRegions(set.toList());
|
||||||
|
assertTrue(mSortedSet.size() == GenomeLocSortedSetTest.NUMBER_OF_CHROMOSOMES);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAddAll2() {
|
||||||
|
mSortedSet = new GenomeLocSortedSet();
|
||||||
|
GenomeLocSortedSet mSortedSet2 = new GenomeLocSortedSet();
|
||||||
|
for (int x=0; x < 200; x = x + 2) {
|
||||||
|
mSortedSet.add(GenomeLocParser.createGenomeLoc(1,x));
|
||||||
|
}
|
||||||
|
assertEquals(100, mSortedSet.size());
|
||||||
|
for (int x=1; x < 201; x = x + 2) {
|
||||||
|
mSortedSet2.add(GenomeLocParser.createGenomeLoc(1,x));
|
||||||
|
}
|
||||||
|
assertEquals(100, mSortedSet2.size());
|
||||||
|
mSortedSet.addAllRegions(mSortedSet2.toList());
|
||||||
|
assertEquals(1, mSortedSet.size());
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue