Fixing parsing of genomelocs that contain colons in the contig names (which is allowed by the spec) as reported on the forum. Added unit test for this case.

This commit is contained in:
Eric Banks 2012-11-27 11:00:33 -05:00
parent e199562c25
commit 4543ece088
2 changed files with 21 additions and 2 deletions

View File

@ -374,7 +374,7 @@ public final class GenomeLocParser {
int start = 1;
int stop = -1;
final int colonIndex = str.indexOf(":");
final int colonIndex = str.lastIndexOf(":");
if(colonIndex == -1) {
contig = str.substring(0, str.length()); // chr1
stop = Integer.MAX_VALUE;

View File

@ -2,6 +2,8 @@ package org.broadinstitute.sting.utils;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -74,6 +76,23 @@ public class GenomeLocParserUnitTest extends BaseTest {
genomeLocParser.parseGenomeLoc("Bad:0-1");
}
@Test
public void testContigHasColon() {
SAMFileHeader header = new SAMFileHeader();
header.setSortOrder(net.sf.samtools.SAMFileHeader.SortOrder.coordinate);
SAMSequenceDictionary dict = new SAMSequenceDictionary();
SAMSequenceRecord rec = new SAMSequenceRecord("c:h:r1", 10);
rec.setSequenceLength(10);
dict.addSequence(rec);
header.setSequenceDictionary(dict);
final GenomeLocParser myGenomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
GenomeLoc loc = myGenomeLocParser.parseGenomeLoc("c:h:r1:4-5");
assertEquals(0, loc.getContigIndex());
assertEquals(loc.getStart(), 4);
assertEquals(loc.getStop(), 5);
}
@Test
public void testParseGoodString() {
GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1-10");
@ -81,7 +100,7 @@ public class GenomeLocParserUnitTest extends BaseTest {
assertEquals(loc.getStop(), 10);
assertEquals(loc.getStart(), 1);
}
@Test
public void testCreateGenomeLoc1() {
GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 1, 100);