fixed a bug I checked in that Eric found, for intervals with no start or stop coordinate. Now I owe Eric a cookie, and Milk Street is so far away. Damn.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1679 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-09-22 04:34:18 +00:00
parent e9eeed8c6f
commit 83a9eebcc4
2 changed files with 60 additions and 34 deletions

View File

@ -52,7 +52,7 @@ import java.util.regex.Pattern;
public class GenomeLocParser { public class GenomeLocParser {
private static Logger logger = Logger.getLogger(GenomeLocParser.class); private static Logger logger = Logger.getLogger(GenomeLocParser.class);
private static final Pattern mPattern = Pattern.compile("([\\w&&[^:]]+):([\\d,]+)?(\\+)?(-)?([\\d,]+)?$"); // matches case 3 private static final Pattern mPattern = Pattern.compile("([\\w&&[^:]]+):*([\\d,]+)?([\\+-])?([\\d,]+)?$"); // matches case 3
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
@ -154,45 +154,39 @@ public class GenomeLocParser {
String contig = null; String contig = null;
long start = 1; long start = 1;
long stop = Integer.MAX_VALUE; long stop = -1;
boolean bad = false; boolean bad = false;
Matcher match = mPattern.matcher(str); Matcher match = mPattern.matcher(str);
try {
try { if (match.matches() && match.groupCount() == 4) {
if (match.matches()) { if (match.group(1) != null) contig = match.group(1);
contig = match.group(1); if (match.group(2) != null) start = parsePosition(match.group(2));
if (match.groupCount() == 5) { if ((match.group(3) != null && match.group(3).equals("+")) || // chr:1+
start = parsePosition(match.group(2)); (match.group(3) == null && match.group(4) == null && match.group(2) == null)) // chr1
if (match.group(3) != null && match.group(3).equals("+") && match.group(5) == null) { stop = Integer.MAX_VALUE;
// do nothing else if (match.group(3) != null && match.group(3).equals("-")) // chr1:1-1
} else if (match.group(5) != null) stop = parsePosition(match.group(4));
stop = parsePosition(match.group(5)); else if (match.group(3) == null && match.group(4) == null) // chr1:1
else if (match.group(5) == null && match.group(4) == null && match.group(3) == null) stop = start;
stop = start; else {
else bad = true;
bad = true; }
} else { }
bad = true; } catch (Exception e) {
} bad = true;
}
} }
catch (Exception e) { if (bad || start < 0 || stop < 0 || contig == null)
bad = true; throw new StingException("Invalid Genome Location string: " + str);
}
if (bad) if (!isContigValid(contig))
throw new StingException("Invalid Genome Location string: "); // + str); throw new StingException("Contig " + contig + " does not match any contig in the GATK sequence dictionary derived from the reference.");
if (stop == Integer.MAX_VALUE && hasKnownContigOrdering())
if (stop == Integer.MAX_VALUE && hasKnownContigOrdering())
// lookup the actually stop position! // lookup the actually stop position!
stop = getContigInfo(contig).getSequenceLength(); stop = getContigInfo(contig).getSequenceLength();
if (!isContigValid(contig))
throw new MalformedGenomeLocException("Contig " + contig + " does not match any contig in the GATK sequence dictionary derived from the reference.");
GenomeLoc loc = parseGenomeLoc(contig, start, stop); GenomeLoc loc = parseGenomeLoc(contig, start, stop);
return loc; return loc;
} }

View File

@ -7,7 +7,7 @@ import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
import java.util.Arrays;
/** /**
* @author aaron * @author aaron
@ -158,11 +158,43 @@ public class GenomeLocParserTest extends BaseTest {
assertEquals(1, loc.getStart()); assertEquals(1, loc.getStart());
} }
@Test(expected = RuntimeException.class) @Test
public void testGenomeLocParseOnlyChrome() {
GenomeLoc loc = GenomeLocParser.parseGenomeLoc("chr1");
assertEquals(0, loc.getContigIndex());
assertEquals(10, loc.getStop()); // the size
assertEquals(1, loc.getStart());
}
@Test(expected = StingException.class)
public void testGenomeLocParseOnlyBadChrome() {
GenomeLoc loc = GenomeLocParser.parseGenomeLoc("chr12");
assertEquals(0, loc.getContigIndex());
assertEquals(10, loc.getStop()); // the size
assertEquals(1, loc.getStart());
}
@Test(expected = StingException.class)
public void testGenomeLocBad() { public void testGenomeLocBad() {
GenomeLoc loc = GenomeLocParser.parseGenomeLoc("chr1:1-"); GenomeLoc loc = GenomeLocParser.parseGenomeLoc("chr1:1-");
assertEquals(0, loc.getContigIndex()); assertEquals(0, loc.getContigIndex());
assertEquals(10, loc.getStop()); // the size assertEquals(10, loc.getStop()); // the size
assertEquals(1, loc.getStart()); assertEquals(1, loc.getStart());
} }
@Test(expected = StingException.class)
public void testGenomeLocBad2() {
GenomeLoc loc = GenomeLocParser.parseGenomeLoc("chr1:1-500-0");
assertEquals(0, loc.getContigIndex());
assertEquals(10, loc.getStop()); // the size
assertEquals(1, loc.getStart());
}
@Test(expected = StingException.class)
public void testGenomeLocBad3() {
GenomeLoc loc = GenomeLocParser.parseGenomeLoc("chr1:1--0");
assertEquals(0, loc.getContigIndex());
assertEquals(10, loc.getStop()); // the size
assertEquals(1, loc.getStart());
}
} }