From 7bfb5fad273ba7e3ac624c8f5f218775c05495d8 Mon Sep 17 00:00:00 2001 From: aaron Date: Mon, 21 Sep 2009 23:32:24 +0000 Subject: [PATCH] fixing the dbSNP test. Also removing unnessasary comments from the GenomeLocParser, added some tests, and commented out the performance test git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1676 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/utils/GenomeLocParser.java | 44 +++++++------------ .../sting/gatk/refdata/rodDbSNPTest.java | 2 +- .../sting/utils/GenomeLocParserTest.java | 42 ++++++++++++++---- 3 files changed, 51 insertions(+), 37 deletions(-) diff --git a/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java b/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java index 2dc2e40db..084f8c997 100644 --- a/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java +++ b/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java @@ -52,7 +52,7 @@ import java.util.regex.Pattern; public class GenomeLocParser { private static Logger logger = Logger.getLogger(GenomeLocParser.class); - private static final Pattern mPattern = Pattern.compile("([\\w&&[^:]]+):([\\d,]+)?\\+?(-)?([\\d,]+)?$"); // matches case 3 + private static final Pattern mPattern = Pattern.compile("([\\w&&[^:]]+):([\\d,]+)?(\\+)?(-)?([\\d,]+)?$"); // matches case 3 // -------------------------------------------------------------------------------------------------------------- @@ -61,7 +61,7 @@ public class GenomeLocParser { // // -------------------------------------------------------------------------------------------------------------- //public static Map refContigOrdering = null; - private static SAMSequenceDictionary contigInfo = null; + protected static SAMSequenceDictionary contigInfo = null; /** * do we have a contig ordering setup? @@ -140,12 +140,7 @@ public class GenomeLocParser { public static GenomeLoc parseGenomeLoc(final String str) { // 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000' //System.out.printf("Parsing location '%s'%n", str); - /*try { - Thread.sleep(2000); - } catch (InterruptedException e) { - e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. - } */ - + String contig = null; long start = 1; long stop = Integer.MAX_VALUE; @@ -156,32 +151,24 @@ public class GenomeLocParser { try { if (match.matches()) { contig = match.group(1); - if (match.groupCount() > 1) { - switch (match.groupCount()) { - case 2: - start = stop = parsePosition(match.group(2)); - break; - case 3: - start = parsePosition(match.group(2)); - if (!match.group(3).equals("+")) bad = true; - break; - case 4: - start = parsePosition(match.group(2)); - stop = parsePosition(match.group(4)); - break; - default: + if (match.groupCount() == 5) { + start = parsePosition(match.group(2)); + if (match.group(3) != null && match.group(3).equals("+") && match.group(5) == null) { + // do nothing + } else if (match.group(5) != null) + stop = parsePosition(match.group(5)); + else if (match.group(5) == null && match.group(4) == null && match.group(3) == null) + stop = start; + else bad = true; - break; - + } else { + bad = true; } - } - } else { - bad = true; } } catch (Exception e) { - bad = true; + bad = true; } if (bad) @@ -194,6 +181,7 @@ public class GenomeLocParser { if (!isContigValid(contig)) throw new MalformedGenomeLocException("Contig " + contig + " does not match any contig in the GATK sequence dictionary derived from the reference."); + GenomeLoc loc = parseGenomeLoc(contig, start, stop); return loc; } diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/rodDbSNPTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/rodDbSNPTest.java index 8e8f21e61..9f2bc09ff 100644 --- a/java/test/org/broadinstitute/sting/gatk/refdata/rodDbSNPTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/rodDbSNPTest.java @@ -75,7 +75,7 @@ public class rodDbSNPTest extends BaseTest { stop = (var.getLocation().getContig().equals("1") && var.getLocation().getStart() > 11000000); line = stream.readLine(); } - Assert.assertEquals(3717,snpCount); + Assert.assertEquals(3615,snpCount); Assert.assertEquals(9902,indelCount); } catch (IOException e) { diff --git a/java/test/org/broadinstitute/sting/utils/GenomeLocParserTest.java b/java/test/org/broadinstitute/sting/utils/GenomeLocParserTest.java index 2ee094ee3..afe1427dc 100644 --- a/java/test/org/broadinstitute/sting/utils/GenomeLocParserTest.java +++ b/java/test/org/broadinstitute/sting/utils/GenomeLocParserTest.java @@ -3,16 +3,11 @@ package org.broadinstitute.sting.utils; import static junit.framework.Assert.assertTrue; import net.sf.samtools.SAMFileHeader; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import static org.junit.Assert.assertEquals; +import org.junit.BeforeClass; import org.junit.Test; -import java.util.Arrays; -import java.util.Collections; -import java.util.LinkedList; -import java.util.List; - /** * @author aaron @@ -22,15 +17,22 @@ import java.util.List; * Test out the functionality of the new genome loc parser */ public class GenomeLocParserTest extends BaseTest { - @Test(expected = StingException.class) public void testUnsetupException() { + GenomeLocParser.contigInfo = null; GenomeLocParser.createGenomeLoc(0, 0, 0); } + @BeforeClass + public static void init() { + SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10); + GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); + } + @Test public void testKnownContigOrder() { SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10); + GenomeLocParser.contigInfo = null; // assert that it's false when the contig ordering is not setup assertTrue(!GenomeLocParser.hasKnownContigOrdering()); GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); @@ -97,6 +99,14 @@ public class GenomeLocParserTest extends BaseTest { assertEquals(1, loc.getStart()); } + @Test + public void testCreateGenomeLoc1point5() { // in honor of VAAL! + GenomeLoc loc = GenomeLocParser.parseGenomeLoc("chr1:1"); + assertEquals(loc.getContigIndex(), 0); + assertEquals(1, loc.getStop()); + assertEquals(1, loc.getStart()); + } + @Test public void testCreateGenomeLoc2() { GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 1, 100); @@ -130,7 +140,7 @@ public class GenomeLocParserTest extends BaseTest { assertEquals(1, copy.getStart()); } - @Test + /*@Test // - uncomment if you want to test speed public void testGenomeLocParserList() { long start = System.currentTimeMillis(); List parsedIntervals = GenomeAnalysisEngine.parseIntervalRegion(Arrays.asList(new String[]{"/humgen/gsa-scr1/GATK_Data/Validation_Data/bigChr1IntervalList.list"})); @@ -138,5 +148,21 @@ public class GenomeLocParserTest extends BaseTest { LinkedList loc = new LinkedList(GenomeLocParser.mergeOverlappingLocations(parsedIntervals)); long stop = System.currentTimeMillis(); logger.warn("Elapsed time = " + (stop - start)); + }*/ + + @Test + public void testGenomeLocPlusSign() { + GenomeLoc loc = GenomeLocParser.parseGenomeLoc("chr1:1+"); + assertEquals(0, loc.getContigIndex()); + assertEquals(10, loc.getStop()); // the size + assertEquals(1, loc.getStart()); + } + + @Test(expected = RuntimeException.class) + public void testGenomeLocBad() { + GenomeLoc loc = GenomeLocParser.parseGenomeLoc("chr1:1-"); + assertEquals(0, loc.getContigIndex()); + assertEquals(10, loc.getStop()); // the size + assertEquals(1, loc.getStart()); } }