fixing the dbSNP test. Also removing unnessasary comments from the GenomeLocParser, added some tests, and commented out the performance test

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1676 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-09-21 23:32:24 +00:00
parent 39a47491a9
commit 7bfb5fad27
3 changed files with 51 additions and 37 deletions

View File

@ -52,7 +52,7 @@ import java.util.regex.Pattern;
public class GenomeLocParser { public class GenomeLocParser {
private static Logger logger = Logger.getLogger(GenomeLocParser.class); private static Logger logger = Logger.getLogger(GenomeLocParser.class);
private static final Pattern mPattern = Pattern.compile("([\\w&&[^:]]+):([\\d,]+)?\\+?(-)?([\\d,]+)?$"); // matches case 3 private static final Pattern mPattern = Pattern.compile("([\\w&&[^:]]+):([\\d,]+)?(\\+)?(-)?([\\d,]+)?$"); // matches case 3
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
@ -61,7 +61,7 @@ public class GenomeLocParser {
// //
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
//public static Map<String, Integer> refContigOrdering = null; //public static Map<String, Integer> refContigOrdering = null;
private static SAMSequenceDictionary contigInfo = null; protected static SAMSequenceDictionary contigInfo = null;
/** /**
* do we have a contig ordering setup? * do we have a contig ordering setup?
@ -140,11 +140,6 @@ public class GenomeLocParser {
public static GenomeLoc parseGenomeLoc(final String str) { public static GenomeLoc parseGenomeLoc(final String str) {
// 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000' // 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'
//System.out.printf("Parsing location '%s'%n", str); //System.out.printf("Parsing location '%s'%n", str);
/*try {
Thread.sleep(2000);
} catch (InterruptedException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
} */
String contig = null; String contig = null;
long start = 1; long start = 1;
@ -156,29 +151,21 @@ public class GenomeLocParser {
try { try {
if (match.matches()) { if (match.matches()) {
contig = match.group(1); contig = match.group(1);
if (match.groupCount() > 1) { if (match.groupCount() == 5) {
switch (match.groupCount()) {
case 2:
start = stop = parsePosition(match.group(2));
break;
case 3:
start = parsePosition(match.group(2)); start = parsePosition(match.group(2));
if (!match.group(3).equals("+")) bad = true; if (match.group(3) != null && match.group(3).equals("+") && match.group(5) == null) {
break; // do nothing
case 4: } else if (match.group(5) != null)
start = parsePosition(match.group(2)); stop = parsePosition(match.group(5));
stop = parsePosition(match.group(4)); else if (match.group(5) == null && match.group(4) == null && match.group(3) == null)
break; stop = start;
default: else
bad = true; bad = true;
break;
}
}
} else { } else {
bad = true; bad = true;
} }
} }
}
catch (Exception e) { catch (Exception e) {
bad = true; bad = true;
@ -194,6 +181,7 @@ public class GenomeLocParser {
if (!isContigValid(contig)) if (!isContigValid(contig))
throw new MalformedGenomeLocException("Contig " + contig + " does not match any contig in the GATK sequence dictionary derived from the reference."); throw new MalformedGenomeLocException("Contig " + contig + " does not match any contig in the GATK sequence dictionary derived from the reference.");
GenomeLoc loc = parseGenomeLoc(contig, start, stop); GenomeLoc loc = parseGenomeLoc(contig, start, stop);
return loc; return loc;
} }

View File

@ -75,7 +75,7 @@ public class rodDbSNPTest extends BaseTest {
stop = (var.getLocation().getContig().equals("1") && var.getLocation().getStart() > 11000000); stop = (var.getLocation().getContig().equals("1") && var.getLocation().getStart() > 11000000);
line = stream.readLine(); line = stream.readLine();
} }
Assert.assertEquals(3717,snpCount); Assert.assertEquals(3615,snpCount);
Assert.assertEquals(9902,indelCount); Assert.assertEquals(9902,indelCount);
} catch (IOException e) { } catch (IOException e) {

View File

@ -3,16 +3,11 @@ package org.broadinstitute.sting.utils;
import static junit.framework.Assert.assertTrue; import static junit.framework.Assert.assertTrue;
import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
/** /**
* @author aaron * @author aaron
@ -22,15 +17,22 @@ import java.util.List;
* Test out the functionality of the new genome loc parser * Test out the functionality of the new genome loc parser
*/ */
public class GenomeLocParserTest extends BaseTest { public class GenomeLocParserTest extends BaseTest {
@Test(expected = StingException.class) @Test(expected = StingException.class)
public void testUnsetupException() { public void testUnsetupException() {
GenomeLocParser.contigInfo = null;
GenomeLocParser.createGenomeLoc(0, 0, 0); GenomeLocParser.createGenomeLoc(0, 0, 0);
} }
@BeforeClass
public static void init() {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10);
GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary());
}
@Test @Test
public void testKnownContigOrder() { public void testKnownContigOrder() {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10); SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10);
GenomeLocParser.contigInfo = null;
// assert that it's false when the contig ordering is not setup // assert that it's false when the contig ordering is not setup
assertTrue(!GenomeLocParser.hasKnownContigOrdering()); assertTrue(!GenomeLocParser.hasKnownContigOrdering());
GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary());
@ -97,6 +99,14 @@ public class GenomeLocParserTest extends BaseTest {
assertEquals(1, loc.getStart()); assertEquals(1, loc.getStart());
} }
@Test
public void testCreateGenomeLoc1point5() { // in honor of VAAL!
GenomeLoc loc = GenomeLocParser.parseGenomeLoc("chr1:1");
assertEquals(loc.getContigIndex(), 0);
assertEquals(1, loc.getStop());
assertEquals(1, loc.getStart());
}
@Test @Test
public void testCreateGenomeLoc2() { public void testCreateGenomeLoc2() {
GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 1, 100); GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 1, 100);
@ -130,7 +140,7 @@ public class GenomeLocParserTest extends BaseTest {
assertEquals(1, copy.getStart()); assertEquals(1, copy.getStart());
} }
@Test /*@Test // - uncomment if you want to test speed
public void testGenomeLocParserList() { public void testGenomeLocParserList() {
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
List<GenomeLoc> parsedIntervals = GenomeAnalysisEngine.parseIntervalRegion(Arrays.asList(new String[]{"/humgen/gsa-scr1/GATK_Data/Validation_Data/bigChr1IntervalList.list"})); List<GenomeLoc> parsedIntervals = GenomeAnalysisEngine.parseIntervalRegion(Arrays.asList(new String[]{"/humgen/gsa-scr1/GATK_Data/Validation_Data/bigChr1IntervalList.list"}));
@ -138,5 +148,21 @@ public class GenomeLocParserTest extends BaseTest {
LinkedList<GenomeLoc> loc = new LinkedList<GenomeLoc>(GenomeLocParser.mergeOverlappingLocations(parsedIntervals)); LinkedList<GenomeLoc> loc = new LinkedList<GenomeLoc>(GenomeLocParser.mergeOverlappingLocations(parsedIntervals));
long stop = System.currentTimeMillis(); long stop = System.currentTimeMillis();
logger.warn("Elapsed time = " + (stop - start)); logger.warn("Elapsed time = " + (stop - start));
}*/
@Test
public void testGenomeLocPlusSign() {
GenomeLoc loc = GenomeLocParser.parseGenomeLoc("chr1:1+");
assertEquals(0, loc.getContigIndex());
assertEquals(10, loc.getStop()); // the size
assertEquals(1, loc.getStart());
}
@Test(expected = RuntimeException.class)
public void testGenomeLocBad() {
GenomeLoc loc = GenomeLocParser.parseGenomeLoc("chr1:1-");
assertEquals(0, loc.getContigIndex());
assertEquals(10, loc.getStop()); // the size
assertEquals(1, loc.getStart());
} }
} }