Merge pull request #1432 from broadinstitute/rhl_invalid_intervals

Throw an exception for invalid Picard intervals
This commit is contained in:
Ron Levine 2016-07-15 13:02:39 -04:00 committed by GitHub
commit b6908f52f0
3 changed files with 18 additions and 27 deletions

View File

@ -144,16 +144,17 @@ public class IntervalUtils {
IntervalList il = IntervalList.fromFile(inputFile); IntervalList il = IntervalList.fromFile(inputFile);
isPicardInterval = true; isPicardInterval = true;
int nInvalidIntervals = 0;
for (Interval interval : il.getIntervals()) { for (Interval interval : il.getIntervals()) {
if ( glParser.isValidGenomeLoc(interval.getSequence(), interval.getStart(), interval.getEnd(), true)) if (interval.getStart() - interval.getEnd() == 1 ) { // remove once a corrected version of the exome interval list is released.
ret.add(glParser.createGenomeLoc(interval.getSequence(), interval.getStart(), interval.getEnd(), true)); logger.warn("Possible incorrectly converted length 1 interval : " + interval);
}
else if ( glParser.isValidGenomeLoc(interval.getContig(), interval.getStart(), interval.getEnd(), true)) {
ret.add(glParser.createGenomeLoc(interval.getContig(), interval.getStart(), interval.getEnd(), true));
}
else { else {
nInvalidIntervals++; throw new UserException(inputFile.toString() + " has an invalid genome location : " + interval) ;
} }
} }
if ( nInvalidIntervals > 0 )
logger.warn("Ignoring " + nInvalidIntervals + " invalid intervals from " + inputFile);
} }
// if that didn't work, try parsing file as a GATK interval file // if that didn't work, try parsing file as a GATK interval file

View File

@ -51,6 +51,7 @@ import java.util.*;
import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue; import static org.testng.Assert.assertTrue;
import static org.testng.Assert.assertFalse;
/** /**
* @author aaron * @author aaron
@ -316,14 +317,14 @@ public class GenomeLocParserUnitTest extends BaseTest {
@Test @Test
public void testValidationOfGenomeLocs() { public void testValidationOfGenomeLocs() {
assertTrue(genomeLocParser.isValidGenomeLoc("chr1",1,1)); assertTrue(genomeLocParser.isValidGenomeLoc("chr1",1,1));
assertTrue(!genomeLocParser.isValidGenomeLoc("chr2",1,1)); // shouldn't have an entry assertFalse(genomeLocParser.isValidGenomeLoc("chr2",1,1)); // shouldn't have an entry
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",1,11)); // past the end of the contig assertFalse(genomeLocParser.isValidGenomeLoc("chr1",1,11)); // past the end of the contig
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",-1,10)); // bad start assertFalse(genomeLocParser.isValidGenomeLoc("chr1",-1,10)); // bad start
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",1,-2)); // bad stop assertFalse(genomeLocParser.isValidGenomeLoc("chr1",1,-2)); // bad stop
assertTrue( genomeLocParser.isValidGenomeLoc("chr1",-1,2, false)); // bad stop assertTrue(genomeLocParser.isValidGenomeLoc("chr1",-1,2, false)); // bad stop
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",10,11)); // bad start, past end assertFalse(genomeLocParser.isValidGenomeLoc("chr1",10,11)); // bad start, past end
assertTrue( genomeLocParser.isValidGenomeLoc("chr1",10,11, false)); // bad start, past end assertTrue(genomeLocParser.isValidGenomeLoc("chr1",10,11, false)); // bad start, past end
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",2,1)); // stop < start assertFalse(genomeLocParser.isValidGenomeLoc("chr1",2,1)); // stop < start
} }
@Test(expectedExceptions = ReviewedGATKException.class) @Test(expectedExceptions = ReviewedGATKException.class)

View File

@ -1046,23 +1046,12 @@ public class IntervalUtilsUnitTest extends BaseTest {
}; };
} }
/* @Test(dataProvider="invalidIntervalTestData", expectedExceptions=UserException.class, enabled = true)
* This test is disabled because its assumption that we will not throw an error
* upon parsing invalid Picard intervals is no longer true, as htsjdk has added
* extra protection against invalid intervals to IntervalList.add().
*
* We should reconsider our decision in IntervalUtils.intervalFileToList() to
* silently ignore invalid intervals when parsing Picard interval files, as it's
* inconsistent with the way we handle invalid intervals for GATK interval files
* (throw a UserException, covered by testInvalidGATKFileIntervalHandling() below),
* and update this test accordingly.
*/
@Test(dataProvider="invalidIntervalTestData", enabled = false)
public void testInvalidPicardIntervalHandling(GenomeLocParser genomeLocParser, public void testInvalidPicardIntervalHandling(GenomeLocParser genomeLocParser,
String contig, int intervalStart, int intervalEnd ) throws Exception { String contig, int intervalStart, int intervalEnd ) throws Exception {
SAMFileHeader picardFileHeader = new SAMFileHeader(); SAMFileHeader picardFileHeader = new SAMFileHeader();
picardFileHeader.addSequence(genomeLocParser.getContigInfo("chr1")); picardFileHeader.addSequence(genomeLocParser.getContigInfo(contig));
IntervalList picardIntervals = new IntervalList(picardFileHeader); IntervalList picardIntervals = new IntervalList(picardFileHeader);
picardIntervals.add(new Interval(contig, intervalStart, intervalEnd, true, "dummyname")); picardIntervals.add(new Interval(contig, intervalStart, intervalEnd, true, "dummyname"));