From 4f2e3128050db69150290e5982c78dbaa27aff18 Mon Sep 17 00:00:00 2001 From: Ron Levine Date: Thu, 7 Jul 2016 10:24:20 -0400 Subject: [PATCH] Throw an exception for invalid Picard intervals --- .../gatk/utils/interval/IntervalUtils.java | 13 +++++++------ .../gatk/utils/GenomeLocParserUnitTest.java | 17 +++++++++-------- .../utils/interval/IntervalUtilsUnitTest.java | 15 ++------------- 3 files changed, 18 insertions(+), 27 deletions(-) diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/interval/IntervalUtils.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/interval/IntervalUtils.java index b2ff1708e..9f6e352bb 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/interval/IntervalUtils.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/interval/IntervalUtils.java @@ -144,16 +144,17 @@ public class IntervalUtils { IntervalList il = IntervalList.fromFile(inputFile); isPicardInterval = true; - int nInvalidIntervals = 0; for (Interval interval : il.getIntervals()) { - if ( glParser.isValidGenomeLoc(interval.getSequence(), interval.getStart(), interval.getEnd(), true)) - ret.add(glParser.createGenomeLoc(interval.getSequence(), interval.getStart(), interval.getEnd(), true)); + if (interval.getStart() - interval.getEnd() == 1 ) { // remove once a corrected version of the exome interval list is released. + logger.warn("Possible incorrectly converted length 1 interval : " + interval); + } + else if ( glParser.isValidGenomeLoc(interval.getContig(), interval.getStart(), interval.getEnd(), true)) { + ret.add(glParser.createGenomeLoc(interval.getContig(), interval.getStart(), interval.getEnd(), true)); + } else { - nInvalidIntervals++; + throw new UserException(inputFile.toString() + " has an invalid genome location : " + interval) ; } } - if ( nInvalidIntervals > 0 ) - logger.warn("Ignoring " + nInvalidIntervals + " invalid intervals from " + inputFile); } // if that didn't work, try parsing file as a GATK interval file diff --git a/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/GenomeLocParserUnitTest.java b/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/GenomeLocParserUnitTest.java index 5432e236f..3f58e2d77 100644 --- a/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/GenomeLocParserUnitTest.java +++ b/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/GenomeLocParserUnitTest.java @@ -51,6 +51,7 @@ import java.util.*; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; +import static org.testng.Assert.assertFalse; /** * @author aaron @@ -316,14 +317,14 @@ public class GenomeLocParserUnitTest extends BaseTest { @Test public void testValidationOfGenomeLocs() { assertTrue(genomeLocParser.isValidGenomeLoc("chr1",1,1)); - assertTrue(!genomeLocParser.isValidGenomeLoc("chr2",1,1)); // shouldn't have an entry - assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",1,11)); // past the end of the contig - assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",-1,10)); // bad start - assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",1,-2)); // bad stop - assertTrue( genomeLocParser.isValidGenomeLoc("chr1",-1,2, false)); // bad stop - assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",10,11)); // bad start, past end - assertTrue( genomeLocParser.isValidGenomeLoc("chr1",10,11, false)); // bad start, past end - assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",2,1)); // stop < start + assertFalse(genomeLocParser.isValidGenomeLoc("chr2",1,1)); // shouldn't have an entry + assertFalse(genomeLocParser.isValidGenomeLoc("chr1",1,11)); // past the end of the contig + assertFalse(genomeLocParser.isValidGenomeLoc("chr1",-1,10)); // bad start + assertFalse(genomeLocParser.isValidGenomeLoc("chr1",1,-2)); // bad stop + assertTrue(genomeLocParser.isValidGenomeLoc("chr1",-1,2, false)); // bad stop + assertFalse(genomeLocParser.isValidGenomeLoc("chr1",10,11)); // bad start, past end + assertTrue(genomeLocParser.isValidGenomeLoc("chr1",10,11, false)); // bad start, past end + assertFalse(genomeLocParser.isValidGenomeLoc("chr1",2,1)); // stop < start } @Test(expectedExceptions = ReviewedGATKException.class) diff --git a/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/interval/IntervalUtilsUnitTest.java b/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/interval/IntervalUtilsUnitTest.java index 07b49d480..b0db3800c 100644 --- a/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/interval/IntervalUtilsUnitTest.java +++ b/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/interval/IntervalUtilsUnitTest.java @@ -1046,23 +1046,12 @@ public class IntervalUtilsUnitTest extends BaseTest { }; } - /* - * This test is disabled because its assumption that we will not throw an error - * upon parsing invalid Picard intervals is no longer true, as htsjdk has added - * extra protection against invalid intervals to IntervalList.add(). - * - * We should reconsider our decision in IntervalUtils.intervalFileToList() to - * silently ignore invalid intervals when parsing Picard interval files, as it's - * inconsistent with the way we handle invalid intervals for GATK interval files - * (throw a UserException, covered by testInvalidGATKFileIntervalHandling() below), - * and update this test accordingly. - */ - @Test(dataProvider="invalidIntervalTestData", enabled = false) + @Test(dataProvider="invalidIntervalTestData", expectedExceptions=UserException.class, enabled = true) public void testInvalidPicardIntervalHandling(GenomeLocParser genomeLocParser, String contig, int intervalStart, int intervalEnd ) throws Exception { SAMFileHeader picardFileHeader = new SAMFileHeader(); - picardFileHeader.addSequence(genomeLocParser.getContigInfo("chr1")); + picardFileHeader.addSequence(genomeLocParser.getContigInfo(contig)); IntervalList picardIntervals = new IntervalList(picardFileHeader); picardIntervals.add(new Interval(contig, intervalStart, intervalEnd, true, "dummyname"));