From 1296dd41bea46ddd64b20104ab02cc62eac46255 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Mon, 21 Nov 2011 11:52:39 -0500 Subject: [PATCH] Removing the legacy -L "interval1;interval2" syntax This syntax predates the ability to have multiple -L arguments, is inconsistent with the syntax of all other GATK arguments, requires quoting to avoid interpretation by the shell, and was causing problems in Queue. A UserException is now thrown if someone tries to use this syntax. --- .../sting/commandline/IntervalBinding.java | 2 +- .../sting/utils/interval/IntervalUtils.java | 42 ++++++++++--------- .../CallableLociWalkerIntegrationTest.java | 2 +- ...astaAlternateReferenceIntegrationTest.java | 8 ++-- .../utils/interval/IntervalUtilsUnitTest.java | 13 ++++++ 5 files changed, 41 insertions(+), 26 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/commandline/IntervalBinding.java b/public/java/src/org/broadinstitute/sting/commandline/IntervalBinding.java index f920d90ef..82c7b20b6 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/IntervalBinding.java +++ b/public/java/src/org/broadinstitute/sting/commandline/IntervalBinding.java @@ -45,7 +45,7 @@ import java.util.*; * * The IntervalBinding is a formal GATK argument that bridges between a walker and * the engine to construct intervals for traversal at runtime. The IntervalBinding can - * either be a RodBinding, a string of one or more intervals, or a file with interval strings. + * either be a RodBinding, a string of one interval, or a file with interval strings. * The GATK Engine takes care of initializing the binding when appropriate and determining intervals from it. * * Note that this class is immutable. diff --git a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java index 159b145a0..f8655f74a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java @@ -56,28 +56,30 @@ public class IntervalUtils { public static List parseIntervalArguments(GenomeLocParser parser, String arg) { List rawIntervals = new ArrayList(); // running list of raw GenomeLocs - // separate argument on semicolon first - for (String fileOrInterval : arg.split(";")) { - // if any argument is 'unmapped', "parse" it to a null entry. A null in this case means 'all the intervals with no alignment data'. - if (isUnmapped(fileOrInterval)) - rawIntervals.add(GenomeLoc.UNMAPPED); - // if it's a file, add items to raw interval list - else if (isIntervalFile(fileOrInterval)) { - try { - rawIntervals.addAll(intervalFileToList(parser, fileOrInterval)); - } - catch ( UserException.MalformedGenomeLoc e ) { - throw e; - } - catch ( Exception e ) { - throw new UserException.MalformedFile(fileOrInterval, "Interval file could not be parsed in any supported format.", e); - } - } + if ( arg.indexOf(';') != -1 ) { + throw new UserException.BadArgumentValue("-L " + arg, "The legacy -L \"interval1;interval2\" syntax " + + "is no longer supported. Please use one -L argument for each " + + "interval or an interval file instead."); + } - // otherwise treat as an interval -> parse and add to raw interval list - else { - rawIntervals.add(parser.parseGenomeLoc(fileOrInterval)); + // if any argument is 'unmapped', "parse" it to a null entry. A null in this case means 'all the intervals with no alignment data'. + if (isUnmapped(arg)) + rawIntervals.add(GenomeLoc.UNMAPPED); + // if it's a file, add items to raw interval list + else if (isIntervalFile(arg)) { + try { + rawIntervals.addAll(intervalFileToList(parser, arg)); } + catch ( UserException.MalformedGenomeLoc e ) { + throw e; + } + catch ( Exception e ) { + throw new UserException.MalformedFile(arg, "Interval file could not be parsed in any supported format.", e); + } + } + // otherwise treat as an interval -> parse and add to raw interval list + else { + rawIntervals.add(parser.parseGenomeLoc(arg)); } return rawIntervals; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalkerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalkerIntegrationTest.java index 1f3f8ebe6..3783525d1 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalkerIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalkerIntegrationTest.java @@ -52,7 +52,7 @@ public class CallableLociWalkerIntegrationTest extends WalkerTest { @Test public void testCallableLociWalker2() { - String gatk_args = commonArgs + " -format BED -L 1:10,000,000-10,000,100;1:10,000,110-10,000,120 -summary %s"; + String gatk_args = commonArgs + " -format BED -L 1:10,000,000-10,000,100 -L 1:10,000,110-10,000,120 -summary %s"; WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2, Arrays.asList("c671f65712d9575b8b3e1f1dbedc146e", "d287510eac04acf5a56f5cde2cba0e4a")); executeTest("formatBed by interval", spec); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java index 9af39e92c..1c5db4262 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java @@ -12,25 +12,25 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest { String md5_1 = "328d2d52cedfdc52da7d1abff487633d"; WalkerTestSpec spec1a = new WalkerTestSpec( - "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -L 1:10,000,100-10,000,500;1:10,100,000-10,101,000;1:10,900,000-10,900,001 -o %s", + "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -L 1:10,000,100-10,000,500 -L 1:10,100,000-10,101,000 -L 1:10,900,000-10,900,001 -o %s", 1, Arrays.asList(md5_1)); executeTest("testFastaReference", spec1a); WalkerTestSpec spec1b = new WalkerTestSpec( - "-T FastaReferenceMaker -R " + b36KGReference + " -L 1:10,000,100-10,000,500;1:10,100,000-10,101,000;1:10,900,000-10,900,001 -o %s", + "-T FastaReferenceMaker -R " + b36KGReference + " -L 1:10,000,100-10,000,500 -L 1:10,100,000-10,101,000 -L 1:10,900,000-10,900,001 -o %s", 1, Arrays.asList(md5_1)); executeTest("testFastaReference", spec1b); WalkerTestSpec spec2 = new WalkerTestSpec( - "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380;1:10,093,447-10,093,847;1:10,271,252-10,271,452 -o %s", + "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380 -L 1:10,093,447-10,093,847 -L 1:10,271,252-10,271,452 -o %s", 1, Arrays.asList("0567b32ebdc26604ddf2a390de4579ac")); executeTest("testFastaAlternateReferenceIndels", spec2); WalkerTestSpec spec3 = new WalkerTestSpec( - "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + GATKDataLocation + "dbsnp_129_b36.vcf -L 1:10,023,400-10,023,500;1:10,029,200-10,029,500 -o %s", + "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + GATKDataLocation + "dbsnp_129_b36.vcf -L 1:10,023,400-10,023,500 -L 1:10,029,200-10,029,500 -o %s", 1, Arrays.asList("8b6cd2e20c381f9819aab2d270f5e641")); executeTest("testFastaAlternateReferenceSnps", spec3); diff --git a/public/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsUnitTest.java index 03d33d2c5..a9035ffd9 100644 --- a/public/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsUnitTest.java @@ -3,7 +3,10 @@ package org.broadinstitute.sting.utils.interval; import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.samtools.SAMFileHeader; import org.apache.commons.io.FileUtils; +import org.broad.tribble.Feature; import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.commandline.IntervalBinding; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource; import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.testng.Assert; @@ -983,4 +986,14 @@ public class IntervalUtilsUnitTest extends BaseTest { data.toString(), data.original, actual, data.expected); Assert.assertEquals(actual, data.expected, description); } + + @Test(expectedExceptions=UserException.BadArgumentValue.class) + public void testExceptionUponLegacyIntervalSyntax() throws Exception { + GenomeAnalysisEngine toolkit = new GenomeAnalysisEngine(); + toolkit.setGenomeLocParser(new GenomeLocParser(new CachingIndexedFastaSequenceFile(new File(BaseTest.hg19Reference)))); + + // Attempting to use the legacy -L "interval1;interval2" syntax should produce an exception: + IntervalBinding binding = new IntervalBinding("1;2"); + List intervals = binding.getIntervals(toolkit); + } }