Removing the legacy -L "interval1;interval2" syntax

This syntax predates the ability to have multiple -L arguments, is
inconsistent with the syntax of all other GATK arguments, requires
quoting to avoid interpretation by the shell, and was causing
problems in Queue.

A UserException is now thrown if someone tries to use this syntax.
This commit is contained in:
David Roazen 2011-11-21 11:52:39 -05:00
parent b5de182014
commit 1296dd41be
5 changed files with 41 additions and 26 deletions

View File

@ -45,7 +45,7 @@ import java.util.*;
*
* The IntervalBinding<T> is a formal GATK argument that bridges between a walker and
* the engine to construct intervals for traversal at runtime. The IntervalBinding can
* either be a RodBinding<T>, a string of one or more intervals, or a file with interval strings.
* either be a RodBinding<T>, a string of one interval, or a file with interval strings.
* The GATK Engine takes care of initializing the binding when appropriate and determining intervals from it.
*
* Note that this class is immutable.

View File

@ -56,28 +56,30 @@ public class IntervalUtils {
public static List<GenomeLoc> parseIntervalArguments(GenomeLocParser parser, String arg) {
List<GenomeLoc> rawIntervals = new ArrayList<GenomeLoc>(); // running list of raw GenomeLocs
// separate argument on semicolon first
for (String fileOrInterval : arg.split(";")) {
// if any argument is 'unmapped', "parse" it to a null entry. A null in this case means 'all the intervals with no alignment data'.
if (isUnmapped(fileOrInterval))
rawIntervals.add(GenomeLoc.UNMAPPED);
// if it's a file, add items to raw interval list
else if (isIntervalFile(fileOrInterval)) {
try {
rawIntervals.addAll(intervalFileToList(parser, fileOrInterval));
}
catch ( UserException.MalformedGenomeLoc e ) {
throw e;
}
catch ( Exception e ) {
throw new UserException.MalformedFile(fileOrInterval, "Interval file could not be parsed in any supported format.", e);
}
}
if ( arg.indexOf(';') != -1 ) {
throw new UserException.BadArgumentValue("-L " + arg, "The legacy -L \"interval1;interval2\" syntax " +
"is no longer supported. Please use one -L argument for each " +
"interval or an interval file instead.");
}
// otherwise treat as an interval -> parse and add to raw interval list
else {
rawIntervals.add(parser.parseGenomeLoc(fileOrInterval));
// if any argument is 'unmapped', "parse" it to a null entry. A null in this case means 'all the intervals with no alignment data'.
if (isUnmapped(arg))
rawIntervals.add(GenomeLoc.UNMAPPED);
// if it's a file, add items to raw interval list
else if (isIntervalFile(arg)) {
try {
rawIntervals.addAll(intervalFileToList(parser, arg));
}
catch ( UserException.MalformedGenomeLoc e ) {
throw e;
}
catch ( Exception e ) {
throw new UserException.MalformedFile(arg, "Interval file could not be parsed in any supported format.", e);
}
}
// otherwise treat as an interval -> parse and add to raw interval list
else {
rawIntervals.add(parser.parseGenomeLoc(arg));
}
return rawIntervals;

View File

@ -52,7 +52,7 @@ public class CallableLociWalkerIntegrationTest extends WalkerTest {
@Test
public void testCallableLociWalker2() {
String gatk_args = commonArgs + " -format BED -L 1:10,000,000-10,000,100;1:10,000,110-10,000,120 -summary %s";
String gatk_args = commonArgs + " -format BED -L 1:10,000,000-10,000,100 -L 1:10,000,110-10,000,120 -summary %s";
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2,
Arrays.asList("c671f65712d9575b8b3e1f1dbedc146e", "d287510eac04acf5a56f5cde2cba0e4a"));
executeTest("formatBed by interval", spec);

View File

@ -12,25 +12,25 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest {
String md5_1 = "328d2d52cedfdc52da7d1abff487633d";
WalkerTestSpec spec1a = new WalkerTestSpec(
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " -L 1:10,000,100-10,000,500;1:10,100,000-10,101,000;1:10,900,000-10,900,001 -o %s",
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " -L 1:10,000,100-10,000,500 -L 1:10,100,000-10,101,000 -L 1:10,900,000-10,900,001 -o %s",
1,
Arrays.asList(md5_1));
executeTest("testFastaReference", spec1a);
WalkerTestSpec spec1b = new WalkerTestSpec(
"-T FastaReferenceMaker -R " + b36KGReference + " -L 1:10,000,100-10,000,500;1:10,100,000-10,101,000;1:10,900,000-10,900,001 -o %s",
"-T FastaReferenceMaker -R " + b36KGReference + " -L 1:10,000,100-10,000,500 -L 1:10,100,000-10,101,000 -L 1:10,900,000-10,900,001 -o %s",
1,
Arrays.asList(md5_1));
executeTest("testFastaReference", spec1b);
WalkerTestSpec spec2 = new WalkerTestSpec(
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380;1:10,093,447-10,093,847;1:10,271,252-10,271,452 -o %s",
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380 -L 1:10,093,447-10,093,847 -L 1:10,271,252-10,271,452 -o %s",
1,
Arrays.asList("0567b32ebdc26604ddf2a390de4579ac"));
executeTest("testFastaAlternateReferenceIndels", spec2);
WalkerTestSpec spec3 = new WalkerTestSpec(
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + GATKDataLocation + "dbsnp_129_b36.vcf -L 1:10,023,400-10,023,500;1:10,029,200-10,029,500 -o %s",
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + GATKDataLocation + "dbsnp_129_b36.vcf -L 1:10,023,400-10,023,500 -L 1:10,029,200-10,029,500 -o %s",
1,
Arrays.asList("8b6cd2e20c381f9819aab2d270f5e641"));
executeTest("testFastaAlternateReferenceSnps", spec3);

View File

@ -3,7 +3,10 @@ package org.broadinstitute.sting.utils.interval;
import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.samtools.SAMFileHeader;
import org.apache.commons.io.FileUtils;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.commandline.IntervalBinding;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.testng.Assert;
@ -983,4 +986,14 @@ public class IntervalUtilsUnitTest extends BaseTest {
data.toString(), data.original, actual, data.expected);
Assert.assertEquals(actual, data.expected, description);
}
@Test(expectedExceptions=UserException.BadArgumentValue.class)
public void testExceptionUponLegacyIntervalSyntax() throws Exception {
GenomeAnalysisEngine toolkit = new GenomeAnalysisEngine();
toolkit.setGenomeLocParser(new GenomeLocParser(new CachingIndexedFastaSequenceFile(new File(BaseTest.hg19Reference))));
// Attempting to use the legacy -L "interval1;interval2" syntax should produce an exception:
IntervalBinding<Feature> binding = new IntervalBinding<Feature>("1;2");
List<GenomeLoc> intervals = binding.getIntervals(toolkit);
}
}