From 746a5e95f318b0da85f98ce9da1288d7cc1cf796 Mon Sep 17 00:00:00 2001 From: Khalid Shakir Date: Wed, 27 Jun 2012 01:15:22 -0400 Subject: [PATCH] Refactored parsing of Rod/IntervalBinding. Queue S/G now uses all interval arguments passed to CommandLineGATK QFunctions including support for BED/tribble types, XL, ISR, and padding. Updated HSP to use new padding arguments instead of flank intervals file, plus latest QC evals. IntervalUtils return unmodifiable lists so that utilities don't mutate the collections. Added a JavaCommandLineFunction.javaGCThreads option to test reducing java's automatic GC thread allocation based on num cpus. Added comma to list of characters to convert to underscores in GridEngine job names so that GE JSV doesn't choke on the -N values. JobRunInfo handles the null done times when jobs crash with strange errors. --- .../commandline/ArgumentTypeDescriptor.java | 231 +++++++++--------- .../sting/commandline/ParsingMethod.java | 41 ++-- .../sting/gatk/GenomeAnalysisEngine.java | 56 +---- .../sting/utils/interval/IntervalUtils.java | 92 ++++++- .../gatk/GenomeAnalysisEngineUnitTest.java | 78 +----- .../utils/interval/IntervalUtilsUnitTest.java | 99 +++++++- .../sting/queue/engine/JobRunInfo.scala | 22 +- .../gridengine/GridEngineJobRunner.scala | 2 +- .../gatk/ContigScatterFunction.scala | 4 +- .../queue/extensions/gatk/GATKIntervals.scala | 66 ++++- .../extensions/gatk/GATKScatterFunction.scala | 51 ++-- .../gatk/IntervalScatterFunction.scala | 4 +- .../gatk/LocusScatterFunction.scala | 4 +- .../extensions/gatk/VcfGatherFunction.scala | 9 +- .../function/JavaCommandLineFunction.scala | 7 + .../sting/queue/util/QJobReport.scala | 4 +- .../ScalaCompoundArgumentTypeDescriptor.scala | 2 +- .../gatk/GATKIntervalsUnitTest.scala | 91 +++++-- .../ExampleUnifiedGenotyperPipelineTest.scala | 39 +++ 19 files changed, 550 insertions(+), 352 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index 94ed23caf..d5503b2a9 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -289,7 +289,7 @@ public abstract class ArgumentTypeDescriptor { return field.isAnnotationPresent(Hidden.class); } - public Class makeRawTypeIfNecessary(Type t) { + public static Class makeRawTypeIfNecessary(Type t) { if ( t == null ) return null; else if ( t instanceof ParameterizedType ) @@ -300,6 +300,114 @@ public abstract class ArgumentTypeDescriptor { throw new IllegalArgumentException("Unable to determine Class-derived component type of field: " + t); } } + + /** + * The actual argument parsing method. + * @param source source + * @param type type to check + * @param matches matches + * @return the RodBinding/IntervalBinding object depending on the value of createIntervalBinding. + */ + protected Object parseBinding(ArgumentSource source, Type type, ArgumentMatches matches, Tags tags) { + ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source); + String value = getArgumentValue(defaultDefinition, matches); + @SuppressWarnings("unchecked") + Class parameterType = JVMUtils.getParameterizedTypeClass(type); + String name = defaultDefinition.fullName; + + return parseBinding(value, parameterType, type, name, tags, source.field.getName()); + } + + /** + * + * @param value The source of the binding + * @param parameterType The Tribble Feature parameter type + * @param bindingClass The class type for the binding (ex: RodBinding, IntervalBinding, etc.) Must have the correct constructor for creating the binding. + * @param bindingName The name of the binding passed to the constructor. + * @param tags Tags for the binding used for parsing and passed to the constructor. + * @param fieldName The name of the field that was parsed. Used for error reporting. + * @return The newly created binding object of type bindingClass. + */ + public static Object parseBinding(String value, Class parameterType, Type bindingClass, + String bindingName, Tags tags, String fieldName) { + try { + String tribbleType = null; + // must have one or two tag values here + if ( tags.getPositionalTags().size() > 2 ) { + throw new UserException.CommandLineException( + String.format("Unexpected number of positional tags for argument %s : %s. " + + "Rod bindings only support -X:type and -X:name,type argument styles", + value, fieldName)); + } else if ( tags.getPositionalTags().size() == 2 ) { + // -X:name,type style + bindingName = tags.getPositionalTags().get(0); + tribbleType = tags.getPositionalTags().get(1); + + FeatureManager manager = new FeatureManager(); + if ( manager.getByName(tribbleType) == null ) + throw new UserException.CommandLineException( + String.format("Unable to find tribble type '%s' provided on the command line. " + + "Please select a correct type from among the supported types:%n%s", + tribbleType, manager.userFriendlyListOfAvailableFeatures(parameterType))); + + } else { + // case with 0 or 1 positional tags + FeatureManager manager = new FeatureManager(); + + // -X:type style is a type when we cannot determine the type dynamically + String tag1 = tags.getPositionalTags().size() == 1 ? tags.getPositionalTags().get(0) : null; + if ( tag1 != null ) { + if ( manager.getByName(tag1) != null ) // this a type + tribbleType = tag1; + else + bindingName = tag1; + } + + if ( tribbleType == null ) { + // try to determine the file type dynamically + File file = new File(value); + if ( file.canRead() && file.isFile() ) { + FeatureManager.FeatureDescriptor featureDescriptor = manager.getByFiletype(file); + if ( featureDescriptor != null ) { + tribbleType = featureDescriptor.getName(); + logger.info("Dynamically determined type of " + file + " to be " + tribbleType); + } + } + + if ( tribbleType == null ) { + // IntervalBinding can be created from a normal String + Class rawType = (makeRawTypeIfNecessary(bindingClass)); + try { + return rawType.getConstructor(String.class).newInstance(value); + } catch (NoSuchMethodException e) { + /* ignore */ + } + + if ( ! file.exists() ) { + throw new UserException.CouldNotReadInputFile(file, "file does not exist"); + } else if ( ! file.canRead() || ! file.isFile() ) { + throw new UserException.CouldNotReadInputFile(file, "file could not be read"); + } else { + throw new UserException.CommandLineException( + String.format("No tribble type was provided on the command line and the type of the file could not be determined dynamically. " + + "Please add an explicit type tag :NAME listing the correct type from among the supported types:%n%s", + manager.userFriendlyListOfAvailableFeatures(parameterType))); + } + } + } + } + + Constructor ctor = (makeRawTypeIfNecessary(bindingClass)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class); + return ctor.newInstance(parameterType, bindingName, value, tribbleType, tags); + } catch (Exception e) { + if ( e instanceof UserException ) + throw ((UserException)e); + else + throw new UserException.CommandLineException( + String.format("Failed to parse value %s for argument %s. Message: %s", + value, fieldName, e.getMessage())); + } + } } /** @@ -324,6 +432,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { public boolean createsTypeDefault(ArgumentSource source) { return ! source.isRequired(); } @Override + @SuppressWarnings("unchecked") public Object createTypeDefault(ParsingEngine parsingEngine, ArgumentSource source, Type type) { Class parameterType = JVMUtils.getParameterizedTypeClass(type); return RodBinding.makeUnbound((Class)parameterType); @@ -336,118 +445,16 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { @Override public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) { - return parse(parsingEngine, source, type, matches, false); - } - - /** - * The actual argument parsing method. - * - * IMPORTANT NOTE: the createIntervalBinding argument is a bit of a hack, but after discussions with SE we've decided - * that it's the best way to proceed for now. IntervalBindings can either be proper RodBindings (hence the use of - * this parse() method) or can be Strings (representing raw intervals or the files containing them). If createIntervalBinding - * is true, we do not call parsingEngine.addRodBinding() because we don't want walkers to assume that these are the - * usual set of RodBindings. It also allows us in the future to be smart about tagging rods as intervals. One other - * side point is that we want to continue to allow the usage of non-Feature intervals so that users can theoretically - * continue to input them out of order (whereas Tribble Features are ordered). - * - * @param parsingEngine parsing engine - * @param source source - * @param type type to check - * @param matches matches - * @param createIntervalBinding should we attempt to create an IntervalBinding instead of a RodBinding? - * @return the RodBinding/IntervalBinding object depending on the value of createIntervalBinding. - */ - public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches, boolean createIntervalBinding) { - ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source); - String value = getArgumentValue( defaultDefinition, matches ); - Class parameterType = JVMUtils.getParameterizedTypeClass(type); - - try { - String name = defaultDefinition.fullName; - String tribbleType = null; - Tags tags = getArgumentTags(matches); - // must have one or two tag values here - if ( tags.getPositionalTags().size() > 2 ) { - throw new UserException.CommandLineException( - String.format("Unexpected number of positional tags for argument %s : %s. " + - "Rod bindings only support -X:type and -X:name,type argument styles", - value, source.field.getName())); - } if ( tags.getPositionalTags().size() == 2 ) { - // -X:name,type style - name = tags.getPositionalTags().get(0); - tribbleType = tags.getPositionalTags().get(1); - } else { - // case with 0 or 1 positional tags - FeatureManager manager = new FeatureManager(); - - // -X:type style is a type when we cannot determine the type dynamically - String tag1 = tags.getPositionalTags().size() == 1 ? tags.getPositionalTags().get(0) : null; - if ( tag1 != null ) { - if ( manager.getByName(tag1) != null ) // this a type - tribbleType = tag1; - else - name = tag1; - } - - if ( tribbleType == null ) { - // try to determine the file type dynamically - File file = new File(value); - if ( file.canRead() && file.isFile() ) { - FeatureManager.FeatureDescriptor featureDescriptor = manager.getByFiletype(file); - if ( featureDescriptor != null ) { - tribbleType = featureDescriptor.getName(); - logger.info("Dynamically determined type of " + file + " to be " + tribbleType); - } - } - - if ( tribbleType == null ) { - // IntervalBindings allow streaming conversion of Strings - if ( createIntervalBinding ) { - return new IntervalBinding(value); - } - - if ( ! file.exists() ) { - throw new UserException.CouldNotReadInputFile(file, "file does not exist"); - } else if ( ! file.canRead() || ! file.isFile() ) { - throw new UserException.CouldNotReadInputFile(file, "file could not be read"); - } else { - throw new UserException.CommandLineException( - String.format("No tribble type was provided on the command line and the type of the file could not be determined dynamically. " + - "Please add an explicit type tag :NAME listing the correct type from among the supported types:%n%s", - manager.userFriendlyListOfAvailableFeatures(parameterType))); - } - } - } - } - - Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class); - Object result; - if ( createIntervalBinding ) { - result = ctor.newInstance(parameterType, name, value, tribbleType, tags); - } else { - RodBinding rbind = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags); - parsingEngine.addTags(rbind, tags); - parsingEngine.addRodBinding(rbind); - result = rbind; - } - return result; - } catch (InvocationTargetException e) { - throw new UserException.CommandLineException( - String.format("Failed to parse value %s for argument %s.", - value, source.field.getName())); - } catch (Exception e) { - if ( e instanceof UserException ) - throw ((UserException)e); - else - throw new UserException.CommandLineException( - String.format("Failed to parse value %s for argument %s. Message: %s", - value, source.field.getName(), e.getMessage())); - } + Tags tags = getArgumentTags(matches); + RodBinding rbind = (RodBinding)parseBinding(source, type, matches, tags); + parsingEngine.addTags(rbind, tags); + parsingEngine.addRodBinding(rbind); + return rbind; } } /** - * Parser for RodBinding objects + * Parser for IntervalBinding objects */ class IntervalBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { /** @@ -475,7 +482,7 @@ class IntervalBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { */ @Override public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) { - return new RodBindingArgumentTypeDescriptor().parse(parsingEngine, source, type, matches, true); + return parseBinding(source, type, matches, getArgumentTags(matches)); } } @@ -783,7 +790,7 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor { } Class multiplexerType = dependentArgument.field.getAnnotation(Multiplex.class).value(); - Constructor multiplexerConstructor = null; + Constructor multiplexerConstructor; try { multiplexerConstructor = multiplexerType.getConstructor(sourceTypes); multiplexerConstructor.setAccessible(true); @@ -792,7 +799,7 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor { throw new ReviewedStingException(String.format("Unable to find constructor for class %s with parameters %s",multiplexerType.getName(),Arrays.deepToString(sourceFields)),ex); } - Multiplexer multiplexer = null; + Multiplexer multiplexer; try { multiplexer = multiplexerConstructor.newInstance(sourceValues); } diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java index 452309e89..26af49e12 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java @@ -78,24 +78,7 @@ public abstract class ParsingMethod { String argument = matcher.group(1).trim(); - Tags tags = new Tags(); - if(matcher.group(2) != null) { - for(String tag: Utils.split(matcher.group(2),",")) { - // Check for presence of an '=' sign, indicating a key-value pair in the tag line. - int equalDelimiterPos = tag.indexOf('='); - if(equalDelimiterPos >= 0) { - // Sanity check; ensure that there aren't multiple '=' in this key-value pair. - if(tag.indexOf('=',equalDelimiterPos+1) >= 0) - throw new ArgumentException(String.format("Tag %s passed to argument %s is malformed. Please ensure that " + - "key-value tags are of the form =, and neither key " + - "nor value contain the '=' character", tag, argument)); - tags.addKeyValueTag(tag.substring(0,equalDelimiterPos),tag.substring(equalDelimiterPos+1)); - } - else - tags.addPositionalTag(tag); - - } - } + Tags tags = parseTags(argument, matcher.group(2)); // Find the most appropriate argument definition for the given argument. ArgumentDefinition argumentDefinition = definitions.findArgumentDefinition( argument, definitionMatcher ); @@ -105,6 +88,28 @@ public abstract class ParsingMethod { return new ArgumentMatch(argument,argumentDefinition,position,tags); } + public static Tags parseTags(String argument, String tagString) { + Tags tags = new Tags(); + if (tagString != null) { + for(String tag: Utils.split(tagString, ",")) { + // Check for presence of an '=' sign, indicating a key-value pair in the tag line. + int equalDelimiterPos = tag.indexOf('='); + if(equalDelimiterPos >= 0) { + // Sanity check; ensure that there aren't multiple '=' in this key-value pair. + if(tag.indexOf('=',equalDelimiterPos+1) >= 0) + throw new ArgumentException(String.format("Tag %s passed to argument %s is malformed. Please ensure that " + + "key-value tags are of the form =, and neither key " + + "nor value contain the '=' character", tag, argument)); + tags.addKeyValueTag(tag.substring(0,equalDelimiterPos),tag.substring(equalDelimiterPos+1)); + } + else + tags.addPositionalTag(tag); + + } + } + return tags; + } + /** * A command-line argument always starts with an alphabetical character or underscore followed by any word character. */ diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 80cbd3dad..68680dd10 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -30,7 +30,6 @@ import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMSequenceDictionary; import org.apache.log4j.Logger; -import org.broad.tribble.Feature; import org.broad.tribble.readers.PositionalBufferedStream; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; @@ -54,9 +53,9 @@ import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.interval.IntervalSetRule; import org.broadinstitute.sting.utils.interval.IntervalUtils; import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder; @@ -582,7 +581,6 @@ public class GenomeAnalysisEngine { * Setup the intervals to be processed */ protected void initializeIntervals() { - // return if no interval arguments at all if ( argCollection.intervals == null && argCollection.excludeIntervals == null ) return; @@ -590,17 +588,22 @@ public class GenomeAnalysisEngine { // Note that the use of '-L all' is no longer supported. // if include argument isn't given, create new set of all possible intervals - GenomeLocSortedSet includeSortedSet = (argCollection.intervals == null ? - GenomeLocSortedSet.createSetFromSequenceDictionary(this.referenceDataSource.getReference().getSequenceDictionary()) : - loadIntervals(argCollection.intervals, argCollection.intervalSetRule, argCollection.intervalPadding)); + + Pair includeExcludePair = IntervalUtils.parseIntervalBindingsPair( + this.referenceDataSource, + argCollection.intervals, + argCollection.intervalSetRule, argCollection.intervalMerging, argCollection.intervalPadding, + argCollection.excludeIntervals); + + GenomeLocSortedSet includeSortedSet = includeExcludePair.getFirst(); + GenomeLocSortedSet excludeSortedSet = includeExcludePair.getSecond(); // if no exclude arguments, can return parseIntervalArguments directly - if ( argCollection.excludeIntervals == null ) + if ( excludeSortedSet == null ) intervals = includeSortedSet; // otherwise there are exclude arguments => must merge include and exclude GenomeLocSortedSets else { - GenomeLocSortedSet excludeSortedSet = loadIntervals(argCollection.excludeIntervals, IntervalSetRule.UNION); intervals = includeSortedSet.subtractRegions(excludeSortedSet); // logging messages only printed when exclude (-XL) arguments are given @@ -613,43 +616,6 @@ public class GenomeAnalysisEngine { } } - /** - * Loads the intervals relevant to the current execution - * @param argList argument bindings; might include filenames, intervals in samtools notation, or a combination of the above - * @param rule interval merging rule - * @return A sorted, merged list of all intervals specified in this arg list. - */ - protected GenomeLocSortedSet loadIntervals( final List> argList, final IntervalSetRule rule ) { - return loadIntervals(argList, rule, 0); - } - - /** - * Loads the intervals relevant to the current execution - * @param argList argument bindings; might include filenames, intervals in samtools notation, or a combination of the above - * @param rule interval merging rule - * @param padding how much to pad the intervals - * @return A sorted, merged list of all intervals specified in this arg list. - */ - protected GenomeLocSortedSet loadIntervals( final List> argList, final IntervalSetRule rule, final int padding ) { - - List allIntervals = new ArrayList(); - for ( IntervalBinding intervalBinding : argList ) { - List intervals = intervalBinding.getIntervals(this.getGenomeLocParser()); - - if ( intervals.isEmpty() ) { - logger.warn("The interval file " + intervalBinding.getSource() + " contains no intervals that could be parsed."); - } - - if ( padding > 0 ) { - intervals = IntervalUtils.getIntervalsWithFlanks(this.getGenomeLocParser(), intervals, padding); - } - - allIntervals = IntervalUtils.mergeListsBySetOperator(intervals, allIntervals, rule); - } - - return IntervalUtils.sortAndMergeIntervals(genomeLocParser, allIntervals, argCollection.intervalMerging); - } - /** * Add additional, externally managed IO streams for inputs. * diff --git a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java index c96226405..6ee4af288 100644 --- a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java @@ -6,6 +6,8 @@ import net.sf.picard.util.Interval; import net.sf.picard.util.IntervalList; import net.sf.samtools.SAMFileHeader; import org.apache.log4j.Logger; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.IntervalBinding; import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -169,21 +171,23 @@ public class IntervalUtils { */ public static List mergeListsBySetOperator(List setOne, List setTwo, IntervalSetRule rule) { // shortcut, if either set is zero, return the other set - if (setOne == null || setOne.size() == 0 || setTwo == null || setTwo.size() == 0) return (setOne == null || setOne.size() == 0) ? setTwo : setOne; + if (setOne == null || setOne.size() == 0 || setTwo == null || setTwo.size() == 0) + return Collections.unmodifiableList((setOne == null || setOne.size() == 0) ? setTwo : setOne); + + // our master list, since we can't guarantee removal time in a generic list + LinkedList retList = new LinkedList(); // if we're set to UNION, just add them all - if (rule == IntervalSetRule.UNION) { - setOne.addAll(setTwo); - return setOne; + if (rule == null || rule == IntervalSetRule.UNION) { + retList.addAll(setOne); + retList.addAll(setTwo); + return Collections.unmodifiableList(retList); } // else we're INTERSECTION, create two indexes into the lists int iOne = 0; int iTwo = 0; - // our master list, since we can't guarantee removal time in a generic list - LinkedList retList = new LinkedList(); - // merge the second into the first using the rule while (iTwo < setTwo.size() && iOne < setOne.size()) // if the first list is ahead, drop items off the second until we overlap @@ -204,7 +208,7 @@ public class IntervalUtils { throw new UserException.BadInput("The INTERSECTION of your -L options produced no intervals."); // we don't need to add the rest of remaining locations, since we know they don't overlap. return what we have - return retList; + return Collections.unmodifiableList(retList); } /** @@ -218,6 +222,8 @@ public class IntervalUtils { * @return A sorted, merged version of the intervals passed in. */ public static GenomeLocSortedSet sortAndMergeIntervals(GenomeLocParser parser, List intervals, IntervalMergingRule mergingRule) { + // Make a copy of the (potentially unmodifiable) list to be sorted + intervals = new ArrayList(intervals); // sort raw interval list Collections.sort(intervals); // now merge raw interval list @@ -481,6 +487,70 @@ public class IntervalUtils { return new SplitLocusRecursive(split, remaining); } + /** + * Setup the intervals to be processed + */ + public static GenomeLocSortedSet parseIntervalBindings( + final ReferenceDataSource referenceDataSource, + final List> intervals, + final IntervalSetRule intervalSetRule, final IntervalMergingRule intervalMergingRule, final int intervalPadding, + final List> excludeIntervals) { + + Pair includeExcludePair = parseIntervalBindingsPair( + referenceDataSource, intervals, intervalSetRule, intervalMergingRule, intervalPadding, excludeIntervals); + + GenomeLocSortedSet includeSortedSet = includeExcludePair.getFirst(); + GenomeLocSortedSet excludeSortedSet = includeExcludePair.getSecond(); + + if (excludeSortedSet != null) { + return includeSortedSet.subtractRegions(excludeSortedSet); + } else { + return includeSortedSet; + } + } + + public static Pair parseIntervalBindingsPair( + final ReferenceDataSource referenceDataSource, + final List> intervals, + final IntervalSetRule intervalSetRule, final IntervalMergingRule intervalMergingRule, final int intervalPadding, + final List> excludeIntervals) { + GenomeLocParser genomeLocParser = new GenomeLocParser(referenceDataSource.getReference()); + + // if include argument isn't given, create new set of all possible intervals + GenomeLocSortedSet includeSortedSet = ((intervals == null || intervals.size() == 0) ? + GenomeLocSortedSet.createSetFromSequenceDictionary(referenceDataSource.getReference().getSequenceDictionary()) : + loadIntervals(intervals, intervalSetRule, intervalMergingRule, intervalPadding, genomeLocParser)); + + GenomeLocSortedSet excludeSortedSet = null; + if (excludeIntervals != null && excludeIntervals.size() > 0) { + excludeSortedSet = loadIntervals(excludeIntervals, IntervalSetRule.UNION, intervalMergingRule, 0, genomeLocParser); + } + return new Pair(includeSortedSet, excludeSortedSet); + } + + public static GenomeLocSortedSet loadIntervals( + final List> intervalBindings, + final IntervalSetRule rule, final IntervalMergingRule intervalMergingRule, final int padding, + final GenomeLocParser genomeLocParser) { + List allIntervals = new ArrayList(); + for ( IntervalBinding intervalBinding : intervalBindings) { + @SuppressWarnings("unchecked") + List intervals = intervalBinding.getIntervals(genomeLocParser); + + if ( intervals.isEmpty() ) { + logger.warn("The interval file " + intervalBinding.getSource() + " contains no intervals that could be parsed."); + } + + if ( padding > 0 ) { + intervals = getIntervalsWithFlanks(genomeLocParser, intervals, padding); + } + + allIntervals = mergeListsBySetOperator(intervals, allIntervals, rule); + } + + return sortAndMergeIntervals(genomeLocParser, allIntervals, intervalMergingRule); + } + private final static class SplitLocusRecursive { final List split; final LinkedList remaining; @@ -546,7 +616,7 @@ public class IntervalUtils { */ public static List mergeIntervalLocations(final List raw, IntervalMergingRule rule) { if (raw.size() <= 1) - return raw; + return Collections.unmodifiableList(raw); else { ArrayList merged = new ArrayList(); Iterator it = raw.iterator(); @@ -555,7 +625,7 @@ public class IntervalUtils { GenomeLoc curr = it.next(); if (prev.overlapsP(curr)) { prev = prev.merge(curr); - } else if (prev.contiguousP(curr) && rule == IntervalMergingRule.ALL) { + } else if (prev.contiguousP(curr) && (rule == null || rule == IntervalMergingRule.ALL)) { prev = prev.merge(curr); } else { merged.add(prev); @@ -563,7 +633,7 @@ public class IntervalUtils { } } merged.add(prev); - return merged; + return Collections.unmodifiableList(merged); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java index 3ce62b697..2f8b1e9b5 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java @@ -24,32 +24,17 @@ package org.broadinstitute.sting.gatk; -import net.sf.picard.reference.IndexedFastaSequenceFile; -import net.sf.picard.util.Interval; -import net.sf.picard.util.IntervalList; -import net.sf.samtools.SAMFileHeader; -import org.broad.tribble.Feature; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.commandline.ArgumentException; -import org.broadinstitute.sting.commandline.IntervalBinding; -import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; -import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; import org.broadinstitute.sting.commandline.Tags; +import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; import org.broadinstitute.sting.gatk.walkers.PrintReadsWalker; -import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocSortedSet; - -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.interval.IntervalSetRule; -import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.io.File; -import java.io.PrintWriter; import java.util.ArrayList; import java.util.Collection; -import java.util.List; - /** * Tests selected functionality in the GenomeAnalysisEngine class @@ -91,65 +76,4 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest { testEngine.validateSuppliedIntervals(); } - - @DataProvider(name="invalidIntervalTestData") - public Object[][] invalidIntervalDataProvider() throws Exception { - GenomeAnalysisEngine testEngine = new GenomeAnalysisEngine(); - GATKArgumentCollection argCollection = new GATKArgumentCollection(); - testEngine.setArguments(argCollection); - - File fastaFile = new File("public/testdata/exampleFASTA.fasta"); - GenomeLocParser genomeLocParser = new GenomeLocParser(new IndexedFastaSequenceFile(fastaFile)); - testEngine.setGenomeLocParser(genomeLocParser); - - return new Object[][] { - new Object[] {testEngine, genomeLocParser, "chr1", 10000000, 20000000}, - new Object[] {testEngine, genomeLocParser, "chr2", 1, 2}, - new Object[] {testEngine, genomeLocParser, "chr1", -1, 50} - }; - } - - @Test(dataProvider="invalidIntervalTestData") - public void testInvalidPicardIntervalHandling(GenomeAnalysisEngine testEngine, GenomeLocParser genomeLocParser, - String contig, int intervalStart, int intervalEnd ) throws Exception { - - SAMFileHeader picardFileHeader = new SAMFileHeader(); - picardFileHeader.addSequence(genomeLocParser.getContigInfo("chr1")); - IntervalList picardIntervals = new IntervalList(picardFileHeader); - picardIntervals.add(new Interval(contig, intervalStart, intervalEnd, true, "dummyname")); - - File picardIntervalFile = createTempFile("testInvalidPicardIntervalHandling", ".intervals"); - picardIntervals.write(picardIntervalFile); - - List> intervalArgs = new ArrayList>(1); - intervalArgs.add(new IntervalBinding(picardIntervalFile.getAbsolutePath())); - - testEngine.loadIntervals(intervalArgs, IntervalSetRule.UNION); - } - - @Test(expectedExceptions=UserException.class, dataProvider="invalidIntervalTestData") - public void testInvalidGATKFileIntervalHandling(GenomeAnalysisEngine testEngine, GenomeLocParser genomeLocParser, - String contig, int intervalStart, int intervalEnd ) throws Exception { - - File gatkIntervalFile = createTempFile("testInvalidGATKFileIntervalHandling", ".intervals", - String.format("%s:%d-%d", contig, intervalStart, intervalEnd)); - - List> intervalArgs = new ArrayList>(1); - intervalArgs.add(new IntervalBinding(gatkIntervalFile.getAbsolutePath())); - - testEngine.loadIntervals(intervalArgs, IntervalSetRule.UNION); - } - - private File createTempFile( String tempFilePrefix, String tempFileExtension, String... lines ) throws Exception { - File tempFile = File.createTempFile(tempFilePrefix, tempFileExtension); - tempFile.deleteOnExit(); - - PrintWriter out = new PrintWriter(tempFile); - for ( String line : lines ) { - out.println(line); - } - out.close(); - - return tempFile; - } } diff --git a/public/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsUnitTest.java index 28573c600..3a9183e9a 100644 --- a/public/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsUnitTest.java @@ -1,12 +1,16 @@ package org.broadinstitute.sting.utils.interval; +import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.picard.reference.ReferenceSequenceFile; +import net.sf.picard.util.Interval; +import net.sf.picard.util.IntervalList; import net.sf.samtools.SAMFileHeader; import org.apache.commons.io.FileUtils; import org.broad.tribble.Feature; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.commandline.IntervalBinding; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -45,7 +49,7 @@ public class IntervalUtilsUnitTest extends BaseTest { List locs = new ArrayList(); for (String interval: intervals) locs.add(hg18GenomeLocParser.parseGenomeLoc(interval)); - return locs; + return Collections.unmodifiableList(locs); } @BeforeClass @@ -277,7 +281,10 @@ public class IntervalUtilsUnitTest extends BaseTest { listEveryTwoFromOne.add(hg18GenomeLocParser.createGenomeLoc("chr1",x,x)); } - List ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, listEveryTwoFromOne, IntervalSetRule.UNION); + List ret; + ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, listEveryTwoFromOne, IntervalSetRule.UNION); + Assert.assertEquals(ret.size(), 100); + ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, listEveryTwoFromOne, null); Assert.assertEquals(ret.size(), 100); ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, listEveryTwoFromOne, IntervalSetRule.INTERSECTION); Assert.assertEquals(ret.size(), 0); @@ -296,7 +303,10 @@ public class IntervalUtilsUnitTest extends BaseTest { allSites.add(hg18GenomeLocParser.createGenomeLoc("chr1",x,x)); } - List ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.UNION); + List ret; + ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.UNION); + Assert.assertEquals(ret.size(), 150); + ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, null); Assert.assertEquals(ret.size(), 150); ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.INTERSECTION); Assert.assertEquals(ret.size(), 50); @@ -316,7 +326,10 @@ public class IntervalUtilsUnitTest extends BaseTest { } } - List ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.UNION); + List ret; + ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.UNION); + Assert.assertEquals(ret.size(), 40); + ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, null); Assert.assertEquals(ret.size(), 40); ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.INTERSECTION); Assert.assertEquals(ret.size(), 20); @@ -761,7 +774,13 @@ public class IntervalUtilsUnitTest extends BaseTest { List locs = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Collections.singletonList(privateTestDir + unmergedIntervals)); Assert.assertEquals(locs.size(), 2); - List merged = IntervalUtils.mergeIntervalLocations(locs, IntervalMergingRule.ALL); + List merged; + + merged = IntervalUtils.mergeIntervalLocations(locs, IntervalMergingRule.ALL); + Assert.assertEquals(merged.size(), 1); + + // Test that null means the same as ALL + merged = IntervalUtils.mergeIntervalLocations(locs, null); Assert.assertEquals(merged.size(), 1); } @@ -993,6 +1012,74 @@ public class IntervalUtilsUnitTest extends BaseTest { // Attempting to use the legacy -L "interval1;interval2" syntax should produce an exception: IntervalBinding binding = new IntervalBinding("1;2"); - List intervals = binding.getIntervals(toolkit); + binding.getIntervals(toolkit); + } + + @DataProvider(name="invalidIntervalTestData") + public Object[][] invalidIntervalDataProvider() throws Exception { + GATKArgumentCollection argCollection = new GATKArgumentCollection(); + File fastaFile = new File("public/testdata/exampleFASTA.fasta"); + GenomeLocParser genomeLocParser = new GenomeLocParser(new IndexedFastaSequenceFile(fastaFile)); + + return new Object[][] { + new Object[] {argCollection, genomeLocParser, "chr1", 10000000, 20000000}, + new Object[] {argCollection, genomeLocParser, "chr2", 1, 2}, + new Object[] {argCollection, genomeLocParser, "chr1", -1, 50} + }; + } + + @Test(dataProvider="invalidIntervalTestData") + public void testInvalidPicardIntervalHandling(GATKArgumentCollection argCollection, GenomeLocParser genomeLocParser, + String contig, int intervalStart, int intervalEnd ) throws Exception { + + SAMFileHeader picardFileHeader = new SAMFileHeader(); + picardFileHeader.addSequence(genomeLocParser.getContigInfo("chr1")); + IntervalList picardIntervals = new IntervalList(picardFileHeader); + picardIntervals.add(new Interval(contig, intervalStart, intervalEnd, true, "dummyname")); + + File picardIntervalFile = createTempFile("testInvalidPicardIntervalHandling", ".intervals"); + picardIntervals.write(picardIntervalFile); + + List> intervalArgs = new ArrayList>(1); + intervalArgs.add(new IntervalBinding(picardIntervalFile.getAbsolutePath())); + + IntervalUtils.loadIntervals(intervalArgs, argCollection.intervalSetRule, argCollection.intervalMerging, argCollection.intervalPadding, genomeLocParser); + } + + @Test(expectedExceptions=UserException.class, dataProvider="invalidIntervalTestData") + public void testInvalidGATKFileIntervalHandling(GATKArgumentCollection argCollection, GenomeLocParser genomeLocParser, + String contig, int intervalStart, int intervalEnd ) throws Exception { + + File gatkIntervalFile = createTempFile("testInvalidGATKFileIntervalHandling", ".intervals", + String.format("%s:%d-%d", contig, intervalStart, intervalEnd)); + + List> intervalArgs = new ArrayList>(1); + intervalArgs.add(new IntervalBinding(gatkIntervalFile.getAbsolutePath())); + + IntervalUtils.loadIntervals(intervalArgs, argCollection.intervalSetRule, argCollection.intervalMerging, argCollection.intervalPadding, genomeLocParser); + } + + private File createTempFile( String tempFilePrefix, String tempFileExtension, String... lines ) throws Exception { + File tempFile = BaseTest.createTempFile(tempFilePrefix, tempFileExtension); + FileUtils.writeLines(tempFile, Arrays.asList(lines)); + return tempFile; + } + + @DataProvider(name = "sortAndMergeIntervals") + public Object[][] getSortAndMergeIntervals() { + return new Object[][] { + new Object[] { IntervalMergingRule.OVERLAPPING_ONLY, getLocs("chr1:1", "chr1:3", "chr1:2"), getLocs("chr1:1", "chr1:2", "chr1:3") }, + new Object[] { IntervalMergingRule.ALL, getLocs("chr1:1", "chr1:3", "chr1:2"), getLocs("chr1:1-3") }, + new Object[] { IntervalMergingRule.OVERLAPPING_ONLY, getLocs("chr1:1", "chr1:3", "chr2:2"), getLocs("chr1:1", "chr1:3", "chr2:2") }, + new Object[] { IntervalMergingRule.ALL, getLocs("chr1:1", "chr1:3", "chr2:2"), getLocs("chr1:1", "chr1:3", "chr2:2") }, + new Object[] { IntervalMergingRule.OVERLAPPING_ONLY, getLocs("chr1:1", "chr1"), getLocs("chr1") }, + new Object[] { IntervalMergingRule.ALL, getLocs("chr1:1", "chr1"), getLocs("chr1") } + }; + } + + @Test(dataProvider = "sortAndMergeIntervals") + public void testSortAndMergeIntervals(IntervalMergingRule merge, List unsorted, List expected) { + List sorted = IntervalUtils.sortAndMergeIntervals(hg18GenomeLocParser, unsorted, merge).toList(); + Assert.assertEquals(sorted, expected); } } diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/JobRunInfo.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/JobRunInfo.scala index 2caa4d2aa..078331602 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/JobRunInfo.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/JobRunInfo.scala @@ -24,7 +24,6 @@ package org.broadinstitute.sting.queue.engine -import java.util.Date import java.text.SimpleDateFormat /** @@ -36,18 +35,21 @@ class JobRunInfo { val formatter = new SimpleDateFormat("yy-MM-dd H:mm:ss:SSS"); /** The start time with millisecond resolution of this job */ - var startTime: Date = _ + var startTime: java.util.Date = _ /** The done time with millisecond resolution of this job */ - var doneTime: Date = _ + var doneTime: java.util.Date = _ var exechosts: String = "localhost" - def getStartTime = startTime - def getDoneTime = doneTime - def getFormattedStartTime = formatTime(getStartTime) - def getFormattedDoneTime = formatTime(getDoneTime) + def getStartTime: String = getTime(startTime) + def getDoneTime: String = getTime(doneTime) + def getFormattedStartTime = formatTime(startTime) + def getFormattedDoneTime = formatTime(doneTime) + + /** Helper function that returns the time of the date */ + private def getTime(d: java.util.Date): String = if ( d != null ) d.getTime.toString else "null" /** Helper function that pretty prints the date */ - private def formatTime(d: Date) = if ( d != null ) formatter.format(d) else "null" + private def formatTime(d: java.util.Date): String = if ( d != null ) formatter.format(d) else "null" def getExecHosts = exechosts @@ -55,14 +57,14 @@ class JobRunInfo { * Was any information set for this jobInfo? JobInfo can be unset because * the job never ran or because it already completed. */ - def isFilledIn = startTime != null + def isFilledIn = startTime != null && doneTime != null /** * How long did the job run (in wall time)? Returns -1 if this jobInfo isn't filled in */ def getRuntimeInMs: Long = { if ( isFilledIn ) - getDoneTime.getTime - getStartTime.getTime + doneTime.getTime - startTime.getTime else -1 } diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/gridengine/GridEngineJobRunner.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/gridengine/GridEngineJobRunner.scala index 239f83482..76cefe2a5 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/gridengine/GridEngineJobRunner.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/gridengine/GridEngineJobRunner.scala @@ -35,7 +35,7 @@ import org.ggf.drmaa.Session class GridEngineJobRunner(session: Session, function: CommandLineFunction) extends DrmaaJobRunner(session, function) with Logging { // Grid Engine disallows certain characters from being in job names. // This replaces all illegal characters with underscores - protected override val jobNameFilter = """[\n\t\r/:@\\*?]""" + protected override val jobNameFilter = """[\n\t\r/:,@\\*?]""" protected override val minRunnerPriority = -1023 protected override val maxRunnerPriority = 0 diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/ContigScatterFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/ContigScatterFunction.scala index 2609c3607..97669030a 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/ContigScatterFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/ContigScatterFunction.scala @@ -38,11 +38,11 @@ class ContigScatterFunction extends GATKScatterFunction with InProcessFunction { override def scatterCount = if (intervalFilesExist) super.scatterCount min this.maxIntervals else super.scatterCount protected override def maxIntervals = { - GATKScatterFunction.getGATKIntervals(this.referenceSequence, this.intervals).contigs.size + GATKScatterFunction.getGATKIntervals(this.originalGATK).contigs.size } def run() { - val gi = GATKScatterFunction.getGATKIntervals(this.referenceSequence, this.intervals) + val gi = GATKScatterFunction.getGATKIntervals(this.originalGATK) IntervalUtils.scatterContigIntervals(gi.samFileHeader, gi.locs, this.scatterOutputFiles) } } diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervals.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervals.scala index 2f604a809..e619c0a02 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervals.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervals.scala @@ -26,13 +26,23 @@ package org.broadinstitute.sting.queue.extensions.gatk import java.io.File import collection.JavaConversions._ -import org.broadinstitute.sting.utils.interval.{IntervalMergingRule, IntervalUtils} +import org.broadinstitute.sting.utils.interval.{IntervalSetRule, IntervalMergingRule, IntervalUtils} import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource import net.sf.samtools.SAMFileHeader -import java.util.Collections -import org.broadinstitute.sting.utils.{GenomeLoc, GenomeLocSortedSet, GenomeLocParser} +import org.broadinstitute.sting.utils.GenomeLoc +import org.broadinstitute.sting.commandline._ +import org.broad.tribble.Feature + +case class GATKIntervals(reference: File, intervals: Seq[File], intervalsString: Seq[String], + intervalSetRule: IntervalSetRule, intervalMergingRule: IntervalMergingRule, intervalPadding: Option[Int], + excludeIntervals: Seq[File], excludeIntervalsString: Seq[String]) { + + def this(gatk: CommandLineGATK) = this( + gatk.reference_sequence, + gatk.intervals, gatk.intervalsString, + gatk.interval_set_rule, gatk.interval_merging, gatk.interval_padding, + gatk.excludeIntervals, gatk.excludeIntervalsString) -case class GATKIntervals(reference: File, intervals: Seq[String]) { private lazy val referenceDataSource = new ReferenceDataSource(reference) lazy val samFileHeader = { @@ -42,16 +52,46 @@ case class GATKIntervals(reference: File, intervals: Seq[String]) { } lazy val locs: java.util.List[GenomeLoc] = { - val parser = new GenomeLocParser(referenceDataSource.getReference) - val parsedLocs = - if (intervals.isEmpty) - GenomeLocSortedSet.createSetFromSequenceDictionary(samFileHeader.getSequenceDictionary).toList - else - IntervalUtils.parseIntervalArguments(parser, intervals) - Collections.sort(parsedLocs) - val mergedLocs = IntervalUtils.mergeIntervalLocations(parsedLocs, IntervalMergingRule.OVERLAPPING_ONLY) - Collections.unmodifiableList(mergedLocs) + val includeIntervalBindings = this.intervals.map(GATKIntervals.createBinding(_, "intervals")) ++ + this.intervalsString.map(GATKIntervals.createBinding(_, "intervalsString")) + val excludeIntervalBindings = this.excludeIntervals.map(GATKIntervals.createBinding(_, "excludeIntervals")) ++ + this.excludeIntervalsString.map(GATKIntervals.createBinding(_, "excludeIntervalsString")) + + IntervalUtils.parseIntervalBindings( + referenceDataSource, + includeIntervalBindings, + intervalSetRule, intervalMergingRule, intervalPadding.getOrElse(0), + excludeIntervalBindings).toList } lazy val contigs = locs.map(_.getContig).distinct.toSeq } + +object GATKIntervals { + def copyIntervalArguments(src: CommandLineGATK, dst: CommandLineGATK) { + dst.reference_sequence = src.reference_sequence + dst.intervals = src.intervals + dst.intervalsString = src.intervalsString + dst.interval_set_rule = src.interval_set_rule + dst.interval_merging = src.interval_merging + dst.interval_padding = src.interval_padding + dst.excludeIntervals = src.excludeIntervals + dst.excludeIntervalsString = src.excludeIntervalsString + } + + private def createBinding(interval: File, argumentName: String): IntervalBinding[Feature] = { + val tags = interval match { + case taggedFile: TaggedFile => ParsingMethod.parseTags(argumentName, taggedFile.tag) + case file: File => new Tags + } + createBinding(interval.getAbsolutePath, argumentName, tags) + } + + private def createBinding(interval: String, argumentName: String): IntervalBinding[Feature] = { + createBinding(interval, argumentName, new Tags) + } + + private def createBinding(interval: String, argumentName: String, tags: Tags): IntervalBinding[Feature] = { + ArgumentTypeDescriptor.parseBinding(interval, classOf[Feature], classOf[IntervalBinding[Feature]], argumentName, tags, argumentName).asInstanceOf[IntervalBinding[Feature]] + } +} diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKScatterFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKScatterFunction.scala index 28c3f41e9..9e79e8f61 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKScatterFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKScatterFunction.scala @@ -28,14 +28,17 @@ import org.broadinstitute.sting.utils.interval.IntervalUtils import java.io.File import org.broadinstitute.sting.utils.io.IOUtils import org.broadinstitute.sting.queue.function.scattergather.{CloneFunction, ScatterFunction} -import org.broadinstitute.sting.commandline.Output +import org.broadinstitute.sting.commandline._ trait GATKScatterFunction extends ScatterFunction { - /** The runtime field to set for specifying an interval file. */ + /* The runtime field to set for specifying intervals. */ private final val intervalsField = "intervals" - - /** The runtime field to set for specifying an interval string. */ private final val intervalsStringField = "intervalsString" + private final val excludeIntervalsField = "excludeIntervals" + private final val excludeIntervalsStringField = "excludeIntervalsString" + private final val intervalsSetRuleField = "interval_set_rule" + private final val intervalMergingField = "interval_merging" + private final val intervalPaddingField = "interval_padding" @Output(doc="Scatter function outputs") var scatterOutputFiles: Seq[File] = Nil @@ -43,25 +46,14 @@ trait GATKScatterFunction extends ScatterFunction { /** The original GATK function. */ protected var originalGATK: CommandLineGATK = _ - /** The reference sequence for the GATK function. */ - protected var referenceSequence: File = _ - - /** The list of interval files ("/path/to/interval.list") or interval strings ("chr1", "chr2") to parse into smaller parts. */ - protected var intervals: Seq[String] = Nil - /** Whether the last scatter job should also include any unmapped reads. */ protected var includeUnmapped: Boolean = _ override def init() { this.originalGATK = this.originalFunction.asInstanceOf[CommandLineGATK] - this.referenceSequence = this.originalGATK.reference_sequence - if (this.originalGATK.intervals.isEmpty && (this.originalGATK.intervalsString == null || this.originalGATK.intervalsString.isEmpty)) { - this.intervals ++= GATKScatterFunction.getGATKIntervals(this.referenceSequence, Seq.empty[String]).contigs - } else { - this.intervals ++= this.originalGATK.intervals.map(_.toString) - this.intervals ++= this.originalGATK.intervalsString.filterNot(interval => IntervalUtils.isUnmapped(interval)) + // If intervals have been specified check if unmapped is included + if (this.originalGATK.intervals.size + this.originalGATK.intervalsString.size > 0) this.includeUnmapped = this.originalGATK.intervalsString.exists(interval => IntervalUtils.isUnmapped(interval)) - } } override def isScatterGatherable = { @@ -74,6 +66,12 @@ trait GATKScatterFunction extends ScatterFunction { cloneFunction.setFieldValue(this.intervalsStringField, Seq("unmapped")) else cloneFunction.setFieldValue(this.intervalsStringField, Seq.empty[String]) + + cloneFunction.setFieldValue(this.intervalsSetRuleField, null) + cloneFunction.setFieldValue(this.intervalMergingField, null) + cloneFunction.setFieldValue(this.intervalPaddingField, None) + cloneFunction.setFieldValue(this.excludeIntervalsField, Seq.empty[File]) + cloneFunction.setFieldValue(this.excludeIntervalsStringField, Seq.empty[String]) } override def bindCloneInputs(cloneFunction: CloneFunction, index: Int) { @@ -85,29 +83,28 @@ trait GATKScatterFunction extends ScatterFunction { } /** - * Returns true if all interval files exist. + * @return true if all interval files exist. */ protected def intervalFilesExist = { - !this.intervals.exists(interval => IntervalUtils.isIntervalFile(interval, false) && !new File(interval).exists) + !(this.originalGATK.intervals ++ this.originalGATK.excludeIntervals).exists(interval => !interval.exists()) } /** - * Returns the maximum number of intervals or this.scatterCount if the maximum can't be determined ahead of time. * @return the maximum number of intervals or this.scatterCount if the maximum can't be determined ahead of time. */ protected def maxIntervals: Int } object GATKScatterFunction { - var gatkIntervals = Seq.empty[GATKIntervals] + var gatkIntervalsCache = Seq.empty[GATKIntervals] - def getGATKIntervals(reference: File, intervals: Seq[String]) = { - gatkIntervals.find(gi => gi.reference == reference && gi.intervals == intervals) match { - case Some(gi) => gi + def getGATKIntervals(originalFunction: CommandLineGATK) = { + val gatkIntervals = new GATKIntervals(originalFunction) + gatkIntervalsCache.find(_ == gatkIntervals) match { + case Some(existingGatkIntervals) => existingGatkIntervals case None => - val gi = new GATKIntervals(reference, intervals) - gatkIntervals :+= gi - gi + gatkIntervalsCache :+= gatkIntervals + gatkIntervals } } } diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunction.scala index 40a6fc4b4..03b142bca 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunction.scala @@ -33,12 +33,12 @@ import org.broadinstitute.sting.queue.function.InProcessFunction */ class IntervalScatterFunction extends GATKScatterFunction with InProcessFunction { protected override def maxIntervals = - GATKScatterFunction.getGATKIntervals(this.referenceSequence, this.intervals).locs.size + GATKScatterFunction.getGATKIntervals(this.originalGATK).locs.size override def scatterCount = if (intervalFilesExist) super.scatterCount min this.maxIntervals else super.scatterCount def run() { - val gi = GATKScatterFunction.getGATKIntervals(this.referenceSequence, this.intervals) + val gi = GATKScatterFunction.getGATKIntervals(this.originalGATK) val splits = IntervalUtils.splitFixedIntervals(gi.locs, this.scatterOutputFiles.size) IntervalUtils.scatterFixedIntervals(gi.samFileHeader, splits, this.scatterOutputFiles) } diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/LocusScatterFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/LocusScatterFunction.scala index 8f52b9b82..150df4e38 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/LocusScatterFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/LocusScatterFunction.scala @@ -31,13 +31,11 @@ import org.broadinstitute.sting.queue.function.InProcessFunction /** * A scatter function that divides down to the locus level. */ -//class LocusScatterFunction extends IntervalScatterFunction { } - class LocusScatterFunction extends GATKScatterFunction with InProcessFunction { protected override def maxIntervals = scatterCount def run() { - val gi = GATKScatterFunction.getGATKIntervals(this.referenceSequence, this.intervals) + val gi = GATKScatterFunction.getGATKIntervals(this.originalGATK) val splits = IntervalUtils.splitLocusIntervals(gi.locs, this.scatterOutputFiles.size) IntervalUtils.scatterFixedIntervals(gi.samFileHeader, splits, this.scatterOutputFiles) } diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala index 11a66a37b..7862dec41 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala @@ -37,14 +37,11 @@ class VcfGatherFunction extends CombineVariants with GatherFunction { private lazy val originalGATK = this.originalFunction.asInstanceOf[CommandLineGATK] - override def freezeFieldValues { + override def freezeFieldValues() { this.jarFile = this.originalGATK.jarFile - this.reference_sequence = this.originalGATK.reference_sequence - this.intervals = this.originalGATK.intervals - this.intervalsString = this.originalGATK.intervalsString - this.variant = this.gatherParts.zipWithIndex map { case (input, index) => new TaggedFile(input, "input"+index) } this.out = this.originalOutput + GATKIntervals.copyIntervalArguments(this.originalGATK, this) // NO_HEADER and sites_only from VCFWriterArgumentTypeDescriptor // are added by the GATKExtensionsGenerator to the subclass of CommandLineGATK @@ -55,6 +52,6 @@ class VcfGatherFunction extends CombineVariants with GatherFunction { val sitesOnly = QFunction.findField(originalFunction.getClass, VCFWriterArgumentTypeDescriptor.SITES_ONLY_ARG_NAME) this.sites_only = originalGATK.getFieldValue(sitesOnly).asInstanceOf[Boolean] - super.freezeFieldValues + super.freezeFieldValues() } } diff --git a/public/scala/src/org/broadinstitute/sting/queue/function/JavaCommandLineFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/function/JavaCommandLineFunction.scala index 534d68069..13448afdd 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/function/JavaCommandLineFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/function/JavaCommandLineFunction.scala @@ -49,6 +49,11 @@ trait JavaCommandLineFunction extends CommandLineFunction { */ var javaMemoryLimit: Option[Double] = None + /** + * Max number of GC threads + */ + var javaGCThreads: Option[Int] = None + override def freezeFieldValues() { super.freezeFieldValues() @@ -73,6 +78,8 @@ trait JavaCommandLineFunction extends CommandLineFunction { } def javaOpts = optional("-Xmx", javaMemoryLimit.map(gb => (gb * 1024).ceil.toInt), "m", spaceSeparated=false) + + conditional(javaGCThreads.isDefined, "-XX:+UseParallelOldGC") + + optional("-XX:ParallelGCThreads=", javaGCThreads, spaceSeparated=false) + required("-Djava.io.tmpdir=", jobTempDir, spaceSeparated=false) def commandLine = required("java") + diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/QJobReport.scala b/public/scala/src/org/broadinstitute/sting/queue/util/QJobReport.scala index 73ab7c366..c69a310b3 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/util/QJobReport.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/util/QJobReport.scala @@ -56,8 +56,8 @@ trait QJobReport extends Logging { "jobName" -> QJobReport.workAroundSameJobNames(this), "intermediate" -> self.isIntermediate, "exechosts" -> info.getExecHosts, - "startTime" -> info.getStartTime.getTime, - "doneTime" -> info.getDoneTime.getTime, + "startTime" -> info.getStartTime, + "doneTime" -> info.getDoneTime, "formattedStartTime" -> info.getFormattedStartTime, "formattedDoneTime" -> info.getFormattedDoneTime, "runtime" -> info.getRuntimeInMs).mapValues((x:Any) => if (x != null) x.toString else "null") diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/ScalaCompoundArgumentTypeDescriptor.scala b/public/scala/src/org/broadinstitute/sting/queue/util/ScalaCompoundArgumentTypeDescriptor.scala index 6b615e6d9..0d8edc25d 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/util/ScalaCompoundArgumentTypeDescriptor.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/util/ScalaCompoundArgumentTypeDescriptor.scala @@ -70,7 +70,7 @@ class ScalaCompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor { * @return The parsed object. */ def parse(parsingEngine: ParsingEngine, source: ArgumentSource, typeType: Type, argumentMatches: ArgumentMatches) = { - parse(parsingEngine,source, makeRawTypeIfNecessary(typeType), argumentMatches) + parse(parsingEngine,source, ArgumentTypeDescriptor.makeRawTypeIfNecessary(typeType), argumentMatches) } def parse(parsingEngine: ParsingEngine, source: ArgumentSource, classType: Class[_], argumentMatches: ArgumentMatches) = { diff --git a/public/scala/test/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervalsUnitTest.scala b/public/scala/test/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervalsUnitTest.scala index b23350557..2c6016c9b 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervalsUnitTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervalsUnitTest.scala @@ -26,19 +26,21 @@ package org.broadinstitute.sting.queue.extensions.gatk import java.io.File import org.testng.Assert -import org.testng.annotations.Test +import org.testng.annotations.{DataProvider, Test} import org.broadinstitute.sting.BaseTest import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile import org.broadinstitute.sting.utils.{GenomeLocSortedSet, GenomeLocParser} import collection.JavaConversions._ import org.broadinstitute.sting.utils.interval.IntervalUtils +import org.broadinstitute.sting.utils.exceptions.UserException class GATKIntervalsUnitTest { private final lazy val hg18Reference = new File(BaseTest.hg18Reference) private final lazy val hg18GenomeLocParser = new GenomeLocParser(new CachingIndexedFastaSequenceFile(hg18Reference)) private final lazy val hg18ReferenceLocs = GenomeLocSortedSet. createSetFromSequenceDictionary(new ReferenceDataSource(hg18Reference).getReference.getSequenceDictionary).toList + private final lazy val hg19GenomeLocParser = new GenomeLocParser(new CachingIndexedFastaSequenceFile(hg19Reference)) private final lazy val hg19Reference = new File(BaseTest.hg19Reference) @@ -48,14 +50,14 @@ class GATKIntervalsUnitTest { val chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2:2-3") val chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3:3-5") - val gi = new GATKIntervals(hg18Reference, Seq("chr1:1-1", "chr2:2-3", "chr3:3-5")) + val gi = createGATKIntervals(hg18Reference, Seq("chr1:1-1", "chr2:2-3", "chr3:3-5")) Assert.assertEquals(gi.locs.toSeq, Seq(chr1, chr2, chr3)) Assert.assertEquals(gi.contigs, Seq("chr1", "chr2", "chr3")) } @Test(timeOut = 30000L) def testIntervalFile() { - var gi = new GATKIntervals(hg19Reference, Seq(BaseTest.hg19Intervals)) + val gi = createGATKIntervals(hg19Reference, Seq(BaseTest.hg19Intervals)) Assert.assertEquals(gi.locs.size, 189894) // Timeout check is because of bad: // for(Item item: javaConvertedScalaList) @@ -67,28 +69,85 @@ class GATKIntervalsUnitTest { @Test def testEmptyIntervals() { - val gi = new GATKIntervals(hg18Reference, Nil) + val gi = createGATKIntervals(hg18Reference, Nil) Assert.assertEquals(gi.locs, hg18ReferenceLocs) Assert.assertEquals(gi.contigs.size, hg18ReferenceLocs.size) } @Test def testContigCounts() { - Assert.assertEquals(new GATKIntervals(hg18Reference, Nil).contigs, hg18ReferenceLocs.map(_.getContig)) - Assert.assertEquals(new GATKIntervals(hg18Reference, Seq("chr1", "chr2", "chr3")).contigs, Seq("chr1", "chr2", "chr3")) - Assert.assertEquals(new GATKIntervals(hg18Reference, Seq("chr1:1-2", "chr1:4-5", "chr2:1-1", "chr3:2-2")).contigs, Seq("chr1", "chr2", "chr3")) + Assert.assertEquals(createGATKIntervals(hg18Reference, Nil).contigs, hg18ReferenceLocs.map(_.getContig)) + Assert.assertEquals(createGATKIntervals(hg18Reference, Seq("chr1", "chr2", "chr3")).contigs, Seq("chr1", "chr2", "chr3")) + Assert.assertEquals(createGATKIntervals(hg18Reference, Seq("chr1:1-2", "chr1:4-5", "chr2:1-1", "chr3:2-2")).contigs, Seq("chr1", "chr2", "chr3")) } - @Test - def testSortAndMergeIntervals() { - testSortAndMergeIntervals(Seq("chr1:1-10", "chr1:1-10", "chr1:1-10"), Seq("chr1:1-10")) - testSortAndMergeIntervals(Seq("chr1:1-10", "chr1:1-11", "chr1:1-12"), Seq("chr1:1-12")) - testSortAndMergeIntervals(Seq("chr1:1-10", "chr1:11-20", "chr1:21-30"), Seq("chr1:1-10", "chr1:11-20", "chr1:21-30")) - testSortAndMergeIntervals(Seq("chr1:1-10", "chr1:10-20", "chr1:21-30"), Seq("chr1:1-20", "chr1:21-30")) - testSortAndMergeIntervals(Seq("chr1:1-10", "chr1:21-30", "chr1:10-20"), Seq("chr1:1-20", "chr1:21-30")) + @DataProvider(name="sortAndMergeIntervals") + def getSortAndMergeIntervals: Array[Array[AnyRef]] = { + Array( + Array(Seq("chr1:1-10", "chr1:1-10", "chr1:1-10"), Seq("chr1:1-10")), + Array(Seq("chr1:1-10", "chr1:1-11", "chr1:1-12"), Seq("chr1:1-12")), + Array(Seq("chr1:1-10", "chr1:11-20", "chr1:21-30"), Seq("chr1:1-30")), + Array(Seq("chr1:1-10", "chr1:10-20", "chr1:21-30"), Seq("chr1:1-30")), + Array(Seq("chr1:1-9", "chr1:21-30", "chr1:11-20"), Seq("chr1:1-9", "chr1:11-30")) + ).asInstanceOf[Array[Array[AnyRef]]] } - private def testSortAndMergeIntervals(actual: Seq[String], expected: Seq[String]) { - Assert.assertEquals(new GATKIntervals(hg18Reference, actual).locs.toSeq, expected.map(hg18GenomeLocParser.parseGenomeLoc(_))) + @Test(dataProvider="sortAndMergeIntervals") + def testSortAndMergeIntervals(unmerged: Seq[String], expected: Seq[String]) { + Assert.assertEquals(createGATKIntervals(hg18Reference, unmerged).locs.toSeq, expected.map(hg18GenomeLocParser.parseGenomeLoc(_))) + } + + @DataProvider(name="taggedFiles") + def getTaggedFiles: Array[Array[AnyRef]] = { + Array( + Array(hg18Reference, BaseTest.privateTestDir + "small_unmerged_gatk_intervals.list", null, Seq("chr1:1-10")), + Array(hg18Reference, BaseTest.privateTestDir + "small_unmerged_gatk_intervals.list", "", Seq("chr1:1-10")), + Array(hg18Reference, BaseTest.privateTestDir + "small_unmerged_gatk_intervals.list", "myList", Seq("chr1:1-10")), + Array(hg19Reference, BaseTest.privateTestDir + "small.indel.test.vcf", null, Seq("1:897475-897481", "1:10001292")), + Array(hg19Reference, BaseTest.privateTestDir + "small.indel.test.vcf", "", Seq("1:897475-897481", "1:10001292")), + Array(hg19Reference, BaseTest.privateTestDir + "small.indel.test.vcf", "myVcf", Seq("1:897475-897481", "1:10001292")), + Array(hg19Reference, BaseTest.privateTestDir + "small.indel.test.vcf", "VCF", Seq("1:897475-897481", "1:10001292")), + Array(hg19Reference, BaseTest.privateTestDir + "small.indel.test.vcf", "myVcf,VCF", Seq("1:897475-897481", "1:10001292")), + Array(hg19Reference, BaseTest.privateTestDir + "sampleBedFile.bed", null, Seq("20:1-999", "20:1002-2000", "22:1001-6000")), + Array(hg19Reference, BaseTest.privateTestDir + "sampleBedFile.bed", "", Seq("20:1-999", "20:1002-2000", "22:1001-6000")), + Array(hg19Reference, BaseTest.privateTestDir + "sampleBedFile.bed", "myBed", Seq("20:1-999", "20:1002-2000", "22:1001-6000")), + Array(hg19Reference, BaseTest.privateTestDir + "sampleBedFile.bed", "BED", Seq("20:1-999", "20:1002-2000", "22:1001-6000")), + Array(hg19Reference, BaseTest.privateTestDir + "sampleBedFile.bed", "myBed,BED", Seq("20:1-999", "20:1002-2000", "22:1001-6000")) + ) + } + + @Test(dataProvider="taggedFiles") + def testTaggedFiles(reference: File, file: String, tags: String, expected: Seq[String]) { + val gatk = new CommandLineGATK + gatk.reference_sequence = reference + gatk.intervals = Seq(new TaggedFile(file, tags)) + val parser = if (reference == hg18Reference) hg18GenomeLocParser else hg19GenomeLocParser + Assert.assertEquals(new GATKIntervals(gatk).locs.toSeq, expected.map(parser.parseGenomeLoc(_))) + } + + @DataProvider(name="badTaggedFiles") + def getBadTaggedFiles: Array[Array[AnyRef]] = { + Array( + Array(hg18Reference, BaseTest.privateTestDir + "small_unmerged_gatk_intervals.list", "VCF"), + Array(hg18Reference, BaseTest.privateTestDir + "small_unmerged_gatk_intervals.list", "too,many,tags"), + Array(hg19Reference, BaseTest.privateTestDir + "small.indel.test.vcf", "BED"), + Array(hg19Reference, BaseTest.privateTestDir + "small.indel.test.vcf", "VCF,myVCF"), + Array(hg19Reference, BaseTest.privateTestDir + "small.indel.test.vcf", "myVCF,VCF,extra"), + Array(hg19Reference, BaseTest.privateTestDir + "sampleBedFile.bed", "VCF"), + Array(hg19Reference, BaseTest.privateTestDir + "sampleBedFile.bed", "BED,myBed"), + Array(hg19Reference, BaseTest.privateTestDir + "sampleBedFile.bed", "myBed,BED,extra") + ).asInstanceOf[Array[Array[AnyRef]]] + } + + @Test(dataProvider = "badTaggedFiles", expectedExceptions = Array(classOf[UserException])) + def testBadTaggedFiles(reference: File, file: String, tags: String) { + testTaggedFiles(reference, file, tags, Nil) + } + + private def createGATKIntervals(reference: File, intervals: Seq[String]) = { + val gatk = new CommandLineGATK + gatk.reference_sequence = reference + gatk.intervalsString = intervals + new GATKIntervals(gatk) } } diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala index 6e37ae2a3..c9d8b59c9 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala @@ -42,4 +42,43 @@ class ExampleUnifiedGenotyperPipelineTest { spec.jobRunners = PipelineTest.allJobRunners PipelineTest.executeTest(spec) } + + @Test + def testUnifiedGenotyperWithGatkIntervals() { + val spec = new PipelineTestSpec + spec.name = "unifiedgenotyper_with_gatk_intervals" + spec.args = Array( + " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala", + " -I " + BaseTest.validationDataLocation + "OV-0930.normal.chunk.bam", + " -R " + BaseTest.hg18Reference, + " -L " + BaseTest.validationDataLocation + "intervalTest.intervals").mkString + spec.jobRunners = Seq("Lsf706") + PipelineTest.executeTest(spec) + } + + @Test + def testUnifiedGenotyperWithBedIntervals() { + val spec = new PipelineTestSpec + spec.name = "unifiedgenotyper_with_bed_intervals" + spec.args = Array( + " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala", + " -I " + BaseTest.validationDataLocation + "OV-0930.normal.chunk.bam", + " -R " + BaseTest.hg18Reference, + " -L " + BaseTest.validationDataLocation + "intervalTest.bed").mkString + spec.jobRunners = Seq("Lsf706") + PipelineTest.executeTest(spec) + } + + @Test + def testUnifiedGenotyperWithVcfIntervals() { + val spec = new PipelineTestSpec + spec.name = "unifiedgenotyper_with_vcf_intervals" + spec.args = Array( + " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala", + " -I " + BaseTest.validationDataLocation + "OV-0930.normal.chunk.bam", + " -R " + BaseTest.hg18Reference, + " -L " + BaseTest.validationDataLocation + "intervalTest.1.vcf").mkString + spec.jobRunners = Seq("Lsf706") + PipelineTest.executeTest(spec) + } }