Refactored parsing of Rod/IntervalBinding. Queue S/G now uses all interval arguments passed to CommandLineGATK QFunctions including support for BED/tribble types, XL, ISR, and padding.

Updated HSP to use new padding arguments instead of flank intervals file, plus latest QC evals.
IntervalUtils return unmodifiable lists so that utilities don't mutate the collections.
Added a JavaCommandLineFunction.javaGCThreads option to test reducing java's automatic GC thread allocation based on num cpus.
Added comma to list of characters to convert to underscores in GridEngine job names so that GE JSV doesn't choke on the -N values.
JobRunInfo handles the null done times when jobs crash with strange errors.
This commit is contained in:
Khalid Shakir 2012-06-27 01:15:22 -04:00
parent a5df8f1277
commit 746a5e95f3
19 changed files with 550 additions and 352 deletions

View File

@ -289,7 +289,7 @@ public abstract class ArgumentTypeDescriptor {
return field.isAnnotationPresent(Hidden.class);
}
public Class makeRawTypeIfNecessary(Type t) {
public static Class makeRawTypeIfNecessary(Type t) {
if ( t == null )
return null;
else if ( t instanceof ParameterizedType )
@ -300,6 +300,114 @@ public abstract class ArgumentTypeDescriptor {
throw new IllegalArgumentException("Unable to determine Class-derived component type of field: " + t);
}
}
/**
* The actual argument parsing method.
* @param source source
* @param type type to check
* @param matches matches
* @return the RodBinding/IntervalBinding object depending on the value of createIntervalBinding.
*/
protected Object parseBinding(ArgumentSource source, Type type, ArgumentMatches matches, Tags tags) {
ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
String value = getArgumentValue(defaultDefinition, matches);
@SuppressWarnings("unchecked")
Class<? extends Feature> parameterType = JVMUtils.getParameterizedTypeClass(type);
String name = defaultDefinition.fullName;
return parseBinding(value, parameterType, type, name, tags, source.field.getName());
}
/**
*
* @param value The source of the binding
* @param parameterType The Tribble Feature parameter type
* @param bindingClass The class type for the binding (ex: RodBinding, IntervalBinding, etc.) Must have the correct constructor for creating the binding.
* @param bindingName The name of the binding passed to the constructor.
* @param tags Tags for the binding used for parsing and passed to the constructor.
* @param fieldName The name of the field that was parsed. Used for error reporting.
* @return The newly created binding object of type bindingClass.
*/
public static Object parseBinding(String value, Class<? extends Feature> parameterType, Type bindingClass,
String bindingName, Tags tags, String fieldName) {
try {
String tribbleType = null;
// must have one or two tag values here
if ( tags.getPositionalTags().size() > 2 ) {
throw new UserException.CommandLineException(
String.format("Unexpected number of positional tags for argument %s : %s. " +
"Rod bindings only support -X:type and -X:name,type argument styles",
value, fieldName));
} else if ( tags.getPositionalTags().size() == 2 ) {
// -X:name,type style
bindingName = tags.getPositionalTags().get(0);
tribbleType = tags.getPositionalTags().get(1);
FeatureManager manager = new FeatureManager();
if ( manager.getByName(tribbleType) == null )
throw new UserException.CommandLineException(
String.format("Unable to find tribble type '%s' provided on the command line. " +
"Please select a correct type from among the supported types:%n%s",
tribbleType, manager.userFriendlyListOfAvailableFeatures(parameterType)));
} else {
// case with 0 or 1 positional tags
FeatureManager manager = new FeatureManager();
// -X:type style is a type when we cannot determine the type dynamically
String tag1 = tags.getPositionalTags().size() == 1 ? tags.getPositionalTags().get(0) : null;
if ( tag1 != null ) {
if ( manager.getByName(tag1) != null ) // this a type
tribbleType = tag1;
else
bindingName = tag1;
}
if ( tribbleType == null ) {
// try to determine the file type dynamically
File file = new File(value);
if ( file.canRead() && file.isFile() ) {
FeatureManager.FeatureDescriptor featureDescriptor = manager.getByFiletype(file);
if ( featureDescriptor != null ) {
tribbleType = featureDescriptor.getName();
logger.info("Dynamically determined type of " + file + " to be " + tribbleType);
}
}
if ( tribbleType == null ) {
// IntervalBinding can be created from a normal String
Class rawType = (makeRawTypeIfNecessary(bindingClass));
try {
return rawType.getConstructor(String.class).newInstance(value);
} catch (NoSuchMethodException e) {
/* ignore */
}
if ( ! file.exists() ) {
throw new UserException.CouldNotReadInputFile(file, "file does not exist");
} else if ( ! file.canRead() || ! file.isFile() ) {
throw new UserException.CouldNotReadInputFile(file, "file could not be read");
} else {
throw new UserException.CommandLineException(
String.format("No tribble type was provided on the command line and the type of the file could not be determined dynamically. " +
"Please add an explicit type tag :NAME listing the correct type from among the supported types:%n%s",
manager.userFriendlyListOfAvailableFeatures(parameterType)));
}
}
}
}
Constructor ctor = (makeRawTypeIfNecessary(bindingClass)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class);
return ctor.newInstance(parameterType, bindingName, value, tribbleType, tags);
} catch (Exception e) {
if ( e instanceof UserException )
throw ((UserException)e);
else
throw new UserException.CommandLineException(
String.format("Failed to parse value %s for argument %s. Message: %s",
value, fieldName, e.getMessage()));
}
}
}
/**
@ -324,6 +432,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
public boolean createsTypeDefault(ArgumentSource source) { return ! source.isRequired(); }
@Override
@SuppressWarnings("unchecked")
public Object createTypeDefault(ParsingEngine parsingEngine, ArgumentSource source, Type type) {
Class parameterType = JVMUtils.getParameterizedTypeClass(type);
return RodBinding.makeUnbound((Class<? extends Feature>)parameterType);
@ -336,118 +445,16 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
return parse(parsingEngine, source, type, matches, false);
}
/**
* The actual argument parsing method.
*
* IMPORTANT NOTE: the createIntervalBinding argument is a bit of a hack, but after discussions with SE we've decided
* that it's the best way to proceed for now. IntervalBindings can either be proper RodBindings (hence the use of
* this parse() method) or can be Strings (representing raw intervals or the files containing them). If createIntervalBinding
* is true, we do not call parsingEngine.addRodBinding() because we don't want walkers to assume that these are the
* usual set of RodBindings. It also allows us in the future to be smart about tagging rods as intervals. One other
* side point is that we want to continue to allow the usage of non-Feature intervals so that users can theoretically
* continue to input them out of order (whereas Tribble Features are ordered).
*
* @param parsingEngine parsing engine
* @param source source
* @param type type to check
* @param matches matches
* @param createIntervalBinding should we attempt to create an IntervalBinding instead of a RodBinding?
* @return the RodBinding/IntervalBinding object depending on the value of createIntervalBinding.
*/
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches, boolean createIntervalBinding) {
ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
String value = getArgumentValue( defaultDefinition, matches );
Class<? extends Feature> parameterType = JVMUtils.getParameterizedTypeClass(type);
try {
String name = defaultDefinition.fullName;
String tribbleType = null;
Tags tags = getArgumentTags(matches);
// must have one or two tag values here
if ( tags.getPositionalTags().size() > 2 ) {
throw new UserException.CommandLineException(
String.format("Unexpected number of positional tags for argument %s : %s. " +
"Rod bindings only support -X:type and -X:name,type argument styles",
value, source.field.getName()));
} if ( tags.getPositionalTags().size() == 2 ) {
// -X:name,type style
name = tags.getPositionalTags().get(0);
tribbleType = tags.getPositionalTags().get(1);
} else {
// case with 0 or 1 positional tags
FeatureManager manager = new FeatureManager();
// -X:type style is a type when we cannot determine the type dynamically
String tag1 = tags.getPositionalTags().size() == 1 ? tags.getPositionalTags().get(0) : null;
if ( tag1 != null ) {
if ( manager.getByName(tag1) != null ) // this a type
tribbleType = tag1;
else
name = tag1;
}
if ( tribbleType == null ) {
// try to determine the file type dynamically
File file = new File(value);
if ( file.canRead() && file.isFile() ) {
FeatureManager.FeatureDescriptor featureDescriptor = manager.getByFiletype(file);
if ( featureDescriptor != null ) {
tribbleType = featureDescriptor.getName();
logger.info("Dynamically determined type of " + file + " to be " + tribbleType);
}
}
if ( tribbleType == null ) {
// IntervalBindings allow streaming conversion of Strings
if ( createIntervalBinding ) {
return new IntervalBinding(value);
}
if ( ! file.exists() ) {
throw new UserException.CouldNotReadInputFile(file, "file does not exist");
} else if ( ! file.canRead() || ! file.isFile() ) {
throw new UserException.CouldNotReadInputFile(file, "file could not be read");
} else {
throw new UserException.CommandLineException(
String.format("No tribble type was provided on the command line and the type of the file could not be determined dynamically. " +
"Please add an explicit type tag :NAME listing the correct type from among the supported types:%n%s",
manager.userFriendlyListOfAvailableFeatures(parameterType)));
}
}
}
}
Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class);
Object result;
if ( createIntervalBinding ) {
result = ctor.newInstance(parameterType, name, value, tribbleType, tags);
} else {
RodBinding rbind = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags);
parsingEngine.addTags(rbind, tags);
parsingEngine.addRodBinding(rbind);
result = rbind;
}
return result;
} catch (InvocationTargetException e) {
throw new UserException.CommandLineException(
String.format("Failed to parse value %s for argument %s.",
value, source.field.getName()));
} catch (Exception e) {
if ( e instanceof UserException )
throw ((UserException)e);
else
throw new UserException.CommandLineException(
String.format("Failed to parse value %s for argument %s. Message: %s",
value, source.field.getName(), e.getMessage()));
}
Tags tags = getArgumentTags(matches);
RodBinding rbind = (RodBinding)parseBinding(source, type, matches, tags);
parsingEngine.addTags(rbind, tags);
parsingEngine.addRodBinding(rbind);
return rbind;
}
}
/**
* Parser for RodBinding objects
* Parser for IntervalBinding objects
*/
class IntervalBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
/**
@ -475,7 +482,7 @@ class IntervalBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
*/
@Override
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
return new RodBindingArgumentTypeDescriptor().parse(parsingEngine, source, type, matches, true);
return parseBinding(source, type, matches, getArgumentTags(matches));
}
}
@ -783,7 +790,7 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor {
}
Class<? extends Multiplexer> multiplexerType = dependentArgument.field.getAnnotation(Multiplex.class).value();
Constructor<? extends Multiplexer> multiplexerConstructor = null;
Constructor<? extends Multiplexer> multiplexerConstructor;
try {
multiplexerConstructor = multiplexerType.getConstructor(sourceTypes);
multiplexerConstructor.setAccessible(true);
@ -792,7 +799,7 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor {
throw new ReviewedStingException(String.format("Unable to find constructor for class %s with parameters %s",multiplexerType.getName(),Arrays.deepToString(sourceFields)),ex);
}
Multiplexer multiplexer = null;
Multiplexer multiplexer;
try {
multiplexer = multiplexerConstructor.newInstance(sourceValues);
}

View File

@ -78,24 +78,7 @@ public abstract class ParsingMethod {
String argument = matcher.group(1).trim();
Tags tags = new Tags();
if(matcher.group(2) != null) {
for(String tag: Utils.split(matcher.group(2),",")) {
// Check for presence of an '=' sign, indicating a key-value pair in the tag line.
int equalDelimiterPos = tag.indexOf('=');
if(equalDelimiterPos >= 0) {
// Sanity check; ensure that there aren't multiple '=' in this key-value pair.
if(tag.indexOf('=',equalDelimiterPos+1) >= 0)
throw new ArgumentException(String.format("Tag %s passed to argument %s is malformed. Please ensure that " +
"key-value tags are of the form <key>=<value>, and neither key " +
"nor value contain the '=' character", tag, argument));
tags.addKeyValueTag(tag.substring(0,equalDelimiterPos),tag.substring(equalDelimiterPos+1));
}
else
tags.addPositionalTag(tag);
}
}
Tags tags = parseTags(argument, matcher.group(2));
// Find the most appropriate argument definition for the given argument.
ArgumentDefinition argumentDefinition = definitions.findArgumentDefinition( argument, definitionMatcher );
@ -105,6 +88,28 @@ public abstract class ParsingMethod {
return new ArgumentMatch(argument,argumentDefinition,position,tags);
}
public static Tags parseTags(String argument, String tagString) {
Tags tags = new Tags();
if (tagString != null) {
for(String tag: Utils.split(tagString, ",")) {
// Check for presence of an '=' sign, indicating a key-value pair in the tag line.
int equalDelimiterPos = tag.indexOf('=');
if(equalDelimiterPos >= 0) {
// Sanity check; ensure that there aren't multiple '=' in this key-value pair.
if(tag.indexOf('=',equalDelimiterPos+1) >= 0)
throw new ArgumentException(String.format("Tag %s passed to argument %s is malformed. Please ensure that " +
"key-value tags are of the form <key>=<value>, and neither key " +
"nor value contain the '=' character", tag, argument));
tags.addKeyValueTag(tag.substring(0,equalDelimiterPos),tag.substring(equalDelimiterPos+1));
}
else
tags.addPositionalTag(tag);
}
}
return tags;
}
/**
* A command-line argument always starts with an alphabetical character or underscore followed by any word character.
*/

View File

@ -30,7 +30,6 @@ import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMSequenceDictionary;
import org.apache.log4j.Logger;
import org.broad.tribble.Feature;
import org.broad.tribble.readers.PositionalBufferedStream;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
@ -54,9 +53,9 @@ import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
@ -582,7 +581,6 @@ public class GenomeAnalysisEngine {
* Setup the intervals to be processed
*/
protected void initializeIntervals() {
// return if no interval arguments at all
if ( argCollection.intervals == null && argCollection.excludeIntervals == null )
return;
@ -590,17 +588,22 @@ public class GenomeAnalysisEngine {
// Note that the use of '-L all' is no longer supported.
// if include argument isn't given, create new set of all possible intervals
GenomeLocSortedSet includeSortedSet = (argCollection.intervals == null ?
GenomeLocSortedSet.createSetFromSequenceDictionary(this.referenceDataSource.getReference().getSequenceDictionary()) :
loadIntervals(argCollection.intervals, argCollection.intervalSetRule, argCollection.intervalPadding));
Pair<GenomeLocSortedSet, GenomeLocSortedSet> includeExcludePair = IntervalUtils.parseIntervalBindingsPair(
this.referenceDataSource,
argCollection.intervals,
argCollection.intervalSetRule, argCollection.intervalMerging, argCollection.intervalPadding,
argCollection.excludeIntervals);
GenomeLocSortedSet includeSortedSet = includeExcludePair.getFirst();
GenomeLocSortedSet excludeSortedSet = includeExcludePair.getSecond();
// if no exclude arguments, can return parseIntervalArguments directly
if ( argCollection.excludeIntervals == null )
if ( excludeSortedSet == null )
intervals = includeSortedSet;
// otherwise there are exclude arguments => must merge include and exclude GenomeLocSortedSets
else {
GenomeLocSortedSet excludeSortedSet = loadIntervals(argCollection.excludeIntervals, IntervalSetRule.UNION);
intervals = includeSortedSet.subtractRegions(excludeSortedSet);
// logging messages only printed when exclude (-XL) arguments are given
@ -613,43 +616,6 @@ public class GenomeAnalysisEngine {
}
}
/**
* Loads the intervals relevant to the current execution
* @param argList argument bindings; might include filenames, intervals in samtools notation, or a combination of the above
* @param rule interval merging rule
* @return A sorted, merged list of all intervals specified in this arg list.
*/
protected GenomeLocSortedSet loadIntervals( final List<IntervalBinding<Feature>> argList, final IntervalSetRule rule ) {
return loadIntervals(argList, rule, 0);
}
/**
* Loads the intervals relevant to the current execution
* @param argList argument bindings; might include filenames, intervals in samtools notation, or a combination of the above
* @param rule interval merging rule
* @param padding how much to pad the intervals
* @return A sorted, merged list of all intervals specified in this arg list.
*/
protected GenomeLocSortedSet loadIntervals( final List<IntervalBinding<Feature>> argList, final IntervalSetRule rule, final int padding ) {
List<GenomeLoc> allIntervals = new ArrayList<GenomeLoc>();
for ( IntervalBinding intervalBinding : argList ) {
List<GenomeLoc> intervals = intervalBinding.getIntervals(this.getGenomeLocParser());
if ( intervals.isEmpty() ) {
logger.warn("The interval file " + intervalBinding.getSource() + " contains no intervals that could be parsed.");
}
if ( padding > 0 ) {
intervals = IntervalUtils.getIntervalsWithFlanks(this.getGenomeLocParser(), intervals, padding);
}
allIntervals = IntervalUtils.mergeListsBySetOperator(intervals, allIntervals, rule);
}
return IntervalUtils.sortAndMergeIntervals(genomeLocParser, allIntervals, argCollection.intervalMerging);
}
/**
* Add additional, externally managed IO streams for inputs.
*

View File

@ -6,6 +6,8 @@ import net.sf.picard.util.Interval;
import net.sf.picard.util.IntervalList;
import net.sf.samtools.SAMFileHeader;
import org.apache.log4j.Logger;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.IntervalBinding;
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
@ -169,21 +171,23 @@ public class IntervalUtils {
*/
public static List<GenomeLoc> mergeListsBySetOperator(List<GenomeLoc> setOne, List<GenomeLoc> setTwo, IntervalSetRule rule) {
// shortcut, if either set is zero, return the other set
if (setOne == null || setOne.size() == 0 || setTwo == null || setTwo.size() == 0) return (setOne == null || setOne.size() == 0) ? setTwo : setOne;
if (setOne == null || setOne.size() == 0 || setTwo == null || setTwo.size() == 0)
return Collections.unmodifiableList((setOne == null || setOne.size() == 0) ? setTwo : setOne);
// our master list, since we can't guarantee removal time in a generic list
LinkedList<GenomeLoc> retList = new LinkedList<GenomeLoc>();
// if we're set to UNION, just add them all
if (rule == IntervalSetRule.UNION) {
setOne.addAll(setTwo);
return setOne;
if (rule == null || rule == IntervalSetRule.UNION) {
retList.addAll(setOne);
retList.addAll(setTwo);
return Collections.unmodifiableList(retList);
}
// else we're INTERSECTION, create two indexes into the lists
int iOne = 0;
int iTwo = 0;
// our master list, since we can't guarantee removal time in a generic list
LinkedList<GenomeLoc> retList = new LinkedList<GenomeLoc>();
// merge the second into the first using the rule
while (iTwo < setTwo.size() && iOne < setOne.size())
// if the first list is ahead, drop items off the second until we overlap
@ -204,7 +208,7 @@ public class IntervalUtils {
throw new UserException.BadInput("The INTERSECTION of your -L options produced no intervals.");
// we don't need to add the rest of remaining locations, since we know they don't overlap. return what we have
return retList;
return Collections.unmodifiableList(retList);
}
/**
@ -218,6 +222,8 @@ public class IntervalUtils {
* @return A sorted, merged version of the intervals passed in.
*/
public static GenomeLocSortedSet sortAndMergeIntervals(GenomeLocParser parser, List<GenomeLoc> intervals, IntervalMergingRule mergingRule) {
// Make a copy of the (potentially unmodifiable) list to be sorted
intervals = new ArrayList<GenomeLoc>(intervals);
// sort raw interval list
Collections.sort(intervals);
// now merge raw interval list
@ -481,6 +487,70 @@ public class IntervalUtils {
return new SplitLocusRecursive(split, remaining);
}
/**
* Setup the intervals to be processed
*/
public static GenomeLocSortedSet parseIntervalBindings(
final ReferenceDataSource referenceDataSource,
final List<IntervalBinding<Feature>> intervals,
final IntervalSetRule intervalSetRule, final IntervalMergingRule intervalMergingRule, final int intervalPadding,
final List<IntervalBinding<Feature>> excludeIntervals) {
Pair<GenomeLocSortedSet, GenomeLocSortedSet> includeExcludePair = parseIntervalBindingsPair(
referenceDataSource, intervals, intervalSetRule, intervalMergingRule, intervalPadding, excludeIntervals);
GenomeLocSortedSet includeSortedSet = includeExcludePair.getFirst();
GenomeLocSortedSet excludeSortedSet = includeExcludePair.getSecond();
if (excludeSortedSet != null) {
return includeSortedSet.subtractRegions(excludeSortedSet);
} else {
return includeSortedSet;
}
}
public static Pair<GenomeLocSortedSet, GenomeLocSortedSet> parseIntervalBindingsPair(
final ReferenceDataSource referenceDataSource,
final List<IntervalBinding<Feature>> intervals,
final IntervalSetRule intervalSetRule, final IntervalMergingRule intervalMergingRule, final int intervalPadding,
final List<IntervalBinding<Feature>> excludeIntervals) {
GenomeLocParser genomeLocParser = new GenomeLocParser(referenceDataSource.getReference());
// if include argument isn't given, create new set of all possible intervals
GenomeLocSortedSet includeSortedSet = ((intervals == null || intervals.size() == 0) ?
GenomeLocSortedSet.createSetFromSequenceDictionary(referenceDataSource.getReference().getSequenceDictionary()) :
loadIntervals(intervals, intervalSetRule, intervalMergingRule, intervalPadding, genomeLocParser));
GenomeLocSortedSet excludeSortedSet = null;
if (excludeIntervals != null && excludeIntervals.size() > 0) {
excludeSortedSet = loadIntervals(excludeIntervals, IntervalSetRule.UNION, intervalMergingRule, 0, genomeLocParser);
}
return new Pair<GenomeLocSortedSet, GenomeLocSortedSet>(includeSortedSet, excludeSortedSet);
}
public static GenomeLocSortedSet loadIntervals(
final List<IntervalBinding<Feature>> intervalBindings,
final IntervalSetRule rule, final IntervalMergingRule intervalMergingRule, final int padding,
final GenomeLocParser genomeLocParser) {
List<GenomeLoc> allIntervals = new ArrayList<GenomeLoc>();
for ( IntervalBinding intervalBinding : intervalBindings) {
@SuppressWarnings("unchecked")
List<GenomeLoc> intervals = intervalBinding.getIntervals(genomeLocParser);
if ( intervals.isEmpty() ) {
logger.warn("The interval file " + intervalBinding.getSource() + " contains no intervals that could be parsed.");
}
if ( padding > 0 ) {
intervals = getIntervalsWithFlanks(genomeLocParser, intervals, padding);
}
allIntervals = mergeListsBySetOperator(intervals, allIntervals, rule);
}
return sortAndMergeIntervals(genomeLocParser, allIntervals, intervalMergingRule);
}
private final static class SplitLocusRecursive {
final List<GenomeLoc> split;
final LinkedList<GenomeLoc> remaining;
@ -546,7 +616,7 @@ public class IntervalUtils {
*/
public static List<GenomeLoc> mergeIntervalLocations(final List<GenomeLoc> raw, IntervalMergingRule rule) {
if (raw.size() <= 1)
return raw;
return Collections.unmodifiableList(raw);
else {
ArrayList<GenomeLoc> merged = new ArrayList<GenomeLoc>();
Iterator<GenomeLoc> it = raw.iterator();
@ -555,7 +625,7 @@ public class IntervalUtils {
GenomeLoc curr = it.next();
if (prev.overlapsP(curr)) {
prev = prev.merge(curr);
} else if (prev.contiguousP(curr) && rule == IntervalMergingRule.ALL) {
} else if (prev.contiguousP(curr) && (rule == null || rule == IntervalMergingRule.ALL)) {
prev = prev.merge(curr);
} else {
merged.add(prev);
@ -563,7 +633,7 @@ public class IntervalUtils {
}
}
merged.add(prev);
return merged;
return Collections.unmodifiableList(merged);
}
}

View File

@ -24,32 +24,17 @@
package org.broadinstitute.sting.gatk;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.picard.util.Interval;
import net.sf.picard.util.IntervalList;
import net.sf.samtools.SAMFileHeader;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.commandline.ArgumentException;
import org.broadinstitute.sting.commandline.IntervalBinding;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.walkers.PrintReadsWalker;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
/**
* Tests selected functionality in the GenomeAnalysisEngine class
@ -91,65 +76,4 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest {
testEngine.validateSuppliedIntervals();
}
@DataProvider(name="invalidIntervalTestData")
public Object[][] invalidIntervalDataProvider() throws Exception {
GenomeAnalysisEngine testEngine = new GenomeAnalysisEngine();
GATKArgumentCollection argCollection = new GATKArgumentCollection();
testEngine.setArguments(argCollection);
File fastaFile = new File("public/testdata/exampleFASTA.fasta");
GenomeLocParser genomeLocParser = new GenomeLocParser(new IndexedFastaSequenceFile(fastaFile));
testEngine.setGenomeLocParser(genomeLocParser);
return new Object[][] {
new Object[] {testEngine, genomeLocParser, "chr1", 10000000, 20000000},
new Object[] {testEngine, genomeLocParser, "chr2", 1, 2},
new Object[] {testEngine, genomeLocParser, "chr1", -1, 50}
};
}
@Test(dataProvider="invalidIntervalTestData")
public void testInvalidPicardIntervalHandling(GenomeAnalysisEngine testEngine, GenomeLocParser genomeLocParser,
String contig, int intervalStart, int intervalEnd ) throws Exception {
SAMFileHeader picardFileHeader = new SAMFileHeader();
picardFileHeader.addSequence(genomeLocParser.getContigInfo("chr1"));
IntervalList picardIntervals = new IntervalList(picardFileHeader);
picardIntervals.add(new Interval(contig, intervalStart, intervalEnd, true, "dummyname"));
File picardIntervalFile = createTempFile("testInvalidPicardIntervalHandling", ".intervals");
picardIntervals.write(picardIntervalFile);
List<IntervalBinding<Feature>> intervalArgs = new ArrayList<IntervalBinding<Feature>>(1);
intervalArgs.add(new IntervalBinding<Feature>(picardIntervalFile.getAbsolutePath()));
testEngine.loadIntervals(intervalArgs, IntervalSetRule.UNION);
}
@Test(expectedExceptions=UserException.class, dataProvider="invalidIntervalTestData")
public void testInvalidGATKFileIntervalHandling(GenomeAnalysisEngine testEngine, GenomeLocParser genomeLocParser,
String contig, int intervalStart, int intervalEnd ) throws Exception {
File gatkIntervalFile = createTempFile("testInvalidGATKFileIntervalHandling", ".intervals",
String.format("%s:%d-%d", contig, intervalStart, intervalEnd));
List<IntervalBinding<Feature>> intervalArgs = new ArrayList<IntervalBinding<Feature>>(1);
intervalArgs.add(new IntervalBinding<Feature>(gatkIntervalFile.getAbsolutePath()));
testEngine.loadIntervals(intervalArgs, IntervalSetRule.UNION);
}
private File createTempFile( String tempFilePrefix, String tempFileExtension, String... lines ) throws Exception {
File tempFile = File.createTempFile(tempFilePrefix, tempFileExtension);
tempFile.deleteOnExit();
PrintWriter out = new PrintWriter(tempFile);
for ( String line : lines ) {
out.println(line);
}
out.close();
return tempFile;
}
}

View File

@ -1,12 +1,16 @@
package org.broadinstitute.sting.utils.interval;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.picard.util.Interval;
import net.sf.picard.util.IntervalList;
import net.sf.samtools.SAMFileHeader;
import org.apache.commons.io.FileUtils;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.commandline.IntervalBinding;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
@ -45,7 +49,7 @@ public class IntervalUtilsUnitTest extends BaseTest {
List<GenomeLoc> locs = new ArrayList<GenomeLoc>();
for (String interval: intervals)
locs.add(hg18GenomeLocParser.parseGenomeLoc(interval));
return locs;
return Collections.unmodifiableList(locs);
}
@BeforeClass
@ -277,7 +281,10 @@ public class IntervalUtilsUnitTest extends BaseTest {
listEveryTwoFromOne.add(hg18GenomeLocParser.createGenomeLoc("chr1",x,x));
}
List<GenomeLoc> ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, listEveryTwoFromOne, IntervalSetRule.UNION);
List<GenomeLoc> ret;
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, listEveryTwoFromOne, IntervalSetRule.UNION);
Assert.assertEquals(ret.size(), 100);
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, listEveryTwoFromOne, null);
Assert.assertEquals(ret.size(), 100);
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, listEveryTwoFromOne, IntervalSetRule.INTERSECTION);
Assert.assertEquals(ret.size(), 0);
@ -296,7 +303,10 @@ public class IntervalUtilsUnitTest extends BaseTest {
allSites.add(hg18GenomeLocParser.createGenomeLoc("chr1",x,x));
}
List<GenomeLoc> ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.UNION);
List<GenomeLoc> ret;
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.UNION);
Assert.assertEquals(ret.size(), 150);
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, null);
Assert.assertEquals(ret.size(), 150);
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.INTERSECTION);
Assert.assertEquals(ret.size(), 50);
@ -316,7 +326,10 @@ public class IntervalUtilsUnitTest extends BaseTest {
}
}
List<GenomeLoc> ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.UNION);
List<GenomeLoc> ret;
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.UNION);
Assert.assertEquals(ret.size(), 40);
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, null);
Assert.assertEquals(ret.size(), 40);
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.INTERSECTION);
Assert.assertEquals(ret.size(), 20);
@ -761,7 +774,13 @@ public class IntervalUtilsUnitTest extends BaseTest {
List<GenomeLoc> locs = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Collections.singletonList(privateTestDir + unmergedIntervals));
Assert.assertEquals(locs.size(), 2);
List<GenomeLoc> merged = IntervalUtils.mergeIntervalLocations(locs, IntervalMergingRule.ALL);
List<GenomeLoc> merged;
merged = IntervalUtils.mergeIntervalLocations(locs, IntervalMergingRule.ALL);
Assert.assertEquals(merged.size(), 1);
// Test that null means the same as ALL
merged = IntervalUtils.mergeIntervalLocations(locs, null);
Assert.assertEquals(merged.size(), 1);
}
@ -993,6 +1012,74 @@ public class IntervalUtilsUnitTest extends BaseTest {
// Attempting to use the legacy -L "interval1;interval2" syntax should produce an exception:
IntervalBinding<Feature> binding = new IntervalBinding<Feature>("1;2");
List<GenomeLoc> intervals = binding.getIntervals(toolkit);
binding.getIntervals(toolkit);
}
@DataProvider(name="invalidIntervalTestData")
public Object[][] invalidIntervalDataProvider() throws Exception {
GATKArgumentCollection argCollection = new GATKArgumentCollection();
File fastaFile = new File("public/testdata/exampleFASTA.fasta");
GenomeLocParser genomeLocParser = new GenomeLocParser(new IndexedFastaSequenceFile(fastaFile));
return new Object[][] {
new Object[] {argCollection, genomeLocParser, "chr1", 10000000, 20000000},
new Object[] {argCollection, genomeLocParser, "chr2", 1, 2},
new Object[] {argCollection, genomeLocParser, "chr1", -1, 50}
};
}
@Test(dataProvider="invalidIntervalTestData")
public void testInvalidPicardIntervalHandling(GATKArgumentCollection argCollection, GenomeLocParser genomeLocParser,
String contig, int intervalStart, int intervalEnd ) throws Exception {
SAMFileHeader picardFileHeader = new SAMFileHeader();
picardFileHeader.addSequence(genomeLocParser.getContigInfo("chr1"));
IntervalList picardIntervals = new IntervalList(picardFileHeader);
picardIntervals.add(new Interval(contig, intervalStart, intervalEnd, true, "dummyname"));
File picardIntervalFile = createTempFile("testInvalidPicardIntervalHandling", ".intervals");
picardIntervals.write(picardIntervalFile);
List<IntervalBinding<Feature>> intervalArgs = new ArrayList<IntervalBinding<Feature>>(1);
intervalArgs.add(new IntervalBinding<Feature>(picardIntervalFile.getAbsolutePath()));
IntervalUtils.loadIntervals(intervalArgs, argCollection.intervalSetRule, argCollection.intervalMerging, argCollection.intervalPadding, genomeLocParser);
}
@Test(expectedExceptions=UserException.class, dataProvider="invalidIntervalTestData")
public void testInvalidGATKFileIntervalHandling(GATKArgumentCollection argCollection, GenomeLocParser genomeLocParser,
String contig, int intervalStart, int intervalEnd ) throws Exception {
File gatkIntervalFile = createTempFile("testInvalidGATKFileIntervalHandling", ".intervals",
String.format("%s:%d-%d", contig, intervalStart, intervalEnd));
List<IntervalBinding<Feature>> intervalArgs = new ArrayList<IntervalBinding<Feature>>(1);
intervalArgs.add(new IntervalBinding<Feature>(gatkIntervalFile.getAbsolutePath()));
IntervalUtils.loadIntervals(intervalArgs, argCollection.intervalSetRule, argCollection.intervalMerging, argCollection.intervalPadding, genomeLocParser);
}
private File createTempFile( String tempFilePrefix, String tempFileExtension, String... lines ) throws Exception {
File tempFile = BaseTest.createTempFile(tempFilePrefix, tempFileExtension);
FileUtils.writeLines(tempFile, Arrays.asList(lines));
return tempFile;
}
@DataProvider(name = "sortAndMergeIntervals")
public Object[][] getSortAndMergeIntervals() {
return new Object[][] {
new Object[] { IntervalMergingRule.OVERLAPPING_ONLY, getLocs("chr1:1", "chr1:3", "chr1:2"), getLocs("chr1:1", "chr1:2", "chr1:3") },
new Object[] { IntervalMergingRule.ALL, getLocs("chr1:1", "chr1:3", "chr1:2"), getLocs("chr1:1-3") },
new Object[] { IntervalMergingRule.OVERLAPPING_ONLY, getLocs("chr1:1", "chr1:3", "chr2:2"), getLocs("chr1:1", "chr1:3", "chr2:2") },
new Object[] { IntervalMergingRule.ALL, getLocs("chr1:1", "chr1:3", "chr2:2"), getLocs("chr1:1", "chr1:3", "chr2:2") },
new Object[] { IntervalMergingRule.OVERLAPPING_ONLY, getLocs("chr1:1", "chr1"), getLocs("chr1") },
new Object[] { IntervalMergingRule.ALL, getLocs("chr1:1", "chr1"), getLocs("chr1") }
};
}
@Test(dataProvider = "sortAndMergeIntervals")
public void testSortAndMergeIntervals(IntervalMergingRule merge, List<GenomeLoc> unsorted, List<GenomeLoc> expected) {
List<GenomeLoc> sorted = IntervalUtils.sortAndMergeIntervals(hg18GenomeLocParser, unsorted, merge).toList();
Assert.assertEquals(sorted, expected);
}
}

View File

@ -24,7 +24,6 @@
package org.broadinstitute.sting.queue.engine
import java.util.Date
import java.text.SimpleDateFormat
/**
@ -36,18 +35,21 @@ class JobRunInfo {
val formatter = new SimpleDateFormat("yy-MM-dd H:mm:ss:SSS");
/** The start time with millisecond resolution of this job */
var startTime: Date = _
var startTime: java.util.Date = _
/** The done time with millisecond resolution of this job */
var doneTime: Date = _
var doneTime: java.util.Date = _
var exechosts: String = "localhost"
def getStartTime = startTime
def getDoneTime = doneTime
def getFormattedStartTime = formatTime(getStartTime)
def getFormattedDoneTime = formatTime(getDoneTime)
def getStartTime: String = getTime(startTime)
def getDoneTime: String = getTime(doneTime)
def getFormattedStartTime = formatTime(startTime)
def getFormattedDoneTime = formatTime(doneTime)
/** Helper function that returns the time of the date */
private def getTime(d: java.util.Date): String = if ( d != null ) d.getTime.toString else "null"
/** Helper function that pretty prints the date */
private def formatTime(d: Date) = if ( d != null ) formatter.format(d) else "null"
private def formatTime(d: java.util.Date): String = if ( d != null ) formatter.format(d) else "null"
def getExecHosts = exechosts
@ -55,14 +57,14 @@ class JobRunInfo {
* Was any information set for this jobInfo? JobInfo can be unset because
* the job never ran or because it already completed.
*/
def isFilledIn = startTime != null
def isFilledIn = startTime != null && doneTime != null
/**
* How long did the job run (in wall time)? Returns -1 if this jobInfo isn't filled in
*/
def getRuntimeInMs: Long = {
if ( isFilledIn )
getDoneTime.getTime - getStartTime.getTime
doneTime.getTime - startTime.getTime
else
-1
}

View File

@ -35,7 +35,7 @@ import org.ggf.drmaa.Session
class GridEngineJobRunner(session: Session, function: CommandLineFunction) extends DrmaaJobRunner(session, function) with Logging {
// Grid Engine disallows certain characters from being in job names.
// This replaces all illegal characters with underscores
protected override val jobNameFilter = """[\n\t\r/:@\\*?]"""
protected override val jobNameFilter = """[\n\t\r/:,@\\*?]"""
protected override val minRunnerPriority = -1023
protected override val maxRunnerPriority = 0

View File

@ -38,11 +38,11 @@ class ContigScatterFunction extends GATKScatterFunction with InProcessFunction {
override def scatterCount = if (intervalFilesExist) super.scatterCount min this.maxIntervals else super.scatterCount
protected override def maxIntervals = {
GATKScatterFunction.getGATKIntervals(this.referenceSequence, this.intervals).contigs.size
GATKScatterFunction.getGATKIntervals(this.originalGATK).contigs.size
}
def run() {
val gi = GATKScatterFunction.getGATKIntervals(this.referenceSequence, this.intervals)
val gi = GATKScatterFunction.getGATKIntervals(this.originalGATK)
IntervalUtils.scatterContigIntervals(gi.samFileHeader, gi.locs, this.scatterOutputFiles)
}
}

View File

@ -26,13 +26,23 @@ package org.broadinstitute.sting.queue.extensions.gatk
import java.io.File
import collection.JavaConversions._
import org.broadinstitute.sting.utils.interval.{IntervalMergingRule, IntervalUtils}
import org.broadinstitute.sting.utils.interval.{IntervalSetRule, IntervalMergingRule, IntervalUtils}
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource
import net.sf.samtools.SAMFileHeader
import java.util.Collections
import org.broadinstitute.sting.utils.{GenomeLoc, GenomeLocSortedSet, GenomeLocParser}
import org.broadinstitute.sting.utils.GenomeLoc
import org.broadinstitute.sting.commandline._
import org.broad.tribble.Feature
case class GATKIntervals(reference: File, intervals: Seq[File], intervalsString: Seq[String],
intervalSetRule: IntervalSetRule, intervalMergingRule: IntervalMergingRule, intervalPadding: Option[Int],
excludeIntervals: Seq[File], excludeIntervalsString: Seq[String]) {
def this(gatk: CommandLineGATK) = this(
gatk.reference_sequence,
gatk.intervals, gatk.intervalsString,
gatk.interval_set_rule, gatk.interval_merging, gatk.interval_padding,
gatk.excludeIntervals, gatk.excludeIntervalsString)
case class GATKIntervals(reference: File, intervals: Seq[String]) {
private lazy val referenceDataSource = new ReferenceDataSource(reference)
lazy val samFileHeader = {
@ -42,16 +52,46 @@ case class GATKIntervals(reference: File, intervals: Seq[String]) {
}
lazy val locs: java.util.List[GenomeLoc] = {
val parser = new GenomeLocParser(referenceDataSource.getReference)
val parsedLocs =
if (intervals.isEmpty)
GenomeLocSortedSet.createSetFromSequenceDictionary(samFileHeader.getSequenceDictionary).toList
else
IntervalUtils.parseIntervalArguments(parser, intervals)
Collections.sort(parsedLocs)
val mergedLocs = IntervalUtils.mergeIntervalLocations(parsedLocs, IntervalMergingRule.OVERLAPPING_ONLY)
Collections.unmodifiableList(mergedLocs)
val includeIntervalBindings = this.intervals.map(GATKIntervals.createBinding(_, "intervals")) ++
this.intervalsString.map(GATKIntervals.createBinding(_, "intervalsString"))
val excludeIntervalBindings = this.excludeIntervals.map(GATKIntervals.createBinding(_, "excludeIntervals")) ++
this.excludeIntervalsString.map(GATKIntervals.createBinding(_, "excludeIntervalsString"))
IntervalUtils.parseIntervalBindings(
referenceDataSource,
includeIntervalBindings,
intervalSetRule, intervalMergingRule, intervalPadding.getOrElse(0),
excludeIntervalBindings).toList
}
lazy val contigs = locs.map(_.getContig).distinct.toSeq
}
object GATKIntervals {
def copyIntervalArguments(src: CommandLineGATK, dst: CommandLineGATK) {
dst.reference_sequence = src.reference_sequence
dst.intervals = src.intervals
dst.intervalsString = src.intervalsString
dst.interval_set_rule = src.interval_set_rule
dst.interval_merging = src.interval_merging
dst.interval_padding = src.interval_padding
dst.excludeIntervals = src.excludeIntervals
dst.excludeIntervalsString = src.excludeIntervalsString
}
private def createBinding(interval: File, argumentName: String): IntervalBinding[Feature] = {
val tags = interval match {
case taggedFile: TaggedFile => ParsingMethod.parseTags(argumentName, taggedFile.tag)
case file: File => new Tags
}
createBinding(interval.getAbsolutePath, argumentName, tags)
}
private def createBinding(interval: String, argumentName: String): IntervalBinding[Feature] = {
createBinding(interval, argumentName, new Tags)
}
private def createBinding(interval: String, argumentName: String, tags: Tags): IntervalBinding[Feature] = {
ArgumentTypeDescriptor.parseBinding(interval, classOf[Feature], classOf[IntervalBinding[Feature]], argumentName, tags, argumentName).asInstanceOf[IntervalBinding[Feature]]
}
}

View File

@ -28,14 +28,17 @@ import org.broadinstitute.sting.utils.interval.IntervalUtils
import java.io.File
import org.broadinstitute.sting.utils.io.IOUtils
import org.broadinstitute.sting.queue.function.scattergather.{CloneFunction, ScatterFunction}
import org.broadinstitute.sting.commandline.Output
import org.broadinstitute.sting.commandline._
trait GATKScatterFunction extends ScatterFunction {
/** The runtime field to set for specifying an interval file. */
/* The runtime field to set for specifying intervals. */
private final val intervalsField = "intervals"
/** The runtime field to set for specifying an interval string. */
private final val intervalsStringField = "intervalsString"
private final val excludeIntervalsField = "excludeIntervals"
private final val excludeIntervalsStringField = "excludeIntervalsString"
private final val intervalsSetRuleField = "interval_set_rule"
private final val intervalMergingField = "interval_merging"
private final val intervalPaddingField = "interval_padding"
@Output(doc="Scatter function outputs")
var scatterOutputFiles: Seq[File] = Nil
@ -43,25 +46,14 @@ trait GATKScatterFunction extends ScatterFunction {
/** The original GATK function. */
protected var originalGATK: CommandLineGATK = _
/** The reference sequence for the GATK function. */
protected var referenceSequence: File = _
/** The list of interval files ("/path/to/interval.list") or interval strings ("chr1", "chr2") to parse into smaller parts. */
protected var intervals: Seq[String] = Nil
/** Whether the last scatter job should also include any unmapped reads. */
protected var includeUnmapped: Boolean = _
override def init() {
this.originalGATK = this.originalFunction.asInstanceOf[CommandLineGATK]
this.referenceSequence = this.originalGATK.reference_sequence
if (this.originalGATK.intervals.isEmpty && (this.originalGATK.intervalsString == null || this.originalGATK.intervalsString.isEmpty)) {
this.intervals ++= GATKScatterFunction.getGATKIntervals(this.referenceSequence, Seq.empty[String]).contigs
} else {
this.intervals ++= this.originalGATK.intervals.map(_.toString)
this.intervals ++= this.originalGATK.intervalsString.filterNot(interval => IntervalUtils.isUnmapped(interval))
// If intervals have been specified check if unmapped is included
if (this.originalGATK.intervals.size + this.originalGATK.intervalsString.size > 0)
this.includeUnmapped = this.originalGATK.intervalsString.exists(interval => IntervalUtils.isUnmapped(interval))
}
}
override def isScatterGatherable = {
@ -74,6 +66,12 @@ trait GATKScatterFunction extends ScatterFunction {
cloneFunction.setFieldValue(this.intervalsStringField, Seq("unmapped"))
else
cloneFunction.setFieldValue(this.intervalsStringField, Seq.empty[String])
cloneFunction.setFieldValue(this.intervalsSetRuleField, null)
cloneFunction.setFieldValue(this.intervalMergingField, null)
cloneFunction.setFieldValue(this.intervalPaddingField, None)
cloneFunction.setFieldValue(this.excludeIntervalsField, Seq.empty[File])
cloneFunction.setFieldValue(this.excludeIntervalsStringField, Seq.empty[String])
}
override def bindCloneInputs(cloneFunction: CloneFunction, index: Int) {
@ -85,29 +83,28 @@ trait GATKScatterFunction extends ScatterFunction {
}
/**
* Returns true if all interval files exist.
* @return true if all interval files exist.
*/
protected def intervalFilesExist = {
!this.intervals.exists(interval => IntervalUtils.isIntervalFile(interval, false) && !new File(interval).exists)
!(this.originalGATK.intervals ++ this.originalGATK.excludeIntervals).exists(interval => !interval.exists())
}
/**
* Returns the maximum number of intervals or this.scatterCount if the maximum can't be determined ahead of time.
* @return the maximum number of intervals or this.scatterCount if the maximum can't be determined ahead of time.
*/
protected def maxIntervals: Int
}
object GATKScatterFunction {
var gatkIntervals = Seq.empty[GATKIntervals]
var gatkIntervalsCache = Seq.empty[GATKIntervals]
def getGATKIntervals(reference: File, intervals: Seq[String]) = {
gatkIntervals.find(gi => gi.reference == reference && gi.intervals == intervals) match {
case Some(gi) => gi
def getGATKIntervals(originalFunction: CommandLineGATK) = {
val gatkIntervals = new GATKIntervals(originalFunction)
gatkIntervalsCache.find(_ == gatkIntervals) match {
case Some(existingGatkIntervals) => existingGatkIntervals
case None =>
val gi = new GATKIntervals(reference, intervals)
gatkIntervals :+= gi
gi
gatkIntervalsCache :+= gatkIntervals
gatkIntervals
}
}
}

View File

@ -33,12 +33,12 @@ import org.broadinstitute.sting.queue.function.InProcessFunction
*/
class IntervalScatterFunction extends GATKScatterFunction with InProcessFunction {
protected override def maxIntervals =
GATKScatterFunction.getGATKIntervals(this.referenceSequence, this.intervals).locs.size
GATKScatterFunction.getGATKIntervals(this.originalGATK).locs.size
override def scatterCount = if (intervalFilesExist) super.scatterCount min this.maxIntervals else super.scatterCount
def run() {
val gi = GATKScatterFunction.getGATKIntervals(this.referenceSequence, this.intervals)
val gi = GATKScatterFunction.getGATKIntervals(this.originalGATK)
val splits = IntervalUtils.splitFixedIntervals(gi.locs, this.scatterOutputFiles.size)
IntervalUtils.scatterFixedIntervals(gi.samFileHeader, splits, this.scatterOutputFiles)
}

View File

@ -31,13 +31,11 @@ import org.broadinstitute.sting.queue.function.InProcessFunction
/**
* A scatter function that divides down to the locus level.
*/
//class LocusScatterFunction extends IntervalScatterFunction { }
class LocusScatterFunction extends GATKScatterFunction with InProcessFunction {
protected override def maxIntervals = scatterCount
def run() {
val gi = GATKScatterFunction.getGATKIntervals(this.referenceSequence, this.intervals)
val gi = GATKScatterFunction.getGATKIntervals(this.originalGATK)
val splits = IntervalUtils.splitLocusIntervals(gi.locs, this.scatterOutputFiles.size)
IntervalUtils.scatterFixedIntervals(gi.samFileHeader, splits, this.scatterOutputFiles)
}

View File

@ -37,14 +37,11 @@ class VcfGatherFunction extends CombineVariants with GatherFunction {
private lazy val originalGATK = this.originalFunction.asInstanceOf[CommandLineGATK]
override def freezeFieldValues {
override def freezeFieldValues() {
this.jarFile = this.originalGATK.jarFile
this.reference_sequence = this.originalGATK.reference_sequence
this.intervals = this.originalGATK.intervals
this.intervalsString = this.originalGATK.intervalsString
this.variant = this.gatherParts.zipWithIndex map { case (input, index) => new TaggedFile(input, "input"+index) }
this.out = this.originalOutput
GATKIntervals.copyIntervalArguments(this.originalGATK, this)
// NO_HEADER and sites_only from VCFWriterArgumentTypeDescriptor
// are added by the GATKExtensionsGenerator to the subclass of CommandLineGATK
@ -55,6 +52,6 @@ class VcfGatherFunction extends CombineVariants with GatherFunction {
val sitesOnly = QFunction.findField(originalFunction.getClass, VCFWriterArgumentTypeDescriptor.SITES_ONLY_ARG_NAME)
this.sites_only = originalGATK.getFieldValue(sitesOnly).asInstanceOf[Boolean]
super.freezeFieldValues
super.freezeFieldValues()
}
}

View File

@ -49,6 +49,11 @@ trait JavaCommandLineFunction extends CommandLineFunction {
*/
var javaMemoryLimit: Option[Double] = None
/**
* Max number of GC threads
*/
var javaGCThreads: Option[Int] = None
override def freezeFieldValues() {
super.freezeFieldValues()
@ -73,6 +78,8 @@ trait JavaCommandLineFunction extends CommandLineFunction {
}
def javaOpts = optional("-Xmx", javaMemoryLimit.map(gb => (gb * 1024).ceil.toInt), "m", spaceSeparated=false) +
conditional(javaGCThreads.isDefined, "-XX:+UseParallelOldGC") +
optional("-XX:ParallelGCThreads=", javaGCThreads, spaceSeparated=false) +
required("-Djava.io.tmpdir=", jobTempDir, spaceSeparated=false)
def commandLine = required("java") +

View File

@ -56,8 +56,8 @@ trait QJobReport extends Logging {
"jobName" -> QJobReport.workAroundSameJobNames(this),
"intermediate" -> self.isIntermediate,
"exechosts" -> info.getExecHosts,
"startTime" -> info.getStartTime.getTime,
"doneTime" -> info.getDoneTime.getTime,
"startTime" -> info.getStartTime,
"doneTime" -> info.getDoneTime,
"formattedStartTime" -> info.getFormattedStartTime,
"formattedDoneTime" -> info.getFormattedDoneTime,
"runtime" -> info.getRuntimeInMs).mapValues((x:Any) => if (x != null) x.toString else "null")

View File

@ -70,7 +70,7 @@ class ScalaCompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
* @return The parsed object.
*/
def parse(parsingEngine: ParsingEngine, source: ArgumentSource, typeType: Type, argumentMatches: ArgumentMatches) = {
parse(parsingEngine,source, makeRawTypeIfNecessary(typeType), argumentMatches)
parse(parsingEngine,source, ArgumentTypeDescriptor.makeRawTypeIfNecessary(typeType), argumentMatches)
}
def parse(parsingEngine: ParsingEngine, source: ArgumentSource, classType: Class[_], argumentMatches: ArgumentMatches) = {

View File

@ -26,19 +26,21 @@ package org.broadinstitute.sting.queue.extensions.gatk
import java.io.File
import org.testng.Assert
import org.testng.annotations.Test
import org.testng.annotations.{DataProvider, Test}
import org.broadinstitute.sting.BaseTest
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile
import org.broadinstitute.sting.utils.{GenomeLocSortedSet, GenomeLocParser}
import collection.JavaConversions._
import org.broadinstitute.sting.utils.interval.IntervalUtils
import org.broadinstitute.sting.utils.exceptions.UserException
class GATKIntervalsUnitTest {
private final lazy val hg18Reference = new File(BaseTest.hg18Reference)
private final lazy val hg18GenomeLocParser = new GenomeLocParser(new CachingIndexedFastaSequenceFile(hg18Reference))
private final lazy val hg18ReferenceLocs = GenomeLocSortedSet.
createSetFromSequenceDictionary(new ReferenceDataSource(hg18Reference).getReference.getSequenceDictionary).toList
private final lazy val hg19GenomeLocParser = new GenomeLocParser(new CachingIndexedFastaSequenceFile(hg19Reference))
private final lazy val hg19Reference = new File(BaseTest.hg19Reference)
@ -48,14 +50,14 @@ class GATKIntervalsUnitTest {
val chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2:2-3")
val chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3:3-5")
val gi = new GATKIntervals(hg18Reference, Seq("chr1:1-1", "chr2:2-3", "chr3:3-5"))
val gi = createGATKIntervals(hg18Reference, Seq("chr1:1-1", "chr2:2-3", "chr3:3-5"))
Assert.assertEquals(gi.locs.toSeq, Seq(chr1, chr2, chr3))
Assert.assertEquals(gi.contigs, Seq("chr1", "chr2", "chr3"))
}
@Test(timeOut = 30000L)
def testIntervalFile() {
var gi = new GATKIntervals(hg19Reference, Seq(BaseTest.hg19Intervals))
val gi = createGATKIntervals(hg19Reference, Seq(BaseTest.hg19Intervals))
Assert.assertEquals(gi.locs.size, 189894)
// Timeout check is because of bad:
// for(Item item: javaConvertedScalaList)
@ -67,28 +69,85 @@ class GATKIntervalsUnitTest {
@Test
def testEmptyIntervals() {
val gi = new GATKIntervals(hg18Reference, Nil)
val gi = createGATKIntervals(hg18Reference, Nil)
Assert.assertEquals(gi.locs, hg18ReferenceLocs)
Assert.assertEquals(gi.contigs.size, hg18ReferenceLocs.size)
}
@Test
def testContigCounts() {
Assert.assertEquals(new GATKIntervals(hg18Reference, Nil).contigs, hg18ReferenceLocs.map(_.getContig))
Assert.assertEquals(new GATKIntervals(hg18Reference, Seq("chr1", "chr2", "chr3")).contigs, Seq("chr1", "chr2", "chr3"))
Assert.assertEquals(new GATKIntervals(hg18Reference, Seq("chr1:1-2", "chr1:4-5", "chr2:1-1", "chr3:2-2")).contigs, Seq("chr1", "chr2", "chr3"))
Assert.assertEquals(createGATKIntervals(hg18Reference, Nil).contigs, hg18ReferenceLocs.map(_.getContig))
Assert.assertEquals(createGATKIntervals(hg18Reference, Seq("chr1", "chr2", "chr3")).contigs, Seq("chr1", "chr2", "chr3"))
Assert.assertEquals(createGATKIntervals(hg18Reference, Seq("chr1:1-2", "chr1:4-5", "chr2:1-1", "chr3:2-2")).contigs, Seq("chr1", "chr2", "chr3"))
}
@Test
def testSortAndMergeIntervals() {
testSortAndMergeIntervals(Seq("chr1:1-10", "chr1:1-10", "chr1:1-10"), Seq("chr1:1-10"))
testSortAndMergeIntervals(Seq("chr1:1-10", "chr1:1-11", "chr1:1-12"), Seq("chr1:1-12"))
testSortAndMergeIntervals(Seq("chr1:1-10", "chr1:11-20", "chr1:21-30"), Seq("chr1:1-10", "chr1:11-20", "chr1:21-30"))
testSortAndMergeIntervals(Seq("chr1:1-10", "chr1:10-20", "chr1:21-30"), Seq("chr1:1-20", "chr1:21-30"))
testSortAndMergeIntervals(Seq("chr1:1-10", "chr1:21-30", "chr1:10-20"), Seq("chr1:1-20", "chr1:21-30"))
@DataProvider(name="sortAndMergeIntervals")
def getSortAndMergeIntervals: Array[Array[AnyRef]] = {
Array(
Array(Seq("chr1:1-10", "chr1:1-10", "chr1:1-10"), Seq("chr1:1-10")),
Array(Seq("chr1:1-10", "chr1:1-11", "chr1:1-12"), Seq("chr1:1-12")),
Array(Seq("chr1:1-10", "chr1:11-20", "chr1:21-30"), Seq("chr1:1-30")),
Array(Seq("chr1:1-10", "chr1:10-20", "chr1:21-30"), Seq("chr1:1-30")),
Array(Seq("chr1:1-9", "chr1:21-30", "chr1:11-20"), Seq("chr1:1-9", "chr1:11-30"))
).asInstanceOf[Array[Array[AnyRef]]]
}
private def testSortAndMergeIntervals(actual: Seq[String], expected: Seq[String]) {
Assert.assertEquals(new GATKIntervals(hg18Reference, actual).locs.toSeq, expected.map(hg18GenomeLocParser.parseGenomeLoc(_)))
@Test(dataProvider="sortAndMergeIntervals")
def testSortAndMergeIntervals(unmerged: Seq[String], expected: Seq[String]) {
Assert.assertEquals(createGATKIntervals(hg18Reference, unmerged).locs.toSeq, expected.map(hg18GenomeLocParser.parseGenomeLoc(_)))
}
@DataProvider(name="taggedFiles")
def getTaggedFiles: Array[Array[AnyRef]] = {
Array(
Array(hg18Reference, BaseTest.privateTestDir + "small_unmerged_gatk_intervals.list", null, Seq("chr1:1-10")),
Array(hg18Reference, BaseTest.privateTestDir + "small_unmerged_gatk_intervals.list", "", Seq("chr1:1-10")),
Array(hg18Reference, BaseTest.privateTestDir + "small_unmerged_gatk_intervals.list", "myList", Seq("chr1:1-10")),
Array(hg19Reference, BaseTest.privateTestDir + "small.indel.test.vcf", null, Seq("1:897475-897481", "1:10001292")),
Array(hg19Reference, BaseTest.privateTestDir + "small.indel.test.vcf", "", Seq("1:897475-897481", "1:10001292")),
Array(hg19Reference, BaseTest.privateTestDir + "small.indel.test.vcf", "myVcf", Seq("1:897475-897481", "1:10001292")),
Array(hg19Reference, BaseTest.privateTestDir + "small.indel.test.vcf", "VCF", Seq("1:897475-897481", "1:10001292")),
Array(hg19Reference, BaseTest.privateTestDir + "small.indel.test.vcf", "myVcf,VCF", Seq("1:897475-897481", "1:10001292")),
Array(hg19Reference, BaseTest.privateTestDir + "sampleBedFile.bed", null, Seq("20:1-999", "20:1002-2000", "22:1001-6000")),
Array(hg19Reference, BaseTest.privateTestDir + "sampleBedFile.bed", "", Seq("20:1-999", "20:1002-2000", "22:1001-6000")),
Array(hg19Reference, BaseTest.privateTestDir + "sampleBedFile.bed", "myBed", Seq("20:1-999", "20:1002-2000", "22:1001-6000")),
Array(hg19Reference, BaseTest.privateTestDir + "sampleBedFile.bed", "BED", Seq("20:1-999", "20:1002-2000", "22:1001-6000")),
Array(hg19Reference, BaseTest.privateTestDir + "sampleBedFile.bed", "myBed,BED", Seq("20:1-999", "20:1002-2000", "22:1001-6000"))
)
}
@Test(dataProvider="taggedFiles")
def testTaggedFiles(reference: File, file: String, tags: String, expected: Seq[String]) {
val gatk = new CommandLineGATK
gatk.reference_sequence = reference
gatk.intervals = Seq(new TaggedFile(file, tags))
val parser = if (reference == hg18Reference) hg18GenomeLocParser else hg19GenomeLocParser
Assert.assertEquals(new GATKIntervals(gatk).locs.toSeq, expected.map(parser.parseGenomeLoc(_)))
}
@DataProvider(name="badTaggedFiles")
def getBadTaggedFiles: Array[Array[AnyRef]] = {
Array(
Array(hg18Reference, BaseTest.privateTestDir + "small_unmerged_gatk_intervals.list", "VCF"),
Array(hg18Reference, BaseTest.privateTestDir + "small_unmerged_gatk_intervals.list", "too,many,tags"),
Array(hg19Reference, BaseTest.privateTestDir + "small.indel.test.vcf", "BED"),
Array(hg19Reference, BaseTest.privateTestDir + "small.indel.test.vcf", "VCF,myVCF"),
Array(hg19Reference, BaseTest.privateTestDir + "small.indel.test.vcf", "myVCF,VCF,extra"),
Array(hg19Reference, BaseTest.privateTestDir + "sampleBedFile.bed", "VCF"),
Array(hg19Reference, BaseTest.privateTestDir + "sampleBedFile.bed", "BED,myBed"),
Array(hg19Reference, BaseTest.privateTestDir + "sampleBedFile.bed", "myBed,BED,extra")
).asInstanceOf[Array[Array[AnyRef]]]
}
@Test(dataProvider = "badTaggedFiles", expectedExceptions = Array(classOf[UserException]))
def testBadTaggedFiles(reference: File, file: String, tags: String) {
testTaggedFiles(reference, file, tags, Nil)
}
private def createGATKIntervals(reference: File, intervals: Seq[String]) = {
val gatk = new CommandLineGATK
gatk.reference_sequence = reference
gatk.intervalsString = intervals
new GATKIntervals(gatk)
}
}

View File

@ -42,4 +42,43 @@ class ExampleUnifiedGenotyperPipelineTest {
spec.jobRunners = PipelineTest.allJobRunners
PipelineTest.executeTest(spec)
}
@Test
def testUnifiedGenotyperWithGatkIntervals() {
val spec = new PipelineTestSpec
spec.name = "unifiedgenotyper_with_gatk_intervals"
spec.args = Array(
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala",
" -I " + BaseTest.validationDataLocation + "OV-0930.normal.chunk.bam",
" -R " + BaseTest.hg18Reference,
" -L " + BaseTest.validationDataLocation + "intervalTest.intervals").mkString
spec.jobRunners = Seq("Lsf706")
PipelineTest.executeTest(spec)
}
@Test
def testUnifiedGenotyperWithBedIntervals() {
val spec = new PipelineTestSpec
spec.name = "unifiedgenotyper_with_bed_intervals"
spec.args = Array(
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala",
" -I " + BaseTest.validationDataLocation + "OV-0930.normal.chunk.bam",
" -R " + BaseTest.hg18Reference,
" -L " + BaseTest.validationDataLocation + "intervalTest.bed").mkString
spec.jobRunners = Seq("Lsf706")
PipelineTest.executeTest(spec)
}
@Test
def testUnifiedGenotyperWithVcfIntervals() {
val spec = new PipelineTestSpec
spec.name = "unifiedgenotyper_with_vcf_intervals"
spec.args = Array(
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala",
" -I " + BaseTest.validationDataLocation + "OV-0930.normal.chunk.bam",
" -R " + BaseTest.hg18Reference,
" -L " + BaseTest.validationDataLocation + "intervalTest.1.vcf").mkString
spec.jobRunners = Seq("Lsf706")
PipelineTest.executeTest(spec)
}
}