Refactored parsing of Rod/IntervalBinding. Queue S/G now uses all interval arguments passed to CommandLineGATK QFunctions including support for BED/tribble types, XL, ISR, and padding.
Updated HSP to use new padding arguments instead of flank intervals file, plus latest QC evals. IntervalUtils return unmodifiable lists so that utilities don't mutate the collections. Added a JavaCommandLineFunction.javaGCThreads option to test reducing java's automatic GC thread allocation based on num cpus. Added comma to list of characters to convert to underscores in GridEngine job names so that GE JSV doesn't choke on the -N values. JobRunInfo handles the null done times when jobs crash with strange errors.
This commit is contained in:
parent
a5df8f1277
commit
746a5e95f3
|
|
@ -289,7 +289,7 @@ public abstract class ArgumentTypeDescriptor {
|
|||
return field.isAnnotationPresent(Hidden.class);
|
||||
}
|
||||
|
||||
public Class makeRawTypeIfNecessary(Type t) {
|
||||
public static Class makeRawTypeIfNecessary(Type t) {
|
||||
if ( t == null )
|
||||
return null;
|
||||
else if ( t instanceof ParameterizedType )
|
||||
|
|
@ -300,6 +300,114 @@ public abstract class ArgumentTypeDescriptor {
|
|||
throw new IllegalArgumentException("Unable to determine Class-derived component type of field: " + t);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The actual argument parsing method.
|
||||
* @param source source
|
||||
* @param type type to check
|
||||
* @param matches matches
|
||||
* @return the RodBinding/IntervalBinding object depending on the value of createIntervalBinding.
|
||||
*/
|
||||
protected Object parseBinding(ArgumentSource source, Type type, ArgumentMatches matches, Tags tags) {
|
||||
ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
|
||||
String value = getArgumentValue(defaultDefinition, matches);
|
||||
@SuppressWarnings("unchecked")
|
||||
Class<? extends Feature> parameterType = JVMUtils.getParameterizedTypeClass(type);
|
||||
String name = defaultDefinition.fullName;
|
||||
|
||||
return parseBinding(value, parameterType, type, name, tags, source.field.getName());
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param value The source of the binding
|
||||
* @param parameterType The Tribble Feature parameter type
|
||||
* @param bindingClass The class type for the binding (ex: RodBinding, IntervalBinding, etc.) Must have the correct constructor for creating the binding.
|
||||
* @param bindingName The name of the binding passed to the constructor.
|
||||
* @param tags Tags for the binding used for parsing and passed to the constructor.
|
||||
* @param fieldName The name of the field that was parsed. Used for error reporting.
|
||||
* @return The newly created binding object of type bindingClass.
|
||||
*/
|
||||
public static Object parseBinding(String value, Class<? extends Feature> parameterType, Type bindingClass,
|
||||
String bindingName, Tags tags, String fieldName) {
|
||||
try {
|
||||
String tribbleType = null;
|
||||
// must have one or two tag values here
|
||||
if ( tags.getPositionalTags().size() > 2 ) {
|
||||
throw new UserException.CommandLineException(
|
||||
String.format("Unexpected number of positional tags for argument %s : %s. " +
|
||||
"Rod bindings only support -X:type and -X:name,type argument styles",
|
||||
value, fieldName));
|
||||
} else if ( tags.getPositionalTags().size() == 2 ) {
|
||||
// -X:name,type style
|
||||
bindingName = tags.getPositionalTags().get(0);
|
||||
tribbleType = tags.getPositionalTags().get(1);
|
||||
|
||||
FeatureManager manager = new FeatureManager();
|
||||
if ( manager.getByName(tribbleType) == null )
|
||||
throw new UserException.CommandLineException(
|
||||
String.format("Unable to find tribble type '%s' provided on the command line. " +
|
||||
"Please select a correct type from among the supported types:%n%s",
|
||||
tribbleType, manager.userFriendlyListOfAvailableFeatures(parameterType)));
|
||||
|
||||
} else {
|
||||
// case with 0 or 1 positional tags
|
||||
FeatureManager manager = new FeatureManager();
|
||||
|
||||
// -X:type style is a type when we cannot determine the type dynamically
|
||||
String tag1 = tags.getPositionalTags().size() == 1 ? tags.getPositionalTags().get(0) : null;
|
||||
if ( tag1 != null ) {
|
||||
if ( manager.getByName(tag1) != null ) // this a type
|
||||
tribbleType = tag1;
|
||||
else
|
||||
bindingName = tag1;
|
||||
}
|
||||
|
||||
if ( tribbleType == null ) {
|
||||
// try to determine the file type dynamically
|
||||
File file = new File(value);
|
||||
if ( file.canRead() && file.isFile() ) {
|
||||
FeatureManager.FeatureDescriptor featureDescriptor = manager.getByFiletype(file);
|
||||
if ( featureDescriptor != null ) {
|
||||
tribbleType = featureDescriptor.getName();
|
||||
logger.info("Dynamically determined type of " + file + " to be " + tribbleType);
|
||||
}
|
||||
}
|
||||
|
||||
if ( tribbleType == null ) {
|
||||
// IntervalBinding can be created from a normal String
|
||||
Class rawType = (makeRawTypeIfNecessary(bindingClass));
|
||||
try {
|
||||
return rawType.getConstructor(String.class).newInstance(value);
|
||||
} catch (NoSuchMethodException e) {
|
||||
/* ignore */
|
||||
}
|
||||
|
||||
if ( ! file.exists() ) {
|
||||
throw new UserException.CouldNotReadInputFile(file, "file does not exist");
|
||||
} else if ( ! file.canRead() || ! file.isFile() ) {
|
||||
throw new UserException.CouldNotReadInputFile(file, "file could not be read");
|
||||
} else {
|
||||
throw new UserException.CommandLineException(
|
||||
String.format("No tribble type was provided on the command line and the type of the file could not be determined dynamically. " +
|
||||
"Please add an explicit type tag :NAME listing the correct type from among the supported types:%n%s",
|
||||
manager.userFriendlyListOfAvailableFeatures(parameterType)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Constructor ctor = (makeRawTypeIfNecessary(bindingClass)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class);
|
||||
return ctor.newInstance(parameterType, bindingName, value, tribbleType, tags);
|
||||
} catch (Exception e) {
|
||||
if ( e instanceof UserException )
|
||||
throw ((UserException)e);
|
||||
else
|
||||
throw new UserException.CommandLineException(
|
||||
String.format("Failed to parse value %s for argument %s. Message: %s",
|
||||
value, fieldName, e.getMessage()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -324,6 +432,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
public boolean createsTypeDefault(ArgumentSource source) { return ! source.isRequired(); }
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public Object createTypeDefault(ParsingEngine parsingEngine, ArgumentSource source, Type type) {
|
||||
Class parameterType = JVMUtils.getParameterizedTypeClass(type);
|
||||
return RodBinding.makeUnbound((Class<? extends Feature>)parameterType);
|
||||
|
|
@ -336,118 +445,16 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
|
||||
@Override
|
||||
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
|
||||
return parse(parsingEngine, source, type, matches, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* The actual argument parsing method.
|
||||
*
|
||||
* IMPORTANT NOTE: the createIntervalBinding argument is a bit of a hack, but after discussions with SE we've decided
|
||||
* that it's the best way to proceed for now. IntervalBindings can either be proper RodBindings (hence the use of
|
||||
* this parse() method) or can be Strings (representing raw intervals or the files containing them). If createIntervalBinding
|
||||
* is true, we do not call parsingEngine.addRodBinding() because we don't want walkers to assume that these are the
|
||||
* usual set of RodBindings. It also allows us in the future to be smart about tagging rods as intervals. One other
|
||||
* side point is that we want to continue to allow the usage of non-Feature intervals so that users can theoretically
|
||||
* continue to input them out of order (whereas Tribble Features are ordered).
|
||||
*
|
||||
* @param parsingEngine parsing engine
|
||||
* @param source source
|
||||
* @param type type to check
|
||||
* @param matches matches
|
||||
* @param createIntervalBinding should we attempt to create an IntervalBinding instead of a RodBinding?
|
||||
* @return the RodBinding/IntervalBinding object depending on the value of createIntervalBinding.
|
||||
*/
|
||||
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches, boolean createIntervalBinding) {
|
||||
ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
|
||||
String value = getArgumentValue( defaultDefinition, matches );
|
||||
Class<? extends Feature> parameterType = JVMUtils.getParameterizedTypeClass(type);
|
||||
|
||||
try {
|
||||
String name = defaultDefinition.fullName;
|
||||
String tribbleType = null;
|
||||
Tags tags = getArgumentTags(matches);
|
||||
// must have one or two tag values here
|
||||
if ( tags.getPositionalTags().size() > 2 ) {
|
||||
throw new UserException.CommandLineException(
|
||||
String.format("Unexpected number of positional tags for argument %s : %s. " +
|
||||
"Rod bindings only support -X:type and -X:name,type argument styles",
|
||||
value, source.field.getName()));
|
||||
} if ( tags.getPositionalTags().size() == 2 ) {
|
||||
// -X:name,type style
|
||||
name = tags.getPositionalTags().get(0);
|
||||
tribbleType = tags.getPositionalTags().get(1);
|
||||
} else {
|
||||
// case with 0 or 1 positional tags
|
||||
FeatureManager manager = new FeatureManager();
|
||||
|
||||
// -X:type style is a type when we cannot determine the type dynamically
|
||||
String tag1 = tags.getPositionalTags().size() == 1 ? tags.getPositionalTags().get(0) : null;
|
||||
if ( tag1 != null ) {
|
||||
if ( manager.getByName(tag1) != null ) // this a type
|
||||
tribbleType = tag1;
|
||||
else
|
||||
name = tag1;
|
||||
}
|
||||
|
||||
if ( tribbleType == null ) {
|
||||
// try to determine the file type dynamically
|
||||
File file = new File(value);
|
||||
if ( file.canRead() && file.isFile() ) {
|
||||
FeatureManager.FeatureDescriptor featureDescriptor = manager.getByFiletype(file);
|
||||
if ( featureDescriptor != null ) {
|
||||
tribbleType = featureDescriptor.getName();
|
||||
logger.info("Dynamically determined type of " + file + " to be " + tribbleType);
|
||||
}
|
||||
}
|
||||
|
||||
if ( tribbleType == null ) {
|
||||
// IntervalBindings allow streaming conversion of Strings
|
||||
if ( createIntervalBinding ) {
|
||||
return new IntervalBinding(value);
|
||||
}
|
||||
|
||||
if ( ! file.exists() ) {
|
||||
throw new UserException.CouldNotReadInputFile(file, "file does not exist");
|
||||
} else if ( ! file.canRead() || ! file.isFile() ) {
|
||||
throw new UserException.CouldNotReadInputFile(file, "file could not be read");
|
||||
} else {
|
||||
throw new UserException.CommandLineException(
|
||||
String.format("No tribble type was provided on the command line and the type of the file could not be determined dynamically. " +
|
||||
"Please add an explicit type tag :NAME listing the correct type from among the supported types:%n%s",
|
||||
manager.userFriendlyListOfAvailableFeatures(parameterType)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class);
|
||||
Object result;
|
||||
if ( createIntervalBinding ) {
|
||||
result = ctor.newInstance(parameterType, name, value, tribbleType, tags);
|
||||
} else {
|
||||
RodBinding rbind = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags);
|
||||
parsingEngine.addTags(rbind, tags);
|
||||
parsingEngine.addRodBinding(rbind);
|
||||
result = rbind;
|
||||
}
|
||||
return result;
|
||||
} catch (InvocationTargetException e) {
|
||||
throw new UserException.CommandLineException(
|
||||
String.format("Failed to parse value %s for argument %s.",
|
||||
value, source.field.getName()));
|
||||
} catch (Exception e) {
|
||||
if ( e instanceof UserException )
|
||||
throw ((UserException)e);
|
||||
else
|
||||
throw new UserException.CommandLineException(
|
||||
String.format("Failed to parse value %s for argument %s. Message: %s",
|
||||
value, source.field.getName(), e.getMessage()));
|
||||
}
|
||||
Tags tags = getArgumentTags(matches);
|
||||
RodBinding rbind = (RodBinding)parseBinding(source, type, matches, tags);
|
||||
parsingEngine.addTags(rbind, tags);
|
||||
parsingEngine.addRodBinding(rbind);
|
||||
return rbind;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parser for RodBinding objects
|
||||
* Parser for IntervalBinding objects
|
||||
*/
|
||||
class IntervalBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||
/**
|
||||
|
|
@ -475,7 +482,7 @@ class IntervalBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
*/
|
||||
@Override
|
||||
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
|
||||
return new RodBindingArgumentTypeDescriptor().parse(parsingEngine, source, type, matches, true);
|
||||
return parseBinding(source, type, matches, getArgumentTags(matches));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -783,7 +790,7 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
}
|
||||
|
||||
Class<? extends Multiplexer> multiplexerType = dependentArgument.field.getAnnotation(Multiplex.class).value();
|
||||
Constructor<? extends Multiplexer> multiplexerConstructor = null;
|
||||
Constructor<? extends Multiplexer> multiplexerConstructor;
|
||||
try {
|
||||
multiplexerConstructor = multiplexerType.getConstructor(sourceTypes);
|
||||
multiplexerConstructor.setAccessible(true);
|
||||
|
|
@ -792,7 +799,7 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
throw new ReviewedStingException(String.format("Unable to find constructor for class %s with parameters %s",multiplexerType.getName(),Arrays.deepToString(sourceFields)),ex);
|
||||
}
|
||||
|
||||
Multiplexer multiplexer = null;
|
||||
Multiplexer multiplexer;
|
||||
try {
|
||||
multiplexer = multiplexerConstructor.newInstance(sourceValues);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -78,24 +78,7 @@ public abstract class ParsingMethod {
|
|||
|
||||
String argument = matcher.group(1).trim();
|
||||
|
||||
Tags tags = new Tags();
|
||||
if(matcher.group(2) != null) {
|
||||
for(String tag: Utils.split(matcher.group(2),",")) {
|
||||
// Check for presence of an '=' sign, indicating a key-value pair in the tag line.
|
||||
int equalDelimiterPos = tag.indexOf('=');
|
||||
if(equalDelimiterPos >= 0) {
|
||||
// Sanity check; ensure that there aren't multiple '=' in this key-value pair.
|
||||
if(tag.indexOf('=',equalDelimiterPos+1) >= 0)
|
||||
throw new ArgumentException(String.format("Tag %s passed to argument %s is malformed. Please ensure that " +
|
||||
"key-value tags are of the form <key>=<value>, and neither key " +
|
||||
"nor value contain the '=' character", tag, argument));
|
||||
tags.addKeyValueTag(tag.substring(0,equalDelimiterPos),tag.substring(equalDelimiterPos+1));
|
||||
}
|
||||
else
|
||||
tags.addPositionalTag(tag);
|
||||
|
||||
}
|
||||
}
|
||||
Tags tags = parseTags(argument, matcher.group(2));
|
||||
|
||||
// Find the most appropriate argument definition for the given argument.
|
||||
ArgumentDefinition argumentDefinition = definitions.findArgumentDefinition( argument, definitionMatcher );
|
||||
|
|
@ -105,6 +88,28 @@ public abstract class ParsingMethod {
|
|||
return new ArgumentMatch(argument,argumentDefinition,position,tags);
|
||||
}
|
||||
|
||||
public static Tags parseTags(String argument, String tagString) {
|
||||
Tags tags = new Tags();
|
||||
if (tagString != null) {
|
||||
for(String tag: Utils.split(tagString, ",")) {
|
||||
// Check for presence of an '=' sign, indicating a key-value pair in the tag line.
|
||||
int equalDelimiterPos = tag.indexOf('=');
|
||||
if(equalDelimiterPos >= 0) {
|
||||
// Sanity check; ensure that there aren't multiple '=' in this key-value pair.
|
||||
if(tag.indexOf('=',equalDelimiterPos+1) >= 0)
|
||||
throw new ArgumentException(String.format("Tag %s passed to argument %s is malformed. Please ensure that " +
|
||||
"key-value tags are of the form <key>=<value>, and neither key " +
|
||||
"nor value contain the '=' character", tag, argument));
|
||||
tags.addKeyValueTag(tag.substring(0,equalDelimiterPos),tag.substring(equalDelimiterPos+1));
|
||||
}
|
||||
else
|
||||
tags.addPositionalTag(tag);
|
||||
|
||||
}
|
||||
}
|
||||
return tags;
|
||||
}
|
||||
|
||||
/**
|
||||
* A command-line argument always starts with an alphabetical character or underscore followed by any word character.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -30,7 +30,6 @@ import net.sf.samtools.SAMFileHeader;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.readers.PositionalBufferedStream;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||
|
|
@ -54,9 +53,9 @@ import org.broadinstitute.sting.utils.*;
|
|||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
|
||||
|
|
@ -582,7 +581,6 @@ public class GenomeAnalysisEngine {
|
|||
* Setup the intervals to be processed
|
||||
*/
|
||||
protected void initializeIntervals() {
|
||||
|
||||
// return if no interval arguments at all
|
||||
if ( argCollection.intervals == null && argCollection.excludeIntervals == null )
|
||||
return;
|
||||
|
|
@ -590,17 +588,22 @@ public class GenomeAnalysisEngine {
|
|||
// Note that the use of '-L all' is no longer supported.
|
||||
|
||||
// if include argument isn't given, create new set of all possible intervals
|
||||
GenomeLocSortedSet includeSortedSet = (argCollection.intervals == null ?
|
||||
GenomeLocSortedSet.createSetFromSequenceDictionary(this.referenceDataSource.getReference().getSequenceDictionary()) :
|
||||
loadIntervals(argCollection.intervals, argCollection.intervalSetRule, argCollection.intervalPadding));
|
||||
|
||||
Pair<GenomeLocSortedSet, GenomeLocSortedSet> includeExcludePair = IntervalUtils.parseIntervalBindingsPair(
|
||||
this.referenceDataSource,
|
||||
argCollection.intervals,
|
||||
argCollection.intervalSetRule, argCollection.intervalMerging, argCollection.intervalPadding,
|
||||
argCollection.excludeIntervals);
|
||||
|
||||
GenomeLocSortedSet includeSortedSet = includeExcludePair.getFirst();
|
||||
GenomeLocSortedSet excludeSortedSet = includeExcludePair.getSecond();
|
||||
|
||||
// if no exclude arguments, can return parseIntervalArguments directly
|
||||
if ( argCollection.excludeIntervals == null )
|
||||
if ( excludeSortedSet == null )
|
||||
intervals = includeSortedSet;
|
||||
|
||||
// otherwise there are exclude arguments => must merge include and exclude GenomeLocSortedSets
|
||||
else {
|
||||
GenomeLocSortedSet excludeSortedSet = loadIntervals(argCollection.excludeIntervals, IntervalSetRule.UNION);
|
||||
intervals = includeSortedSet.subtractRegions(excludeSortedSet);
|
||||
|
||||
// logging messages only printed when exclude (-XL) arguments are given
|
||||
|
|
@ -613,43 +616,6 @@ public class GenomeAnalysisEngine {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads the intervals relevant to the current execution
|
||||
* @param argList argument bindings; might include filenames, intervals in samtools notation, or a combination of the above
|
||||
* @param rule interval merging rule
|
||||
* @return A sorted, merged list of all intervals specified in this arg list.
|
||||
*/
|
||||
protected GenomeLocSortedSet loadIntervals( final List<IntervalBinding<Feature>> argList, final IntervalSetRule rule ) {
|
||||
return loadIntervals(argList, rule, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads the intervals relevant to the current execution
|
||||
* @param argList argument bindings; might include filenames, intervals in samtools notation, or a combination of the above
|
||||
* @param rule interval merging rule
|
||||
* @param padding how much to pad the intervals
|
||||
* @return A sorted, merged list of all intervals specified in this arg list.
|
||||
*/
|
||||
protected GenomeLocSortedSet loadIntervals( final List<IntervalBinding<Feature>> argList, final IntervalSetRule rule, final int padding ) {
|
||||
|
||||
List<GenomeLoc> allIntervals = new ArrayList<GenomeLoc>();
|
||||
for ( IntervalBinding intervalBinding : argList ) {
|
||||
List<GenomeLoc> intervals = intervalBinding.getIntervals(this.getGenomeLocParser());
|
||||
|
||||
if ( intervals.isEmpty() ) {
|
||||
logger.warn("The interval file " + intervalBinding.getSource() + " contains no intervals that could be parsed.");
|
||||
}
|
||||
|
||||
if ( padding > 0 ) {
|
||||
intervals = IntervalUtils.getIntervalsWithFlanks(this.getGenomeLocParser(), intervals, padding);
|
||||
}
|
||||
|
||||
allIntervals = IntervalUtils.mergeListsBySetOperator(intervals, allIntervals, rule);
|
||||
}
|
||||
|
||||
return IntervalUtils.sortAndMergeIntervals(genomeLocParser, allIntervals, argCollection.intervalMerging);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add additional, externally managed IO streams for inputs.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@ import net.sf.picard.util.Interval;
|
|||
import net.sf.picard.util.IntervalList;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.IntervalBinding;
|
||||
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
|
@ -169,21 +171,23 @@ public class IntervalUtils {
|
|||
*/
|
||||
public static List<GenomeLoc> mergeListsBySetOperator(List<GenomeLoc> setOne, List<GenomeLoc> setTwo, IntervalSetRule rule) {
|
||||
// shortcut, if either set is zero, return the other set
|
||||
if (setOne == null || setOne.size() == 0 || setTwo == null || setTwo.size() == 0) return (setOne == null || setOne.size() == 0) ? setTwo : setOne;
|
||||
if (setOne == null || setOne.size() == 0 || setTwo == null || setTwo.size() == 0)
|
||||
return Collections.unmodifiableList((setOne == null || setOne.size() == 0) ? setTwo : setOne);
|
||||
|
||||
// our master list, since we can't guarantee removal time in a generic list
|
||||
LinkedList<GenomeLoc> retList = new LinkedList<GenomeLoc>();
|
||||
|
||||
// if we're set to UNION, just add them all
|
||||
if (rule == IntervalSetRule.UNION) {
|
||||
setOne.addAll(setTwo);
|
||||
return setOne;
|
||||
if (rule == null || rule == IntervalSetRule.UNION) {
|
||||
retList.addAll(setOne);
|
||||
retList.addAll(setTwo);
|
||||
return Collections.unmodifiableList(retList);
|
||||
}
|
||||
|
||||
// else we're INTERSECTION, create two indexes into the lists
|
||||
int iOne = 0;
|
||||
int iTwo = 0;
|
||||
|
||||
// our master list, since we can't guarantee removal time in a generic list
|
||||
LinkedList<GenomeLoc> retList = new LinkedList<GenomeLoc>();
|
||||
|
||||
// merge the second into the first using the rule
|
||||
while (iTwo < setTwo.size() && iOne < setOne.size())
|
||||
// if the first list is ahead, drop items off the second until we overlap
|
||||
|
|
@ -204,7 +208,7 @@ public class IntervalUtils {
|
|||
throw new UserException.BadInput("The INTERSECTION of your -L options produced no intervals.");
|
||||
|
||||
// we don't need to add the rest of remaining locations, since we know they don't overlap. return what we have
|
||||
return retList;
|
||||
return Collections.unmodifiableList(retList);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -218,6 +222,8 @@ public class IntervalUtils {
|
|||
* @return A sorted, merged version of the intervals passed in.
|
||||
*/
|
||||
public static GenomeLocSortedSet sortAndMergeIntervals(GenomeLocParser parser, List<GenomeLoc> intervals, IntervalMergingRule mergingRule) {
|
||||
// Make a copy of the (potentially unmodifiable) list to be sorted
|
||||
intervals = new ArrayList<GenomeLoc>(intervals);
|
||||
// sort raw interval list
|
||||
Collections.sort(intervals);
|
||||
// now merge raw interval list
|
||||
|
|
@ -481,6 +487,70 @@ public class IntervalUtils {
|
|||
return new SplitLocusRecursive(split, remaining);
|
||||
}
|
||||
|
||||
/**
|
||||
* Setup the intervals to be processed
|
||||
*/
|
||||
public static GenomeLocSortedSet parseIntervalBindings(
|
||||
final ReferenceDataSource referenceDataSource,
|
||||
final List<IntervalBinding<Feature>> intervals,
|
||||
final IntervalSetRule intervalSetRule, final IntervalMergingRule intervalMergingRule, final int intervalPadding,
|
||||
final List<IntervalBinding<Feature>> excludeIntervals) {
|
||||
|
||||
Pair<GenomeLocSortedSet, GenomeLocSortedSet> includeExcludePair = parseIntervalBindingsPair(
|
||||
referenceDataSource, intervals, intervalSetRule, intervalMergingRule, intervalPadding, excludeIntervals);
|
||||
|
||||
GenomeLocSortedSet includeSortedSet = includeExcludePair.getFirst();
|
||||
GenomeLocSortedSet excludeSortedSet = includeExcludePair.getSecond();
|
||||
|
||||
if (excludeSortedSet != null) {
|
||||
return includeSortedSet.subtractRegions(excludeSortedSet);
|
||||
} else {
|
||||
return includeSortedSet;
|
||||
}
|
||||
}
|
||||
|
||||
public static Pair<GenomeLocSortedSet, GenomeLocSortedSet> parseIntervalBindingsPair(
|
||||
final ReferenceDataSource referenceDataSource,
|
||||
final List<IntervalBinding<Feature>> intervals,
|
||||
final IntervalSetRule intervalSetRule, final IntervalMergingRule intervalMergingRule, final int intervalPadding,
|
||||
final List<IntervalBinding<Feature>> excludeIntervals) {
|
||||
GenomeLocParser genomeLocParser = new GenomeLocParser(referenceDataSource.getReference());
|
||||
|
||||
// if include argument isn't given, create new set of all possible intervals
|
||||
GenomeLocSortedSet includeSortedSet = ((intervals == null || intervals.size() == 0) ?
|
||||
GenomeLocSortedSet.createSetFromSequenceDictionary(referenceDataSource.getReference().getSequenceDictionary()) :
|
||||
loadIntervals(intervals, intervalSetRule, intervalMergingRule, intervalPadding, genomeLocParser));
|
||||
|
||||
GenomeLocSortedSet excludeSortedSet = null;
|
||||
if (excludeIntervals != null && excludeIntervals.size() > 0) {
|
||||
excludeSortedSet = loadIntervals(excludeIntervals, IntervalSetRule.UNION, intervalMergingRule, 0, genomeLocParser);
|
||||
}
|
||||
return new Pair<GenomeLocSortedSet, GenomeLocSortedSet>(includeSortedSet, excludeSortedSet);
|
||||
}
|
||||
|
||||
public static GenomeLocSortedSet loadIntervals(
|
||||
final List<IntervalBinding<Feature>> intervalBindings,
|
||||
final IntervalSetRule rule, final IntervalMergingRule intervalMergingRule, final int padding,
|
||||
final GenomeLocParser genomeLocParser) {
|
||||
List<GenomeLoc> allIntervals = new ArrayList<GenomeLoc>();
|
||||
for ( IntervalBinding intervalBinding : intervalBindings) {
|
||||
@SuppressWarnings("unchecked")
|
||||
List<GenomeLoc> intervals = intervalBinding.getIntervals(genomeLocParser);
|
||||
|
||||
if ( intervals.isEmpty() ) {
|
||||
logger.warn("The interval file " + intervalBinding.getSource() + " contains no intervals that could be parsed.");
|
||||
}
|
||||
|
||||
if ( padding > 0 ) {
|
||||
intervals = getIntervalsWithFlanks(genomeLocParser, intervals, padding);
|
||||
}
|
||||
|
||||
allIntervals = mergeListsBySetOperator(intervals, allIntervals, rule);
|
||||
}
|
||||
|
||||
return sortAndMergeIntervals(genomeLocParser, allIntervals, intervalMergingRule);
|
||||
}
|
||||
|
||||
private final static class SplitLocusRecursive {
|
||||
final List<GenomeLoc> split;
|
||||
final LinkedList<GenomeLoc> remaining;
|
||||
|
|
@ -546,7 +616,7 @@ public class IntervalUtils {
|
|||
*/
|
||||
public static List<GenomeLoc> mergeIntervalLocations(final List<GenomeLoc> raw, IntervalMergingRule rule) {
|
||||
if (raw.size() <= 1)
|
||||
return raw;
|
||||
return Collections.unmodifiableList(raw);
|
||||
else {
|
||||
ArrayList<GenomeLoc> merged = new ArrayList<GenomeLoc>();
|
||||
Iterator<GenomeLoc> it = raw.iterator();
|
||||
|
|
@ -555,7 +625,7 @@ public class IntervalUtils {
|
|||
GenomeLoc curr = it.next();
|
||||
if (prev.overlapsP(curr)) {
|
||||
prev = prev.merge(curr);
|
||||
} else if (prev.contiguousP(curr) && rule == IntervalMergingRule.ALL) {
|
||||
} else if (prev.contiguousP(curr) && (rule == null || rule == IntervalMergingRule.ALL)) {
|
||||
prev = prev.merge(curr);
|
||||
} else {
|
||||
merged.add(prev);
|
||||
|
|
@ -563,7 +633,7 @@ public class IntervalUtils {
|
|||
}
|
||||
}
|
||||
merged.add(prev);
|
||||
return merged;
|
||||
return Collections.unmodifiableList(merged);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -24,32 +24,17 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.picard.util.Interval;
|
||||
import net.sf.picard.util.IntervalList;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.commandline.ArgumentException;
|
||||
import org.broadinstitute.sting.commandline.IntervalBinding;
|
||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.commandline.Tags;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.walkers.PrintReadsWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
* Tests selected functionality in the GenomeAnalysisEngine class
|
||||
|
|
@ -91,65 +76,4 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest {
|
|||
|
||||
testEngine.validateSuppliedIntervals();
|
||||
}
|
||||
|
||||
@DataProvider(name="invalidIntervalTestData")
|
||||
public Object[][] invalidIntervalDataProvider() throws Exception {
|
||||
GenomeAnalysisEngine testEngine = new GenomeAnalysisEngine();
|
||||
GATKArgumentCollection argCollection = new GATKArgumentCollection();
|
||||
testEngine.setArguments(argCollection);
|
||||
|
||||
File fastaFile = new File("public/testdata/exampleFASTA.fasta");
|
||||
GenomeLocParser genomeLocParser = new GenomeLocParser(new IndexedFastaSequenceFile(fastaFile));
|
||||
testEngine.setGenomeLocParser(genomeLocParser);
|
||||
|
||||
return new Object[][] {
|
||||
new Object[] {testEngine, genomeLocParser, "chr1", 10000000, 20000000},
|
||||
new Object[] {testEngine, genomeLocParser, "chr2", 1, 2},
|
||||
new Object[] {testEngine, genomeLocParser, "chr1", -1, 50}
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider="invalidIntervalTestData")
|
||||
public void testInvalidPicardIntervalHandling(GenomeAnalysisEngine testEngine, GenomeLocParser genomeLocParser,
|
||||
String contig, int intervalStart, int intervalEnd ) throws Exception {
|
||||
|
||||
SAMFileHeader picardFileHeader = new SAMFileHeader();
|
||||
picardFileHeader.addSequence(genomeLocParser.getContigInfo("chr1"));
|
||||
IntervalList picardIntervals = new IntervalList(picardFileHeader);
|
||||
picardIntervals.add(new Interval(contig, intervalStart, intervalEnd, true, "dummyname"));
|
||||
|
||||
File picardIntervalFile = createTempFile("testInvalidPicardIntervalHandling", ".intervals");
|
||||
picardIntervals.write(picardIntervalFile);
|
||||
|
||||
List<IntervalBinding<Feature>> intervalArgs = new ArrayList<IntervalBinding<Feature>>(1);
|
||||
intervalArgs.add(new IntervalBinding<Feature>(picardIntervalFile.getAbsolutePath()));
|
||||
|
||||
testEngine.loadIntervals(intervalArgs, IntervalSetRule.UNION);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions=UserException.class, dataProvider="invalidIntervalTestData")
|
||||
public void testInvalidGATKFileIntervalHandling(GenomeAnalysisEngine testEngine, GenomeLocParser genomeLocParser,
|
||||
String contig, int intervalStart, int intervalEnd ) throws Exception {
|
||||
|
||||
File gatkIntervalFile = createTempFile("testInvalidGATKFileIntervalHandling", ".intervals",
|
||||
String.format("%s:%d-%d", contig, intervalStart, intervalEnd));
|
||||
|
||||
List<IntervalBinding<Feature>> intervalArgs = new ArrayList<IntervalBinding<Feature>>(1);
|
||||
intervalArgs.add(new IntervalBinding<Feature>(gatkIntervalFile.getAbsolutePath()));
|
||||
|
||||
testEngine.loadIntervals(intervalArgs, IntervalSetRule.UNION);
|
||||
}
|
||||
|
||||
private File createTempFile( String tempFilePrefix, String tempFileExtension, String... lines ) throws Exception {
|
||||
File tempFile = File.createTempFile(tempFilePrefix, tempFileExtension);
|
||||
tempFile.deleteOnExit();
|
||||
|
||||
PrintWriter out = new PrintWriter(tempFile);
|
||||
for ( String line : lines ) {
|
||||
out.println(line);
|
||||
}
|
||||
out.close();
|
||||
|
||||
return tempFile;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,12 +1,16 @@
|
|||
package org.broadinstitute.sting.utils.interval;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||
import net.sf.picard.util.Interval;
|
||||
import net.sf.picard.util.IntervalList;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.commandline.IntervalBinding;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
|
@ -45,7 +49,7 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
List<GenomeLoc> locs = new ArrayList<GenomeLoc>();
|
||||
for (String interval: intervals)
|
||||
locs.add(hg18GenomeLocParser.parseGenomeLoc(interval));
|
||||
return locs;
|
||||
return Collections.unmodifiableList(locs);
|
||||
}
|
||||
|
||||
@BeforeClass
|
||||
|
|
@ -277,7 +281,10 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
listEveryTwoFromOne.add(hg18GenomeLocParser.createGenomeLoc("chr1",x,x));
|
||||
}
|
||||
|
||||
List<GenomeLoc> ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, listEveryTwoFromOne, IntervalSetRule.UNION);
|
||||
List<GenomeLoc> ret;
|
||||
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, listEveryTwoFromOne, IntervalSetRule.UNION);
|
||||
Assert.assertEquals(ret.size(), 100);
|
||||
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, listEveryTwoFromOne, null);
|
||||
Assert.assertEquals(ret.size(), 100);
|
||||
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, listEveryTwoFromOne, IntervalSetRule.INTERSECTION);
|
||||
Assert.assertEquals(ret.size(), 0);
|
||||
|
|
@ -296,7 +303,10 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
allSites.add(hg18GenomeLocParser.createGenomeLoc("chr1",x,x));
|
||||
}
|
||||
|
||||
List<GenomeLoc> ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.UNION);
|
||||
List<GenomeLoc> ret;
|
||||
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.UNION);
|
||||
Assert.assertEquals(ret.size(), 150);
|
||||
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, null);
|
||||
Assert.assertEquals(ret.size(), 150);
|
||||
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.INTERSECTION);
|
||||
Assert.assertEquals(ret.size(), 50);
|
||||
|
|
@ -316,7 +326,10 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
List<GenomeLoc> ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.UNION);
|
||||
List<GenomeLoc> ret;
|
||||
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.UNION);
|
||||
Assert.assertEquals(ret.size(), 40);
|
||||
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, null);
|
||||
Assert.assertEquals(ret.size(), 40);
|
||||
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.INTERSECTION);
|
||||
Assert.assertEquals(ret.size(), 20);
|
||||
|
|
@ -761,7 +774,13 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
List<GenomeLoc> locs = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Collections.singletonList(privateTestDir + unmergedIntervals));
|
||||
Assert.assertEquals(locs.size(), 2);
|
||||
|
||||
List<GenomeLoc> merged = IntervalUtils.mergeIntervalLocations(locs, IntervalMergingRule.ALL);
|
||||
List<GenomeLoc> merged;
|
||||
|
||||
merged = IntervalUtils.mergeIntervalLocations(locs, IntervalMergingRule.ALL);
|
||||
Assert.assertEquals(merged.size(), 1);
|
||||
|
||||
// Test that null means the same as ALL
|
||||
merged = IntervalUtils.mergeIntervalLocations(locs, null);
|
||||
Assert.assertEquals(merged.size(), 1);
|
||||
}
|
||||
|
||||
|
|
@ -993,6 +1012,74 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
|
||||
// Attempting to use the legacy -L "interval1;interval2" syntax should produce an exception:
|
||||
IntervalBinding<Feature> binding = new IntervalBinding<Feature>("1;2");
|
||||
List<GenomeLoc> intervals = binding.getIntervals(toolkit);
|
||||
binding.getIntervals(toolkit);
|
||||
}
|
||||
|
||||
@DataProvider(name="invalidIntervalTestData")
|
||||
public Object[][] invalidIntervalDataProvider() throws Exception {
|
||||
GATKArgumentCollection argCollection = new GATKArgumentCollection();
|
||||
File fastaFile = new File("public/testdata/exampleFASTA.fasta");
|
||||
GenomeLocParser genomeLocParser = new GenomeLocParser(new IndexedFastaSequenceFile(fastaFile));
|
||||
|
||||
return new Object[][] {
|
||||
new Object[] {argCollection, genomeLocParser, "chr1", 10000000, 20000000},
|
||||
new Object[] {argCollection, genomeLocParser, "chr2", 1, 2},
|
||||
new Object[] {argCollection, genomeLocParser, "chr1", -1, 50}
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider="invalidIntervalTestData")
|
||||
public void testInvalidPicardIntervalHandling(GATKArgumentCollection argCollection, GenomeLocParser genomeLocParser,
|
||||
String contig, int intervalStart, int intervalEnd ) throws Exception {
|
||||
|
||||
SAMFileHeader picardFileHeader = new SAMFileHeader();
|
||||
picardFileHeader.addSequence(genomeLocParser.getContigInfo("chr1"));
|
||||
IntervalList picardIntervals = new IntervalList(picardFileHeader);
|
||||
picardIntervals.add(new Interval(contig, intervalStart, intervalEnd, true, "dummyname"));
|
||||
|
||||
File picardIntervalFile = createTempFile("testInvalidPicardIntervalHandling", ".intervals");
|
||||
picardIntervals.write(picardIntervalFile);
|
||||
|
||||
List<IntervalBinding<Feature>> intervalArgs = new ArrayList<IntervalBinding<Feature>>(1);
|
||||
intervalArgs.add(new IntervalBinding<Feature>(picardIntervalFile.getAbsolutePath()));
|
||||
|
||||
IntervalUtils.loadIntervals(intervalArgs, argCollection.intervalSetRule, argCollection.intervalMerging, argCollection.intervalPadding, genomeLocParser);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions=UserException.class, dataProvider="invalidIntervalTestData")
|
||||
public void testInvalidGATKFileIntervalHandling(GATKArgumentCollection argCollection, GenomeLocParser genomeLocParser,
|
||||
String contig, int intervalStart, int intervalEnd ) throws Exception {
|
||||
|
||||
File gatkIntervalFile = createTempFile("testInvalidGATKFileIntervalHandling", ".intervals",
|
||||
String.format("%s:%d-%d", contig, intervalStart, intervalEnd));
|
||||
|
||||
List<IntervalBinding<Feature>> intervalArgs = new ArrayList<IntervalBinding<Feature>>(1);
|
||||
intervalArgs.add(new IntervalBinding<Feature>(gatkIntervalFile.getAbsolutePath()));
|
||||
|
||||
IntervalUtils.loadIntervals(intervalArgs, argCollection.intervalSetRule, argCollection.intervalMerging, argCollection.intervalPadding, genomeLocParser);
|
||||
}
|
||||
|
||||
private File createTempFile( String tempFilePrefix, String tempFileExtension, String... lines ) throws Exception {
|
||||
File tempFile = BaseTest.createTempFile(tempFilePrefix, tempFileExtension);
|
||||
FileUtils.writeLines(tempFile, Arrays.asList(lines));
|
||||
return tempFile;
|
||||
}
|
||||
|
||||
@DataProvider(name = "sortAndMergeIntervals")
|
||||
public Object[][] getSortAndMergeIntervals() {
|
||||
return new Object[][] {
|
||||
new Object[] { IntervalMergingRule.OVERLAPPING_ONLY, getLocs("chr1:1", "chr1:3", "chr1:2"), getLocs("chr1:1", "chr1:2", "chr1:3") },
|
||||
new Object[] { IntervalMergingRule.ALL, getLocs("chr1:1", "chr1:3", "chr1:2"), getLocs("chr1:1-3") },
|
||||
new Object[] { IntervalMergingRule.OVERLAPPING_ONLY, getLocs("chr1:1", "chr1:3", "chr2:2"), getLocs("chr1:1", "chr1:3", "chr2:2") },
|
||||
new Object[] { IntervalMergingRule.ALL, getLocs("chr1:1", "chr1:3", "chr2:2"), getLocs("chr1:1", "chr1:3", "chr2:2") },
|
||||
new Object[] { IntervalMergingRule.OVERLAPPING_ONLY, getLocs("chr1:1", "chr1"), getLocs("chr1") },
|
||||
new Object[] { IntervalMergingRule.ALL, getLocs("chr1:1", "chr1"), getLocs("chr1") }
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "sortAndMergeIntervals")
|
||||
public void testSortAndMergeIntervals(IntervalMergingRule merge, List<GenomeLoc> unsorted, List<GenomeLoc> expected) {
|
||||
List<GenomeLoc> sorted = IntervalUtils.sortAndMergeIntervals(hg18GenomeLocParser, unsorted, merge).toList();
|
||||
Assert.assertEquals(sorted, expected);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,7 +24,6 @@
|
|||
|
||||
package org.broadinstitute.sting.queue.engine
|
||||
|
||||
import java.util.Date
|
||||
import java.text.SimpleDateFormat
|
||||
|
||||
/**
|
||||
|
|
@ -36,18 +35,21 @@ class JobRunInfo {
|
|||
val formatter = new SimpleDateFormat("yy-MM-dd H:mm:ss:SSS");
|
||||
|
||||
/** The start time with millisecond resolution of this job */
|
||||
var startTime: Date = _
|
||||
var startTime: java.util.Date = _
|
||||
/** The done time with millisecond resolution of this job */
|
||||
var doneTime: Date = _
|
||||
var doneTime: java.util.Date = _
|
||||
var exechosts: String = "localhost"
|
||||
|
||||
def getStartTime = startTime
|
||||
def getDoneTime = doneTime
|
||||
def getFormattedStartTime = formatTime(getStartTime)
|
||||
def getFormattedDoneTime = formatTime(getDoneTime)
|
||||
def getStartTime: String = getTime(startTime)
|
||||
def getDoneTime: String = getTime(doneTime)
|
||||
def getFormattedStartTime = formatTime(startTime)
|
||||
def getFormattedDoneTime = formatTime(doneTime)
|
||||
|
||||
/** Helper function that returns the time of the date */
|
||||
private def getTime(d: java.util.Date): String = if ( d != null ) d.getTime.toString else "null"
|
||||
|
||||
/** Helper function that pretty prints the date */
|
||||
private def formatTime(d: Date) = if ( d != null ) formatter.format(d) else "null"
|
||||
private def formatTime(d: java.util.Date): String = if ( d != null ) formatter.format(d) else "null"
|
||||
|
||||
def getExecHosts = exechosts
|
||||
|
||||
|
|
@ -55,14 +57,14 @@ class JobRunInfo {
|
|||
* Was any information set for this jobInfo? JobInfo can be unset because
|
||||
* the job never ran or because it already completed.
|
||||
*/
|
||||
def isFilledIn = startTime != null
|
||||
def isFilledIn = startTime != null && doneTime != null
|
||||
|
||||
/**
|
||||
* How long did the job run (in wall time)? Returns -1 if this jobInfo isn't filled in
|
||||
*/
|
||||
def getRuntimeInMs: Long = {
|
||||
if ( isFilledIn )
|
||||
getDoneTime.getTime - getStartTime.getTime
|
||||
doneTime.getTime - startTime.getTime
|
||||
else
|
||||
-1
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ import org.ggf.drmaa.Session
|
|||
class GridEngineJobRunner(session: Session, function: CommandLineFunction) extends DrmaaJobRunner(session, function) with Logging {
|
||||
// Grid Engine disallows certain characters from being in job names.
|
||||
// This replaces all illegal characters with underscores
|
||||
protected override val jobNameFilter = """[\n\t\r/:@\\*?]"""
|
||||
protected override val jobNameFilter = """[\n\t\r/:,@\\*?]"""
|
||||
protected override val minRunnerPriority = -1023
|
||||
protected override val maxRunnerPriority = 0
|
||||
|
||||
|
|
|
|||
|
|
@ -38,11 +38,11 @@ class ContigScatterFunction extends GATKScatterFunction with InProcessFunction {
|
|||
override def scatterCount = if (intervalFilesExist) super.scatterCount min this.maxIntervals else super.scatterCount
|
||||
|
||||
protected override def maxIntervals = {
|
||||
GATKScatterFunction.getGATKIntervals(this.referenceSequence, this.intervals).contigs.size
|
||||
GATKScatterFunction.getGATKIntervals(this.originalGATK).contigs.size
|
||||
}
|
||||
|
||||
def run() {
|
||||
val gi = GATKScatterFunction.getGATKIntervals(this.referenceSequence, this.intervals)
|
||||
val gi = GATKScatterFunction.getGATKIntervals(this.originalGATK)
|
||||
IntervalUtils.scatterContigIntervals(gi.samFileHeader, gi.locs, this.scatterOutputFiles)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,13 +26,23 @@ package org.broadinstitute.sting.queue.extensions.gatk
|
|||
|
||||
import java.io.File
|
||||
import collection.JavaConversions._
|
||||
import org.broadinstitute.sting.utils.interval.{IntervalMergingRule, IntervalUtils}
|
||||
import org.broadinstitute.sting.utils.interval.{IntervalSetRule, IntervalMergingRule, IntervalUtils}
|
||||
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource
|
||||
import net.sf.samtools.SAMFileHeader
|
||||
import java.util.Collections
|
||||
import org.broadinstitute.sting.utils.{GenomeLoc, GenomeLocSortedSet, GenomeLocParser}
|
||||
import org.broadinstitute.sting.utils.GenomeLoc
|
||||
import org.broadinstitute.sting.commandline._
|
||||
import org.broad.tribble.Feature
|
||||
|
||||
case class GATKIntervals(reference: File, intervals: Seq[File], intervalsString: Seq[String],
|
||||
intervalSetRule: IntervalSetRule, intervalMergingRule: IntervalMergingRule, intervalPadding: Option[Int],
|
||||
excludeIntervals: Seq[File], excludeIntervalsString: Seq[String]) {
|
||||
|
||||
def this(gatk: CommandLineGATK) = this(
|
||||
gatk.reference_sequence,
|
||||
gatk.intervals, gatk.intervalsString,
|
||||
gatk.interval_set_rule, gatk.interval_merging, gatk.interval_padding,
|
||||
gatk.excludeIntervals, gatk.excludeIntervalsString)
|
||||
|
||||
case class GATKIntervals(reference: File, intervals: Seq[String]) {
|
||||
private lazy val referenceDataSource = new ReferenceDataSource(reference)
|
||||
|
||||
lazy val samFileHeader = {
|
||||
|
|
@ -42,16 +52,46 @@ case class GATKIntervals(reference: File, intervals: Seq[String]) {
|
|||
}
|
||||
|
||||
lazy val locs: java.util.List[GenomeLoc] = {
|
||||
val parser = new GenomeLocParser(referenceDataSource.getReference)
|
||||
val parsedLocs =
|
||||
if (intervals.isEmpty)
|
||||
GenomeLocSortedSet.createSetFromSequenceDictionary(samFileHeader.getSequenceDictionary).toList
|
||||
else
|
||||
IntervalUtils.parseIntervalArguments(parser, intervals)
|
||||
Collections.sort(parsedLocs)
|
||||
val mergedLocs = IntervalUtils.mergeIntervalLocations(parsedLocs, IntervalMergingRule.OVERLAPPING_ONLY)
|
||||
Collections.unmodifiableList(mergedLocs)
|
||||
val includeIntervalBindings = this.intervals.map(GATKIntervals.createBinding(_, "intervals")) ++
|
||||
this.intervalsString.map(GATKIntervals.createBinding(_, "intervalsString"))
|
||||
val excludeIntervalBindings = this.excludeIntervals.map(GATKIntervals.createBinding(_, "excludeIntervals")) ++
|
||||
this.excludeIntervalsString.map(GATKIntervals.createBinding(_, "excludeIntervalsString"))
|
||||
|
||||
IntervalUtils.parseIntervalBindings(
|
||||
referenceDataSource,
|
||||
includeIntervalBindings,
|
||||
intervalSetRule, intervalMergingRule, intervalPadding.getOrElse(0),
|
||||
excludeIntervalBindings).toList
|
||||
}
|
||||
|
||||
lazy val contigs = locs.map(_.getContig).distinct.toSeq
|
||||
}
|
||||
|
||||
object GATKIntervals {
|
||||
def copyIntervalArguments(src: CommandLineGATK, dst: CommandLineGATK) {
|
||||
dst.reference_sequence = src.reference_sequence
|
||||
dst.intervals = src.intervals
|
||||
dst.intervalsString = src.intervalsString
|
||||
dst.interval_set_rule = src.interval_set_rule
|
||||
dst.interval_merging = src.interval_merging
|
||||
dst.interval_padding = src.interval_padding
|
||||
dst.excludeIntervals = src.excludeIntervals
|
||||
dst.excludeIntervalsString = src.excludeIntervalsString
|
||||
}
|
||||
|
||||
private def createBinding(interval: File, argumentName: String): IntervalBinding[Feature] = {
|
||||
val tags = interval match {
|
||||
case taggedFile: TaggedFile => ParsingMethod.parseTags(argumentName, taggedFile.tag)
|
||||
case file: File => new Tags
|
||||
}
|
||||
createBinding(interval.getAbsolutePath, argumentName, tags)
|
||||
}
|
||||
|
||||
private def createBinding(interval: String, argumentName: String): IntervalBinding[Feature] = {
|
||||
createBinding(interval, argumentName, new Tags)
|
||||
}
|
||||
|
||||
private def createBinding(interval: String, argumentName: String, tags: Tags): IntervalBinding[Feature] = {
|
||||
ArgumentTypeDescriptor.parseBinding(interval, classOf[Feature], classOf[IntervalBinding[Feature]], argumentName, tags, argumentName).asInstanceOf[IntervalBinding[Feature]]
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,14 +28,17 @@ import org.broadinstitute.sting.utils.interval.IntervalUtils
|
|||
import java.io.File
|
||||
import org.broadinstitute.sting.utils.io.IOUtils
|
||||
import org.broadinstitute.sting.queue.function.scattergather.{CloneFunction, ScatterFunction}
|
||||
import org.broadinstitute.sting.commandline.Output
|
||||
import org.broadinstitute.sting.commandline._
|
||||
|
||||
trait GATKScatterFunction extends ScatterFunction {
|
||||
/** The runtime field to set for specifying an interval file. */
|
||||
/* The runtime field to set for specifying intervals. */
|
||||
private final val intervalsField = "intervals"
|
||||
|
||||
/** The runtime field to set for specifying an interval string. */
|
||||
private final val intervalsStringField = "intervalsString"
|
||||
private final val excludeIntervalsField = "excludeIntervals"
|
||||
private final val excludeIntervalsStringField = "excludeIntervalsString"
|
||||
private final val intervalsSetRuleField = "interval_set_rule"
|
||||
private final val intervalMergingField = "interval_merging"
|
||||
private final val intervalPaddingField = "interval_padding"
|
||||
|
||||
@Output(doc="Scatter function outputs")
|
||||
var scatterOutputFiles: Seq[File] = Nil
|
||||
|
|
@ -43,25 +46,14 @@ trait GATKScatterFunction extends ScatterFunction {
|
|||
/** The original GATK function. */
|
||||
protected var originalGATK: CommandLineGATK = _
|
||||
|
||||
/** The reference sequence for the GATK function. */
|
||||
protected var referenceSequence: File = _
|
||||
|
||||
/** The list of interval files ("/path/to/interval.list") or interval strings ("chr1", "chr2") to parse into smaller parts. */
|
||||
protected var intervals: Seq[String] = Nil
|
||||
|
||||
/** Whether the last scatter job should also include any unmapped reads. */
|
||||
protected var includeUnmapped: Boolean = _
|
||||
|
||||
override def init() {
|
||||
this.originalGATK = this.originalFunction.asInstanceOf[CommandLineGATK]
|
||||
this.referenceSequence = this.originalGATK.reference_sequence
|
||||
if (this.originalGATK.intervals.isEmpty && (this.originalGATK.intervalsString == null || this.originalGATK.intervalsString.isEmpty)) {
|
||||
this.intervals ++= GATKScatterFunction.getGATKIntervals(this.referenceSequence, Seq.empty[String]).contigs
|
||||
} else {
|
||||
this.intervals ++= this.originalGATK.intervals.map(_.toString)
|
||||
this.intervals ++= this.originalGATK.intervalsString.filterNot(interval => IntervalUtils.isUnmapped(interval))
|
||||
// If intervals have been specified check if unmapped is included
|
||||
if (this.originalGATK.intervals.size + this.originalGATK.intervalsString.size > 0)
|
||||
this.includeUnmapped = this.originalGATK.intervalsString.exists(interval => IntervalUtils.isUnmapped(interval))
|
||||
}
|
||||
}
|
||||
|
||||
override def isScatterGatherable = {
|
||||
|
|
@ -74,6 +66,12 @@ trait GATKScatterFunction extends ScatterFunction {
|
|||
cloneFunction.setFieldValue(this.intervalsStringField, Seq("unmapped"))
|
||||
else
|
||||
cloneFunction.setFieldValue(this.intervalsStringField, Seq.empty[String])
|
||||
|
||||
cloneFunction.setFieldValue(this.intervalsSetRuleField, null)
|
||||
cloneFunction.setFieldValue(this.intervalMergingField, null)
|
||||
cloneFunction.setFieldValue(this.intervalPaddingField, None)
|
||||
cloneFunction.setFieldValue(this.excludeIntervalsField, Seq.empty[File])
|
||||
cloneFunction.setFieldValue(this.excludeIntervalsStringField, Seq.empty[String])
|
||||
}
|
||||
|
||||
override def bindCloneInputs(cloneFunction: CloneFunction, index: Int) {
|
||||
|
|
@ -85,29 +83,28 @@ trait GATKScatterFunction extends ScatterFunction {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns true if all interval files exist.
|
||||
* @return true if all interval files exist.
|
||||
*/
|
||||
protected def intervalFilesExist = {
|
||||
!this.intervals.exists(interval => IntervalUtils.isIntervalFile(interval, false) && !new File(interval).exists)
|
||||
!(this.originalGATK.intervals ++ this.originalGATK.excludeIntervals).exists(interval => !interval.exists())
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the maximum number of intervals or this.scatterCount if the maximum can't be determined ahead of time.
|
||||
* @return the maximum number of intervals or this.scatterCount if the maximum can't be determined ahead of time.
|
||||
*/
|
||||
protected def maxIntervals: Int
|
||||
}
|
||||
|
||||
object GATKScatterFunction {
|
||||
var gatkIntervals = Seq.empty[GATKIntervals]
|
||||
var gatkIntervalsCache = Seq.empty[GATKIntervals]
|
||||
|
||||
def getGATKIntervals(reference: File, intervals: Seq[String]) = {
|
||||
gatkIntervals.find(gi => gi.reference == reference && gi.intervals == intervals) match {
|
||||
case Some(gi) => gi
|
||||
def getGATKIntervals(originalFunction: CommandLineGATK) = {
|
||||
val gatkIntervals = new GATKIntervals(originalFunction)
|
||||
gatkIntervalsCache.find(_ == gatkIntervals) match {
|
||||
case Some(existingGatkIntervals) => existingGatkIntervals
|
||||
case None =>
|
||||
val gi = new GATKIntervals(reference, intervals)
|
||||
gatkIntervals :+= gi
|
||||
gi
|
||||
gatkIntervalsCache :+= gatkIntervals
|
||||
gatkIntervals
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,12 +33,12 @@ import org.broadinstitute.sting.queue.function.InProcessFunction
|
|||
*/
|
||||
class IntervalScatterFunction extends GATKScatterFunction with InProcessFunction {
|
||||
protected override def maxIntervals =
|
||||
GATKScatterFunction.getGATKIntervals(this.referenceSequence, this.intervals).locs.size
|
||||
GATKScatterFunction.getGATKIntervals(this.originalGATK).locs.size
|
||||
|
||||
override def scatterCount = if (intervalFilesExist) super.scatterCount min this.maxIntervals else super.scatterCount
|
||||
|
||||
def run() {
|
||||
val gi = GATKScatterFunction.getGATKIntervals(this.referenceSequence, this.intervals)
|
||||
val gi = GATKScatterFunction.getGATKIntervals(this.originalGATK)
|
||||
val splits = IntervalUtils.splitFixedIntervals(gi.locs, this.scatterOutputFiles.size)
|
||||
IntervalUtils.scatterFixedIntervals(gi.samFileHeader, splits, this.scatterOutputFiles)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,13 +31,11 @@ import org.broadinstitute.sting.queue.function.InProcessFunction
|
|||
/**
|
||||
* A scatter function that divides down to the locus level.
|
||||
*/
|
||||
//class LocusScatterFunction extends IntervalScatterFunction { }
|
||||
|
||||
class LocusScatterFunction extends GATKScatterFunction with InProcessFunction {
|
||||
protected override def maxIntervals = scatterCount
|
||||
|
||||
def run() {
|
||||
val gi = GATKScatterFunction.getGATKIntervals(this.referenceSequence, this.intervals)
|
||||
val gi = GATKScatterFunction.getGATKIntervals(this.originalGATK)
|
||||
val splits = IntervalUtils.splitLocusIntervals(gi.locs, this.scatterOutputFiles.size)
|
||||
IntervalUtils.scatterFixedIntervals(gi.samFileHeader, splits, this.scatterOutputFiles)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,14 +37,11 @@ class VcfGatherFunction extends CombineVariants with GatherFunction {
|
|||
|
||||
private lazy val originalGATK = this.originalFunction.asInstanceOf[CommandLineGATK]
|
||||
|
||||
override def freezeFieldValues {
|
||||
override def freezeFieldValues() {
|
||||
this.jarFile = this.originalGATK.jarFile
|
||||
this.reference_sequence = this.originalGATK.reference_sequence
|
||||
this.intervals = this.originalGATK.intervals
|
||||
this.intervalsString = this.originalGATK.intervalsString
|
||||
|
||||
this.variant = this.gatherParts.zipWithIndex map { case (input, index) => new TaggedFile(input, "input"+index) }
|
||||
this.out = this.originalOutput
|
||||
GATKIntervals.copyIntervalArguments(this.originalGATK, this)
|
||||
|
||||
// NO_HEADER and sites_only from VCFWriterArgumentTypeDescriptor
|
||||
// are added by the GATKExtensionsGenerator to the subclass of CommandLineGATK
|
||||
|
|
@ -55,6 +52,6 @@ class VcfGatherFunction extends CombineVariants with GatherFunction {
|
|||
val sitesOnly = QFunction.findField(originalFunction.getClass, VCFWriterArgumentTypeDescriptor.SITES_ONLY_ARG_NAME)
|
||||
this.sites_only = originalGATK.getFieldValue(sitesOnly).asInstanceOf[Boolean]
|
||||
|
||||
super.freezeFieldValues
|
||||
super.freezeFieldValues()
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -49,6 +49,11 @@ trait JavaCommandLineFunction extends CommandLineFunction {
|
|||
*/
|
||||
var javaMemoryLimit: Option[Double] = None
|
||||
|
||||
/**
|
||||
* Max number of GC threads
|
||||
*/
|
||||
var javaGCThreads: Option[Int] = None
|
||||
|
||||
override def freezeFieldValues() {
|
||||
super.freezeFieldValues()
|
||||
|
||||
|
|
@ -73,6 +78,8 @@ trait JavaCommandLineFunction extends CommandLineFunction {
|
|||
}
|
||||
|
||||
def javaOpts = optional("-Xmx", javaMemoryLimit.map(gb => (gb * 1024).ceil.toInt), "m", spaceSeparated=false) +
|
||||
conditional(javaGCThreads.isDefined, "-XX:+UseParallelOldGC") +
|
||||
optional("-XX:ParallelGCThreads=", javaGCThreads, spaceSeparated=false) +
|
||||
required("-Djava.io.tmpdir=", jobTempDir, spaceSeparated=false)
|
||||
|
||||
def commandLine = required("java") +
|
||||
|
|
|
|||
|
|
@ -56,8 +56,8 @@ trait QJobReport extends Logging {
|
|||
"jobName" -> QJobReport.workAroundSameJobNames(this),
|
||||
"intermediate" -> self.isIntermediate,
|
||||
"exechosts" -> info.getExecHosts,
|
||||
"startTime" -> info.getStartTime.getTime,
|
||||
"doneTime" -> info.getDoneTime.getTime,
|
||||
"startTime" -> info.getStartTime,
|
||||
"doneTime" -> info.getDoneTime,
|
||||
"formattedStartTime" -> info.getFormattedStartTime,
|
||||
"formattedDoneTime" -> info.getFormattedDoneTime,
|
||||
"runtime" -> info.getRuntimeInMs).mapValues((x:Any) => if (x != null) x.toString else "null")
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ class ScalaCompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
* @return The parsed object.
|
||||
*/
|
||||
def parse(parsingEngine: ParsingEngine, source: ArgumentSource, typeType: Type, argumentMatches: ArgumentMatches) = {
|
||||
parse(parsingEngine,source, makeRawTypeIfNecessary(typeType), argumentMatches)
|
||||
parse(parsingEngine,source, ArgumentTypeDescriptor.makeRawTypeIfNecessary(typeType), argumentMatches)
|
||||
}
|
||||
|
||||
def parse(parsingEngine: ParsingEngine, source: ArgumentSource, classType: Class[_], argumentMatches: ArgumentMatches) = {
|
||||
|
|
|
|||
|
|
@ -26,19 +26,21 @@ package org.broadinstitute.sting.queue.extensions.gatk
|
|||
|
||||
import java.io.File
|
||||
import org.testng.Assert
|
||||
import org.testng.annotations.Test
|
||||
import org.testng.annotations.{DataProvider, Test}
|
||||
import org.broadinstitute.sting.BaseTest
|
||||
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile
|
||||
import org.broadinstitute.sting.utils.{GenomeLocSortedSet, GenomeLocParser}
|
||||
import collection.JavaConversions._
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException
|
||||
|
||||
class GATKIntervalsUnitTest {
|
||||
private final lazy val hg18Reference = new File(BaseTest.hg18Reference)
|
||||
private final lazy val hg18GenomeLocParser = new GenomeLocParser(new CachingIndexedFastaSequenceFile(hg18Reference))
|
||||
private final lazy val hg18ReferenceLocs = GenomeLocSortedSet.
|
||||
createSetFromSequenceDictionary(new ReferenceDataSource(hg18Reference).getReference.getSequenceDictionary).toList
|
||||
private final lazy val hg19GenomeLocParser = new GenomeLocParser(new CachingIndexedFastaSequenceFile(hg19Reference))
|
||||
|
||||
private final lazy val hg19Reference = new File(BaseTest.hg19Reference)
|
||||
|
||||
|
|
@ -48,14 +50,14 @@ class GATKIntervalsUnitTest {
|
|||
val chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2:2-3")
|
||||
val chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3:3-5")
|
||||
|
||||
val gi = new GATKIntervals(hg18Reference, Seq("chr1:1-1", "chr2:2-3", "chr3:3-5"))
|
||||
val gi = createGATKIntervals(hg18Reference, Seq("chr1:1-1", "chr2:2-3", "chr3:3-5"))
|
||||
Assert.assertEquals(gi.locs.toSeq, Seq(chr1, chr2, chr3))
|
||||
Assert.assertEquals(gi.contigs, Seq("chr1", "chr2", "chr3"))
|
||||
}
|
||||
|
||||
@Test(timeOut = 30000L)
|
||||
def testIntervalFile() {
|
||||
var gi = new GATKIntervals(hg19Reference, Seq(BaseTest.hg19Intervals))
|
||||
val gi = createGATKIntervals(hg19Reference, Seq(BaseTest.hg19Intervals))
|
||||
Assert.assertEquals(gi.locs.size, 189894)
|
||||
// Timeout check is because of bad:
|
||||
// for(Item item: javaConvertedScalaList)
|
||||
|
|
@ -67,28 +69,85 @@ class GATKIntervalsUnitTest {
|
|||
|
||||
@Test
|
||||
def testEmptyIntervals() {
|
||||
val gi = new GATKIntervals(hg18Reference, Nil)
|
||||
val gi = createGATKIntervals(hg18Reference, Nil)
|
||||
Assert.assertEquals(gi.locs, hg18ReferenceLocs)
|
||||
Assert.assertEquals(gi.contigs.size, hg18ReferenceLocs.size)
|
||||
}
|
||||
|
||||
@Test
|
||||
def testContigCounts() {
|
||||
Assert.assertEquals(new GATKIntervals(hg18Reference, Nil).contigs, hg18ReferenceLocs.map(_.getContig))
|
||||
Assert.assertEquals(new GATKIntervals(hg18Reference, Seq("chr1", "chr2", "chr3")).contigs, Seq("chr1", "chr2", "chr3"))
|
||||
Assert.assertEquals(new GATKIntervals(hg18Reference, Seq("chr1:1-2", "chr1:4-5", "chr2:1-1", "chr3:2-2")).contigs, Seq("chr1", "chr2", "chr3"))
|
||||
Assert.assertEquals(createGATKIntervals(hg18Reference, Nil).contigs, hg18ReferenceLocs.map(_.getContig))
|
||||
Assert.assertEquals(createGATKIntervals(hg18Reference, Seq("chr1", "chr2", "chr3")).contigs, Seq("chr1", "chr2", "chr3"))
|
||||
Assert.assertEquals(createGATKIntervals(hg18Reference, Seq("chr1:1-2", "chr1:4-5", "chr2:1-1", "chr3:2-2")).contigs, Seq("chr1", "chr2", "chr3"))
|
||||
}
|
||||
|
||||
@Test
|
||||
def testSortAndMergeIntervals() {
|
||||
testSortAndMergeIntervals(Seq("chr1:1-10", "chr1:1-10", "chr1:1-10"), Seq("chr1:1-10"))
|
||||
testSortAndMergeIntervals(Seq("chr1:1-10", "chr1:1-11", "chr1:1-12"), Seq("chr1:1-12"))
|
||||
testSortAndMergeIntervals(Seq("chr1:1-10", "chr1:11-20", "chr1:21-30"), Seq("chr1:1-10", "chr1:11-20", "chr1:21-30"))
|
||||
testSortAndMergeIntervals(Seq("chr1:1-10", "chr1:10-20", "chr1:21-30"), Seq("chr1:1-20", "chr1:21-30"))
|
||||
testSortAndMergeIntervals(Seq("chr1:1-10", "chr1:21-30", "chr1:10-20"), Seq("chr1:1-20", "chr1:21-30"))
|
||||
@DataProvider(name="sortAndMergeIntervals")
|
||||
def getSortAndMergeIntervals: Array[Array[AnyRef]] = {
|
||||
Array(
|
||||
Array(Seq("chr1:1-10", "chr1:1-10", "chr1:1-10"), Seq("chr1:1-10")),
|
||||
Array(Seq("chr1:1-10", "chr1:1-11", "chr1:1-12"), Seq("chr1:1-12")),
|
||||
Array(Seq("chr1:1-10", "chr1:11-20", "chr1:21-30"), Seq("chr1:1-30")),
|
||||
Array(Seq("chr1:1-10", "chr1:10-20", "chr1:21-30"), Seq("chr1:1-30")),
|
||||
Array(Seq("chr1:1-9", "chr1:21-30", "chr1:11-20"), Seq("chr1:1-9", "chr1:11-30"))
|
||||
).asInstanceOf[Array[Array[AnyRef]]]
|
||||
}
|
||||
|
||||
private def testSortAndMergeIntervals(actual: Seq[String], expected: Seq[String]) {
|
||||
Assert.assertEquals(new GATKIntervals(hg18Reference, actual).locs.toSeq, expected.map(hg18GenomeLocParser.parseGenomeLoc(_)))
|
||||
@Test(dataProvider="sortAndMergeIntervals")
|
||||
def testSortAndMergeIntervals(unmerged: Seq[String], expected: Seq[String]) {
|
||||
Assert.assertEquals(createGATKIntervals(hg18Reference, unmerged).locs.toSeq, expected.map(hg18GenomeLocParser.parseGenomeLoc(_)))
|
||||
}
|
||||
|
||||
@DataProvider(name="taggedFiles")
|
||||
def getTaggedFiles: Array[Array[AnyRef]] = {
|
||||
Array(
|
||||
Array(hg18Reference, BaseTest.privateTestDir + "small_unmerged_gatk_intervals.list", null, Seq("chr1:1-10")),
|
||||
Array(hg18Reference, BaseTest.privateTestDir + "small_unmerged_gatk_intervals.list", "", Seq("chr1:1-10")),
|
||||
Array(hg18Reference, BaseTest.privateTestDir + "small_unmerged_gatk_intervals.list", "myList", Seq("chr1:1-10")),
|
||||
Array(hg19Reference, BaseTest.privateTestDir + "small.indel.test.vcf", null, Seq("1:897475-897481", "1:10001292")),
|
||||
Array(hg19Reference, BaseTest.privateTestDir + "small.indel.test.vcf", "", Seq("1:897475-897481", "1:10001292")),
|
||||
Array(hg19Reference, BaseTest.privateTestDir + "small.indel.test.vcf", "myVcf", Seq("1:897475-897481", "1:10001292")),
|
||||
Array(hg19Reference, BaseTest.privateTestDir + "small.indel.test.vcf", "VCF", Seq("1:897475-897481", "1:10001292")),
|
||||
Array(hg19Reference, BaseTest.privateTestDir + "small.indel.test.vcf", "myVcf,VCF", Seq("1:897475-897481", "1:10001292")),
|
||||
Array(hg19Reference, BaseTest.privateTestDir + "sampleBedFile.bed", null, Seq("20:1-999", "20:1002-2000", "22:1001-6000")),
|
||||
Array(hg19Reference, BaseTest.privateTestDir + "sampleBedFile.bed", "", Seq("20:1-999", "20:1002-2000", "22:1001-6000")),
|
||||
Array(hg19Reference, BaseTest.privateTestDir + "sampleBedFile.bed", "myBed", Seq("20:1-999", "20:1002-2000", "22:1001-6000")),
|
||||
Array(hg19Reference, BaseTest.privateTestDir + "sampleBedFile.bed", "BED", Seq("20:1-999", "20:1002-2000", "22:1001-6000")),
|
||||
Array(hg19Reference, BaseTest.privateTestDir + "sampleBedFile.bed", "myBed,BED", Seq("20:1-999", "20:1002-2000", "22:1001-6000"))
|
||||
)
|
||||
}
|
||||
|
||||
@Test(dataProvider="taggedFiles")
|
||||
def testTaggedFiles(reference: File, file: String, tags: String, expected: Seq[String]) {
|
||||
val gatk = new CommandLineGATK
|
||||
gatk.reference_sequence = reference
|
||||
gatk.intervals = Seq(new TaggedFile(file, tags))
|
||||
val parser = if (reference == hg18Reference) hg18GenomeLocParser else hg19GenomeLocParser
|
||||
Assert.assertEquals(new GATKIntervals(gatk).locs.toSeq, expected.map(parser.parseGenomeLoc(_)))
|
||||
}
|
||||
|
||||
@DataProvider(name="badTaggedFiles")
|
||||
def getBadTaggedFiles: Array[Array[AnyRef]] = {
|
||||
Array(
|
||||
Array(hg18Reference, BaseTest.privateTestDir + "small_unmerged_gatk_intervals.list", "VCF"),
|
||||
Array(hg18Reference, BaseTest.privateTestDir + "small_unmerged_gatk_intervals.list", "too,many,tags"),
|
||||
Array(hg19Reference, BaseTest.privateTestDir + "small.indel.test.vcf", "BED"),
|
||||
Array(hg19Reference, BaseTest.privateTestDir + "small.indel.test.vcf", "VCF,myVCF"),
|
||||
Array(hg19Reference, BaseTest.privateTestDir + "small.indel.test.vcf", "myVCF,VCF,extra"),
|
||||
Array(hg19Reference, BaseTest.privateTestDir + "sampleBedFile.bed", "VCF"),
|
||||
Array(hg19Reference, BaseTest.privateTestDir + "sampleBedFile.bed", "BED,myBed"),
|
||||
Array(hg19Reference, BaseTest.privateTestDir + "sampleBedFile.bed", "myBed,BED,extra")
|
||||
).asInstanceOf[Array[Array[AnyRef]]]
|
||||
}
|
||||
|
||||
@Test(dataProvider = "badTaggedFiles", expectedExceptions = Array(classOf[UserException]))
|
||||
def testBadTaggedFiles(reference: File, file: String, tags: String) {
|
||||
testTaggedFiles(reference, file, tags, Nil)
|
||||
}
|
||||
|
||||
private def createGATKIntervals(reference: File, intervals: Seq[String]) = {
|
||||
val gatk = new CommandLineGATK
|
||||
gatk.reference_sequence = reference
|
||||
gatk.intervalsString = intervals
|
||||
new GATKIntervals(gatk)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -42,4 +42,43 @@ class ExampleUnifiedGenotyperPipelineTest {
|
|||
spec.jobRunners = PipelineTest.allJobRunners
|
||||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
|
||||
@Test
|
||||
def testUnifiedGenotyperWithGatkIntervals() {
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "unifiedgenotyper_with_gatk_intervals"
|
||||
spec.args = Array(
|
||||
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala",
|
||||
" -I " + BaseTest.validationDataLocation + "OV-0930.normal.chunk.bam",
|
||||
" -R " + BaseTest.hg18Reference,
|
||||
" -L " + BaseTest.validationDataLocation + "intervalTest.intervals").mkString
|
||||
spec.jobRunners = Seq("Lsf706")
|
||||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
|
||||
@Test
|
||||
def testUnifiedGenotyperWithBedIntervals() {
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "unifiedgenotyper_with_bed_intervals"
|
||||
spec.args = Array(
|
||||
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala",
|
||||
" -I " + BaseTest.validationDataLocation + "OV-0930.normal.chunk.bam",
|
||||
" -R " + BaseTest.hg18Reference,
|
||||
" -L " + BaseTest.validationDataLocation + "intervalTest.bed").mkString
|
||||
spec.jobRunners = Seq("Lsf706")
|
||||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
|
||||
@Test
|
||||
def testUnifiedGenotyperWithVcfIntervals() {
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "unifiedgenotyper_with_vcf_intervals"
|
||||
spec.args = Array(
|
||||
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala",
|
||||
" -I " + BaseTest.validationDataLocation + "OV-0930.normal.chunk.bam",
|
||||
" -R " + BaseTest.hg18Reference,
|
||||
" -L " + BaseTest.validationDataLocation + "intervalTest.1.vcf").mkString
|
||||
spec.jobRunners = Seq("Lsf706")
|
||||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue