From 9424e8b2caa7fa08de3867b881754628419fce71 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 26 Oct 2011 14:11:49 -0400 Subject: [PATCH] Initial working version of new interval system in which the argument for -L (and -XL) is allowed to be a rod file (e.g. VCF). Old samtools-style intervals still behave as before. BTI is no longer supported. The merging (union or intersection) of intervals is now consistently applied to all -L (or -XL) intervals, which is nice. More testing needed. --- .../commandline/ArgumentTypeDescriptor.java | 79 ++++++++++++++-- .../sting/commandline/IntervalBinding.java | 93 +++++++++++++++++++ .../sting/commandline/ParsingEngine.java | 1 + .../sting/gatk/GenomeAnalysisEngine.java | 80 ++++++---------- .../arguments/GATKArgumentCollection.java | 36 +++---- .../reads/utilities/FindLargeShards.java | 2 +- .../gatk/refdata/tracks/RMDTrackBuilder.java | 2 +- .../refdata/utils/RMDIntervalGenerator.java | 57 ------------ .../gatk/walkers/indels/IndelRealigner.java | 5 +- .../indels/SomaticIndelDetectorWalker.java | 2 +- .../sting/utils/interval/IntervalUtils.java | 82 ++++++++-------- 11 files changed, 252 insertions(+), 187 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/commandline/IntervalBinding.java delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/refdata/utils/RMDIntervalGenerator.java diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index d1d9cf7fe..31212a46f 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -336,6 +336,28 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { @Override public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) { + return parse(parsingEngine, source, type, matches, false); + } + + /** + * The actual argument parsing method. + * + * IMPORTANT NOTE: the createIntervalBinding argument is a bit of a hack, but after discussions with SE we've decided + * that it's the best way to proceed for now. IntervalBindings can either be proper RodBindings (hence the use of + * this parse() method) or can be Strings (representing raw intervals or the files containing them). If createIntervalBinding + * is true, we do not call parsingEngine.addRodBinding() because we don't want walkers to assume that these are the + * usual set of RodBindings. It also allows us in the future to be smart about tagging rods as intervals. One other + * side point is that we want to continue to allow the usage of non-Feature intervals so that users can theoretically + * continue to input them out of order (whereas Tribble Features are ordered). + * + * @param parsingEngine parsing engine + * @param source source + * @param type type to check + * @param matches matches + * @param createIntervalBinding should we attempt to create an IntervalBinding instead of a RodBinding? + * @return the RodBinding/IntervalBinding object depending on the value of createIntervalBinding. + */ + public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches, boolean createIntervalBinding) { ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source); String value = getArgumentValue( defaultDefinition, matches ); Class parameterType = JVMUtils.getParameterizedTypeClass(type); @@ -348,7 +370,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { if ( tags.getPositionalTags().size() > 2 ) { throw new UserException.CommandLineException( String.format("Unexpected number of positional tags for argument %s : %s. " + - "Rod bindings only suport -X:type and -X:name,type argument styles", + "Rod bindings only support -X:type and -X:name,type argument styles", value, source.field.getName())); } if ( tags.getPositionalTags().size() == 2 ) { // -X:name,type style @@ -378,7 +400,12 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { } } - if ( tribbleType == null ) + if ( tribbleType == null ) { + // IntervalBindings allow streaming conversion of Strings + if ( createIntervalBinding ) { + return new IntervalBinding(value); + } + if ( ! file.exists() ) { throw new UserException.CouldNotReadInputFile(file, "file does not exist"); } else if ( ! file.canRead() || ! file.isFile() ) { @@ -389,13 +416,20 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { "Please add an explicit type tag :NAME listing the correct type from among the supported types:%n%s", manager.userFriendlyListOfAvailableFeatures(parameterType))); } + } } } Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class); - RodBinding result = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags); - parsingEngine.addTags(result,tags); - parsingEngine.addRodBinding(result); + Object result; + if ( createIntervalBinding ) { + result = ctor.newInstance(parameterType, name, value, tribbleType, tags); + } else { + RodBinding rbind = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags); + parsingEngine.addTags(rbind, tags); + parsingEngine.addRodBinding(rbind); + result = rbind; + } return result; } catch (InvocationTargetException e) { throw new UserException.CommandLineException( @@ -409,6 +443,39 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { } } +/** + * Parser for RodBinding objects + */ +class IntervalBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { + /** + * We only want IntervalBinding class objects + * @param type The type to check. + * @return true if the provided class is an IntervalBinding.class + */ + @Override + public boolean supports( Class type ) { + return isIntervalBinding(type); + } + + public static boolean isIntervalBinding( Class type ) { + return IntervalBinding.class.isAssignableFrom(type); + } + + /** + * See note from RodBindingArgumentTypeDescriptor.parse(). + * + * @param parsingEngine parsing engine + * @param source source + * @param type type to check + * @param matches matches + * @return the IntervalBinding object. + */ + @Override + public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) { + return new RodBindingArgumentTypeDescriptor().parse(parsingEngine, source, type, matches, true); + } +} + /** * Parse simple argument types: java primitives, wrapper classes, and anything that has * a simple String constructor. @@ -416,7 +483,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor { @Override public boolean supports( Class type ) { - if ( RodBindingArgumentTypeDescriptor.isRodBinding(type) ) return false; + if ( RodBindingArgumentTypeDescriptor.isRodBinding(type) || IntervalBindingArgumentTypeDescriptor.isIntervalBinding(type) ) return false; if ( type.isPrimitive() ) return true; if ( type.isEnum() ) return true; if ( primitiveToWrapperMap.containsValue(type) ) return true; diff --git a/public/java/src/org/broadinstitute/sting/commandline/IntervalBinding.java b/public/java/src/org/broadinstitute/sting/commandline/IntervalBinding.java new file mode 100644 index 000000000..7fcf50e9e --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/IntervalBinding.java @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.commandline; + +import com.google.java.contract.Requires; +import net.sf.samtools.util.CloseableIterator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; +import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.interval.IntervalUtils; + +import java.io.File; +import java.util.*; + +/** + * An IntervalBinding representing a walker argument that gets bound to either a ROD track or interval string. + * + * The IntervalBinding is a formal GATK argument that bridges between a walker and + * the engine to construct intervals for traversal at runtime. The IntervalBinding can + * either be a RodBinding, a string of one or more intervals, or a file with interval strings. + * The GATK Engine takes care of initializing the binding when appropriate and determining intervals from it. + * + * Note that this class is immutable. + */ +public final class IntervalBinding { + + private RodBinding featureIntervals; + private String stringIntervals; + + @Requires({"type != null", "rawName != null", "source != null", "tribbleType != null", "tags != null"}) + public IntervalBinding(Class type, final String rawName, final String source, final String tribbleType, final Tags tags) { + featureIntervals = new RodBinding(type, rawName, source, tribbleType, tags); + } + + @Requires({"intervalArgument != null"}) + public IntervalBinding(String intervalArgument) { + stringIntervals = intervalArgument; + } + + public String getSource() { + if ( featureIntervals != null ) + return featureIntervals.getSource(); + return stringIntervals; + } + + public List getIntervals(GenomeAnalysisEngine toolkit) { + List intervals; + + if ( featureIntervals != null ) { + intervals = new ArrayList(); + + RMDTrackBuilder builder = new RMDTrackBuilder(toolkit.getReferenceDataSource().getReference().getSequenceDictionary(), + toolkit.getGenomeLocParser(), + toolkit.getArguments().unsafe); + FeatureManager.FeatureDescriptor descriptor = new FeatureManager().getByName(featureIntervals.getTribbleType()); + CloseableIterator iterator = builder.createInstanceOfTrack(descriptor.getCodecClass(), new File(featureIntervals.getSource())).getIterator(); + while ( iterator.hasNext() ) { + intervals.add(iterator.next().getLocation()); + } + iterator.close(); + + } else { + intervals = IntervalUtils.parseIntervalArguments(toolkit.getGenomeLocParser(), stringIntervals); + } + + return intervals; + } +} diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java index fbf8c6516..ad58553c1 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java @@ -75,6 +75,7 @@ public class ParsingEngine { * The type of set used must be ordered (but not necessarily sorted). */ private static final Set STANDARD_ARGUMENT_TYPE_DESCRIPTORS = new LinkedHashSet( Arrays.asList(new SimpleArgumentTypeDescriptor(), + new IntervalBindingArgumentTypeDescriptor(), new RodBindingArgumentTypeDescriptor(), new CompoundArgumentTypeDescriptor(), new MultiplexArgumentTypeDescriptor()) ); diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 7bc3daa9a..2b6c280c8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -28,6 +28,7 @@ import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.samtools.*; import org.apache.log4j.Logger; +import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; @@ -42,7 +43,6 @@ import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter; import org.broadinstitute.sting.gatk.io.OutputTracker; import org.broadinstitute.sting.gatk.io.stubs.Stub; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; -import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.samples.SampleDBBuilder; import org.broadinstitute.sting.gatk.walkers.*; @@ -50,6 +50,7 @@ import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.interval.IntervalSetRule; import org.broadinstitute.sting.utils.interval.IntervalUtils; import java.io.File; @@ -296,7 +297,7 @@ public class GenomeAnalysisEngine { else if(WalkerManager.getDownsamplingMethod(walker) != null) method = WalkerManager.getDownsamplingMethod(walker); else - method = argCollection.getDefaultDownsamplingMethod(); + method = GATKArgumentCollection.getDefaultDownsamplingMethod(); return method; } @@ -563,34 +564,23 @@ public class GenomeAnalysisEngine { protected void initializeIntervals() { // return if no interval arguments at all - if ((argCollection.intervals == null) && (argCollection.excludeIntervals == null) && (argCollection.RODToInterval == null)) + if ( argCollection.intervals == null && argCollection.excludeIntervals == null ) return; - // if '-L all' was specified, verify that it was the only -L specified and return if so. - if(argCollection.intervals != null) { - for(String interval: argCollection.intervals) { - if(interval.trim().equals("all")) { - if(argCollection.intervals.size() > 1) - throw new UserException("'-L all' was specified along with other intervals or interval lists; the GATK cannot combine '-L all' with other intervals."); - - // '-L all' was specified and seems valid. Return. - return; - } - } - } + // Note that the use of '-L all' is no longer supported. // if include argument isn't given, create new set of all possible intervals - GenomeLocSortedSet includeSortedSet = (argCollection.intervals == null && argCollection.RODToInterval == null ? + GenomeLocSortedSet includeSortedSet = (argCollection.intervals == null ? GenomeLocSortedSet.createSetFromSequenceDictionary(this.referenceDataSource.getReference().getSequenceDictionary()) : - loadIntervals(argCollection.intervals, IntervalUtils.mergeIntervalLocations(getRODIntervals(), argCollection.intervalMerging))); + loadIntervals(argCollection.intervals, argCollection.intervalSetRule)); // if no exclude arguments, can return parseIntervalArguments directly - if (argCollection.excludeIntervals == null) + if ( argCollection.excludeIntervals == null ) intervals = includeSortedSet; - // otherwise there are exclude arguments => must merge include and exclude GenomeLocSortedSets + // otherwise there are exclude arguments => must merge include and exclude GenomeLocSortedSets else { - GenomeLocSortedSet excludeSortedSet = loadIntervals(argCollection.excludeIntervals, null); + GenomeLocSortedSet excludeSortedSet = loadIntervals(argCollection.excludeIntervals, IntervalSetRule.UNION); intervals = includeSortedSet.subtractRegions(excludeSortedSet); // logging messages only printed when exclude (-XL) arguments are given @@ -601,51 +591,39 @@ public class GenomeAnalysisEngine { logger.info(String.format("Excluding %d loci from original intervals (%.2f%% reduction)", toPruneSize - intervalSize, (toPruneSize - intervalSize) / (0.01 * toPruneSize))); } + + // DEBUGGING OUTPUT + for ( GenomeLoc loc : intervals ) + logger.info("Including -L interval: " + loc); } /** * Loads the intervals relevant to the current execution - * @param argList String representation of arguments; might include 'all', filenames, intervals in samtools - * notation, or a combination of the above - * @param rodIntervals a list of ROD intervals to add to the returned set. Can be empty or null. + * @param argList argument bindings; might include filenames, intervals in samtools notation, or a combination of the above + * @param rule interval merging rule * @return A sorted, merged list of all intervals specified in this arg list. */ - protected GenomeLocSortedSet loadIntervals( List argList, List rodIntervals ) { + protected GenomeLocSortedSet loadIntervals( List> argList, IntervalSetRule rule ) { boolean allowEmptyIntervalList = (argCollection.unsafe == ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST || argCollection.unsafe == ValidationExclusion.TYPE.ALL); - List nonRODIntervals = IntervalUtils.parseIntervalArguments(genomeLocParser, argList, allowEmptyIntervalList); - List allIntervals = IntervalUtils.mergeListsBySetOperator(rodIntervals, nonRODIntervals, argCollection.BTIMergeRule); + List allIntervals = new ArrayList(0); + for ( IntervalBinding intervalBinding : argList ) { + List intervals = intervalBinding.getIntervals(this); + + if ( !allowEmptyIntervalList && intervals.isEmpty() ) { + throw new UserException("The interval file " + intervalBinding.getSource() + " contains no intervals " + + "that could be parsed, and the unsafe operation ALLOW_EMPTY_INTERVAL_LIST has " + + "not been enabled"); + } + + allIntervals = IntervalUtils.mergeListsBySetOperator(intervals, allIntervals, rule); + } return IntervalUtils.sortAndMergeIntervals(genomeLocParser, allIntervals, argCollection.intervalMerging); } - /** - * if we have a ROD specified as a 'rodToIntervalTrackName', convert its records to RODs - * @return ROD intervals as GenomeLocs - */ - private List getRODIntervals() { - Map rodNames = RMDIntervalGenerator.getRMDTrackNames(rodDataSources); - // Do we have any RODs that overloaded as interval lists with the 'rodToIntervalTrackName' flag? - List ret = new ArrayList(); - if (rodNames != null && argCollection.RODToInterval != null) { - String rodName = argCollection.RODToInterval; - - // check to make sure we have a rod of that name - if (!rodNames.containsKey(rodName)) - throw new UserException.CommandLineException("--rodToIntervalTrackName (-BTI) was passed the name '"+rodName+"', which wasn't given as a ROD name in the -B option"); - - for (String str : rodNames.keySet()) - if (str.equals(rodName)) { - logger.info("Adding interval list from track (ROD) named " + rodName); - RMDIntervalGenerator intervalGenerator = new RMDIntervalGenerator(rodNames.get(str)); - ret.addAll(intervalGenerator.toGenomeLocList()); - } - } - return ret; - } - /** * Add additional, externally managed IO streams for inputs. * diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 486868dc2..70819a092 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -26,9 +26,11 @@ package org.broadinstitute.sting.gatk.arguments; import net.sf.samtools.SAMFileReader; +import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.commandline.IntervalBinding; import org.broadinstitute.sting.gatk.DownsampleType; import org.broadinstitute.sting.gatk.DownsamplingMethod; import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; @@ -84,11 +86,20 @@ public class GATKArgumentCollection { @ElementList(required = false) @Input(fullName = "intervals", shortName = "L", doc = "A list of genomic intervals over which to operate. Can be explicitly specified on the command line or in a file.", required = false) - public List intervals = null; + public List> intervals = null; @ElementList(required = false) @Input(fullName = "excludeIntervals", shortName = "XL", doc = "A list of genomic intervals to exclude from processing. Can be explicitly specified on the command line or in a file.", required = false) - public List excludeIntervals = null; + public List> excludeIntervals = null; + + @Element(required = false) + @Argument(fullName = "interval_set_rule", shortName = "isr", doc = "Indicates the set merging approach the interval parser should use to combine the various -L inputs", required = false) + public IntervalSetRule intervalSetRule = IntervalSetRule.UNION; + + /** What rule should we use when merging intervals */ + @Element(required = false) + @Argument(fullName = "interval_merging", shortName = "im", doc = "Indicates the interval merging rule we should use for abutting intervals", required = false) + public IntervalMergingRule intervalMerging = IntervalMergingRule.ALL; @Element(required = false) @Input(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false) @@ -100,14 +111,6 @@ public class GATKArgumentCollection { @Input(fullName = "rodBind", shortName = "B", doc = "Bindings for reference-ordered data, in the form :, ", required = false) public ArrayList RODBindings = new ArrayList(); - @Element(required = false) - @Argument(fullName = "rodToIntervalTrackName", shortName = "BTI", doc = "Indicates that the named track should be converted into an interval list, to drive the traversal", required = false) - public String RODToInterval = null; - - @Element(required = false) - @Argument(fullName = "BTI_merge_rule", shortName = "BTIMR", doc = "Indicates the merging approach the interval parser should use to combine the BTI track with other -L options", required = false) - public IntervalSetRule BTIMergeRule = IntervalSetRule.UNION; - @Element(required = false) @Argument(fullName = "nonDeterministicRandomSeed", shortName = "ndrs", doc = "Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run", required = false) public boolean nonDeterministicRandomSeed = false; @@ -197,11 +200,6 @@ public class GATKArgumentCollection { @Argument(fullName = "num_threads", shortName = "nt", doc = "How many threads should be allocated to running this analysis.", required = false) public int numberOfThreads = 1; - /** What rule should we use when merging intervals */ - @Element(required = false) - @Argument(fullName = "interval_merging", shortName = "im", doc = "What interval merging rule should we use.", required = false) - public IntervalMergingRule intervalMerging = IntervalMergingRule.ALL; - @ElementList(required = false) @Input(fullName = "read_group_black_list", shortName="rgbl", doc="Filters out read groups matching : or a .txt file containing the filter strings one per line.", required = false) public List readGroupBlackList = null; @@ -442,19 +440,15 @@ public class GATKArgumentCollection { if (other.intervalMerging != this.intervalMerging) { return false; } - if ((other.RODToInterval == null && RODToInterval != null) || - (other.RODToInterval != null && !other.RODToInterval.equals(RODToInterval))) { - return false; - } if (other.phoneHomeType != this.phoneHomeType) { return false; } - if (BTIMergeRule != other.BTIMergeRule) + if (intervalSetRule != other.intervalSetRule) return false; - if ( BAQMode != other.BAQMode) return false; + if ( BAQMode != other.BAQMode ) return false; if ( BAQGOP != other.BAQGOP ) return false; if ((other.performanceLog == null && this.performanceLog != null) || diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/FindLargeShards.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/FindLargeShards.java index 24d8bc6c5..673df6dfa 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/FindLargeShards.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/FindLargeShards.java @@ -97,7 +97,7 @@ public class FindLargeShards extends CommandLineProgram { // intervals GenomeLocSortedSet intervalSortedSet = null; if(intervals != null) - intervalSortedSet = IntervalUtils.sortAndMergeIntervals(genomeLocParser, IntervalUtils.parseIntervalArguments(genomeLocParser, intervals, true), IntervalMergingRule.ALL); + intervalSortedSet = IntervalUtils.sortAndMergeIntervals(genomeLocParser, IntervalUtils.parseIntervalArguments(genomeLocParser, intervals), IntervalMergingRule.ALL); else { intervalSortedSet = new GenomeLocSortedSet(genomeLocParser); for(SAMSequenceRecord entry: refReader.getSequenceDictionary().getSequences()) diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java index 3b4558579..edb514984 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java @@ -140,7 +140,7 @@ public class RMDTrackBuilder { // extends PluginManager { final FeatureManager.FeatureDescriptor descriptor = getFeatureManager().getByCodec(codecClass); if (descriptor == null) - throw new ReviewedStingException("Unable to find type name for codex class " + codecClass.getName()); + throw new ReviewedStingException("Unable to find type name for codec class " + codecClass.getName()); return createInstanceOfTrack(new RMDTriplet("anonymous",descriptor.getName(),inputFile.getAbsolutePath(),RMDStorageType.FILE,new Tags())); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/RMDIntervalGenerator.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/RMDIntervalGenerator.java deleted file mode 100644 index a7666981c..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/RMDIntervalGenerator.java +++ /dev/null @@ -1,57 +0,0 @@ -package org.broadinstitute.sting.gatk.refdata.utils; - -import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.broadinstitute.sting.utils.GenomeLoc; - -import java.util.*; - - -/** - * - * @author aaron - * - * Class RMDIntervalGenerator - * - * Creates an interval list, given an RMDTrack - */ -public class RMDIntervalGenerator { - public ReferenceOrderedDataSource dataSource; - - /** - * create a interval representation of a ROD track - * @param dataSource the track - */ - public RMDIntervalGenerator(ReferenceOrderedDataSource dataSource) { - if (dataSource == null) throw new IllegalArgumentException("Data source cannot be null"); - this.dataSource = dataSource; - } - - /** - * create a genome location list from the interval track - * @return a list of genome locations - */ - public List toGenomeLocList() { - Iterator iter = dataSource.seek((GenomeLoc)null); - List locations = new ArrayList(); - while (iter.hasNext()) { - RODRecordList feature = iter.next(); - GenomeLoc loc = feature.getLocation(); - if (loc != null) locations.add(loc); - } - return locations; - } - - /** - * return a map of reference meta data track names to RODS - * @param sources the reference ordered data sources to get the names from - * @return a map of reference meta data names to RODS - */ - public static Map getRMDTrackNames(List sources) { - // get a list of the current rod names we're working with - Map rodNames = new HashMap(); - for (ReferenceOrderedDataSource rod : sources) { - rodNames.put(rod.getName(),rod); - } - return rodNames; - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 41fa755b8..a17956008 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -32,7 +32,6 @@ import net.sf.samtools.util.SequenceUtil; import net.sf.samtools.util.StringUtil; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; @@ -386,10 +385,8 @@ public class IndelRealigner extends ReadWalker { intervals = merger; } else { // read in the whole list of intervals for cleaning - boolean allowEmptyIntervalList = (getToolkit().getArguments().unsafe == ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST || - getToolkit().getArguments().unsafe == ValidationExclusion.TYPE.ALL); GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(getToolkit().getGenomeLocParser(), - IntervalUtils.parseIntervalArguments(getToolkit().getGenomeLocParser(),Arrays.asList(intervalsFile),allowEmptyIntervalList), + IntervalUtils.parseIntervalArguments(getToolkit().getGenomeLocParser(),Arrays.asList(intervalsFile)), IntervalMergingRule.OVERLAPPING_ONLY); intervals = locs.iterator(); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java index 74cbfa05f..434cbec52 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java @@ -372,7 +372,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { } else { // read in the whole list of intervals for cleaning GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(getToolkit().getGenomeLocParser(), - IntervalUtils.parseIntervalArguments(getToolkit().getGenomeLocParser(),Arrays.asList(genotypeIntervalsFile),true), IntervalMergingRule.OVERLAPPING_ONLY); + IntervalUtils.parseIntervalArguments(getToolkit().getGenomeLocParser(),Arrays.asList(genotypeIntervalsFile)), IntervalMergingRule.OVERLAPPING_ONLY); genotypeIntervalIterator = locs.iterator(); } diff --git a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java index 41cbbe59f..c9fc39aa6 100644 --- a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java @@ -35,62 +35,60 @@ public class IntervalUtils { * * @param parser Genome loc parser. * @param argList A list of strings containing interval data. - * @param allowEmptyIntervalList If false instead of an empty interval list will return null. - * @return an unsorted, unmerged representation of the given intervals. Null is used to indicate that all intervals should be used. + * @return an unsorted, unmerged representation of the given intervals. Null is used to indicate that all intervals should be used. */ - public static List parseIntervalArguments(GenomeLocParser parser, List argList, boolean allowEmptyIntervalList) { + public static List parseIntervalArguments(GenomeLocParser parser, List argList) { List rawIntervals = new ArrayList(); // running list of raw GenomeLocs if (argList != null) { // now that we can be in this function if only the ROD-to-Intervals was provided, we need to // ensure that the arg list isn't null before looping. for (String argument : argList) { - - // separate argument on semicolon first - for (String fileOrInterval : argument.split(";")) { - // if any interval argument is '-L all', consider all loci by returning no intervals - if (fileOrInterval.trim().toLowerCase().equals("all")) { - if (argList.size() != 1) { - // throw error if '-L all' is not only interval - potentially conflicting commands - throw new UserException.CommandLineException(String.format("Conflicting arguments: Intervals given along with \"-L all\"")); - } - return null; - } - // if any argument is 'unmapped', "parse" it to a null entry. A null in this case means 'all the intervals with no alignment data'. - else if (isUnmapped(fileOrInterval)) - rawIntervals.add(GenomeLoc.UNMAPPED); - // if it's a file, add items to raw interval list - else if (isIntervalFile(fileOrInterval)) { - try { - rawIntervals.addAll(intervalFileToList(parser, fileOrInterval, allowEmptyIntervalList)); - } - catch ( UserException.MalformedGenomeLoc e ) { - throw e; - } - catch ( Exception e ) { - throw new UserException.MalformedFile(fileOrInterval, "Interval file could not be parsed in any supported format.", e); - } - } - - // otherwise treat as an interval -> parse and add to raw interval list - else { - rawIntervals.add(parser.parseGenomeLoc(fileOrInterval)); - } - } + rawIntervals.addAll(parseIntervalArguments(parser, argument)); } } return rawIntervals; } - /** + public static List parseIntervalArguments(GenomeLocParser parser, String arg) { + List rawIntervals = new ArrayList(); // running list of raw GenomeLocs + + // separate argument on semicolon first + for (String fileOrInterval : arg.split(";")) { + // if any argument is 'unmapped', "parse" it to a null entry. A null in this case means 'all the intervals with no alignment data'. + if (isUnmapped(fileOrInterval)) + rawIntervals.add(GenomeLoc.UNMAPPED); + // if it's a file, add items to raw interval list + else if (isIntervalFile(fileOrInterval)) { + try { + rawIntervals.addAll(intervalFileToList(parser, fileOrInterval)); + } + catch ( UserException.MalformedGenomeLoc e ) { + throw e; + } + catch ( Exception e ) { + throw new UserException.MalformedFile(fileOrInterval, "Interval file could not be parsed in any supported format.", e); + } + } + + // otherwise treat as an interval -> parse and add to raw interval list + else { + rawIntervals.add(parser.parseGenomeLoc(fileOrInterval)); + } + } + + return rawIntervals; + } + + /** * Read a file of genome locations to process. The file may be in BED, Picard, * or GATK interval format. * - * @param file_name interval file - * @param allowEmptyIntervalList if false an exception will be thrown for files that contain no intervals + * @param glParser GenomeLocParser + * @param file_name interval file * @return List List of Genome Locs that have been parsed from file */ - public static List intervalFileToList(final GenomeLocParser glParser, final String file_name, boolean allowEmptyIntervalList) { + public static List intervalFileToList(final GenomeLocParser glParser, final String file_name) { // try to open file File inputFile = new File(file_name); List ret = new ArrayList(); @@ -145,12 +143,6 @@ public class IntervalUtils { } } - if ( ret.isEmpty() && ! allowEmptyIntervalList ) { - throw new UserException("The interval file " + inputFile.getAbsolutePath() + " contains no intervals " + - "that could be parsed, and the unsafe operation ALLOW_EMPTY_INTERVAL_LIST has " + - "not been enabled"); - } - return ret; }