Initial working version of new interval system in which the argument for -L (and -XL) is allowed to be a rod file (e.g. VCF). Old samtools-style intervals still behave as before. BTI is no longer supported. The merging (union or intersection) of intervals is now consistently applied to all -L (or -XL) intervals, which is nice. More testing needed.

This commit is contained in:
Eric Banks 2011-10-26 14:11:49 -04:00
parent f6ccac889b
commit 9424e8b2ca
11 changed files with 252 additions and 187 deletions

View File

@ -336,6 +336,28 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
return parse(parsingEngine, source, type, matches, false);
}
/**
* The actual argument parsing method.
*
* IMPORTANT NOTE: the createIntervalBinding argument is a bit of a hack, but after discussions with SE we've decided
* that it's the best way to proceed for now. IntervalBindings can either be proper RodBindings (hence the use of
* this parse() method) or can be Strings (representing raw intervals or the files containing them). If createIntervalBinding
* is true, we do not call parsingEngine.addRodBinding() because we don't want walkers to assume that these are the
* usual set of RodBindings. It also allows us in the future to be smart about tagging rods as intervals. One other
* side point is that we want to continue to allow the usage of non-Feature intervals so that users can theoretically
* continue to input them out of order (whereas Tribble Features are ordered).
*
* @param parsingEngine parsing engine
* @param source source
* @param type type to check
* @param matches matches
* @param createIntervalBinding should we attempt to create an IntervalBinding instead of a RodBinding?
* @return the RodBinding/IntervalBinding object depending on the value of createIntervalBinding.
*/
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches, boolean createIntervalBinding) {
ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
String value = getArgumentValue( defaultDefinition, matches );
Class<? extends Feature> parameterType = JVMUtils.getParameterizedTypeClass(type);
@ -348,7 +370,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
if ( tags.getPositionalTags().size() > 2 ) {
throw new UserException.CommandLineException(
String.format("Unexpected number of positional tags for argument %s : %s. " +
"Rod bindings only suport -X:type and -X:name,type argument styles",
"Rod bindings only support -X:type and -X:name,type argument styles",
value, source.field.getName()));
} if ( tags.getPositionalTags().size() == 2 ) {
// -X:name,type style
@ -378,7 +400,12 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
}
}
if ( tribbleType == null )
if ( tribbleType == null ) {
// IntervalBindings allow streaming conversion of Strings
if ( createIntervalBinding ) {
return new IntervalBinding(value);
}
if ( ! file.exists() ) {
throw new UserException.CouldNotReadInputFile(file, "file does not exist");
} else if ( ! file.canRead() || ! file.isFile() ) {
@ -389,13 +416,20 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
"Please add an explicit type tag :NAME listing the correct type from among the supported types:%n%s",
manager.userFriendlyListOfAvailableFeatures(parameterType)));
}
}
}
}
Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class);
RodBinding result = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags);
parsingEngine.addTags(result,tags);
parsingEngine.addRodBinding(result);
Object result;
if ( createIntervalBinding ) {
result = ctor.newInstance(parameterType, name, value, tribbleType, tags);
} else {
RodBinding rbind = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags);
parsingEngine.addTags(rbind, tags);
parsingEngine.addRodBinding(rbind);
result = rbind;
}
return result;
} catch (InvocationTargetException e) {
throw new UserException.CommandLineException(
@ -409,6 +443,39 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
}
}
/**
* Parser for RodBinding objects
*/
class IntervalBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
/**
* We only want IntervalBinding class objects
* @param type The type to check.
* @return true if the provided class is an IntervalBinding.class
*/
@Override
public boolean supports( Class type ) {
return isIntervalBinding(type);
}
public static boolean isIntervalBinding( Class type ) {
return IntervalBinding.class.isAssignableFrom(type);
}
/**
* See note from RodBindingArgumentTypeDescriptor.parse().
*
* @param parsingEngine parsing engine
* @param source source
* @param type type to check
* @param matches matches
* @return the IntervalBinding object.
*/
@Override
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
return new RodBindingArgumentTypeDescriptor().parse(parsingEngine, source, type, matches, true);
}
}
/**
* Parse simple argument types: java primitives, wrapper classes, and anything that has
* a simple String constructor.
@ -416,7 +483,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override
public boolean supports( Class type ) {
if ( RodBindingArgumentTypeDescriptor.isRodBinding(type) ) return false;
if ( RodBindingArgumentTypeDescriptor.isRodBinding(type) || IntervalBindingArgumentTypeDescriptor.isIntervalBinding(type) ) return false;
if ( type.isPrimitive() ) return true;
if ( type.isEnum() ) return true;
if ( primitiveToWrapperMap.containsValue(type) ) return true;

View File

@ -0,0 +1,93 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.commandline;
import com.google.java.contract.Requires;
import net.sf.samtools.util.CloseableIterator;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import java.io.File;
import java.util.*;
/**
* An IntervalBinding representing a walker argument that gets bound to either a ROD track or interval string.
*
* The IntervalBinding<T> is a formal GATK argument that bridges between a walker and
* the engine to construct intervals for traversal at runtime. The IntervalBinding can
* either be a RodBinding<T>, a string of one or more intervals, or a file with interval strings.
* The GATK Engine takes care of initializing the binding when appropriate and determining intervals from it.
*
* Note that this class is immutable.
*/
public final class IntervalBinding<T extends Feature> {
private RodBinding<T> featureIntervals;
private String stringIntervals;
@Requires({"type != null", "rawName != null", "source != null", "tribbleType != null", "tags != null"})
public IntervalBinding(Class<T> type, final String rawName, final String source, final String tribbleType, final Tags tags) {
featureIntervals = new RodBinding<T>(type, rawName, source, tribbleType, tags);
}
@Requires({"intervalArgument != null"})
public IntervalBinding(String intervalArgument) {
stringIntervals = intervalArgument;
}
public String getSource() {
if ( featureIntervals != null )
return featureIntervals.getSource();
return stringIntervals;
}
public List<GenomeLoc> getIntervals(GenomeAnalysisEngine toolkit) {
List<GenomeLoc> intervals;
if ( featureIntervals != null ) {
intervals = new ArrayList<GenomeLoc>();
RMDTrackBuilder builder = new RMDTrackBuilder(toolkit.getReferenceDataSource().getReference().getSequenceDictionary(),
toolkit.getGenomeLocParser(),
toolkit.getArguments().unsafe);
FeatureManager.FeatureDescriptor descriptor = new FeatureManager().getByName(featureIntervals.getTribbleType());
CloseableIterator<GATKFeature> iterator = builder.createInstanceOfTrack(descriptor.getCodecClass(), new File(featureIntervals.getSource())).getIterator();
while ( iterator.hasNext() ) {
intervals.add(iterator.next().getLocation());
}
iterator.close();
} else {
intervals = IntervalUtils.parseIntervalArguments(toolkit.getGenomeLocParser(), stringIntervals);
}
return intervals;
}
}

View File

@ -75,6 +75,7 @@ public class ParsingEngine {
* The type of set used must be ordered (but not necessarily sorted).
*/
private static final Set<ArgumentTypeDescriptor> STANDARD_ARGUMENT_TYPE_DESCRIPTORS = new LinkedHashSet<ArgumentTypeDescriptor>( Arrays.asList(new SimpleArgumentTypeDescriptor(),
new IntervalBindingArgumentTypeDescriptor(),
new RodBindingArgumentTypeDescriptor(),
new CompoundArgumentTypeDescriptor(),
new MultiplexArgumentTypeDescriptor()) );

View File

@ -28,6 +28,7 @@ import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.samtools.*;
import org.apache.log4j.Logger;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
@ -42,7 +43,6 @@ import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter;
import org.broadinstitute.sting.gatk.io.OutputTracker;
import org.broadinstitute.sting.gatk.io.stubs.Stub;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.gatk.samples.SampleDBBuilder;
import org.broadinstitute.sting.gatk.walkers.*;
@ -50,6 +50,7 @@ import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import java.io.File;
@ -296,7 +297,7 @@ public class GenomeAnalysisEngine {
else if(WalkerManager.getDownsamplingMethod(walker) != null)
method = WalkerManager.getDownsamplingMethod(walker);
else
method = argCollection.getDefaultDownsamplingMethod();
method = GATKArgumentCollection.getDefaultDownsamplingMethod();
return method;
}
@ -563,34 +564,23 @@ public class GenomeAnalysisEngine {
protected void initializeIntervals() {
// return if no interval arguments at all
if ((argCollection.intervals == null) && (argCollection.excludeIntervals == null) && (argCollection.RODToInterval == null))
if ( argCollection.intervals == null && argCollection.excludeIntervals == null )
return;
// if '-L all' was specified, verify that it was the only -L specified and return if so.
if(argCollection.intervals != null) {
for(String interval: argCollection.intervals) {
if(interval.trim().equals("all")) {
if(argCollection.intervals.size() > 1)
throw new UserException("'-L all' was specified along with other intervals or interval lists; the GATK cannot combine '-L all' with other intervals.");
// '-L all' was specified and seems valid. Return.
return;
}
}
}
// Note that the use of '-L all' is no longer supported.
// if include argument isn't given, create new set of all possible intervals
GenomeLocSortedSet includeSortedSet = (argCollection.intervals == null && argCollection.RODToInterval == null ?
GenomeLocSortedSet includeSortedSet = (argCollection.intervals == null ?
GenomeLocSortedSet.createSetFromSequenceDictionary(this.referenceDataSource.getReference().getSequenceDictionary()) :
loadIntervals(argCollection.intervals, IntervalUtils.mergeIntervalLocations(getRODIntervals(), argCollection.intervalMerging)));
loadIntervals(argCollection.intervals, argCollection.intervalSetRule));
// if no exclude arguments, can return parseIntervalArguments directly
if (argCollection.excludeIntervals == null)
if ( argCollection.excludeIntervals == null )
intervals = includeSortedSet;
// otherwise there are exclude arguments => must merge include and exclude GenomeLocSortedSets
// otherwise there are exclude arguments => must merge include and exclude GenomeLocSortedSets
else {
GenomeLocSortedSet excludeSortedSet = loadIntervals(argCollection.excludeIntervals, null);
GenomeLocSortedSet excludeSortedSet = loadIntervals(argCollection.excludeIntervals, IntervalSetRule.UNION);
intervals = includeSortedSet.subtractRegions(excludeSortedSet);
// logging messages only printed when exclude (-XL) arguments are given
@ -601,51 +591,39 @@ public class GenomeAnalysisEngine {
logger.info(String.format("Excluding %d loci from original intervals (%.2f%% reduction)",
toPruneSize - intervalSize, (toPruneSize - intervalSize) / (0.01 * toPruneSize)));
}
// DEBUGGING OUTPUT
for ( GenomeLoc loc : intervals )
logger.info("Including -L interval: " + loc);
}
/**
* Loads the intervals relevant to the current execution
* @param argList String representation of arguments; might include 'all', filenames, intervals in samtools
* notation, or a combination of the above
* @param rodIntervals a list of ROD intervals to add to the returned set. Can be empty or null.
* @param argList argument bindings; might include filenames, intervals in samtools notation, or a combination of the above
* @param rule interval merging rule
* @return A sorted, merged list of all intervals specified in this arg list.
*/
protected GenomeLocSortedSet loadIntervals( List<String> argList, List<GenomeLoc> rodIntervals ) {
protected GenomeLocSortedSet loadIntervals( List<IntervalBinding<Feature>> argList, IntervalSetRule rule ) {
boolean allowEmptyIntervalList = (argCollection.unsafe == ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST ||
argCollection.unsafe == ValidationExclusion.TYPE.ALL);
List<GenomeLoc> nonRODIntervals = IntervalUtils.parseIntervalArguments(genomeLocParser, argList, allowEmptyIntervalList);
List<GenomeLoc> allIntervals = IntervalUtils.mergeListsBySetOperator(rodIntervals, nonRODIntervals, argCollection.BTIMergeRule);
List<GenomeLoc> allIntervals = new ArrayList<GenomeLoc>(0);
for ( IntervalBinding intervalBinding : argList ) {
List<GenomeLoc> intervals = intervalBinding.getIntervals(this);
if ( !allowEmptyIntervalList && intervals.isEmpty() ) {
throw new UserException("The interval file " + intervalBinding.getSource() + " contains no intervals " +
"that could be parsed, and the unsafe operation ALLOW_EMPTY_INTERVAL_LIST has " +
"not been enabled");
}
allIntervals = IntervalUtils.mergeListsBySetOperator(intervals, allIntervals, rule);
}
return IntervalUtils.sortAndMergeIntervals(genomeLocParser, allIntervals, argCollection.intervalMerging);
}
/**
* if we have a ROD specified as a 'rodToIntervalTrackName', convert its records to RODs
* @return ROD intervals as GenomeLocs
*/
private List<GenomeLoc> getRODIntervals() {
Map<String, ReferenceOrderedDataSource> rodNames = RMDIntervalGenerator.getRMDTrackNames(rodDataSources);
// Do we have any RODs that overloaded as interval lists with the 'rodToIntervalTrackName' flag?
List<GenomeLoc> ret = new ArrayList<GenomeLoc>();
if (rodNames != null && argCollection.RODToInterval != null) {
String rodName = argCollection.RODToInterval;
// check to make sure we have a rod of that name
if (!rodNames.containsKey(rodName))
throw new UserException.CommandLineException("--rodToIntervalTrackName (-BTI) was passed the name '"+rodName+"', which wasn't given as a ROD name in the -B option");
for (String str : rodNames.keySet())
if (str.equals(rodName)) {
logger.info("Adding interval list from track (ROD) named " + rodName);
RMDIntervalGenerator intervalGenerator = new RMDIntervalGenerator(rodNames.get(str));
ret.addAll(intervalGenerator.toGenomeLocList());
}
}
return ret;
}
/**
* Add additional, externally managed IO streams for inputs.
*

View File

@ -26,9 +26,11 @@
package org.broadinstitute.sting.gatk.arguments;
import net.sf.samtools.SAMFileReader;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.IntervalBinding;
import org.broadinstitute.sting.gatk.DownsampleType;
import org.broadinstitute.sting.gatk.DownsamplingMethod;
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
@ -84,11 +86,20 @@ public class GATKArgumentCollection {
@ElementList(required = false)
@Input(fullName = "intervals", shortName = "L", doc = "A list of genomic intervals over which to operate. Can be explicitly specified on the command line or in a file.", required = false)
public List<String> intervals = null;
public List<IntervalBinding<Feature>> intervals = null;
@ElementList(required = false)
@Input(fullName = "excludeIntervals", shortName = "XL", doc = "A list of genomic intervals to exclude from processing. Can be explicitly specified on the command line or in a file.", required = false)
public List<String> excludeIntervals = null;
public List<IntervalBinding<Feature>> excludeIntervals = null;
@Element(required = false)
@Argument(fullName = "interval_set_rule", shortName = "isr", doc = "Indicates the set merging approach the interval parser should use to combine the various -L inputs", required = false)
public IntervalSetRule intervalSetRule = IntervalSetRule.UNION;
/** What rule should we use when merging intervals */
@Element(required = false)
@Argument(fullName = "interval_merging", shortName = "im", doc = "Indicates the interval merging rule we should use for abutting intervals", required = false)
public IntervalMergingRule intervalMerging = IntervalMergingRule.ALL;
@Element(required = false)
@Input(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false)
@ -100,14 +111,6 @@ public class GATKArgumentCollection {
@Input(fullName = "rodBind", shortName = "B", doc = "Bindings for reference-ordered data, in the form :<name>,<type> <file>", required = false)
public ArrayList<String> RODBindings = new ArrayList<String>();
@Element(required = false)
@Argument(fullName = "rodToIntervalTrackName", shortName = "BTI", doc = "Indicates that the named track should be converted into an interval list, to drive the traversal", required = false)
public String RODToInterval = null;
@Element(required = false)
@Argument(fullName = "BTI_merge_rule", shortName = "BTIMR", doc = "Indicates the merging approach the interval parser should use to combine the BTI track with other -L options", required = false)
public IntervalSetRule BTIMergeRule = IntervalSetRule.UNION;
@Element(required = false)
@Argument(fullName = "nonDeterministicRandomSeed", shortName = "ndrs", doc = "Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run", required = false)
public boolean nonDeterministicRandomSeed = false;
@ -197,11 +200,6 @@ public class GATKArgumentCollection {
@Argument(fullName = "num_threads", shortName = "nt", doc = "How many threads should be allocated to running this analysis.", required = false)
public int numberOfThreads = 1;
/** What rule should we use when merging intervals */
@Element(required = false)
@Argument(fullName = "interval_merging", shortName = "im", doc = "What interval merging rule should we use.", required = false)
public IntervalMergingRule intervalMerging = IntervalMergingRule.ALL;
@ElementList(required = false)
@Input(fullName = "read_group_black_list", shortName="rgbl", doc="Filters out read groups matching <TAG>:<STRING> or a .txt file containing the filter strings one per line.", required = false)
public List<String> readGroupBlackList = null;
@ -442,19 +440,15 @@ public class GATKArgumentCollection {
if (other.intervalMerging != this.intervalMerging) {
return false;
}
if ((other.RODToInterval == null && RODToInterval != null) ||
(other.RODToInterval != null && !other.RODToInterval.equals(RODToInterval))) {
return false;
}
if (other.phoneHomeType != this.phoneHomeType) {
return false;
}
if (BTIMergeRule != other.BTIMergeRule)
if (intervalSetRule != other.intervalSetRule)
return false;
if ( BAQMode != other.BAQMode) return false;
if ( BAQMode != other.BAQMode ) return false;
if ( BAQGOP != other.BAQGOP ) return false;
if ((other.performanceLog == null && this.performanceLog != null) ||

View File

@ -97,7 +97,7 @@ public class FindLargeShards extends CommandLineProgram {
// intervals
GenomeLocSortedSet intervalSortedSet = null;
if(intervals != null)
intervalSortedSet = IntervalUtils.sortAndMergeIntervals(genomeLocParser, IntervalUtils.parseIntervalArguments(genomeLocParser, intervals, true), IntervalMergingRule.ALL);
intervalSortedSet = IntervalUtils.sortAndMergeIntervals(genomeLocParser, IntervalUtils.parseIntervalArguments(genomeLocParser, intervals), IntervalMergingRule.ALL);
else {
intervalSortedSet = new GenomeLocSortedSet(genomeLocParser);
for(SAMSequenceRecord entry: refReader.getSequenceDictionary().getSequences())

View File

@ -140,7 +140,7 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
final FeatureManager.FeatureDescriptor descriptor = getFeatureManager().getByCodec(codecClass);
if (descriptor == null)
throw new ReviewedStingException("Unable to find type name for codex class " + codecClass.getName());
throw new ReviewedStingException("Unable to find type name for codec class " + codecClass.getName());
return createInstanceOfTrack(new RMDTriplet("anonymous",descriptor.getName(),inputFile.getAbsolutePath(),RMDStorageType.FILE,new Tags()));
}

View File

@ -1,57 +0,0 @@
package org.broadinstitute.sting.gatk.refdata.utils;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.*;
/**
*
* @author aaron
*
* Class RMDIntervalGenerator
*
* Creates an interval list, given an RMDTrack
*/
public class RMDIntervalGenerator {
public ReferenceOrderedDataSource dataSource;
/**
* create a interval representation of a ROD track
* @param dataSource the track
*/
public RMDIntervalGenerator(ReferenceOrderedDataSource dataSource) {
if (dataSource == null) throw new IllegalArgumentException("Data source cannot be null");
this.dataSource = dataSource;
}
/**
* create a genome location list from the interval track
* @return a list of genome locations
*/
public List<GenomeLoc> toGenomeLocList() {
Iterator<RODRecordList> iter = dataSource.seek((GenomeLoc)null);
List<GenomeLoc> locations = new ArrayList<GenomeLoc>();
while (iter.hasNext()) {
RODRecordList feature = iter.next();
GenomeLoc loc = feature.getLocation();
if (loc != null) locations.add(loc);
}
return locations;
}
/**
* return a map of reference meta data track names to RODS
* @param sources the reference ordered data sources to get the names from
* @return a map of reference meta data names to RODS
*/
public static Map<String,ReferenceOrderedDataSource> getRMDTrackNames(List<ReferenceOrderedDataSource> sources) {
// get a list of the current rod names we're working with
Map<String,ReferenceOrderedDataSource> rodNames = new HashMap<String,ReferenceOrderedDataSource>();
for (ReferenceOrderedDataSource rod : sources) {
rodNames.put(rod.getName(),rod);
}
return rodNames;
}
}

View File

@ -32,7 +32,6 @@ import net.sf.samtools.util.SequenceUtil;
import net.sf.samtools.util.StringUtil;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
@ -386,10 +385,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
intervals = merger;
} else {
// read in the whole list of intervals for cleaning
boolean allowEmptyIntervalList = (getToolkit().getArguments().unsafe == ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST ||
getToolkit().getArguments().unsafe == ValidationExclusion.TYPE.ALL);
GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(getToolkit().getGenomeLocParser(),
IntervalUtils.parseIntervalArguments(getToolkit().getGenomeLocParser(),Arrays.asList(intervalsFile),allowEmptyIntervalList),
IntervalUtils.parseIntervalArguments(getToolkit().getGenomeLocParser(),Arrays.asList(intervalsFile)),
IntervalMergingRule.OVERLAPPING_ONLY);
intervals = locs.iterator();
}

View File

@ -372,7 +372,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
} else {
// read in the whole list of intervals for cleaning
GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(getToolkit().getGenomeLocParser(),
IntervalUtils.parseIntervalArguments(getToolkit().getGenomeLocParser(),Arrays.asList(genotypeIntervalsFile),true), IntervalMergingRule.OVERLAPPING_ONLY);
IntervalUtils.parseIntervalArguments(getToolkit().getGenomeLocParser(),Arrays.asList(genotypeIntervalsFile)), IntervalMergingRule.OVERLAPPING_ONLY);
genotypeIntervalIterator = locs.iterator();
}

View File

@ -35,62 +35,60 @@ public class IntervalUtils {
*
* @param parser Genome loc parser.
* @param argList A list of strings containing interval data.
* @param allowEmptyIntervalList If false instead of an empty interval list will return null.
* @return an unsorted, unmerged representation of the given intervals. Null is used to indicate that all intervals should be used.
* @return an unsorted, unmerged representation of the given intervals. Null is used to indicate that all intervals should be used.
*/
public static List<GenomeLoc> parseIntervalArguments(GenomeLocParser parser, List<String> argList, boolean allowEmptyIntervalList) {
public static List<GenomeLoc> parseIntervalArguments(GenomeLocParser parser, List<String> argList) {
List<GenomeLoc> rawIntervals = new ArrayList<GenomeLoc>(); // running list of raw GenomeLocs
if (argList != null) { // now that we can be in this function if only the ROD-to-Intervals was provided, we need to
// ensure that the arg list isn't null before looping.
for (String argument : argList) {
// separate argument on semicolon first
for (String fileOrInterval : argument.split(";")) {
// if any interval argument is '-L all', consider all loci by returning no intervals
if (fileOrInterval.trim().toLowerCase().equals("all")) {
if (argList.size() != 1) {
// throw error if '-L all' is not only interval - potentially conflicting commands
throw new UserException.CommandLineException(String.format("Conflicting arguments: Intervals given along with \"-L all\""));
}
return null;
}
// if any argument is 'unmapped', "parse" it to a null entry. A null in this case means 'all the intervals with no alignment data'.
else if (isUnmapped(fileOrInterval))
rawIntervals.add(GenomeLoc.UNMAPPED);
// if it's a file, add items to raw interval list
else if (isIntervalFile(fileOrInterval)) {
try {
rawIntervals.addAll(intervalFileToList(parser, fileOrInterval, allowEmptyIntervalList));
}
catch ( UserException.MalformedGenomeLoc e ) {
throw e;
}
catch ( Exception e ) {
throw new UserException.MalformedFile(fileOrInterval, "Interval file could not be parsed in any supported format.", e);
}
}
// otherwise treat as an interval -> parse and add to raw interval list
else {
rawIntervals.add(parser.parseGenomeLoc(fileOrInterval));
}
}
rawIntervals.addAll(parseIntervalArguments(parser, argument));
}
}
return rawIntervals;
}
/**
public static List<GenomeLoc> parseIntervalArguments(GenomeLocParser parser, String arg) {
List<GenomeLoc> rawIntervals = new ArrayList<GenomeLoc>(); // running list of raw GenomeLocs
// separate argument on semicolon first
for (String fileOrInterval : arg.split(";")) {
// if any argument is 'unmapped', "parse" it to a null entry. A null in this case means 'all the intervals with no alignment data'.
if (isUnmapped(fileOrInterval))
rawIntervals.add(GenomeLoc.UNMAPPED);
// if it's a file, add items to raw interval list
else if (isIntervalFile(fileOrInterval)) {
try {
rawIntervals.addAll(intervalFileToList(parser, fileOrInterval));
}
catch ( UserException.MalformedGenomeLoc e ) {
throw e;
}
catch ( Exception e ) {
throw new UserException.MalformedFile(fileOrInterval, "Interval file could not be parsed in any supported format.", e);
}
}
// otherwise treat as an interval -> parse and add to raw interval list
else {
rawIntervals.add(parser.parseGenomeLoc(fileOrInterval));
}
}
return rawIntervals;
}
/**
* Read a file of genome locations to process. The file may be in BED, Picard,
* or GATK interval format.
*
* @param file_name interval file
* @param allowEmptyIntervalList if false an exception will be thrown for files that contain no intervals
* @param glParser GenomeLocParser
* @param file_name interval file
* @return List<GenomeLoc> List of Genome Locs that have been parsed from file
*/
public static List<GenomeLoc> intervalFileToList(final GenomeLocParser glParser, final String file_name, boolean allowEmptyIntervalList) {
public static List<GenomeLoc> intervalFileToList(final GenomeLocParser glParser, final String file_name) {
// try to open file
File inputFile = new File(file_name);
List<GenomeLoc> ret = new ArrayList<GenomeLoc>();
@ -145,12 +143,6 @@ public class IntervalUtils {
}
}
if ( ret.isEmpty() && ! allowEmptyIntervalList ) {
throw new UserException("The interval file " + inputFile.getAbsolutePath() + " contains no intervals " +
"that could be parsed, and the unsafe operation ALLOW_EMPTY_INTERVAL_LIST has " +
"not been enabled");
}
return ret;
}