Initial working version of new interval system in which the argument for -L (and -XL) is allowed to be a rod file (e.g. VCF). Old samtools-style intervals still behave as before. BTI is no longer supported. The merging (union or intersection) of intervals is now consistently applied to all -L (or -XL) intervals, which is nice. More testing needed.
This commit is contained in:
parent
f6ccac889b
commit
9424e8b2ca
|
|
@ -336,6 +336,28 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
|
||||
@Override
|
||||
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
|
||||
return parse(parsingEngine, source, type, matches, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* The actual argument parsing method.
|
||||
*
|
||||
* IMPORTANT NOTE: the createIntervalBinding argument is a bit of a hack, but after discussions with SE we've decided
|
||||
* that it's the best way to proceed for now. IntervalBindings can either be proper RodBindings (hence the use of
|
||||
* this parse() method) or can be Strings (representing raw intervals or the files containing them). If createIntervalBinding
|
||||
* is true, we do not call parsingEngine.addRodBinding() because we don't want walkers to assume that these are the
|
||||
* usual set of RodBindings. It also allows us in the future to be smart about tagging rods as intervals. One other
|
||||
* side point is that we want to continue to allow the usage of non-Feature intervals so that users can theoretically
|
||||
* continue to input them out of order (whereas Tribble Features are ordered).
|
||||
*
|
||||
* @param parsingEngine parsing engine
|
||||
* @param source source
|
||||
* @param type type to check
|
||||
* @param matches matches
|
||||
* @param createIntervalBinding should we attempt to create an IntervalBinding instead of a RodBinding?
|
||||
* @return the RodBinding/IntervalBinding object depending on the value of createIntervalBinding.
|
||||
*/
|
||||
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches, boolean createIntervalBinding) {
|
||||
ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
|
||||
String value = getArgumentValue( defaultDefinition, matches );
|
||||
Class<? extends Feature> parameterType = JVMUtils.getParameterizedTypeClass(type);
|
||||
|
|
@ -348,7 +370,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
if ( tags.getPositionalTags().size() > 2 ) {
|
||||
throw new UserException.CommandLineException(
|
||||
String.format("Unexpected number of positional tags for argument %s : %s. " +
|
||||
"Rod bindings only suport -X:type and -X:name,type argument styles",
|
||||
"Rod bindings only support -X:type and -X:name,type argument styles",
|
||||
value, source.field.getName()));
|
||||
} if ( tags.getPositionalTags().size() == 2 ) {
|
||||
// -X:name,type style
|
||||
|
|
@ -378,7 +400,12 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
}
|
||||
}
|
||||
|
||||
if ( tribbleType == null )
|
||||
if ( tribbleType == null ) {
|
||||
// IntervalBindings allow streaming conversion of Strings
|
||||
if ( createIntervalBinding ) {
|
||||
return new IntervalBinding(value);
|
||||
}
|
||||
|
||||
if ( ! file.exists() ) {
|
||||
throw new UserException.CouldNotReadInputFile(file, "file does not exist");
|
||||
} else if ( ! file.canRead() || ! file.isFile() ) {
|
||||
|
|
@ -389,13 +416,20 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
"Please add an explicit type tag :NAME listing the correct type from among the supported types:%n%s",
|
||||
manager.userFriendlyListOfAvailableFeatures(parameterType)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class);
|
||||
RodBinding result = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags);
|
||||
parsingEngine.addTags(result,tags);
|
||||
parsingEngine.addRodBinding(result);
|
||||
Object result;
|
||||
if ( createIntervalBinding ) {
|
||||
result = ctor.newInstance(parameterType, name, value, tribbleType, tags);
|
||||
} else {
|
||||
RodBinding rbind = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags);
|
||||
parsingEngine.addTags(rbind, tags);
|
||||
parsingEngine.addRodBinding(rbind);
|
||||
result = rbind;
|
||||
}
|
||||
return result;
|
||||
} catch (InvocationTargetException e) {
|
||||
throw new UserException.CommandLineException(
|
||||
|
|
@ -409,6 +443,39 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parser for RodBinding objects
|
||||
*/
|
||||
class IntervalBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||
/**
|
||||
* We only want IntervalBinding class objects
|
||||
* @param type The type to check.
|
||||
* @return true if the provided class is an IntervalBinding.class
|
||||
*/
|
||||
@Override
|
||||
public boolean supports( Class type ) {
|
||||
return isIntervalBinding(type);
|
||||
}
|
||||
|
||||
public static boolean isIntervalBinding( Class type ) {
|
||||
return IntervalBinding.class.isAssignableFrom(type);
|
||||
}
|
||||
|
||||
/**
|
||||
* See note from RodBindingArgumentTypeDescriptor.parse().
|
||||
*
|
||||
* @param parsingEngine parsing engine
|
||||
* @param source source
|
||||
* @param type type to check
|
||||
* @param matches matches
|
||||
* @return the IntervalBinding object.
|
||||
*/
|
||||
@Override
|
||||
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
|
||||
return new RodBindingArgumentTypeDescriptor().parse(parsingEngine, source, type, matches, true);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse simple argument types: java primitives, wrapper classes, and anything that has
|
||||
* a simple String constructor.
|
||||
|
|
@ -416,7 +483,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||
@Override
|
||||
public boolean supports( Class type ) {
|
||||
if ( RodBindingArgumentTypeDescriptor.isRodBinding(type) ) return false;
|
||||
if ( RodBindingArgumentTypeDescriptor.isRodBinding(type) || IntervalBindingArgumentTypeDescriptor.isIntervalBinding(type) ) return false;
|
||||
if ( type.isPrimitive() ) return true;
|
||||
if ( type.isEnum() ) return true;
|
||||
if ( primitiveToWrapperMap.containsValue(type) ) return true;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,93 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.commandline;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* An IntervalBinding representing a walker argument that gets bound to either a ROD track or interval string.
|
||||
*
|
||||
* The IntervalBinding<T> is a formal GATK argument that bridges between a walker and
|
||||
* the engine to construct intervals for traversal at runtime. The IntervalBinding can
|
||||
* either be a RodBinding<T>, a string of one or more intervals, or a file with interval strings.
|
||||
* The GATK Engine takes care of initializing the binding when appropriate and determining intervals from it.
|
||||
*
|
||||
* Note that this class is immutable.
|
||||
*/
|
||||
public final class IntervalBinding<T extends Feature> {
|
||||
|
||||
private RodBinding<T> featureIntervals;
|
||||
private String stringIntervals;
|
||||
|
||||
@Requires({"type != null", "rawName != null", "source != null", "tribbleType != null", "tags != null"})
|
||||
public IntervalBinding(Class<T> type, final String rawName, final String source, final String tribbleType, final Tags tags) {
|
||||
featureIntervals = new RodBinding<T>(type, rawName, source, tribbleType, tags);
|
||||
}
|
||||
|
||||
@Requires({"intervalArgument != null"})
|
||||
public IntervalBinding(String intervalArgument) {
|
||||
stringIntervals = intervalArgument;
|
||||
}
|
||||
|
||||
public String getSource() {
|
||||
if ( featureIntervals != null )
|
||||
return featureIntervals.getSource();
|
||||
return stringIntervals;
|
||||
}
|
||||
|
||||
public List<GenomeLoc> getIntervals(GenomeAnalysisEngine toolkit) {
|
||||
List<GenomeLoc> intervals;
|
||||
|
||||
if ( featureIntervals != null ) {
|
||||
intervals = new ArrayList<GenomeLoc>();
|
||||
|
||||
RMDTrackBuilder builder = new RMDTrackBuilder(toolkit.getReferenceDataSource().getReference().getSequenceDictionary(),
|
||||
toolkit.getGenomeLocParser(),
|
||||
toolkit.getArguments().unsafe);
|
||||
FeatureManager.FeatureDescriptor descriptor = new FeatureManager().getByName(featureIntervals.getTribbleType());
|
||||
CloseableIterator<GATKFeature> iterator = builder.createInstanceOfTrack(descriptor.getCodecClass(), new File(featureIntervals.getSource())).getIterator();
|
||||
while ( iterator.hasNext() ) {
|
||||
intervals.add(iterator.next().getLocation());
|
||||
}
|
||||
iterator.close();
|
||||
|
||||
} else {
|
||||
intervals = IntervalUtils.parseIntervalArguments(toolkit.getGenomeLocParser(), stringIntervals);
|
||||
}
|
||||
|
||||
return intervals;
|
||||
}
|
||||
}
|
||||
|
|
@ -75,6 +75,7 @@ public class ParsingEngine {
|
|||
* The type of set used must be ordered (but not necessarily sorted).
|
||||
*/
|
||||
private static final Set<ArgumentTypeDescriptor> STANDARD_ARGUMENT_TYPE_DESCRIPTORS = new LinkedHashSet<ArgumentTypeDescriptor>( Arrays.asList(new SimpleArgumentTypeDescriptor(),
|
||||
new IntervalBindingArgumentTypeDescriptor(),
|
||||
new RodBindingArgumentTypeDescriptor(),
|
||||
new CompoundArgumentTypeDescriptor(),
|
||||
new MultiplexArgumentTypeDescriptor()) );
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ import net.sf.picard.reference.IndexedFastaSequenceFile;
|
|||
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||
import net.sf.samtools.*;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
|
|
@ -42,7 +43,6 @@ import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter;
|
|||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.Stub;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.samples.SampleDBBuilder;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
|
|
@ -50,6 +50,7 @@ import org.broadinstitute.sting.utils.*;
|
|||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -296,7 +297,7 @@ public class GenomeAnalysisEngine {
|
|||
else if(WalkerManager.getDownsamplingMethod(walker) != null)
|
||||
method = WalkerManager.getDownsamplingMethod(walker);
|
||||
else
|
||||
method = argCollection.getDefaultDownsamplingMethod();
|
||||
method = GATKArgumentCollection.getDefaultDownsamplingMethod();
|
||||
return method;
|
||||
}
|
||||
|
||||
|
|
@ -563,34 +564,23 @@ public class GenomeAnalysisEngine {
|
|||
protected void initializeIntervals() {
|
||||
|
||||
// return if no interval arguments at all
|
||||
if ((argCollection.intervals == null) && (argCollection.excludeIntervals == null) && (argCollection.RODToInterval == null))
|
||||
if ( argCollection.intervals == null && argCollection.excludeIntervals == null )
|
||||
return;
|
||||
|
||||
// if '-L all' was specified, verify that it was the only -L specified and return if so.
|
||||
if(argCollection.intervals != null) {
|
||||
for(String interval: argCollection.intervals) {
|
||||
if(interval.trim().equals("all")) {
|
||||
if(argCollection.intervals.size() > 1)
|
||||
throw new UserException("'-L all' was specified along with other intervals or interval lists; the GATK cannot combine '-L all' with other intervals.");
|
||||
|
||||
// '-L all' was specified and seems valid. Return.
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Note that the use of '-L all' is no longer supported.
|
||||
|
||||
// if include argument isn't given, create new set of all possible intervals
|
||||
GenomeLocSortedSet includeSortedSet = (argCollection.intervals == null && argCollection.RODToInterval == null ?
|
||||
GenomeLocSortedSet includeSortedSet = (argCollection.intervals == null ?
|
||||
GenomeLocSortedSet.createSetFromSequenceDictionary(this.referenceDataSource.getReference().getSequenceDictionary()) :
|
||||
loadIntervals(argCollection.intervals, IntervalUtils.mergeIntervalLocations(getRODIntervals(), argCollection.intervalMerging)));
|
||||
loadIntervals(argCollection.intervals, argCollection.intervalSetRule));
|
||||
|
||||
// if no exclude arguments, can return parseIntervalArguments directly
|
||||
if (argCollection.excludeIntervals == null)
|
||||
if ( argCollection.excludeIntervals == null )
|
||||
intervals = includeSortedSet;
|
||||
|
||||
// otherwise there are exclude arguments => must merge include and exclude GenomeLocSortedSets
|
||||
// otherwise there are exclude arguments => must merge include and exclude GenomeLocSortedSets
|
||||
else {
|
||||
GenomeLocSortedSet excludeSortedSet = loadIntervals(argCollection.excludeIntervals, null);
|
||||
GenomeLocSortedSet excludeSortedSet = loadIntervals(argCollection.excludeIntervals, IntervalSetRule.UNION);
|
||||
intervals = includeSortedSet.subtractRegions(excludeSortedSet);
|
||||
|
||||
// logging messages only printed when exclude (-XL) arguments are given
|
||||
|
|
@ -601,51 +591,39 @@ public class GenomeAnalysisEngine {
|
|||
logger.info(String.format("Excluding %d loci from original intervals (%.2f%% reduction)",
|
||||
toPruneSize - intervalSize, (toPruneSize - intervalSize) / (0.01 * toPruneSize)));
|
||||
}
|
||||
|
||||
// DEBUGGING OUTPUT
|
||||
for ( GenomeLoc loc : intervals )
|
||||
logger.info("Including -L interval: " + loc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads the intervals relevant to the current execution
|
||||
* @param argList String representation of arguments; might include 'all', filenames, intervals in samtools
|
||||
* notation, or a combination of the above
|
||||
* @param rodIntervals a list of ROD intervals to add to the returned set. Can be empty or null.
|
||||
* @param argList argument bindings; might include filenames, intervals in samtools notation, or a combination of the above
|
||||
* @param rule interval merging rule
|
||||
* @return A sorted, merged list of all intervals specified in this arg list.
|
||||
*/
|
||||
protected GenomeLocSortedSet loadIntervals( List<String> argList, List<GenomeLoc> rodIntervals ) {
|
||||
protected GenomeLocSortedSet loadIntervals( List<IntervalBinding<Feature>> argList, IntervalSetRule rule ) {
|
||||
|
||||
boolean allowEmptyIntervalList = (argCollection.unsafe == ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST ||
|
||||
argCollection.unsafe == ValidationExclusion.TYPE.ALL);
|
||||
|
||||
List<GenomeLoc> nonRODIntervals = IntervalUtils.parseIntervalArguments(genomeLocParser, argList, allowEmptyIntervalList);
|
||||
List<GenomeLoc> allIntervals = IntervalUtils.mergeListsBySetOperator(rodIntervals, nonRODIntervals, argCollection.BTIMergeRule);
|
||||
List<GenomeLoc> allIntervals = new ArrayList<GenomeLoc>(0);
|
||||
for ( IntervalBinding intervalBinding : argList ) {
|
||||
List<GenomeLoc> intervals = intervalBinding.getIntervals(this);
|
||||
|
||||
if ( !allowEmptyIntervalList && intervals.isEmpty() ) {
|
||||
throw new UserException("The interval file " + intervalBinding.getSource() + " contains no intervals " +
|
||||
"that could be parsed, and the unsafe operation ALLOW_EMPTY_INTERVAL_LIST has " +
|
||||
"not been enabled");
|
||||
}
|
||||
|
||||
allIntervals = IntervalUtils.mergeListsBySetOperator(intervals, allIntervals, rule);
|
||||
}
|
||||
|
||||
return IntervalUtils.sortAndMergeIntervals(genomeLocParser, allIntervals, argCollection.intervalMerging);
|
||||
}
|
||||
|
||||
/**
|
||||
* if we have a ROD specified as a 'rodToIntervalTrackName', convert its records to RODs
|
||||
* @return ROD intervals as GenomeLocs
|
||||
*/
|
||||
private List<GenomeLoc> getRODIntervals() {
|
||||
Map<String, ReferenceOrderedDataSource> rodNames = RMDIntervalGenerator.getRMDTrackNames(rodDataSources);
|
||||
// Do we have any RODs that overloaded as interval lists with the 'rodToIntervalTrackName' flag?
|
||||
List<GenomeLoc> ret = new ArrayList<GenomeLoc>();
|
||||
if (rodNames != null && argCollection.RODToInterval != null) {
|
||||
String rodName = argCollection.RODToInterval;
|
||||
|
||||
// check to make sure we have a rod of that name
|
||||
if (!rodNames.containsKey(rodName))
|
||||
throw new UserException.CommandLineException("--rodToIntervalTrackName (-BTI) was passed the name '"+rodName+"', which wasn't given as a ROD name in the -B option");
|
||||
|
||||
for (String str : rodNames.keySet())
|
||||
if (str.equals(rodName)) {
|
||||
logger.info("Adding interval list from track (ROD) named " + rodName);
|
||||
RMDIntervalGenerator intervalGenerator = new RMDIntervalGenerator(rodNames.get(str));
|
||||
ret.addAll(intervalGenerator.toGenomeLocList());
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add additional, externally managed IO streams for inputs.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -26,9 +26,11 @@
|
|||
package org.broadinstitute.sting.gatk.arguments;
|
||||
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Hidden;
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.IntervalBinding;
|
||||
import org.broadinstitute.sting.gatk.DownsampleType;
|
||||
import org.broadinstitute.sting.gatk.DownsamplingMethod;
|
||||
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
||||
|
|
@ -84,11 +86,20 @@ public class GATKArgumentCollection {
|
|||
|
||||
@ElementList(required = false)
|
||||
@Input(fullName = "intervals", shortName = "L", doc = "A list of genomic intervals over which to operate. Can be explicitly specified on the command line or in a file.", required = false)
|
||||
public List<String> intervals = null;
|
||||
public List<IntervalBinding<Feature>> intervals = null;
|
||||
|
||||
@ElementList(required = false)
|
||||
@Input(fullName = "excludeIntervals", shortName = "XL", doc = "A list of genomic intervals to exclude from processing. Can be explicitly specified on the command line or in a file.", required = false)
|
||||
public List<String> excludeIntervals = null;
|
||||
public List<IntervalBinding<Feature>> excludeIntervals = null;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "interval_set_rule", shortName = "isr", doc = "Indicates the set merging approach the interval parser should use to combine the various -L inputs", required = false)
|
||||
public IntervalSetRule intervalSetRule = IntervalSetRule.UNION;
|
||||
|
||||
/** What rule should we use when merging intervals */
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "interval_merging", shortName = "im", doc = "Indicates the interval merging rule we should use for abutting intervals", required = false)
|
||||
public IntervalMergingRule intervalMerging = IntervalMergingRule.ALL;
|
||||
|
||||
@Element(required = false)
|
||||
@Input(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false)
|
||||
|
|
@ -100,14 +111,6 @@ public class GATKArgumentCollection {
|
|||
@Input(fullName = "rodBind", shortName = "B", doc = "Bindings for reference-ordered data, in the form :<name>,<type> <file>", required = false)
|
||||
public ArrayList<String> RODBindings = new ArrayList<String>();
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "rodToIntervalTrackName", shortName = "BTI", doc = "Indicates that the named track should be converted into an interval list, to drive the traversal", required = false)
|
||||
public String RODToInterval = null;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "BTI_merge_rule", shortName = "BTIMR", doc = "Indicates the merging approach the interval parser should use to combine the BTI track with other -L options", required = false)
|
||||
public IntervalSetRule BTIMergeRule = IntervalSetRule.UNION;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "nonDeterministicRandomSeed", shortName = "ndrs", doc = "Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run", required = false)
|
||||
public boolean nonDeterministicRandomSeed = false;
|
||||
|
|
@ -197,11 +200,6 @@ public class GATKArgumentCollection {
|
|||
@Argument(fullName = "num_threads", shortName = "nt", doc = "How many threads should be allocated to running this analysis.", required = false)
|
||||
public int numberOfThreads = 1;
|
||||
|
||||
/** What rule should we use when merging intervals */
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "interval_merging", shortName = "im", doc = "What interval merging rule should we use.", required = false)
|
||||
public IntervalMergingRule intervalMerging = IntervalMergingRule.ALL;
|
||||
|
||||
@ElementList(required = false)
|
||||
@Input(fullName = "read_group_black_list", shortName="rgbl", doc="Filters out read groups matching <TAG>:<STRING> or a .txt file containing the filter strings one per line.", required = false)
|
||||
public List<String> readGroupBlackList = null;
|
||||
|
|
@ -442,19 +440,15 @@ public class GATKArgumentCollection {
|
|||
if (other.intervalMerging != this.intervalMerging) {
|
||||
return false;
|
||||
}
|
||||
if ((other.RODToInterval == null && RODToInterval != null) ||
|
||||
(other.RODToInterval != null && !other.RODToInterval.equals(RODToInterval))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (other.phoneHomeType != this.phoneHomeType) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (BTIMergeRule != other.BTIMergeRule)
|
||||
if (intervalSetRule != other.intervalSetRule)
|
||||
return false;
|
||||
|
||||
if ( BAQMode != other.BAQMode) return false;
|
||||
if ( BAQMode != other.BAQMode ) return false;
|
||||
if ( BAQGOP != other.BAQGOP ) return false;
|
||||
|
||||
if ((other.performanceLog == null && this.performanceLog != null) ||
|
||||
|
|
|
|||
|
|
@ -97,7 +97,7 @@ public class FindLargeShards extends CommandLineProgram {
|
|||
// intervals
|
||||
GenomeLocSortedSet intervalSortedSet = null;
|
||||
if(intervals != null)
|
||||
intervalSortedSet = IntervalUtils.sortAndMergeIntervals(genomeLocParser, IntervalUtils.parseIntervalArguments(genomeLocParser, intervals, true), IntervalMergingRule.ALL);
|
||||
intervalSortedSet = IntervalUtils.sortAndMergeIntervals(genomeLocParser, IntervalUtils.parseIntervalArguments(genomeLocParser, intervals), IntervalMergingRule.ALL);
|
||||
else {
|
||||
intervalSortedSet = new GenomeLocSortedSet(genomeLocParser);
|
||||
for(SAMSequenceRecord entry: refReader.getSequenceDictionary().getSequences())
|
||||
|
|
|
|||
|
|
@ -140,7 +140,7 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
|
|||
final FeatureManager.FeatureDescriptor descriptor = getFeatureManager().getByCodec(codecClass);
|
||||
|
||||
if (descriptor == null)
|
||||
throw new ReviewedStingException("Unable to find type name for codex class " + codecClass.getName());
|
||||
throw new ReviewedStingException("Unable to find type name for codec class " + codecClass.getName());
|
||||
|
||||
return createInstanceOfTrack(new RMDTriplet("anonymous",descriptor.getName(),inputFile.getAbsolutePath(),RMDStorageType.FILE,new Tags()));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,57 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.refdata.utils;
|
||||
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @author aaron
|
||||
*
|
||||
* Class RMDIntervalGenerator
|
||||
*
|
||||
* Creates an interval list, given an RMDTrack
|
||||
*/
|
||||
public class RMDIntervalGenerator {
|
||||
public ReferenceOrderedDataSource dataSource;
|
||||
|
||||
/**
|
||||
* create a interval representation of a ROD track
|
||||
* @param dataSource the track
|
||||
*/
|
||||
public RMDIntervalGenerator(ReferenceOrderedDataSource dataSource) {
|
||||
if (dataSource == null) throw new IllegalArgumentException("Data source cannot be null");
|
||||
this.dataSource = dataSource;
|
||||
}
|
||||
|
||||
/**
|
||||
* create a genome location list from the interval track
|
||||
* @return a list of genome locations
|
||||
*/
|
||||
public List<GenomeLoc> toGenomeLocList() {
|
||||
Iterator<RODRecordList> iter = dataSource.seek((GenomeLoc)null);
|
||||
List<GenomeLoc> locations = new ArrayList<GenomeLoc>();
|
||||
while (iter.hasNext()) {
|
||||
RODRecordList feature = iter.next();
|
||||
GenomeLoc loc = feature.getLocation();
|
||||
if (loc != null) locations.add(loc);
|
||||
}
|
||||
return locations;
|
||||
}
|
||||
|
||||
/**
|
||||
* return a map of reference meta data track names to RODS
|
||||
* @param sources the reference ordered data sources to get the names from
|
||||
* @return a map of reference meta data names to RODS
|
||||
*/
|
||||
public static Map<String,ReferenceOrderedDataSource> getRMDTrackNames(List<ReferenceOrderedDataSource> sources) {
|
||||
// get a list of the current rod names we're working with
|
||||
Map<String,ReferenceOrderedDataSource> rodNames = new HashMap<String,ReferenceOrderedDataSource>();
|
||||
for (ReferenceOrderedDataSource rod : sources) {
|
||||
rodNames.put(rod.getName(),rod);
|
||||
}
|
||||
return rodNames;
|
||||
}
|
||||
}
|
||||
|
|
@ -32,7 +32,6 @@ import net.sf.samtools.util.SequenceUtil;
|
|||
import net.sf.samtools.util.StringUtil;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
|
|
@ -386,10 +385,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
intervals = merger;
|
||||
} else {
|
||||
// read in the whole list of intervals for cleaning
|
||||
boolean allowEmptyIntervalList = (getToolkit().getArguments().unsafe == ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST ||
|
||||
getToolkit().getArguments().unsafe == ValidationExclusion.TYPE.ALL);
|
||||
GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(getToolkit().getGenomeLocParser(),
|
||||
IntervalUtils.parseIntervalArguments(getToolkit().getGenomeLocParser(),Arrays.asList(intervalsFile),allowEmptyIntervalList),
|
||||
IntervalUtils.parseIntervalArguments(getToolkit().getGenomeLocParser(),Arrays.asList(intervalsFile)),
|
||||
IntervalMergingRule.OVERLAPPING_ONLY);
|
||||
intervals = locs.iterator();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -372,7 +372,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
} else {
|
||||
// read in the whole list of intervals for cleaning
|
||||
GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(getToolkit().getGenomeLocParser(),
|
||||
IntervalUtils.parseIntervalArguments(getToolkit().getGenomeLocParser(),Arrays.asList(genotypeIntervalsFile),true), IntervalMergingRule.OVERLAPPING_ONLY);
|
||||
IntervalUtils.parseIntervalArguments(getToolkit().getGenomeLocParser(),Arrays.asList(genotypeIntervalsFile)), IntervalMergingRule.OVERLAPPING_ONLY);
|
||||
genotypeIntervalIterator = locs.iterator();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -35,62 +35,60 @@ public class IntervalUtils {
|
|||
*
|
||||
* @param parser Genome loc parser.
|
||||
* @param argList A list of strings containing interval data.
|
||||
* @param allowEmptyIntervalList If false instead of an empty interval list will return null.
|
||||
* @return an unsorted, unmerged representation of the given intervals. Null is used to indicate that all intervals should be used.
|
||||
* @return an unsorted, unmerged representation of the given intervals. Null is used to indicate that all intervals should be used.
|
||||
*/
|
||||
public static List<GenomeLoc> parseIntervalArguments(GenomeLocParser parser, List<String> argList, boolean allowEmptyIntervalList) {
|
||||
public static List<GenomeLoc> parseIntervalArguments(GenomeLocParser parser, List<String> argList) {
|
||||
List<GenomeLoc> rawIntervals = new ArrayList<GenomeLoc>(); // running list of raw GenomeLocs
|
||||
|
||||
if (argList != null) { // now that we can be in this function if only the ROD-to-Intervals was provided, we need to
|
||||
// ensure that the arg list isn't null before looping.
|
||||
for (String argument : argList) {
|
||||
|
||||
// separate argument on semicolon first
|
||||
for (String fileOrInterval : argument.split(";")) {
|
||||
// if any interval argument is '-L all', consider all loci by returning no intervals
|
||||
if (fileOrInterval.trim().toLowerCase().equals("all")) {
|
||||
if (argList.size() != 1) {
|
||||
// throw error if '-L all' is not only interval - potentially conflicting commands
|
||||
throw new UserException.CommandLineException(String.format("Conflicting arguments: Intervals given along with \"-L all\""));
|
||||
}
|
||||
return null;
|
||||
}
|
||||
// if any argument is 'unmapped', "parse" it to a null entry. A null in this case means 'all the intervals with no alignment data'.
|
||||
else if (isUnmapped(fileOrInterval))
|
||||
rawIntervals.add(GenomeLoc.UNMAPPED);
|
||||
// if it's a file, add items to raw interval list
|
||||
else if (isIntervalFile(fileOrInterval)) {
|
||||
try {
|
||||
rawIntervals.addAll(intervalFileToList(parser, fileOrInterval, allowEmptyIntervalList));
|
||||
}
|
||||
catch ( UserException.MalformedGenomeLoc e ) {
|
||||
throw e;
|
||||
}
|
||||
catch ( Exception e ) {
|
||||
throw new UserException.MalformedFile(fileOrInterval, "Interval file could not be parsed in any supported format.", e);
|
||||
}
|
||||
}
|
||||
|
||||
// otherwise treat as an interval -> parse and add to raw interval list
|
||||
else {
|
||||
rawIntervals.add(parser.parseGenomeLoc(fileOrInterval));
|
||||
}
|
||||
}
|
||||
rawIntervals.addAll(parseIntervalArguments(parser, argument));
|
||||
}
|
||||
}
|
||||
|
||||
return rawIntervals;
|
||||
}
|
||||
|
||||
/**
|
||||
public static List<GenomeLoc> parseIntervalArguments(GenomeLocParser parser, String arg) {
|
||||
List<GenomeLoc> rawIntervals = new ArrayList<GenomeLoc>(); // running list of raw GenomeLocs
|
||||
|
||||
// separate argument on semicolon first
|
||||
for (String fileOrInterval : arg.split(";")) {
|
||||
// if any argument is 'unmapped', "parse" it to a null entry. A null in this case means 'all the intervals with no alignment data'.
|
||||
if (isUnmapped(fileOrInterval))
|
||||
rawIntervals.add(GenomeLoc.UNMAPPED);
|
||||
// if it's a file, add items to raw interval list
|
||||
else if (isIntervalFile(fileOrInterval)) {
|
||||
try {
|
||||
rawIntervals.addAll(intervalFileToList(parser, fileOrInterval));
|
||||
}
|
||||
catch ( UserException.MalformedGenomeLoc e ) {
|
||||
throw e;
|
||||
}
|
||||
catch ( Exception e ) {
|
||||
throw new UserException.MalformedFile(fileOrInterval, "Interval file could not be parsed in any supported format.", e);
|
||||
}
|
||||
}
|
||||
|
||||
// otherwise treat as an interval -> parse and add to raw interval list
|
||||
else {
|
||||
rawIntervals.add(parser.parseGenomeLoc(fileOrInterval));
|
||||
}
|
||||
}
|
||||
|
||||
return rawIntervals;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a file of genome locations to process. The file may be in BED, Picard,
|
||||
* or GATK interval format.
|
||||
*
|
||||
* @param file_name interval file
|
||||
* @param allowEmptyIntervalList if false an exception will be thrown for files that contain no intervals
|
||||
* @param glParser GenomeLocParser
|
||||
* @param file_name interval file
|
||||
* @return List<GenomeLoc> List of Genome Locs that have been parsed from file
|
||||
*/
|
||||
public static List<GenomeLoc> intervalFileToList(final GenomeLocParser glParser, final String file_name, boolean allowEmptyIntervalList) {
|
||||
public static List<GenomeLoc> intervalFileToList(final GenomeLocParser glParser, final String file_name) {
|
||||
// try to open file
|
||||
File inputFile = new File(file_name);
|
||||
List<GenomeLoc> ret = new ArrayList<GenomeLoc>();
|
||||
|
|
@ -145,12 +143,6 @@ public class IntervalUtils {
|
|||
}
|
||||
}
|
||||
|
||||
if ( ret.isEmpty() && ! allowEmptyIntervalList ) {
|
||||
throw new UserException("The interval file " + inputFile.getAbsolutePath() + " contains no intervals " +
|
||||
"that could be parsed, and the unsafe operation ALLOW_EMPTY_INTERVAL_LIST has " +
|
||||
"not been enabled");
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue