Initial working version of new interval system in which the argument for -L (and -XL) is allowed to be a rod file (e.g. VCF). Old samtools-style intervals still behave as before. BTI is no longer supported. The merging (union or intersection) of intervals is now consistently applied to all -L (or -XL) intervals, which is nice. More testing needed.
This commit is contained in:
parent
f6ccac889b
commit
9424e8b2ca
|
|
@ -336,6 +336,28 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
|
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
|
||||||
|
return parse(parsingEngine, source, type, matches, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The actual argument parsing method.
|
||||||
|
*
|
||||||
|
* IMPORTANT NOTE: the createIntervalBinding argument is a bit of a hack, but after discussions with SE we've decided
|
||||||
|
* that it's the best way to proceed for now. IntervalBindings can either be proper RodBindings (hence the use of
|
||||||
|
* this parse() method) or can be Strings (representing raw intervals or the files containing them). If createIntervalBinding
|
||||||
|
* is true, we do not call parsingEngine.addRodBinding() because we don't want walkers to assume that these are the
|
||||||
|
* usual set of RodBindings. It also allows us in the future to be smart about tagging rods as intervals. One other
|
||||||
|
* side point is that we want to continue to allow the usage of non-Feature intervals so that users can theoretically
|
||||||
|
* continue to input them out of order (whereas Tribble Features are ordered).
|
||||||
|
*
|
||||||
|
* @param parsingEngine parsing engine
|
||||||
|
* @param source source
|
||||||
|
* @param type type to check
|
||||||
|
* @param matches matches
|
||||||
|
* @param createIntervalBinding should we attempt to create an IntervalBinding instead of a RodBinding?
|
||||||
|
* @return the RodBinding/IntervalBinding object depending on the value of createIntervalBinding.
|
||||||
|
*/
|
||||||
|
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches, boolean createIntervalBinding) {
|
||||||
ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
|
ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
|
||||||
String value = getArgumentValue( defaultDefinition, matches );
|
String value = getArgumentValue( defaultDefinition, matches );
|
||||||
Class<? extends Feature> parameterType = JVMUtils.getParameterizedTypeClass(type);
|
Class<? extends Feature> parameterType = JVMUtils.getParameterizedTypeClass(type);
|
||||||
|
|
@ -348,7 +370,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
if ( tags.getPositionalTags().size() > 2 ) {
|
if ( tags.getPositionalTags().size() > 2 ) {
|
||||||
throw new UserException.CommandLineException(
|
throw new UserException.CommandLineException(
|
||||||
String.format("Unexpected number of positional tags for argument %s : %s. " +
|
String.format("Unexpected number of positional tags for argument %s : %s. " +
|
||||||
"Rod bindings only suport -X:type and -X:name,type argument styles",
|
"Rod bindings only support -X:type and -X:name,type argument styles",
|
||||||
value, source.field.getName()));
|
value, source.field.getName()));
|
||||||
} if ( tags.getPositionalTags().size() == 2 ) {
|
} if ( tags.getPositionalTags().size() == 2 ) {
|
||||||
// -X:name,type style
|
// -X:name,type style
|
||||||
|
|
@ -378,7 +400,12 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( tribbleType == null )
|
if ( tribbleType == null ) {
|
||||||
|
// IntervalBindings allow streaming conversion of Strings
|
||||||
|
if ( createIntervalBinding ) {
|
||||||
|
return new IntervalBinding(value);
|
||||||
|
}
|
||||||
|
|
||||||
if ( ! file.exists() ) {
|
if ( ! file.exists() ) {
|
||||||
throw new UserException.CouldNotReadInputFile(file, "file does not exist");
|
throw new UserException.CouldNotReadInputFile(file, "file does not exist");
|
||||||
} else if ( ! file.canRead() || ! file.isFile() ) {
|
} else if ( ! file.canRead() || ! file.isFile() ) {
|
||||||
|
|
@ -389,13 +416,20 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
"Please add an explicit type tag :NAME listing the correct type from among the supported types:%n%s",
|
"Please add an explicit type tag :NAME listing the correct type from among the supported types:%n%s",
|
||||||
manager.userFriendlyListOfAvailableFeatures(parameterType)));
|
manager.userFriendlyListOfAvailableFeatures(parameterType)));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class);
|
Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class);
|
||||||
RodBinding result = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags);
|
Object result;
|
||||||
parsingEngine.addTags(result,tags);
|
if ( createIntervalBinding ) {
|
||||||
parsingEngine.addRodBinding(result);
|
result = ctor.newInstance(parameterType, name, value, tribbleType, tags);
|
||||||
|
} else {
|
||||||
|
RodBinding rbind = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags);
|
||||||
|
parsingEngine.addTags(rbind, tags);
|
||||||
|
parsingEngine.addRodBinding(rbind);
|
||||||
|
result = rbind;
|
||||||
|
}
|
||||||
return result;
|
return result;
|
||||||
} catch (InvocationTargetException e) {
|
} catch (InvocationTargetException e) {
|
||||||
throw new UserException.CommandLineException(
|
throw new UserException.CommandLineException(
|
||||||
|
|
@ -409,6 +443,39 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parser for RodBinding objects
|
||||||
|
*/
|
||||||
|
class IntervalBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
|
/**
|
||||||
|
* We only want IntervalBinding class objects
|
||||||
|
* @param type The type to check.
|
||||||
|
* @return true if the provided class is an IntervalBinding.class
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean supports( Class type ) {
|
||||||
|
return isIntervalBinding(type);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean isIntervalBinding( Class type ) {
|
||||||
|
return IntervalBinding.class.isAssignableFrom(type);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* See note from RodBindingArgumentTypeDescriptor.parse().
|
||||||
|
*
|
||||||
|
* @param parsingEngine parsing engine
|
||||||
|
* @param source source
|
||||||
|
* @param type type to check
|
||||||
|
* @param matches matches
|
||||||
|
* @return the IntervalBinding object.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
|
||||||
|
return new RodBindingArgumentTypeDescriptor().parse(parsingEngine, source, type, matches, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse simple argument types: java primitives, wrapper classes, and anything that has
|
* Parse simple argument types: java primitives, wrapper classes, and anything that has
|
||||||
* a simple String constructor.
|
* a simple String constructor.
|
||||||
|
|
@ -416,7 +483,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
@Override
|
@Override
|
||||||
public boolean supports( Class type ) {
|
public boolean supports( Class type ) {
|
||||||
if ( RodBindingArgumentTypeDescriptor.isRodBinding(type) ) return false;
|
if ( RodBindingArgumentTypeDescriptor.isRodBinding(type) || IntervalBindingArgumentTypeDescriptor.isIntervalBinding(type) ) return false;
|
||||||
if ( type.isPrimitive() ) return true;
|
if ( type.isPrimitive() ) return true;
|
||||||
if ( type.isEnum() ) return true;
|
if ( type.isEnum() ) return true;
|
||||||
if ( primitiveToWrapperMap.containsValue(type) ) return true;
|
if ( primitiveToWrapperMap.containsValue(type) ) return true;
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,93 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.commandline;
|
||||||
|
|
||||||
|
import com.google.java.contract.Requires;
|
||||||
|
import net.sf.samtools.util.CloseableIterator;
|
||||||
|
import org.broad.tribble.Feature;
|
||||||
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An IntervalBinding representing a walker argument that gets bound to either a ROD track or interval string.
|
||||||
|
*
|
||||||
|
* The IntervalBinding<T> is a formal GATK argument that bridges between a walker and
|
||||||
|
* the engine to construct intervals for traversal at runtime. The IntervalBinding can
|
||||||
|
* either be a RodBinding<T>, a string of one or more intervals, or a file with interval strings.
|
||||||
|
* The GATK Engine takes care of initializing the binding when appropriate and determining intervals from it.
|
||||||
|
*
|
||||||
|
* Note that this class is immutable.
|
||||||
|
*/
|
||||||
|
public final class IntervalBinding<T extends Feature> {
|
||||||
|
|
||||||
|
private RodBinding<T> featureIntervals;
|
||||||
|
private String stringIntervals;
|
||||||
|
|
||||||
|
@Requires({"type != null", "rawName != null", "source != null", "tribbleType != null", "tags != null"})
|
||||||
|
public IntervalBinding(Class<T> type, final String rawName, final String source, final String tribbleType, final Tags tags) {
|
||||||
|
featureIntervals = new RodBinding<T>(type, rawName, source, tribbleType, tags);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Requires({"intervalArgument != null"})
|
||||||
|
public IntervalBinding(String intervalArgument) {
|
||||||
|
stringIntervals = intervalArgument;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getSource() {
|
||||||
|
if ( featureIntervals != null )
|
||||||
|
return featureIntervals.getSource();
|
||||||
|
return stringIntervals;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<GenomeLoc> getIntervals(GenomeAnalysisEngine toolkit) {
|
||||||
|
List<GenomeLoc> intervals;
|
||||||
|
|
||||||
|
if ( featureIntervals != null ) {
|
||||||
|
intervals = new ArrayList<GenomeLoc>();
|
||||||
|
|
||||||
|
RMDTrackBuilder builder = new RMDTrackBuilder(toolkit.getReferenceDataSource().getReference().getSequenceDictionary(),
|
||||||
|
toolkit.getGenomeLocParser(),
|
||||||
|
toolkit.getArguments().unsafe);
|
||||||
|
FeatureManager.FeatureDescriptor descriptor = new FeatureManager().getByName(featureIntervals.getTribbleType());
|
||||||
|
CloseableIterator<GATKFeature> iterator = builder.createInstanceOfTrack(descriptor.getCodecClass(), new File(featureIntervals.getSource())).getIterator();
|
||||||
|
while ( iterator.hasNext() ) {
|
||||||
|
intervals.add(iterator.next().getLocation());
|
||||||
|
}
|
||||||
|
iterator.close();
|
||||||
|
|
||||||
|
} else {
|
||||||
|
intervals = IntervalUtils.parseIntervalArguments(toolkit.getGenomeLocParser(), stringIntervals);
|
||||||
|
}
|
||||||
|
|
||||||
|
return intervals;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -75,6 +75,7 @@ public class ParsingEngine {
|
||||||
* The type of set used must be ordered (but not necessarily sorted).
|
* The type of set used must be ordered (but not necessarily sorted).
|
||||||
*/
|
*/
|
||||||
private static final Set<ArgumentTypeDescriptor> STANDARD_ARGUMENT_TYPE_DESCRIPTORS = new LinkedHashSet<ArgumentTypeDescriptor>( Arrays.asList(new SimpleArgumentTypeDescriptor(),
|
private static final Set<ArgumentTypeDescriptor> STANDARD_ARGUMENT_TYPE_DESCRIPTORS = new LinkedHashSet<ArgumentTypeDescriptor>( Arrays.asList(new SimpleArgumentTypeDescriptor(),
|
||||||
|
new IntervalBindingArgumentTypeDescriptor(),
|
||||||
new RodBindingArgumentTypeDescriptor(),
|
new RodBindingArgumentTypeDescriptor(),
|
||||||
new CompoundArgumentTypeDescriptor(),
|
new CompoundArgumentTypeDescriptor(),
|
||||||
new MultiplexArgumentTypeDescriptor()) );
|
new MultiplexArgumentTypeDescriptor()) );
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,7 @@ import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broad.tribble.Feature;
|
||||||
import org.broadinstitute.sting.commandline.*;
|
import org.broadinstitute.sting.commandline.*;
|
||||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||||
|
|
@ -42,7 +43,6 @@ import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter;
|
||||||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||||
import org.broadinstitute.sting.gatk.io.stubs.Stub;
|
import org.broadinstitute.sting.gatk.io.stubs.Stub;
|
||||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
|
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||||
import org.broadinstitute.sting.gatk.samples.SampleDBBuilder;
|
import org.broadinstitute.sting.gatk.samples.SampleDBBuilder;
|
||||||
import org.broadinstitute.sting.gatk.walkers.*;
|
import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
|
|
@ -50,6 +50,7 @@ import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
|
||||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
@ -296,7 +297,7 @@ public class GenomeAnalysisEngine {
|
||||||
else if(WalkerManager.getDownsamplingMethod(walker) != null)
|
else if(WalkerManager.getDownsamplingMethod(walker) != null)
|
||||||
method = WalkerManager.getDownsamplingMethod(walker);
|
method = WalkerManager.getDownsamplingMethod(walker);
|
||||||
else
|
else
|
||||||
method = argCollection.getDefaultDownsamplingMethod();
|
method = GATKArgumentCollection.getDefaultDownsamplingMethod();
|
||||||
return method;
|
return method;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -563,34 +564,23 @@ public class GenomeAnalysisEngine {
|
||||||
protected void initializeIntervals() {
|
protected void initializeIntervals() {
|
||||||
|
|
||||||
// return if no interval arguments at all
|
// return if no interval arguments at all
|
||||||
if ((argCollection.intervals == null) && (argCollection.excludeIntervals == null) && (argCollection.RODToInterval == null))
|
if ( argCollection.intervals == null && argCollection.excludeIntervals == null )
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// if '-L all' was specified, verify that it was the only -L specified and return if so.
|
// Note that the use of '-L all' is no longer supported.
|
||||||
if(argCollection.intervals != null) {
|
|
||||||
for(String interval: argCollection.intervals) {
|
|
||||||
if(interval.trim().equals("all")) {
|
|
||||||
if(argCollection.intervals.size() > 1)
|
|
||||||
throw new UserException("'-L all' was specified along with other intervals or interval lists; the GATK cannot combine '-L all' with other intervals.");
|
|
||||||
|
|
||||||
// '-L all' was specified and seems valid. Return.
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// if include argument isn't given, create new set of all possible intervals
|
// if include argument isn't given, create new set of all possible intervals
|
||||||
GenomeLocSortedSet includeSortedSet = (argCollection.intervals == null && argCollection.RODToInterval == null ?
|
GenomeLocSortedSet includeSortedSet = (argCollection.intervals == null ?
|
||||||
GenomeLocSortedSet.createSetFromSequenceDictionary(this.referenceDataSource.getReference().getSequenceDictionary()) :
|
GenomeLocSortedSet.createSetFromSequenceDictionary(this.referenceDataSource.getReference().getSequenceDictionary()) :
|
||||||
loadIntervals(argCollection.intervals, IntervalUtils.mergeIntervalLocations(getRODIntervals(), argCollection.intervalMerging)));
|
loadIntervals(argCollection.intervals, argCollection.intervalSetRule));
|
||||||
|
|
||||||
// if no exclude arguments, can return parseIntervalArguments directly
|
// if no exclude arguments, can return parseIntervalArguments directly
|
||||||
if (argCollection.excludeIntervals == null)
|
if ( argCollection.excludeIntervals == null )
|
||||||
intervals = includeSortedSet;
|
intervals = includeSortedSet;
|
||||||
|
|
||||||
// otherwise there are exclude arguments => must merge include and exclude GenomeLocSortedSets
|
// otherwise there are exclude arguments => must merge include and exclude GenomeLocSortedSets
|
||||||
else {
|
else {
|
||||||
GenomeLocSortedSet excludeSortedSet = loadIntervals(argCollection.excludeIntervals, null);
|
GenomeLocSortedSet excludeSortedSet = loadIntervals(argCollection.excludeIntervals, IntervalSetRule.UNION);
|
||||||
intervals = includeSortedSet.subtractRegions(excludeSortedSet);
|
intervals = includeSortedSet.subtractRegions(excludeSortedSet);
|
||||||
|
|
||||||
// logging messages only printed when exclude (-XL) arguments are given
|
// logging messages only printed when exclude (-XL) arguments are given
|
||||||
|
|
@ -601,51 +591,39 @@ public class GenomeAnalysisEngine {
|
||||||
logger.info(String.format("Excluding %d loci from original intervals (%.2f%% reduction)",
|
logger.info(String.format("Excluding %d loci from original intervals (%.2f%% reduction)",
|
||||||
toPruneSize - intervalSize, (toPruneSize - intervalSize) / (0.01 * toPruneSize)));
|
toPruneSize - intervalSize, (toPruneSize - intervalSize) / (0.01 * toPruneSize)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DEBUGGING OUTPUT
|
||||||
|
for ( GenomeLoc loc : intervals )
|
||||||
|
logger.info("Including -L interval: " + loc);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Loads the intervals relevant to the current execution
|
* Loads the intervals relevant to the current execution
|
||||||
* @param argList String representation of arguments; might include 'all', filenames, intervals in samtools
|
* @param argList argument bindings; might include filenames, intervals in samtools notation, or a combination of the above
|
||||||
* notation, or a combination of the above
|
* @param rule interval merging rule
|
||||||
* @param rodIntervals a list of ROD intervals to add to the returned set. Can be empty or null.
|
|
||||||
* @return A sorted, merged list of all intervals specified in this arg list.
|
* @return A sorted, merged list of all intervals specified in this arg list.
|
||||||
*/
|
*/
|
||||||
protected GenomeLocSortedSet loadIntervals( List<String> argList, List<GenomeLoc> rodIntervals ) {
|
protected GenomeLocSortedSet loadIntervals( List<IntervalBinding<Feature>> argList, IntervalSetRule rule ) {
|
||||||
|
|
||||||
boolean allowEmptyIntervalList = (argCollection.unsafe == ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST ||
|
boolean allowEmptyIntervalList = (argCollection.unsafe == ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST ||
|
||||||
argCollection.unsafe == ValidationExclusion.TYPE.ALL);
|
argCollection.unsafe == ValidationExclusion.TYPE.ALL);
|
||||||
|
|
||||||
List<GenomeLoc> nonRODIntervals = IntervalUtils.parseIntervalArguments(genomeLocParser, argList, allowEmptyIntervalList);
|
List<GenomeLoc> allIntervals = new ArrayList<GenomeLoc>(0);
|
||||||
List<GenomeLoc> allIntervals = IntervalUtils.mergeListsBySetOperator(rodIntervals, nonRODIntervals, argCollection.BTIMergeRule);
|
for ( IntervalBinding intervalBinding : argList ) {
|
||||||
|
List<GenomeLoc> intervals = intervalBinding.getIntervals(this);
|
||||||
|
|
||||||
|
if ( !allowEmptyIntervalList && intervals.isEmpty() ) {
|
||||||
|
throw new UserException("The interval file " + intervalBinding.getSource() + " contains no intervals " +
|
||||||
|
"that could be parsed, and the unsafe operation ALLOW_EMPTY_INTERVAL_LIST has " +
|
||||||
|
"not been enabled");
|
||||||
|
}
|
||||||
|
|
||||||
|
allIntervals = IntervalUtils.mergeListsBySetOperator(intervals, allIntervals, rule);
|
||||||
|
}
|
||||||
|
|
||||||
return IntervalUtils.sortAndMergeIntervals(genomeLocParser, allIntervals, argCollection.intervalMerging);
|
return IntervalUtils.sortAndMergeIntervals(genomeLocParser, allIntervals, argCollection.intervalMerging);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* if we have a ROD specified as a 'rodToIntervalTrackName', convert its records to RODs
|
|
||||||
* @return ROD intervals as GenomeLocs
|
|
||||||
*/
|
|
||||||
private List<GenomeLoc> getRODIntervals() {
|
|
||||||
Map<String, ReferenceOrderedDataSource> rodNames = RMDIntervalGenerator.getRMDTrackNames(rodDataSources);
|
|
||||||
// Do we have any RODs that overloaded as interval lists with the 'rodToIntervalTrackName' flag?
|
|
||||||
List<GenomeLoc> ret = new ArrayList<GenomeLoc>();
|
|
||||||
if (rodNames != null && argCollection.RODToInterval != null) {
|
|
||||||
String rodName = argCollection.RODToInterval;
|
|
||||||
|
|
||||||
// check to make sure we have a rod of that name
|
|
||||||
if (!rodNames.containsKey(rodName))
|
|
||||||
throw new UserException.CommandLineException("--rodToIntervalTrackName (-BTI) was passed the name '"+rodName+"', which wasn't given as a ROD name in the -B option");
|
|
||||||
|
|
||||||
for (String str : rodNames.keySet())
|
|
||||||
if (str.equals(rodName)) {
|
|
||||||
logger.info("Adding interval list from track (ROD) named " + rodName);
|
|
||||||
RMDIntervalGenerator intervalGenerator = new RMDIntervalGenerator(rodNames.get(str));
|
|
||||||
ret.addAll(intervalGenerator.toGenomeLocList());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Add additional, externally managed IO streams for inputs.
|
* Add additional, externally managed IO streams for inputs.
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -26,9 +26,11 @@
|
||||||
package org.broadinstitute.sting.gatk.arguments;
|
package org.broadinstitute.sting.gatk.arguments;
|
||||||
|
|
||||||
import net.sf.samtools.SAMFileReader;
|
import net.sf.samtools.SAMFileReader;
|
||||||
|
import org.broad.tribble.Feature;
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.commandline.Hidden;
|
import org.broadinstitute.sting.commandline.Hidden;
|
||||||
import org.broadinstitute.sting.commandline.Input;
|
import org.broadinstitute.sting.commandline.Input;
|
||||||
|
import org.broadinstitute.sting.commandline.IntervalBinding;
|
||||||
import org.broadinstitute.sting.gatk.DownsampleType;
|
import org.broadinstitute.sting.gatk.DownsampleType;
|
||||||
import org.broadinstitute.sting.gatk.DownsamplingMethod;
|
import org.broadinstitute.sting.gatk.DownsamplingMethod;
|
||||||
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
||||||
|
|
@ -84,11 +86,20 @@ public class GATKArgumentCollection {
|
||||||
|
|
||||||
@ElementList(required = false)
|
@ElementList(required = false)
|
||||||
@Input(fullName = "intervals", shortName = "L", doc = "A list of genomic intervals over which to operate. Can be explicitly specified on the command line or in a file.", required = false)
|
@Input(fullName = "intervals", shortName = "L", doc = "A list of genomic intervals over which to operate. Can be explicitly specified on the command line or in a file.", required = false)
|
||||||
public List<String> intervals = null;
|
public List<IntervalBinding<Feature>> intervals = null;
|
||||||
|
|
||||||
@ElementList(required = false)
|
@ElementList(required = false)
|
||||||
@Input(fullName = "excludeIntervals", shortName = "XL", doc = "A list of genomic intervals to exclude from processing. Can be explicitly specified on the command line or in a file.", required = false)
|
@Input(fullName = "excludeIntervals", shortName = "XL", doc = "A list of genomic intervals to exclude from processing. Can be explicitly specified on the command line or in a file.", required = false)
|
||||||
public List<String> excludeIntervals = null;
|
public List<IntervalBinding<Feature>> excludeIntervals = null;
|
||||||
|
|
||||||
|
@Element(required = false)
|
||||||
|
@Argument(fullName = "interval_set_rule", shortName = "isr", doc = "Indicates the set merging approach the interval parser should use to combine the various -L inputs", required = false)
|
||||||
|
public IntervalSetRule intervalSetRule = IntervalSetRule.UNION;
|
||||||
|
|
||||||
|
/** What rule should we use when merging intervals */
|
||||||
|
@Element(required = false)
|
||||||
|
@Argument(fullName = "interval_merging", shortName = "im", doc = "Indicates the interval merging rule we should use for abutting intervals", required = false)
|
||||||
|
public IntervalMergingRule intervalMerging = IntervalMergingRule.ALL;
|
||||||
|
|
||||||
@Element(required = false)
|
@Element(required = false)
|
||||||
@Input(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false)
|
@Input(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false)
|
||||||
|
|
@ -100,14 +111,6 @@ public class GATKArgumentCollection {
|
||||||
@Input(fullName = "rodBind", shortName = "B", doc = "Bindings for reference-ordered data, in the form :<name>,<type> <file>", required = false)
|
@Input(fullName = "rodBind", shortName = "B", doc = "Bindings for reference-ordered data, in the form :<name>,<type> <file>", required = false)
|
||||||
public ArrayList<String> RODBindings = new ArrayList<String>();
|
public ArrayList<String> RODBindings = new ArrayList<String>();
|
||||||
|
|
||||||
@Element(required = false)
|
|
||||||
@Argument(fullName = "rodToIntervalTrackName", shortName = "BTI", doc = "Indicates that the named track should be converted into an interval list, to drive the traversal", required = false)
|
|
||||||
public String RODToInterval = null;
|
|
||||||
|
|
||||||
@Element(required = false)
|
|
||||||
@Argument(fullName = "BTI_merge_rule", shortName = "BTIMR", doc = "Indicates the merging approach the interval parser should use to combine the BTI track with other -L options", required = false)
|
|
||||||
public IntervalSetRule BTIMergeRule = IntervalSetRule.UNION;
|
|
||||||
|
|
||||||
@Element(required = false)
|
@Element(required = false)
|
||||||
@Argument(fullName = "nonDeterministicRandomSeed", shortName = "ndrs", doc = "Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run", required = false)
|
@Argument(fullName = "nonDeterministicRandomSeed", shortName = "ndrs", doc = "Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run", required = false)
|
||||||
public boolean nonDeterministicRandomSeed = false;
|
public boolean nonDeterministicRandomSeed = false;
|
||||||
|
|
@ -197,11 +200,6 @@ public class GATKArgumentCollection {
|
||||||
@Argument(fullName = "num_threads", shortName = "nt", doc = "How many threads should be allocated to running this analysis.", required = false)
|
@Argument(fullName = "num_threads", shortName = "nt", doc = "How many threads should be allocated to running this analysis.", required = false)
|
||||||
public int numberOfThreads = 1;
|
public int numberOfThreads = 1;
|
||||||
|
|
||||||
/** What rule should we use when merging intervals */
|
|
||||||
@Element(required = false)
|
|
||||||
@Argument(fullName = "interval_merging", shortName = "im", doc = "What interval merging rule should we use.", required = false)
|
|
||||||
public IntervalMergingRule intervalMerging = IntervalMergingRule.ALL;
|
|
||||||
|
|
||||||
@ElementList(required = false)
|
@ElementList(required = false)
|
||||||
@Input(fullName = "read_group_black_list", shortName="rgbl", doc="Filters out read groups matching <TAG>:<STRING> or a .txt file containing the filter strings one per line.", required = false)
|
@Input(fullName = "read_group_black_list", shortName="rgbl", doc="Filters out read groups matching <TAG>:<STRING> or a .txt file containing the filter strings one per line.", required = false)
|
||||||
public List<String> readGroupBlackList = null;
|
public List<String> readGroupBlackList = null;
|
||||||
|
|
@ -442,19 +440,15 @@ public class GATKArgumentCollection {
|
||||||
if (other.intervalMerging != this.intervalMerging) {
|
if (other.intervalMerging != this.intervalMerging) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if ((other.RODToInterval == null && RODToInterval != null) ||
|
|
||||||
(other.RODToInterval != null && !other.RODToInterval.equals(RODToInterval))) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (other.phoneHomeType != this.phoneHomeType) {
|
if (other.phoneHomeType != this.phoneHomeType) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (BTIMergeRule != other.BTIMergeRule)
|
if (intervalSetRule != other.intervalSetRule)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if ( BAQMode != other.BAQMode) return false;
|
if ( BAQMode != other.BAQMode ) return false;
|
||||||
if ( BAQGOP != other.BAQGOP ) return false;
|
if ( BAQGOP != other.BAQGOP ) return false;
|
||||||
|
|
||||||
if ((other.performanceLog == null && this.performanceLog != null) ||
|
if ((other.performanceLog == null && this.performanceLog != null) ||
|
||||||
|
|
|
||||||
|
|
@ -97,7 +97,7 @@ public class FindLargeShards extends CommandLineProgram {
|
||||||
// intervals
|
// intervals
|
||||||
GenomeLocSortedSet intervalSortedSet = null;
|
GenomeLocSortedSet intervalSortedSet = null;
|
||||||
if(intervals != null)
|
if(intervals != null)
|
||||||
intervalSortedSet = IntervalUtils.sortAndMergeIntervals(genomeLocParser, IntervalUtils.parseIntervalArguments(genomeLocParser, intervals, true), IntervalMergingRule.ALL);
|
intervalSortedSet = IntervalUtils.sortAndMergeIntervals(genomeLocParser, IntervalUtils.parseIntervalArguments(genomeLocParser, intervals), IntervalMergingRule.ALL);
|
||||||
else {
|
else {
|
||||||
intervalSortedSet = new GenomeLocSortedSet(genomeLocParser);
|
intervalSortedSet = new GenomeLocSortedSet(genomeLocParser);
|
||||||
for(SAMSequenceRecord entry: refReader.getSequenceDictionary().getSequences())
|
for(SAMSequenceRecord entry: refReader.getSequenceDictionary().getSequences())
|
||||||
|
|
|
||||||
|
|
@ -140,7 +140,7 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
|
||||||
final FeatureManager.FeatureDescriptor descriptor = getFeatureManager().getByCodec(codecClass);
|
final FeatureManager.FeatureDescriptor descriptor = getFeatureManager().getByCodec(codecClass);
|
||||||
|
|
||||||
if (descriptor == null)
|
if (descriptor == null)
|
||||||
throw new ReviewedStingException("Unable to find type name for codex class " + codecClass.getName());
|
throw new ReviewedStingException("Unable to find type name for codec class " + codecClass.getName());
|
||||||
|
|
||||||
return createInstanceOfTrack(new RMDTriplet("anonymous",descriptor.getName(),inputFile.getAbsolutePath(),RMDStorageType.FILE,new Tags()));
|
return createInstanceOfTrack(new RMDTriplet("anonymous",descriptor.getName(),inputFile.getAbsolutePath(),RMDStorageType.FILE,new Tags()));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,57 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.refdata.utils;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @author aaron
|
|
||||||
*
|
|
||||||
* Class RMDIntervalGenerator
|
|
||||||
*
|
|
||||||
* Creates an interval list, given an RMDTrack
|
|
||||||
*/
|
|
||||||
public class RMDIntervalGenerator {
|
|
||||||
public ReferenceOrderedDataSource dataSource;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* create a interval representation of a ROD track
|
|
||||||
* @param dataSource the track
|
|
||||||
*/
|
|
||||||
public RMDIntervalGenerator(ReferenceOrderedDataSource dataSource) {
|
|
||||||
if (dataSource == null) throw new IllegalArgumentException("Data source cannot be null");
|
|
||||||
this.dataSource = dataSource;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* create a genome location list from the interval track
|
|
||||||
* @return a list of genome locations
|
|
||||||
*/
|
|
||||||
public List<GenomeLoc> toGenomeLocList() {
|
|
||||||
Iterator<RODRecordList> iter = dataSource.seek((GenomeLoc)null);
|
|
||||||
List<GenomeLoc> locations = new ArrayList<GenomeLoc>();
|
|
||||||
while (iter.hasNext()) {
|
|
||||||
RODRecordList feature = iter.next();
|
|
||||||
GenomeLoc loc = feature.getLocation();
|
|
||||||
if (loc != null) locations.add(loc);
|
|
||||||
}
|
|
||||||
return locations;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* return a map of reference meta data track names to RODS
|
|
||||||
* @param sources the reference ordered data sources to get the names from
|
|
||||||
* @return a map of reference meta data names to RODS
|
|
||||||
*/
|
|
||||||
public static Map<String,ReferenceOrderedDataSource> getRMDTrackNames(List<ReferenceOrderedDataSource> sources) {
|
|
||||||
// get a list of the current rod names we're working with
|
|
||||||
Map<String,ReferenceOrderedDataSource> rodNames = new HashMap<String,ReferenceOrderedDataSource>();
|
|
||||||
for (ReferenceOrderedDataSource rod : sources) {
|
|
||||||
rodNames.put(rod.getName(),rod);
|
|
||||||
}
|
|
||||||
return rodNames;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -32,7 +32,6 @@ import net.sf.samtools.util.SequenceUtil;
|
||||||
import net.sf.samtools.util.StringUtil;
|
import net.sf.samtools.util.StringUtil;
|
||||||
import org.broadinstitute.sting.commandline.*;
|
import org.broadinstitute.sting.commandline.*;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||||
|
|
@ -386,10 +385,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
intervals = merger;
|
intervals = merger;
|
||||||
} else {
|
} else {
|
||||||
// read in the whole list of intervals for cleaning
|
// read in the whole list of intervals for cleaning
|
||||||
boolean allowEmptyIntervalList = (getToolkit().getArguments().unsafe == ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST ||
|
|
||||||
getToolkit().getArguments().unsafe == ValidationExclusion.TYPE.ALL);
|
|
||||||
GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(getToolkit().getGenomeLocParser(),
|
GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(getToolkit().getGenomeLocParser(),
|
||||||
IntervalUtils.parseIntervalArguments(getToolkit().getGenomeLocParser(),Arrays.asList(intervalsFile),allowEmptyIntervalList),
|
IntervalUtils.parseIntervalArguments(getToolkit().getGenomeLocParser(),Arrays.asList(intervalsFile)),
|
||||||
IntervalMergingRule.OVERLAPPING_ONLY);
|
IntervalMergingRule.OVERLAPPING_ONLY);
|
||||||
intervals = locs.iterator();
|
intervals = locs.iterator();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -372,7 +372,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
||||||
} else {
|
} else {
|
||||||
// read in the whole list of intervals for cleaning
|
// read in the whole list of intervals for cleaning
|
||||||
GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(getToolkit().getGenomeLocParser(),
|
GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(getToolkit().getGenomeLocParser(),
|
||||||
IntervalUtils.parseIntervalArguments(getToolkit().getGenomeLocParser(),Arrays.asList(genotypeIntervalsFile),true), IntervalMergingRule.OVERLAPPING_ONLY);
|
IntervalUtils.parseIntervalArguments(getToolkit().getGenomeLocParser(),Arrays.asList(genotypeIntervalsFile)), IntervalMergingRule.OVERLAPPING_ONLY);
|
||||||
genotypeIntervalIterator = locs.iterator();
|
genotypeIntervalIterator = locs.iterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -35,62 +35,60 @@ public class IntervalUtils {
|
||||||
*
|
*
|
||||||
* @param parser Genome loc parser.
|
* @param parser Genome loc parser.
|
||||||
* @param argList A list of strings containing interval data.
|
* @param argList A list of strings containing interval data.
|
||||||
* @param allowEmptyIntervalList If false instead of an empty interval list will return null.
|
|
||||||
* @return an unsorted, unmerged representation of the given intervals. Null is used to indicate that all intervals should be used.
|
* @return an unsorted, unmerged representation of the given intervals. Null is used to indicate that all intervals should be used.
|
||||||
*/
|
*/
|
||||||
public static List<GenomeLoc> parseIntervalArguments(GenomeLocParser parser, List<String> argList, boolean allowEmptyIntervalList) {
|
public static List<GenomeLoc> parseIntervalArguments(GenomeLocParser parser, List<String> argList) {
|
||||||
List<GenomeLoc> rawIntervals = new ArrayList<GenomeLoc>(); // running list of raw GenomeLocs
|
List<GenomeLoc> rawIntervals = new ArrayList<GenomeLoc>(); // running list of raw GenomeLocs
|
||||||
|
|
||||||
if (argList != null) { // now that we can be in this function if only the ROD-to-Intervals was provided, we need to
|
if (argList != null) { // now that we can be in this function if only the ROD-to-Intervals was provided, we need to
|
||||||
// ensure that the arg list isn't null before looping.
|
// ensure that the arg list isn't null before looping.
|
||||||
for (String argument : argList) {
|
for (String argument : argList) {
|
||||||
|
rawIntervals.addAll(parseIntervalArguments(parser, argument));
|
||||||
// separate argument on semicolon first
|
|
||||||
for (String fileOrInterval : argument.split(";")) {
|
|
||||||
// if any interval argument is '-L all', consider all loci by returning no intervals
|
|
||||||
if (fileOrInterval.trim().toLowerCase().equals("all")) {
|
|
||||||
if (argList.size() != 1) {
|
|
||||||
// throw error if '-L all' is not only interval - potentially conflicting commands
|
|
||||||
throw new UserException.CommandLineException(String.format("Conflicting arguments: Intervals given along with \"-L all\""));
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
// if any argument is 'unmapped', "parse" it to a null entry. A null in this case means 'all the intervals with no alignment data'.
|
|
||||||
else if (isUnmapped(fileOrInterval))
|
|
||||||
rawIntervals.add(GenomeLoc.UNMAPPED);
|
|
||||||
// if it's a file, add items to raw interval list
|
|
||||||
else if (isIntervalFile(fileOrInterval)) {
|
|
||||||
try {
|
|
||||||
rawIntervals.addAll(intervalFileToList(parser, fileOrInterval, allowEmptyIntervalList));
|
|
||||||
}
|
|
||||||
catch ( UserException.MalformedGenomeLoc e ) {
|
|
||||||
throw e;
|
|
||||||
}
|
|
||||||
catch ( Exception e ) {
|
|
||||||
throw new UserException.MalformedFile(fileOrInterval, "Interval file could not be parsed in any supported format.", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// otherwise treat as an interval -> parse and add to raw interval list
|
|
||||||
else {
|
|
||||||
rawIntervals.add(parser.parseGenomeLoc(fileOrInterval));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return rawIntervals;
|
return rawIntervals;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
public static List<GenomeLoc> parseIntervalArguments(GenomeLocParser parser, String arg) {
|
||||||
|
List<GenomeLoc> rawIntervals = new ArrayList<GenomeLoc>(); // running list of raw GenomeLocs
|
||||||
|
|
||||||
|
// separate argument on semicolon first
|
||||||
|
for (String fileOrInterval : arg.split(";")) {
|
||||||
|
// if any argument is 'unmapped', "parse" it to a null entry. A null in this case means 'all the intervals with no alignment data'.
|
||||||
|
if (isUnmapped(fileOrInterval))
|
||||||
|
rawIntervals.add(GenomeLoc.UNMAPPED);
|
||||||
|
// if it's a file, add items to raw interval list
|
||||||
|
else if (isIntervalFile(fileOrInterval)) {
|
||||||
|
try {
|
||||||
|
rawIntervals.addAll(intervalFileToList(parser, fileOrInterval));
|
||||||
|
}
|
||||||
|
catch ( UserException.MalformedGenomeLoc e ) {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
catch ( Exception e ) {
|
||||||
|
throw new UserException.MalformedFile(fileOrInterval, "Interval file could not be parsed in any supported format.", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// otherwise treat as an interval -> parse and add to raw interval list
|
||||||
|
else {
|
||||||
|
rawIntervals.add(parser.parseGenomeLoc(fileOrInterval));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return rawIntervals;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
* Read a file of genome locations to process. The file may be in BED, Picard,
|
* Read a file of genome locations to process. The file may be in BED, Picard,
|
||||||
* or GATK interval format.
|
* or GATK interval format.
|
||||||
*
|
*
|
||||||
* @param file_name interval file
|
* @param glParser GenomeLocParser
|
||||||
* @param allowEmptyIntervalList if false an exception will be thrown for files that contain no intervals
|
* @param file_name interval file
|
||||||
* @return List<GenomeLoc> List of Genome Locs that have been parsed from file
|
* @return List<GenomeLoc> List of Genome Locs that have been parsed from file
|
||||||
*/
|
*/
|
||||||
public static List<GenomeLoc> intervalFileToList(final GenomeLocParser glParser, final String file_name, boolean allowEmptyIntervalList) {
|
public static List<GenomeLoc> intervalFileToList(final GenomeLocParser glParser, final String file_name) {
|
||||||
// try to open file
|
// try to open file
|
||||||
File inputFile = new File(file_name);
|
File inputFile = new File(file_name);
|
||||||
List<GenomeLoc> ret = new ArrayList<GenomeLoc>();
|
List<GenomeLoc> ret = new ArrayList<GenomeLoc>();
|
||||||
|
|
@ -145,12 +143,6 @@ public class IntervalUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( ret.isEmpty() && ! allowEmptyIntervalList ) {
|
|
||||||
throw new UserException("The interval file " + inputFile.getAbsolutePath() + " contains no intervals " +
|
|
||||||
"that could be parsed, and the unsafe operation ALLOW_EMPTY_INTERVAL_LIST has " +
|
|
||||||
"not been enabled");
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue