Adding three minor new features:
+ -L all now walks over all intervals + if a -L argument is passed with a .list extension, and file does not exist, returns a \ File Not Found error instead of "bad interval" error. We plan to soon revisit interval \ lists and generate a concrete list of filenames, so this is likely temporary. + Error is thrown if the start position on an interval is higher number than the end position. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3021 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4340601c26
commit
5b34bb9ab0
|
|
@ -161,10 +161,10 @@ public class GenomeAnalysisEngine {
|
|||
initializeIntervals();
|
||||
|
||||
ShardStrategy shardStrategy = getShardStrategy(my_walker,
|
||||
microScheduler.getReference(),
|
||||
intervals,
|
||||
argCollection.maximumEngineIterations,
|
||||
readsDataSource != null ? readsDataSource.getReadsInfo().getValidationExclusionList() : null);
|
||||
microScheduler.getReference(),
|
||||
intervals,
|
||||
argCollection.maximumEngineIterations,
|
||||
readsDataSource != null ? readsDataSource.getReadsInfo().getValidationExclusionList() : null);
|
||||
|
||||
// execute the microscheduler, storing the results
|
||||
return microScheduler.execute(my_walker, shardStrategy, argCollection.maximumEngineIterations);
|
||||
|
|
@ -180,10 +180,15 @@ public class GenomeAnalysisEngine {
|
|||
excludeIntervals = GenomeLocSortedSet.createSetFromList(rawExcludeIntervals);
|
||||
}
|
||||
|
||||
if (argCollection.intervals != null && argCollection.intervalMerging.check()) {
|
||||
List <GenomeLoc> parsedIntervals = parseIntervalRegion(argCollection.intervals);
|
||||
intervals = (parsedIntervals == null) ? null: GenomeLocSortedSet.createSetFromList(parsedIntervals);
|
||||
}
|
||||
/*
|
||||
if (argCollection.intervals != null && argCollection.intervalMerging.check()) {
|
||||
intervals = GenomeLocSortedSet.createSetFromList(parseIntervalRegion(argCollection.intervals));
|
||||
}
|
||||
|
||||
*/
|
||||
if ( excludeIntervals != null ) {
|
||||
GenomeLocSortedSet toPrune = intervals == null ? GenomeLocSortedSet.createSetFromSequenceDictionary(this.referenceDataSource.getSequenceDictionary()) : intervals;
|
||||
long toPruneSize = toPrune.coveredSize();
|
||||
|
|
@ -246,7 +251,7 @@ public class GenomeAnalysisEngine {
|
|||
* @return A collection of available filters.
|
||||
*/
|
||||
protected Collection<SamRecordFilter> createFiltersForWalker(GATKArgumentCollection args, Walker walker) {
|
||||
Set<SamRecordFilter> filters = new HashSet<SamRecordFilter>();
|
||||
Set<SamRecordFilter> filters = new HashSet<SamRecordFilter>();
|
||||
filters.addAll(WalkerManager.getReadFilters(walker,filterManager));
|
||||
if (args.filterZeroMappingQualityReads != null && args.filterZeroMappingQualityReads)
|
||||
filters.add(new ZeroMappingQualityReadFilter());
|
||||
|
|
@ -342,6 +347,15 @@ public class GenomeAnalysisEngine {
|
|||
public static List<GenomeLoc> parseIntervalRegion(final List<String> intervals, IntervalMergingRule mergingRule) {
|
||||
List<GenomeLoc> locs = new ArrayList<GenomeLoc>();
|
||||
for (String interval : intervals) {
|
||||
// if any interval argument is '-L all', consider all loci by returning no intervals
|
||||
if (interval.equals("all")) {
|
||||
if (intervals.size() != 1) {
|
||||
// throw error if '-L all' is not only interval - potentially conflicting commands
|
||||
throw new StingException(String.format("Conflicting arguments: Intervals given along with \"-L all\""));
|
||||
}
|
||||
return new ArrayList<GenomeLoc>();
|
||||
}
|
||||
|
||||
if (new File(interval).exists()) {
|
||||
// support for the bed style interval format
|
||||
if (interval.toUpperCase().endsWith(".BED")) {
|
||||
|
|
@ -446,7 +460,7 @@ public class GenomeAnalysisEngine {
|
|||
}
|
||||
|
||||
/**
|
||||
* **** UNLESS YOU HAVE GOOD REASON TO, DO NOT USE THIS METHOD; USE getFileToReadGroupIdMapping() INSTEAD ****
|
||||
* **** UNLESS YOU HAVE GOOD REASON TO, DO NOT USE THIS METHOD; USE getFileToReadGroupIdMapping() INSTEAD ****
|
||||
*
|
||||
* Returns sets of (remapped) read groups in input SAM stream, grouped by readers (i.e. underlying
|
||||
* individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list
|
||||
|
|
|
|||
|
|
@ -190,7 +190,9 @@ public class GenomeLocParser {
|
|||
if (contig == null)
|
||||
throw new StingException("Invalid Genome Location contig == null : " + str);
|
||||
|
||||
|
||||
if (start > stop)
|
||||
throw new StingException("Invalid Genome Location string; start position comes after end position: " + str );
|
||||
|
||||
if (!isContigValid(contig))
|
||||
throw new StingException("Contig " + contig + " does not match any contig in the GATK sequence dictionary derived from the reference.");
|
||||
|
||||
|
|
@ -226,8 +228,8 @@ public class GenomeLocParser {
|
|||
locs = mergeIntervalLocations(locs, rule);
|
||||
return locs;
|
||||
} catch (Exception e) { // TODO: fix this so that it passes the message from the exception, and doesn't print it out
|
||||
throw new StingException(String.format("Invalid locations string: %s, format is loc1;loc2; where each locN can be 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'", str), e);
|
||||
}
|
||||
throw new StingException(String.format("Invalid locations string: %s, format is loc1;loc2; where loc1 < loc2. Each locN can be 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'", str), e);
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
|
|
|||
Loading…
Reference in New Issue