Adding three minor new features:

+ -L all now walks over all intervals

+ if a -L argument is passed with a .list extension, and file does not exist, returns a \
File Not Found error instead of "bad interval" error. We plan to soon revisit interval \
lists and generate a concrete list of filenames, so this is likely temporary.

+ Error is thrown if the start position on an interval is higher number than the end position.




git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3021 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
bthomas 2010-03-17 16:24:10 +00:00
parent 4340601c26
commit 5b34bb9ab0
2 changed files with 26 additions and 10 deletions

View File

@ -161,10 +161,10 @@ public class GenomeAnalysisEngine {
initializeIntervals();
ShardStrategy shardStrategy = getShardStrategy(my_walker,
microScheduler.getReference(),
intervals,
argCollection.maximumEngineIterations,
readsDataSource != null ? readsDataSource.getReadsInfo().getValidationExclusionList() : null);
microScheduler.getReference(),
intervals,
argCollection.maximumEngineIterations,
readsDataSource != null ? readsDataSource.getReadsInfo().getValidationExclusionList() : null);
// execute the microscheduler, storing the results
return microScheduler.execute(my_walker, shardStrategy, argCollection.maximumEngineIterations);
@ -180,10 +180,15 @@ public class GenomeAnalysisEngine {
excludeIntervals = GenomeLocSortedSet.createSetFromList(rawExcludeIntervals);
}
if (argCollection.intervals != null && argCollection.intervalMerging.check()) {
List <GenomeLoc> parsedIntervals = parseIntervalRegion(argCollection.intervals);
intervals = (parsedIntervals == null) ? null: GenomeLocSortedSet.createSetFromList(parsedIntervals);
}
/*
if (argCollection.intervals != null && argCollection.intervalMerging.check()) {
intervals = GenomeLocSortedSet.createSetFromList(parseIntervalRegion(argCollection.intervals));
}
*/
if ( excludeIntervals != null ) {
GenomeLocSortedSet toPrune = intervals == null ? GenomeLocSortedSet.createSetFromSequenceDictionary(this.referenceDataSource.getSequenceDictionary()) : intervals;
long toPruneSize = toPrune.coveredSize();
@ -246,7 +251,7 @@ public class GenomeAnalysisEngine {
* @return A collection of available filters.
*/
protected Collection<SamRecordFilter> createFiltersForWalker(GATKArgumentCollection args, Walker walker) {
Set<SamRecordFilter> filters = new HashSet<SamRecordFilter>();
Set<SamRecordFilter> filters = new HashSet<SamRecordFilter>();
filters.addAll(WalkerManager.getReadFilters(walker,filterManager));
if (args.filterZeroMappingQualityReads != null && args.filterZeroMappingQualityReads)
filters.add(new ZeroMappingQualityReadFilter());
@ -342,6 +347,15 @@ public class GenomeAnalysisEngine {
public static List<GenomeLoc> parseIntervalRegion(final List<String> intervals, IntervalMergingRule mergingRule) {
List<GenomeLoc> locs = new ArrayList<GenomeLoc>();
for (String interval : intervals) {
// if any interval argument is '-L all', consider all loci by returning no intervals
if (interval.equals("all")) {
if (intervals.size() != 1) {
// throw error if '-L all' is not only interval - potentially conflicting commands
throw new StingException(String.format("Conflicting arguments: Intervals given along with \"-L all\""));
}
return new ArrayList<GenomeLoc>();
}
if (new File(interval).exists()) {
// support for the bed style interval format
if (interval.toUpperCase().endsWith(".BED")) {
@ -446,7 +460,7 @@ public class GenomeAnalysisEngine {
}
/**
* **** UNLESS YOU HAVE GOOD REASON TO, DO NOT USE THIS METHOD; USE getFileToReadGroupIdMapping() INSTEAD ****
* **** UNLESS YOU HAVE GOOD REASON TO, DO NOT USE THIS METHOD; USE getFileToReadGroupIdMapping() INSTEAD ****
*
* Returns sets of (remapped) read groups in input SAM stream, grouped by readers (i.e. underlying
* individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list

View File

@ -190,7 +190,9 @@ public class GenomeLocParser {
if (contig == null)
throw new StingException("Invalid Genome Location contig == null : " + str);
if (start > stop)
throw new StingException("Invalid Genome Location string; start position comes after end position: " + str );
if (!isContigValid(contig))
throw new StingException("Contig " + contig + " does not match any contig in the GATK sequence dictionary derived from the reference.");
@ -226,8 +228,8 @@ public class GenomeLocParser {
locs = mergeIntervalLocations(locs, rule);
return locs;
} catch (Exception e) { // TODO: fix this so that it passes the message from the exception, and doesn't print it out
throw new StingException(String.format("Invalid locations string: %s, format is loc1;loc2; where each locN can be 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'", str), e);
}
throw new StingException(String.format("Invalid locations string: %s, format is loc1;loc2; where loc1 < loc2. Each locN can be 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'", str), e);
}
}
// --------------------------------------------------------------------------------------------------------------