Overload parseIntervalRegion() to allow for the interval merging rule to be passed in (so one is not required to use the value from the GATK arg collection).

Now the IndelRealigner can use this functionality without being forced to merge  abutting intervals (which was actually causing a problem with the cleaning).



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2862 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-02-22 04:13:54 +00:00
parent cc09f48cd8
commit 32d14d988e
2 changed files with 17 additions and 4 deletions

View File

@ -41,6 +41,7 @@ import org.broadinstitute.sting.gatk.datasources.shards.MonolithicShardStrategy;
import org.broadinstitute.sting.gatk.executive.MicroScheduler;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.arguments.IntervalMergingRule;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.walkers.*;
@ -299,6 +300,17 @@ public class GenomeAnalysisEngine {
* @return a list of genomeLoc representing the interval file
*/
public static List<GenomeLoc> parseIntervalRegion(final List<String> intervals) {
return parseIntervalRegion(intervals, GenomeAnalysisEngine.instance.getArguments().intervalMerging);
}
/**
* setup the interval regions, from either the interval file of the genome region string
*
* @param intervals the list of intervals to parse
* @param mergingRule the rule for merging intervals
* @return a list of genomeLoc representing the interval file
*/
public static List<GenomeLoc> parseIntervalRegion(final List<String> intervals, IntervalMergingRule mergingRule) {
List<GenomeLoc> locs = new ArrayList<GenomeLoc>();
for (String interval : intervals) {
if (new File(interval).exists()) {
@ -307,12 +319,12 @@ public class GenomeAnalysisEngine {
Utils.warnUser("Bed files are 0 based half-open intervals, which are converted to 1-based closed intervals in the GATK. " +
"Be aware that all output information and intervals are 1-based closed intervals.");
BedParser parser = new BedParser(new File(interval));
locs.addAll(parser.getSortedAndMergedLocations(GenomeAnalysisEngine.instance.getArguments().intervalMerging));
locs.addAll(parser.getSortedAndMergedLocations(mergingRule));
} else {
locs.addAll(GenomeLocParser.intervalFileToList(interval,GenomeAnalysisEngine.instance.getArguments().intervalMerging));
locs.addAll(GenomeLocParser.intervalFileToList(interval,mergingRule));
}
} else {
locs.addAll(GenomeLocParser.parseGenomeLocs(interval,GenomeAnalysisEngine.instance.getArguments().intervalMerging));
locs.addAll(GenomeLocParser.parseGenomeLocs(interval,mergingRule));
}
}

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.indels;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.arguments.IntervalMergingRule;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.utils.cmdLine.Argument;
@ -115,7 +116,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
throw new RuntimeException("Entropy threshold must be a fraction between 0 and 1");
// read in the intervals for cleaning
List<GenomeLoc> locs = GenomeAnalysisEngine.parseIntervalRegion(Arrays.asList(intervalsFile));
List<GenomeLoc> locs = GenomeAnalysisEngine.parseIntervalRegion(Arrays.asList(intervalsFile), IntervalMergingRule.OVERLAPPING_ONLY);
intervals = GenomeLocSortedSet.createSetFromList(locs).iterator();
currentInterval = intervals.hasNext() ? intervals.next() : null;