*Massive* speed-up for interval-based by-read traversals.

[Could do more optimizing, but this simple fix was good enough for now]


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@266 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-04-02 20:19:39 +00:00
parent c192a95998
commit 4faa680887
2 changed files with 20 additions and 2 deletions

View File

@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.iterators.ReferenceIterator;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.FastaSequenceFile2;
import java.util.List;
import java.util.Arrays;
@ -52,6 +53,7 @@ public class TraverseByReads extends TraversalEngine {
*/
public <M, T> Object traverseByRead(ReadWalker<M, T> walker, ArrayList<GenomeLoc> locations) {
samReadIter = initializeReads();
GenomeLoc.setupRefContigOrdering(new FastaSequenceFile2(refFileName));
if (refFileName == null && !walker.requiresOrderedReads() && verifyingSamReadIter != null) {
logger.warn(String.format("STATUS: No reference file provided and unordered reads are tolerated, enabling out of order read processing."));

View File

@ -41,6 +41,7 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
//public static Map<String, Integer> refContigOrdering = null;
private static SAMSequenceDictionary contigInfo = null;
private static HashMap<String, String> interns = null;
private static int lastGoodIntervalIndex = 0;
public static boolean hasKnownContigOrdering() {
return contigInfo != null;
@ -271,10 +272,24 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
if ( locs.size() == 0 ) {
return true;
} else {
for ( GenomeLoc loc : locs ) {
for ( int i = lastGoodIntervalIndex; i < locs.size(); i++ ) {
GenomeLoc loc = locs.get(i);
// since it's ordered, we can do some simple checks to save us tons of time
if ( hasKnownContigOrdering() ) {
int curIndex = getContigIndex(curr.contig);
int locIndex = getContigIndex(loc.contig);
// skip loci before intervals begin
if (curIndex < locIndex)
return false;
// skip loci between intervals
if (curIndex == locIndex && curr.stop < loc.start)
return false;
}
//System.out.printf(" Overlap %s vs. %s => %b%n", loc, curr, loc.overlapsP(curr));
if (loc.overlapsP(curr))
if (loc.overlapsP(curr)) {
lastGoodIntervalIndex = i;
return true;
}
}
return false;
}
@ -403,6 +418,7 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
int thisIndex = getContigIndex(thisContig);
int thatIndex = getContigIndex(thatContig);
if ( thisIndex == -1 )
{
if ( thatIndex == -1 )