*Massive* speed-up for interval-based by-read traversals.

[Could do more optimizing, but this simple fix was good enough for now]


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@266 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-04-02 20:19:39 +00:00
parent c192a95998
commit 4faa680887
2 changed files with 20 additions and 2 deletions

View File

@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.iterators.ReferenceIterator; import org.broadinstitute.sting.gatk.iterators.ReferenceIterator;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.FastaSequenceFile2;
import java.util.List; import java.util.List;
import java.util.Arrays; import java.util.Arrays;
@ -52,6 +53,7 @@ public class TraverseByReads extends TraversalEngine {
*/ */
public <M, T> Object traverseByRead(ReadWalker<M, T> walker, ArrayList<GenomeLoc> locations) { public <M, T> Object traverseByRead(ReadWalker<M, T> walker, ArrayList<GenomeLoc> locations) {
samReadIter = initializeReads(); samReadIter = initializeReads();
GenomeLoc.setupRefContigOrdering(new FastaSequenceFile2(refFileName));
if (refFileName == null && !walker.requiresOrderedReads() && verifyingSamReadIter != null) { if (refFileName == null && !walker.requiresOrderedReads() && verifyingSamReadIter != null) {
logger.warn(String.format("STATUS: No reference file provided and unordered reads are tolerated, enabling out of order read processing.")); logger.warn(String.format("STATUS: No reference file provided and unordered reads are tolerated, enabling out of order read processing."));

View File

@ -41,6 +41,7 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
//public static Map<String, Integer> refContigOrdering = null; //public static Map<String, Integer> refContigOrdering = null;
private static SAMSequenceDictionary contigInfo = null; private static SAMSequenceDictionary contigInfo = null;
private static HashMap<String, String> interns = null; private static HashMap<String, String> interns = null;
private static int lastGoodIntervalIndex = 0;
public static boolean hasKnownContigOrdering() { public static boolean hasKnownContigOrdering() {
return contigInfo != null; return contigInfo != null;
@ -271,10 +272,24 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
if ( locs.size() == 0 ) { if ( locs.size() == 0 ) {
return true; return true;
} else { } else {
for ( GenomeLoc loc : locs ) { for ( int i = lastGoodIntervalIndex; i < locs.size(); i++ ) {
GenomeLoc loc = locs.get(i);
// since it's ordered, we can do some simple checks to save us tons of time
if ( hasKnownContigOrdering() ) {
int curIndex = getContigIndex(curr.contig);
int locIndex = getContigIndex(loc.contig);
// skip loci before intervals begin
if (curIndex < locIndex)
return false;
// skip loci between intervals
if (curIndex == locIndex && curr.stop < loc.start)
return false;
}
//System.out.printf(" Overlap %s vs. %s => %b%n", loc, curr, loc.overlapsP(curr)); //System.out.printf(" Overlap %s vs. %s => %b%n", loc, curr, loc.overlapsP(curr));
if (loc.overlapsP(curr)) if (loc.overlapsP(curr)) {
lastGoodIntervalIndex = i;
return true; return true;
}
} }
return false; return false;
} }
@ -403,6 +418,7 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
int thisIndex = getContigIndex(thisContig); int thisIndex = getContigIndex(thisContig);
int thatIndex = getContigIndex(thatContig); int thatIndex = getContigIndex(thatContig);
if ( thisIndex == -1 ) if ( thisIndex == -1 )
{ {
if ( thatIndex == -1 ) if ( thatIndex == -1 )