*Massive* speed-up for interval-based by-read traversals.
[Could do more optimizing, but this simple fix was good enough for now] git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@266 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
c192a95998
commit
4faa680887
|
|
@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
|||
import org.broadinstitute.sting.gatk.iterators.ReferenceIterator;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.FastaSequenceFile2;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Arrays;
|
||||
|
|
@ -52,6 +53,7 @@ public class TraverseByReads extends TraversalEngine {
|
|||
*/
|
||||
public <M, T> Object traverseByRead(ReadWalker<M, T> walker, ArrayList<GenomeLoc> locations) {
|
||||
samReadIter = initializeReads();
|
||||
GenomeLoc.setupRefContigOrdering(new FastaSequenceFile2(refFileName));
|
||||
|
||||
if (refFileName == null && !walker.requiresOrderedReads() && verifyingSamReadIter != null) {
|
||||
logger.warn(String.format("STATUS: No reference file provided and unordered reads are tolerated, enabling out of order read processing."));
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
|
|||
//public static Map<String, Integer> refContigOrdering = null;
|
||||
private static SAMSequenceDictionary contigInfo = null;
|
||||
private static HashMap<String, String> interns = null;
|
||||
private static int lastGoodIntervalIndex = 0;
|
||||
|
||||
public static boolean hasKnownContigOrdering() {
|
||||
return contigInfo != null;
|
||||
|
|
@ -271,10 +272,24 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
|
|||
if ( locs.size() == 0 ) {
|
||||
return true;
|
||||
} else {
|
||||
for ( GenomeLoc loc : locs ) {
|
||||
for ( int i = lastGoodIntervalIndex; i < locs.size(); i++ ) {
|
||||
GenomeLoc loc = locs.get(i);
|
||||
// since it's ordered, we can do some simple checks to save us tons of time
|
||||
if ( hasKnownContigOrdering() ) {
|
||||
int curIndex = getContigIndex(curr.contig);
|
||||
int locIndex = getContigIndex(loc.contig);
|
||||
// skip loci before intervals begin
|
||||
if (curIndex < locIndex)
|
||||
return false;
|
||||
// skip loci between intervals
|
||||
if (curIndex == locIndex && curr.stop < loc.start)
|
||||
return false;
|
||||
}
|
||||
//System.out.printf(" Overlap %s vs. %s => %b%n", loc, curr, loc.overlapsP(curr));
|
||||
if (loc.overlapsP(curr))
|
||||
if (loc.overlapsP(curr)) {
|
||||
lastGoodIntervalIndex = i;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
@ -403,6 +418,7 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
|
|||
int thisIndex = getContigIndex(thisContig);
|
||||
int thatIndex = getContigIndex(thatContig);
|
||||
|
||||
|
||||
if ( thisIndex == -1 )
|
||||
{
|
||||
if ( thatIndex == -1 )
|
||||
|
|
|
|||
Loading…
Reference in New Issue