*Massive* speed-up for interval-based by-read traversals.
[Could do more optimizing, but this simple fix was good enough for now] git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@266 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
c192a95998
commit
4faa680887
|
|
@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||||
import org.broadinstitute.sting.gatk.iterators.ReferenceIterator;
|
import org.broadinstitute.sting.gatk.iterators.ReferenceIterator;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.FastaSequenceFile2;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
@ -52,6 +53,7 @@ public class TraverseByReads extends TraversalEngine {
|
||||||
*/
|
*/
|
||||||
public <M, T> Object traverseByRead(ReadWalker<M, T> walker, ArrayList<GenomeLoc> locations) {
|
public <M, T> Object traverseByRead(ReadWalker<M, T> walker, ArrayList<GenomeLoc> locations) {
|
||||||
samReadIter = initializeReads();
|
samReadIter = initializeReads();
|
||||||
|
GenomeLoc.setupRefContigOrdering(new FastaSequenceFile2(refFileName));
|
||||||
|
|
||||||
if (refFileName == null && !walker.requiresOrderedReads() && verifyingSamReadIter != null) {
|
if (refFileName == null && !walker.requiresOrderedReads() && verifyingSamReadIter != null) {
|
||||||
logger.warn(String.format("STATUS: No reference file provided and unordered reads are tolerated, enabling out of order read processing."));
|
logger.warn(String.format("STATUS: No reference file provided and unordered reads are tolerated, enabling out of order read processing."));
|
||||||
|
|
|
||||||
|
|
@ -41,6 +41,7 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
|
||||||
//public static Map<String, Integer> refContigOrdering = null;
|
//public static Map<String, Integer> refContigOrdering = null;
|
||||||
private static SAMSequenceDictionary contigInfo = null;
|
private static SAMSequenceDictionary contigInfo = null;
|
||||||
private static HashMap<String, String> interns = null;
|
private static HashMap<String, String> interns = null;
|
||||||
|
private static int lastGoodIntervalIndex = 0;
|
||||||
|
|
||||||
public static boolean hasKnownContigOrdering() {
|
public static boolean hasKnownContigOrdering() {
|
||||||
return contigInfo != null;
|
return contigInfo != null;
|
||||||
|
|
@ -271,10 +272,24 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
|
||||||
if ( locs.size() == 0 ) {
|
if ( locs.size() == 0 ) {
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
for ( GenomeLoc loc : locs ) {
|
for ( int i = lastGoodIntervalIndex; i < locs.size(); i++ ) {
|
||||||
|
GenomeLoc loc = locs.get(i);
|
||||||
|
// since it's ordered, we can do some simple checks to save us tons of time
|
||||||
|
if ( hasKnownContigOrdering() ) {
|
||||||
|
int curIndex = getContigIndex(curr.contig);
|
||||||
|
int locIndex = getContigIndex(loc.contig);
|
||||||
|
// skip loci before intervals begin
|
||||||
|
if (curIndex < locIndex)
|
||||||
|
return false;
|
||||||
|
// skip loci between intervals
|
||||||
|
if (curIndex == locIndex && curr.stop < loc.start)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
//System.out.printf(" Overlap %s vs. %s => %b%n", loc, curr, loc.overlapsP(curr));
|
//System.out.printf(" Overlap %s vs. %s => %b%n", loc, curr, loc.overlapsP(curr));
|
||||||
if (loc.overlapsP(curr))
|
if (loc.overlapsP(curr)) {
|
||||||
|
lastGoodIntervalIndex = i;
|
||||||
return true;
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
@ -403,6 +418,7 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
|
||||||
int thisIndex = getContigIndex(thisContig);
|
int thisIndex = getContigIndex(thisContig);
|
||||||
int thatIndex = getContigIndex(thatContig);
|
int thatIndex = getContigIndex(thatContig);
|
||||||
|
|
||||||
|
|
||||||
if ( thisIndex == -1 )
|
if ( thisIndex == -1 )
|
||||||
{
|
{
|
||||||
if ( thatIndex == -1 )
|
if ( thatIndex == -1 )
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue