Remove startSearchAt state variable from ActivityProfile

-- New algorithm will only try to create an active region if there's at least maxREgionSize + propagation distance states in the list.  When that's true, we are guaranteed to actually find a region.  So this algorithm is not only truly correct but as super fast, as we only ever do the search for the end of the region when we will certainly find one, and actually generate a region.
This commit is contained in:
Mark DePristo 2013-01-27 13:51:53 -05:00
parent c97a361b5d
commit 14d8afe413
2 changed files with 5 additions and 24 deletions

View File

@ -125,7 +125,6 @@ public class ActiveRegion implements HasGenomeLocation {
public int getExtension() { return extension; }
public int size() { return reads.size(); }
public void clearReads() { reads.clear(); }
public void remove( final GATKSAMRecord read ) { reads.remove( read ); }
public void removeAll( final ArrayList<GATKSAMRecord> readsToRemove ) { reads.removeAll( readsToRemove ); }
public boolean equalExceptReads(final ActiveRegion other) {

View File

@ -49,14 +49,6 @@ public class ActivityProfile {
protected GenomeLoc regionStartLoc = null;
protected GenomeLoc regionStopLoc = null;
/**
* Optimization variable. Keeps track of the right-most state we looked at in our
* last unsuccessful call to findEndOfRegion. This variable allows us to
* avoid an O(N^2) algorithm to find the end of the current region in the profile. It
* must be reset to 0 when a region is popped off the stack.
*/
int startSearchForEndOfRegionHere = 0;
/**
* A cached value of the regionStartLoc contig length, to make calls to
* getCurrentContigLength efficient
@ -352,9 +344,6 @@ public class ActivityProfile {
// couldn't find a valid ending offset, so we return null
return null;
// reset the start site of findEndOfRegion to the first element
startSearchForEndOfRegionHere = 0;
// we need to create the active region, and clip out the states we're extracting from this profile
final List<ActivityProfileState> sub = stateList.subList(0, offsetOfNextRegionEnd + 1);
final List<ActivityProfileState> supportingStates = new ArrayList<ActivityProfileState>(sub);
@ -401,9 +390,7 @@ public class ActivityProfile {
return -1;
}
// TODO -- don't need startSearchForEndOfRegionHere with the above check
int endOfActiveRegion = findFirstActivityBoundary(isActiveRegion, maxRegionSize, startSearchForEndOfRegionHere);
startSearchForEndOfRegionHere = Math.max(endOfActiveRegion - getMaxProbPropagationDistance(), 0);
int endOfActiveRegion = findFirstActivityBoundary(isActiveRegion, maxRegionSize);
if ( isActiveRegion && endOfActiveRegion == maxRegionSize )
// we've run to the end of the region, let's find a good place to cut
@ -446,25 +433,20 @@ public class ActivityProfile {
*
* Note that each state has a probability of being active, and this function thresholds that
* value on ACTIVE_PROB_THRESHOLD, coloring each state as active or inactive. Finds the
* largest contiguous stretch of states starting at startSearchAt with the same isActive
* largest contiguous stretch of states starting at the first state (index 0) with the same isActive
* state as isActiveRegion. If the entire state list has the same isActive value, then returns
* maxRegionSize
*
* @param isActiveRegion are we looking for a stretch of active states, or inactive ones?
* @param maxRegionSize don't look for a boundary that would yield a region of size > maxRegionSize
* @param startSearchAt start looking not at 0 but rather at this offset into the state list. This
* parameter allows us to remember where we looked before across calls, so that
* we don't keep searching from 0 -> 1, then 0 -> 2, then 0 -> 3 on each subsequent
* call to this function. Use with caution, as an incorrect value could result in
* skipping a true boundary
* @return the index of the first state in the state list with isActive value != isActiveRegion, or maxRegionSize
* if no such element exists
*/
@Requires({"maxRegionSize > 0", "startSearchAt >= 0", "startSearchAt <= maxRegionSize", "startSearchAt <= stateList.size()"})
@Requires({"maxRegionSize > 0"})
@Ensures({"result >= 0", "result <= stateList.size()"})
private int findFirstActivityBoundary(final boolean isActiveRegion, final int maxRegionSize, final int startSearchAt) {
private int findFirstActivityBoundary(final boolean isActiveRegion, final int maxRegionSize) {
final int nStates = stateList.size();
int endOfActiveRegion = startSearchAt;
int endOfActiveRegion = 0;
while ( endOfActiveRegion < nStates && endOfActiveRegion < maxRegionSize ) {
if ( getProb(endOfActiveRegion) > ACTIVE_PROB_THRESHOLD != isActiveRegion ) {