From 20a0078f23727079aa1d9bd5e6a1ea021a2f4531 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Tue, 1 May 2012 15:51:36 -0400 Subject: [PATCH] Merging active regions across shard boundries if they are contiguous, have the same active status and don't grow too big. --- .../gatk/traversals/TraverseActiveRegions.java | 17 +++++++++++++++-- .../gatk/walkers/annotator/HaplotypeScore.java | 2 +- .../utils/activeregion/ActivityProfile.java | 4 +++- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index 76c1ce8c5..6d0ec0e7c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -28,7 +28,7 @@ public class TraverseActiveRegions extends TraversalEngine workQueue = new LinkedList(); + private final LinkedList workQueue = new LinkedList(); private final LinkedHashSet myReads = new LinkedHashSet(); @Override @@ -112,7 +112,20 @@ public class TraverseActiveRegions extends TraversalEngine activeRegions = bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize ); // add active regions to queue of regions to process - workQueue.addAll( activeRegions ); + // first check if can merge active regions over shard boundaries + if( !activeRegions.isEmpty() ) { + if( !workQueue.isEmpty() ) { + final ActiveRegion last = workQueue.getLast(); + final ActiveRegion first = activeRegions.get(0); + if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize ) { + workQueue.removeLast(); + activeRegions.remove(first); + workQueue.add( new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension) ); + } + } + workQueue.addAll( activeRegions ); + } + logger.debug("Integrated " + profile.size() + " isActive calls into " + activeRegions.size() + " regions." ); // now go and process all of the active regions diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java index 6abfdc7d2..f2e919b92 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java @@ -376,7 +376,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot } } - // indel likelihoods are stric log-probs, not phred scored + // indel likelihoods are strict log-probs, not phred scored double overallScore = 0.0; for (final double[] readHaplotypeScores : haplotypeScores) { overallScore += MathUtils.arrayMin(readHaplotypeScores); diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java index 70593bbed..4333e471e 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java @@ -158,7 +158,9 @@ public class ActivityProfile { // find the best place to break up the large active region Double minProb = Double.MAX_VALUE; int cutPoint = -1; - for( int iii = curStart + 50; iii < curEnd - 50; iii++ ) { // BUGBUG: assumes maxRegionSize >> 50 + + final int size = curEnd - curStart + 1; + for( int iii = curStart + (int)(size*0.25); iii < curEnd - (int)(size*0.25); iii++ ) { if( isActiveList.get(iii) < minProb ) { minProb = isActiveList.get(iii); cutPoint = iii; } } final List leftList = createActiveRegion(isActive, curStart, cutPoint, activeRegionExtension, maxRegionSize, new ArrayList());