diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index f5e936a09..83daa4d80 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -10,14 +10,12 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.activeregion.ActiveRegion; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import java.util.ArrayList; -import java.util.LinkedHashSet; -import java.util.LinkedList; -import java.util.Queue; +import java.util.*; /** * Created by IntelliJ IDEA. @@ -54,7 +52,8 @@ public class TraverseActiveRegions extends TraversalEngine isActiveList = new ArrayList(); + final ArrayList isActiveList = new ArrayList(); + GenomeLoc firstIsActiveStart = null; //ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider ); ReferenceOrderedView referenceOrderedDataView = null; @@ -91,11 +90,15 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine extends TraversalEngine activeRegions = integrateActiveList( isActiveList ); + final ArrayList activeRegions = integrateActiveList( isActiveList, firstIsActiveStart, activeRegionExtension ); logger.debug("Integrated " + isActiveList.size() + " isActive calls into " + activeRegions.size() + " regions." ); if( walker.activeRegionOutStream == null ) { workQueue.addAll( activeRegions ); @@ -137,14 +134,11 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine extends TraversalEngine integrateActiveList( final ArrayList activeList ) { + // band-pass filter the list of isActive probabilities and turn into active regions + private ArrayList integrateActiveList( final ArrayList activeList, final GenomeLoc firstIsActiveStart, final int activeRegionExtension ) { + + final double ACTIVE_PROB_THRESHOLD = 0.2; final ArrayList returnList = new ArrayList(); if( activeList.size() == 0 ) { return returnList; } else if( activeList.size() == 1 ) { - returnList.add( new ActiveRegion( engine.getGenomeLocParser().createGenomeLoc(activeList.get(0).getLocation().getContig(), activeList.get(0).getLocation().getStart(), activeList.get(0).getLocation().getStart()), - activeList.get(0).isActive, engine.getGenomeLocParser(), activeList.get(0).getExtension() ) ); + returnList.add( new ActiveRegion( engine.getGenomeLocParser().createGenomeLoc(firstIsActiveStart.getContig(), firstIsActiveStart.getStart(), firstIsActiveStart.getStart()), + activeList.get(0) > ACTIVE_PROB_THRESHOLD, engine.getGenomeLocParser(), activeRegionExtension ) ); return returnList; } else { - ActiveRegion prevLocus = activeList.get(0); - ActiveRegion startLocus = prevLocus; - for( final ActiveRegion thisLocus : activeList ) { - if( prevLocus.isActive != thisLocus.isActive || !prevLocus.getLocation().contiguousP( thisLocus.getLocation() ) ) { - returnList.add( new ActiveRegion( engine.getGenomeLocParser().createGenomeLoc(startLocus.getLocation().getContig(), startLocus.getLocation().getStart(), prevLocus.getLocation().getStart()), - prevLocus.isActive, engine.getGenomeLocParser(), startLocus.getExtension() ) ); - startLocus = thisLocus; + final Double[] activeProbArray = activeList.toArray(new Double[activeList.size()]); + final double[] filteredProbArray = new double[activeProbArray.length]; + final int FILTER_SIZE = 10; + final int MAX_ACTIVE_REGION = 200; + for( int iii = 0; iii < activeProbArray.length; iii++ ) { + double maxVal = 0; + for( int jjj = Math.max( 0, iii-FILTER_SIZE); jjj < Math.min( activeList.size(), iii+FILTER_SIZE); jjj++ ) { + if( activeProbArray[jjj] > maxVal ) { maxVal = activeProbArray[jjj]; } } - prevLocus = thisLocus; + filteredProbArray[iii] = maxVal; } - // output the last region if necessary - if( startLocus != prevLocus ) { - returnList.add( new ActiveRegion( engine.getGenomeLocParser().createGenomeLoc(startLocus.getLocation().getContig(), startLocus.getLocation().getStart(), prevLocus.getLocation().getStart()), - prevLocus.isActive, engine.getGenomeLocParser(), startLocus.getExtension() ) ); + + boolean curStatus = filteredProbArray[0] > ACTIVE_PROB_THRESHOLD; + int curStart = 0; + for(int iii = 1; iii < filteredProbArray.length; iii++ ) { + final boolean thisStatus = filteredProbArray[iii] > ACTIVE_PROB_THRESHOLD; + if( curStatus != thisStatus || (iii-curStart) > MAX_ACTIVE_REGION ) { + returnList.add( new ActiveRegion( engine.getGenomeLocParser().createGenomeLoc(firstIsActiveStart.getContig(), firstIsActiveStart.getStart() + curStart, firstIsActiveStart.getStart() + (iii-1)), + curStatus, engine.getGenomeLocParser(), activeRegionExtension ) ); + curStatus = thisStatus; + curStart = iii; + } } + returnList.add( new ActiveRegion( engine.getGenomeLocParser().createGenomeLoc(firstIsActiveStart.getContig(), firstIsActiveStart.getStart() + curStart, firstIsActiveStart.getStart() + (filteredProbArray.length-1)), + curStatus, engine.getGenomeLocParser(), activeRegionExtension ) ); return returnList; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java index 98308ee11..244870c78 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java @@ -73,8 +73,8 @@ public abstract class ActiveRegionWalker extends Walker