diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CountIntervals.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CountIntervals.java new file mode 100755 index 000000000..4d74261cd --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CountIntervals.java @@ -0,0 +1,47 @@ +package org.broadinstitute.sting.oneoffprojects.walkers; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.walkers.RefWalker; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.collections.Pair; + +import java.util.List; + +/** + * Counts the number of contiguous regions the walker traverses over. Slower than it needs to be, but + * very useful since overlapping intervals get merged, so you can count the number of intervals the GATK merges down to. + * This was its very first use. + */ +public class CountIntervals extends RefWalker> { + + public Pair reduceInit() { + return new Pair(null,0l); + } + + public GenomeLoc map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( tracker == null ) { + return null; + } + + return ref.getLocus(); + } + + public Pair reduce(GenomeLoc loc, Pair prev) { + if ( prev.first == null || prev.first.distance(loc) > 1 ) { + prev.second ++; + } + + prev.first = loc; + + return prev; + } + + public void onTraversalDone(Pair finalReduce ) { + out.printf("Number of contiguous intervals: %d",finalReduce.second); + } +}