diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/GCCalculatorWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/GCCalculatorWalker.java new file mode 100644 index 000000000..1982a07e5 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/GCCalculatorWalker.java @@ -0,0 +1,69 @@ +package org.broadinstitute.sting.oneoffprojects.walkers; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.IntervalRod; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; +import org.broadinstitute.sting.gatk.walkers.RefWalker; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.collections.Pair; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +/** + * IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl + * + * @Author chartl + * @Date May 19, 2010 + */ +public class GCCalculatorWalker extends RefWalker,Boolean>, Map>> { + + public Map> reduceInit() { + return new HashMap>(); + } + + public Pair,Boolean> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( tracker == null || tracker.getReferenceMetaData("interval_list") == null ) { + return null; + } else { + Set overlappingIntervals = new HashSet(); + for ( GATKFeature f : tracker.getGATKFeatureMetaData("interval_list",true) ) { + overlappingIntervals.add( f.getLocation() ); + } + + return new Pair,Boolean>(overlappingIntervals, ref.getBaseIndex() == BaseUtils.cIndex || ref.getBaseIndex() == BaseUtils.gIndex ); + } + } + + public Map> reduce(Pair,Boolean> map, Map> prevReduce) { + if ( map == null ) { + return prevReduce; + } + + for ( GenomeLoc loc : map.first ) { + if ( ! prevReduce.keySet().contains(loc) ) { + prevReduce.put(loc,new Pair(0l,0l)); + } + + prevReduce.get(loc).first ++; + if ( map.second ) { + prevReduce.get(loc).second ++; + } + } + + return prevReduce; + } + + public void onTraversalDone(Map> reduced ) { + for ( Map.Entry> gcCounts : reduced.entrySet() ) { + double gc_content = ( (double) gcCounts.getValue().second )/( (double) gcCounts.getValue().first ); + out.printf("%s\t%.2f%n",gcCounts.getKey().toString(),100*gc_content); + } + } +}