diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/GCContentByIntervalWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/GCContentByIntervalWalker.java new file mode 100755 index 000000000..c0f469973 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/GCContentByIntervalWalker.java @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers; + +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.collections.Pair; + +import java.io.PrintStream; +import java.util.*; + +/** + * Walks along reference and calculates the GC content for each interval. + */ +@Allows(value = {DataSource.REFERENCE}) +@Requires(value = {DataSource.REFERENCE}) + +@By(DataSource.REFERENCE) + +public class GCContentByIntervalWalker extends LocusWalker { + @Output + protected PrintStream out; + + public boolean isReduceByInterval() { + return true; + } + + public void initialize() { + } + + public boolean generateExtendedEvents() { + return false; + } + + public Long reduceInit() { + return 0L; + } + + public Long map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if (tracker == null) + return null; + int baseIndex = ref.getBaseIndex(); + return (baseIndex == BaseUtils.gIndex || baseIndex == BaseUtils.cIndex) ? 1L : 0L; + } + + public Long reduce(Long toAdd, Long runningCount) { + return runningCount + toAdd; + } + + public void onTraversalDone(List> results) { + for (Pair result : results ) { + GenomeLoc loc = result.getFirst(); + Long gcCount = result.getSecond(); + + double gcContent = (double) gcCount / loc.size(); + out.println(loc + "\t" + gcContent); + } + } +} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CNV/GCcontentIntervalWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CNV/GCcontentIntervalWalker.java deleted file mode 100755 index 0922df26d..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CNV/GCcontentIntervalWalker.java +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright (c) 2010, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.oneoffprojects.walkers.CNV; - -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; -import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; -import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.exceptions.UserException; - -import java.io.PrintStream; -import java.util.*; - -/** - * Walks along reference and calculates the GC content for each interval. - */ -@Allows(value = {DataSource.REFERENCE}) -@Requires(value = {DataSource.REFERENCE}) - -@By(DataSource.REFERENCE) - -public class GCcontentIntervalWalker extends LocusWalker { - @Output - protected PrintStream out; - - public boolean isReduceByInterval() { - return true; - } - - public void initialize() { - } - - public boolean generateExtendedEvents() { - return false; - } - - public GCcounter reduceInit() { - return new GCcounter(); - } - - /** - * @param tracker the meta-data tracker - * @param ref the reference base - * @param context the context for the given locus - * @return statistics of and list of all phased VariantContexts and their base pileup that have gone out of cacheWindow range. - */ - public GCcounter map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if (tracker == null) - return null; - - return new GCcounter().calculateGCandAddIn(ref); - } - - public GCcounter reduce(GCcounter add, GCcounter runningCount) { - if (add == null) - add = new GCcounter(); - - return runningCount.addIn(add); - } - - /** - * @param results the GC content observed for each interval. - */ - public void onTraversalDone(List> results) { - for (Pair result : results ) { - GenomeLoc loc = result.getFirst(); - GCcounter counter = result.getSecond(); - - double gcContent = (double) counter.GCcount / counter.totalCount; - out.println(loc + "\t" + gcContent + "\t" + loc.size()); - } - } -} - -class GCcounter { - public int totalCount; - public int GCcount; - - public GCcounter() { - this.totalCount = 0; - this.GCcount = 0; - } - - public GCcounter addIn(GCcounter other) { - this.totalCount += other.totalCount; - this.GCcount += other.GCcount; - - return this; - } - - public GCcounter calculateGCandAddIn(ReferenceContext ref) { - for (byte base : ref.getBases()) { - int baseIndex = BaseUtils.simpleBaseToBaseIndex(base); - - boolean baseIsGC = (baseIndex == BaseUtils.gIndex || baseIndex == BaseUtils.cIndex); - boolean baseIsAT = (baseIndex == BaseUtils.aIndex || baseIndex == BaseUtils.tIndex); - if (baseIsGC || baseIsAT) { - totalCount++; - if (baseIsGC) - GCcount++; - } - } - - return this; - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/GCCalculatorWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/GCCalculatorWalker.java deleted file mode 100644 index ab78c7dac..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/GCCalculatorWalker.java +++ /dev/null @@ -1,71 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; -import org.broadinstitute.sting.gatk.walkers.RefWalker; -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.commandline.Output; - -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; -import java.io.PrintStream; - -/** - * IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl - * - * @Author chartl - * @Date May 19, 2010 - */ -public class GCCalculatorWalker extends RefWalker,Boolean>, Map>> { - @Output - PrintStream out; - - public Map> reduceInit() { - return new HashMap>(); - } - - public Pair,Boolean> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if ( tracker == null || tracker.getReferenceMetaData("interval_list") == null ) { - return null; - } else { - Set overlappingIntervals = new HashSet(); - for ( GATKFeature f : tracker.getGATKFeatureMetaData("interval_list",true) ) { - overlappingIntervals.add( f.getLocation() ); - } - - return new Pair,Boolean>(overlappingIntervals, ref.getBaseIndex() == BaseUtils.cIndex || ref.getBaseIndex() == BaseUtils.gIndex ); - } - } - - public Map> reduce(Pair,Boolean> map, Map> prevReduce) { - if ( map == null ) { - return prevReduce; - } - - for ( GenomeLoc loc : map.first ) { - if ( ! prevReduce.keySet().contains(loc) ) { - prevReduce.put(loc,new Pair(0l,0l)); - } - - prevReduce.get(loc).first ++; - if ( map.second ) { - prevReduce.get(loc).second ++; - } - } - - return prevReduce; - } - - public void onTraversalDone(Map> reduced ) { - for ( Map.Entry> gcCounts : reduced.entrySet() ) { - double gc_content = ( (double) gcCounts.getValue().second )/( (double) gcCounts.getValue().first ); - out.printf("%s\t%.2f%n",gcCounts.getKey().toString(),100*gc_content); - } - } -}