diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CNV/GCcontentIntervalWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CNV/GCcontentIntervalWalker.java index e1c5255b3..4e6f7c81c 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CNV/GCcontentIntervalWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CNV/GCcontentIntervalWalker.java @@ -24,13 +24,9 @@ package org.broadinstitute.sting.oneoffprojects.walkers.CNV; -import org.broad.tribble.util.variantcontext.Genotype; -import org.broad.tribble.util.variantcontext.VariantContext; -import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; @@ -43,15 +39,17 @@ import java.io.PrintStream; import java.util.*; /** - * Walks along all variant ROD loci and verifies the phasing from the reads for user-defined pairs of sites. + * Walks along reference and calculates the GC content for each interval defined in "intervals" ROD. */ @Allows(value = {DataSource.REFERENCE}) -@Requires(value = {DataSource.REFERENCE}) +@Requires(value = {DataSource.REFERENCE}, referenceMetaData = {@RMD(name = GCcontentIntervalWalker.INTERVALS_ROD_NAME, type = ReferenceOrderedDatum.class)}) public class GCcontentIntervalWalker extends RodWalker { @Output protected PrintStream out; + public final static String INTERVALS_ROD_NAME = "intervals"; + public boolean isReduceByInterval() { return true; } @@ -77,9 +75,9 @@ public class GCcontentIntervalWalker extends RodWalker { if (tracker == null) return null; - List interval = tracker.getGATKFeatureMetaData("intervals", true); + List interval = tracker.getGATKFeatureMetaData(INTERVALS_ROD_NAME, true); if (interval.size() != 1) { - String error = "At " + ref.getLocus() + " : Must provide a track named 'intervals' with exactly ONE interval per locus in -L argument!"; + String error = "At " + ref.getLocus() + " : Must provide a track named '"+ INTERVALS_ROD_NAME +"' with exactly ONE interval per locus in -L argument!"; if (interval.size() < 1) throw new UserException(error); else // interval.size() > 1 @@ -102,7 +100,7 @@ public class GCcontentIntervalWalker extends RodWalker { } /** - * @param result the number of reads and VariantContexts seen. + * @param result the GC content observed. */ public void onTraversalDone(GCcounter result) { if (result.loc == null) diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CNV/GeneNamesIntervalWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CNV/GeneNamesIntervalWalker.java new file mode 100755 index 000000000..1388c825f --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CNV/GeneNamesIntervalWalker.java @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.oneoffprojects.walkers.CNV; + +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableFeature; +import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; +import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.exceptions.UserException; + +import java.io.PrintStream; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + * Walks along reference and calculates the genes (from "" ROD) for each interval defined in "intervals" ROD. + */ +@Allows(value = {DataSource.REFERENCE}) +@Requires(value = {DataSource.REFERENCE}, referenceMetaData = {@RMD(name = GeneNamesIntervalWalker.REFSEQ_ROD_NAME, type = AnnotatorInputTableFeature.class), @RMD(name = GeneNamesIntervalWalker.INTERVALS_ROD_NAME, type = ReferenceOrderedDatum.class)}) + +public class GeneNamesIntervalWalker extends RodWalker { + @Output + protected PrintStream out; + + public final static String REFSEQ_ROD_NAME = "refseq"; + public final static String INTERVALS_ROD_NAME = "intervals"; + + public final static String REFSEQ_NAME2 = "name2"; + + + public boolean isReduceByInterval() { + return true; + } + + public void initialize() { + } + + public boolean generateExtendedEvents() { + return false; + } + + public GeneNames reduceInit() { + return new GeneNames(); + } + + /** + * @param tracker the meta-data tracker + * @param ref the reference base + * @param context the context for the given locus + * @return statistics of and list of all phased VariantContexts and their base pileup that have gone out of cacheWindow range. + */ + public GeneNames map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if (tracker == null) + return null; + + List interval = tracker.getGATKFeatureMetaData(INTERVALS_ROD_NAME, true); + if (interval.size() != 1) { + String error = "At " + ref.getLocus() + " : Must provide a track named '"+ INTERVALS_ROD_NAME +"' with exactly ONE interval per locus in -L argument!"; + if (interval.size() < 1) + throw new UserException(error); + else // interval.size() > 1 + logger.warn(error); + } + GenomeLoc curInterval = interval.get(0).getLocation(); + + GeneNames names = new GeneNames(); + names.addGenes(tracker.getReferenceMetaData(REFSEQ_ROD_NAME)); + names.loc = curInterval; + + return names; + } + + public GeneNames reduce(GeneNames add, GeneNames runningCount) { + if (add == null) + add = new GeneNames(); + + return runningCount.addIn(add); + } + + /** + * @param result the genes in the interval. + */ + public void onTraversalDone(GeneNames result) { + if (result.loc == null) + return; + + out.println(result.loc + "\t" + result); + } +} + +class GeneNames { + public Set geneNames; + public GenomeLoc loc; + + public GeneNames() { + this.geneNames = new HashSet(); + this.loc = null; + } + + public GeneNames addIn(GeneNames other) { + this.geneNames.addAll(other.geneNames); + + if (other.loc != null && this.loc == null) + this.loc = other.loc; + + return this; + } + + public void addGenes(List refSeqRODs) { + for (Object refSeqObject : refSeqRODs) { + AnnotatorInputTableFeature refSeqAnnotation = (AnnotatorInputTableFeature) refSeqObject; + if (refSeqAnnotation.containsColumnName(GeneNamesIntervalWalker.REFSEQ_NAME2)) + geneNames.add(refSeqAnnotation.getColumnValue(GeneNamesIntervalWalker.REFSEQ_NAME2)); + } + } + + public String toString() { + if (geneNames.isEmpty()) + return "."; + + StringBuilder sb = new StringBuilder(); + + for (String gene : geneNames) + sb.append(gene).append(";"); + + return sb.toString(); + } +} + diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/CountHetPhasingInIntervalWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/CountHetPhasingInIntervalWalker.java index e164e3d38..00f34c03e 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/CountHetPhasingInIntervalWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/CountHetPhasingInIntervalWalker.java @@ -45,7 +45,7 @@ import java.util.*; * Walks along all variant ROD loci and verifies the phasing from the reads for user-defined pairs of sites. */ @Allows(value = {DataSource.REFERENCE}) -@Requires(value = {DataSource.REFERENCE}, referenceMetaData = @RMD(name = "variant", type = ReferenceOrderedDatum.class)) +@Requires(value = {DataSource.REFERENCE}, referenceMetaData = {@RMD(name = "variant", type = ReferenceOrderedDatum.class), @RMD(name = CountHetPhasingInIntervalWalker.INTERVALS_ROD_NAME, type = ReferenceOrderedDatum.class)}) @ReadFilters({ZeroMappingQualityReadFilter.class}) // Filter out all reads with zero mapping quality @@ -63,6 +63,8 @@ public class CountHetPhasingInIntervalWalker extends RodWalker @Argument(fullName = "perIntervalOut", shortName = "perIntervalOut", doc = "File to which to write per-sample, per-interval phased het statistics", required = false) protected PrintStream perIntervalOut = null; + public final static String INTERVALS_ROD_NAME = "intervals"; + public void initialize() { rodNames = new LinkedList(); rodNames.add("variant"); @@ -90,9 +92,9 @@ public class CountHetPhasingInIntervalWalker extends RodWalker int processed = 1; - List interval = tracker.getGATKFeatureMetaData("intervals", true); + List interval = tracker.getGATKFeatureMetaData(INTERVALS_ROD_NAME, true); if (interval.size() != 1) { - String error = "At " + ref.getLocus() + " : Must provide a track named 'intervals' with exactly ONE interval per locus in -L argument!"; + String error = "At " + ref.getLocus() + " : Must provide a track named '"+ INTERVALS_ROD_NAME +"' with exactly ONE interval per locus in -L argument!"; if (interval.size() < 1) throw new UserException(error); else // interval.size() > 1