From 0b45de14ed205d516895f0b2c998aa0a84d07d8e Mon Sep 17 00:00:00 2001 From: fromer Date: Wed, 9 Mar 2011 20:38:08 +0000 Subject: [PATCH] Some minor updates to fully utilize the functionality of reduceByInterval git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5411 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/CNV/GCcontentIntervalWalker.java | 47 ++++++------------- .../walkers/CNV/GeneNamesIntervalWalker.java | 43 ++++++----------- .../oneoffs/fromer/ReadDepthCNVanalysis.scala | 2 +- 3 files changed, 30 insertions(+), 62 deletions(-) diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CNV/GCcontentIntervalWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CNV/GCcontentIntervalWalker.java index 4e6f7c81c..af5d9d076 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CNV/GCcontentIntervalWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CNV/GCcontentIntervalWalker.java @@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.PrintStream; @@ -42,14 +43,12 @@ import java.util.*; * Walks along reference and calculates the GC content for each interval defined in "intervals" ROD. */ @Allows(value = {DataSource.REFERENCE}) -@Requires(value = {DataSource.REFERENCE}, referenceMetaData = {@RMD(name = GCcontentIntervalWalker.INTERVALS_ROD_NAME, type = ReferenceOrderedDatum.class)}) +@Requires(value = {DataSource.REFERENCE}) public class GCcontentIntervalWalker extends RodWalker { @Output protected PrintStream out; - public final static String INTERVALS_ROD_NAME = "intervals"; - public boolean isReduceByInterval() { return true; } @@ -75,21 +74,7 @@ public class GCcontentIntervalWalker extends RodWalker { if (tracker == null) return null; - List interval = tracker.getGATKFeatureMetaData(INTERVALS_ROD_NAME, true); - if (interval.size() != 1) { - String error = "At " + ref.getLocus() + " : Must provide a track named '"+ INTERVALS_ROD_NAME +"' with exactly ONE interval per locus in -L argument!"; - if (interval.size() < 1) - throw new UserException(error); - else // interval.size() > 1 - logger.warn(error); - } - GenomeLoc curInterval = interval.get(0).getLocation(); - - GCcounter counter = new GCcounter(); - counter.calculateGCandAddIn(ref); - counter.loc = curInterval; - - return counter; + return new GCcounter().calculateGCandAddIn(ref); } public GCcounter reduce(GCcounter add, GCcounter runningCount) { @@ -100,39 +85,36 @@ public class GCcontentIntervalWalker extends RodWalker { } /** - * @param result the GC content observed. + * @param results the GC content observed for each interval. */ - public void onTraversalDone(GCcounter result) { - if (result.loc == null) - return; + public void onTraversalDone(List> results) { + for (Pair result : results ) { + GenomeLoc loc = result.getFirst(); + GCcounter counter = result.getSecond(); - double gcContent = (double) result.GCcount / result.totalCount; - out.println(result.loc + "\t" + gcContent + "\t" + result.loc.size()); + double gcContent = (double) counter.GCcount / counter.totalCount; + out.println(loc + "\t" + gcContent + "\t" + loc.size()); + } } } class GCcounter { public int totalCount; public int GCcount; - public GenomeLoc loc; public GCcounter() { this.totalCount = 0; this.GCcount = 0; - this.loc = null; } public GCcounter addIn(GCcounter other) { this.totalCount += other.totalCount; this.GCcount += other.GCcount; - if (other.loc != null && this.loc == null) - this.loc = other.loc; - return this; } - public void calculateGCandAddIn(ReferenceContext ref) { + public GCcounter calculateGCandAddIn(ReferenceContext ref) { for (byte base : ref.getBases()) { int baseIndex = BaseUtils.simpleBaseToBaseIndex(base); @@ -144,6 +126,7 @@ class GCcounter { GCcount++; } } - } -} + return this; + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CNV/GeneNamesIntervalWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CNV/GeneNamesIntervalWalker.java index 1388c825f..e85cc6819 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CNV/GeneNamesIntervalWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CNV/GeneNamesIntervalWalker.java @@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTa import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.PrintStream; @@ -41,17 +42,16 @@ import java.util.List; import java.util.Set; /** - * Walks along reference and calculates the genes (from "" ROD) for each interval defined in "intervals" ROD. + * Walks along reference and calculates the genes (from "refseq" ROD) for each interval defined in "intervals" ROD. */ @Allows(value = {DataSource.REFERENCE}) -@Requires(value = {DataSource.REFERENCE}, referenceMetaData = {@RMD(name = GeneNamesIntervalWalker.REFSEQ_ROD_NAME, type = AnnotatorInputTableFeature.class), @RMD(name = GeneNamesIntervalWalker.INTERVALS_ROD_NAME, type = ReferenceOrderedDatum.class)}) +@Requires(value = {DataSource.REFERENCE}, referenceMetaData = {@RMD(name = GeneNamesIntervalWalker.REFSEQ_ROD_NAME, type = AnnotatorInputTableFeature.class)}) public class GeneNamesIntervalWalker extends RodWalker { @Output protected PrintStream out; public final static String REFSEQ_ROD_NAME = "refseq"; - public final static String INTERVALS_ROD_NAME = "intervals"; public final static String REFSEQ_NAME2 = "name2"; @@ -81,21 +81,7 @@ public class GeneNamesIntervalWalker extends RodWalker { if (tracker == null) return null; - List interval = tracker.getGATKFeatureMetaData(INTERVALS_ROD_NAME, true); - if (interval.size() != 1) { - String error = "At " + ref.getLocus() + " : Must provide a track named '"+ INTERVALS_ROD_NAME +"' with exactly ONE interval per locus in -L argument!"; - if (interval.size() < 1) - throw new UserException(error); - else // interval.size() > 1 - logger.warn(error); - } - GenomeLoc curInterval = interval.get(0).getLocation(); - - GeneNames names = new GeneNames(); - names.addGenes(tracker.getReferenceMetaData(REFSEQ_ROD_NAME)); - names.loc = curInterval; - - return names; + return new GeneNames().addGenes(tracker.getReferenceMetaData(REFSEQ_ROD_NAME)); } public GeneNames reduce(GeneNames add, GeneNames runningCount) { @@ -106,40 +92,39 @@ public class GeneNamesIntervalWalker extends RodWalker { } /** - * @param result the genes in the interval. + * @param results the genes found in each interval. */ - public void onTraversalDone(GeneNames result) { - if (result.loc == null) - return; + public void onTraversalDone(List> results) { + for (Pair result : results ) { + GenomeLoc loc = result.getFirst(); + GeneNames names = result.getSecond(); - out.println(result.loc + "\t" + result); + out.println(loc + "\t" + names); + } } } class GeneNames { public Set geneNames; - public GenomeLoc loc; public GeneNames() { this.geneNames = new HashSet(); - this.loc = null; } public GeneNames addIn(GeneNames other) { this.geneNames.addAll(other.geneNames); - if (other.loc != null && this.loc == null) - this.loc = other.loc; - return this; } - public void addGenes(List refSeqRODs) { + public GeneNames addGenes(List refSeqRODs) { for (Object refSeqObject : refSeqRODs) { AnnotatorInputTableFeature refSeqAnnotation = (AnnotatorInputTableFeature) refSeqObject; if (refSeqAnnotation.containsColumnName(GeneNamesIntervalWalker.REFSEQ_NAME2)) geneNames.add(refSeqAnnotation.getColumnValue(GeneNamesIntervalWalker.REFSEQ_NAME2)); } + + return this; } public String toString() { diff --git a/scala/qscript/oneoffs/fromer/ReadDepthCNVanalysis.scala b/scala/qscript/oneoffs/fromer/ReadDepthCNVanalysis.scala index 3b8d64cb3..6efd483d8 100644 --- a/scala/qscript/oneoffs/fromer/ReadDepthCNVanalysis.scala +++ b/scala/qscript/oneoffs/fromer/ReadDepthCNVanalysis.scala @@ -125,7 +125,7 @@ class ReadDepthCNVanalysis extends QScript { @Output val outputDoCaverageCoverage: File = new File(outputDoC.getPath + DOC_MEAN_COVERAGE_OUTPUT) - var command: String = "~/CNV/wave1+2/scripts/mergeDoC.pl -gatk " + qscript.gatkJarFile.getPath.replaceFirst("dist/GenomeAnalysisTK.jar", "") + " -ref " + qscript.referenceFile + " -out " + outputDoCaverageCoverage + var command: String = "~fromer/CNV/wave1+2/scripts/mergeDoC.pl -gatk " + qscript.gatkJarFile.getPath.replaceFirst("dist/GenomeAnalysisTK.jar", "") + " -ref " + qscript.referenceFile + " -out " + outputDoCaverageCoverage for (input <- inputDoCfiles) { command += " " + input }