From c3c001e02ee9ab467c05ecd3b2b855acd55e5215 Mon Sep 17 00:00:00 2001 From: aaron Date: Thu, 12 Nov 2009 06:18:10 +0000 Subject: [PATCH] cleanup of the traversal output code git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2026 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/GenomeAnalysisEngine.java | 3 ++- .../gatk/traversals/TraversalEngine.java | 22 +++++-------------- .../gatk/traversals/TraverseDuplicates.java | 7 ++++-- .../sting/gatk/traversals/TraverseLoci.java | 9 ++++---- .../gatk/traversals/TraverseLocusWindows.java | 8 ++++--- .../sting/gatk/traversals/TraverseReads.java | 7 ++++-- .../utils/sam/ArtificialReadsTraversal.java | 4 ++-- .../gatk/traversals/TraverseReadsTest.java | 7 +++--- 8 files changed, 32 insertions(+), 35 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index aef85d491..e658c2a58 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -309,7 +309,8 @@ public class GenomeAnalysisEngine { * Returns sets of samples present in the (merged) input SAM stream, grouped by readers (i.e. underlying * individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list * returned by this method will contain 3 elements (one for each reader), with each element being a set of sample names - * found in the corresponding bam file. + * found in the corresponding bam file. The sample names returned will be in order of the files passed in the input + * parameter list of the GATK. * * @return */ diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java index dd98a59db..61b73bc7b 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java @@ -21,9 +21,6 @@ public abstract class TraversalEngine { /** our log, which we want to capture anything from this class */ protected static Logger logger = Logger.getLogger(TraversalEngine.class); - /** what kind of traversal we're undertaking. This allows us to format output correctly */ - public enum TRAVERSAL_TYPE { READ, LOCUS, LOCUS_WINDOW, DUPLICATE }; - /** * set the max number of iterations * @param maximumIterations the number of iterations @@ -47,7 +44,7 @@ public abstract class TraversalEngine { * @param type the TRAVERSAL_TYPE of the traversal * @param loc the location */ - public void printProgress(final TRAVERSAL_TYPE type, GenomeLoc loc) { + public void printProgress(final String type, GenomeLoc loc) { printProgress(false, type, loc); } @@ -59,7 +56,7 @@ public abstract class TraversalEngine { * @param type String to print out describing our atomic traversal type ("read", "locus", etc) * @param loc Current location */ - private void printProgress(boolean mustPrint, final TRAVERSAL_TYPE type, GenomeLoc loc) { + private void printProgress(boolean mustPrint, final String type, GenomeLoc loc) { final long nRecords = TraversalStatistics.nRecords; final long curTime = System.currentTimeMillis(); final double elapsed = (curTime - startTime) / 1000.0; @@ -68,17 +65,10 @@ public abstract class TraversalEngine { if (mustPrint || nRecords == 1 || nRecords % N_RECORDS_TO_PRINT == 0 || maxElapsedIntervalForPrinting(curTime)) { this.lastProgressPrintTime = curTime; final double secsPer1MReads = (elapsed * 1000000.0) / nRecords; - String typeString = "loci"; - switch (type) { - case LOCUS: typeString = "loci"; break; - case READ: typeString = "reads"; break; - case DUPLICATE: typeString = "dups"; break; - case LOCUS_WINDOW: typeString = "interval"; break; - } if (loc != null) - logger.info(String.format("[PROGRESS] Traversed to %s, processing %,d %s in %.2f secs (%.2f secs per 1M %s)", loc, nRecords, typeString, elapsed, secsPer1MReads, typeString)); + logger.info(String.format("[PROGRESS] Traversed to %s, processing %,d %s in %.2f secs (%.2f secs per 1M %s)", loc, nRecords, type, elapsed, secsPer1MReads, type)); else - logger.info(String.format("[PROGRESS] Traversed %,d %s in %.2f secs (%.2f secs per 1M %s)", nRecords, typeString, elapsed, secsPer1MReads, typeString)); + logger.info(String.format("[PROGRESS] Traversed %,d %s in %.2f secs (%.2f secs per 1M %s)", nRecords, type, elapsed, secsPer1MReads, type)); } } @@ -93,11 +83,11 @@ public abstract class TraversalEngine { /** * Called after a traversal to print out information about the traversal process * - * @param type TRAVERSAL_TYPE describing this type of traversal + * @param type describing this type of traversal * @param sum The reduce result of the traversal * @param ReduceType of the traversal */ - protected void printOnTraversalDone(final TRAVERSAL_TYPE type, T sum) { + protected void printOnTraversalDone(final String type, T sum) { printProgress(true, type, null); logger.info("Traversal reduce result is " + sum); final long curTime = System.currentTimeMillis(); diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java index bd4089ded..ae37469e9 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java @@ -66,6 +66,9 @@ public class TraverseDuplicates extends TraversalEngine { /** our log, which we want to capture anything from this class */ protected static Logger logger = Logger.getLogger(TraverseDuplicates.class); + /** descriptor of the type */ + private static final String DUPS_STRING = "dups"; + private final boolean DEBUG = false; private List readsAtLoc(final SAMRecord read, PushbackIterator iter) { @@ -203,7 +206,7 @@ public class TraverseDuplicates extends TraversalEngine { if (duplicateReads.size() > 0) sum = mapOne(dupWalker, uniqueReads, duplicateReads, site, refBases, locus, sum); - printProgress(TRAVERSAL_TYPE.DUPLICATE, site); + printProgress(DUPS_STRING, site); if (this.maximumIterations > 0 && TraversalStatistics.nRecords > this.maximumIterations) { logger.warn(String.format(("Maximum number of duplicate sets encountered, terminating traversal " + TraversalStatistics.nRecords))); @@ -309,6 +312,6 @@ public class TraverseDuplicates extends TraversalEngine { * @param Type of the result. */ public void printOnTraversalDone(T sum) { - printOnTraversalDone(TRAVERSAL_TYPE.DUPLICATE, sum); + printOnTraversalDone(DUPS_STRING, sum); } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java index dd40e85e6..6f5d65a0c 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java @@ -1,8 +1,8 @@ package org.broadinstitute.sting.gatk.traversals; import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.WalkerManager; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.providers.*; import org.broadinstitute.sting.gatk.datasources.shards.Shard; @@ -10,7 +10,6 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Utils; @@ -20,7 +19,7 @@ import java.util.ArrayList; * A simple solution to iterating over all reference positions over a series of genomic locations. */ public class TraverseLoci extends TraversalEngine { - final private static String UNIT_STRING = "sites"; + final private static String LOCI_STRING = "sites"; final private static boolean ENABLE_ROD_TRAVERSAL = false; @@ -86,7 +85,7 @@ public class TraverseLoci extends TraversalEngine { break; } - printProgress(TRAVERSAL_TYPE.LOCUS, locus.getLocation()); + printProgress(LOCI_STRING, locus.getLocation()); } } @@ -113,7 +112,7 @@ public class TraverseLoci extends TraversalEngine { * @param Type of the result. */ public void printOnTraversalDone( T sum ) { - printOnTraversalDone( TRAVERSAL_TYPE.LOCUS, sum ); + printOnTraversalDone(LOCI_STRING, sum ); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLocusWindows.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLocusWindows.java index 0af23805c..9b345018d 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLocusWindows.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLocusWindows.java @@ -15,12 +15,14 @@ import java.util.ArrayList; /** * Created by IntelliJ IDEA. - * User: ebanks + * User: amckenna * Date: Apr 23, 2009 * Time: 10:26:03 AM * To change this template use File | Settings | File Templates. */ public class TraverseLocusWindows extends TraversalEngine { + /** descriptor of the type */ + private static final String LOCUS_WINDOW_STRING = "intervals"; public T traverse( Walker walker, Shard shard, @@ -58,7 +60,7 @@ public class TraverseLocusWindows extends TraversalEngine { sum = locusWindowWalker.reduce(x, sum); } - printProgress(TRAVERSAL_TYPE.LOCUS_WINDOW, locus.getLocation()); + printProgress(LOCUS_WINDOW_STRING, locus.getLocation()); return sum; } @@ -98,7 +100,7 @@ public class TraverseLocusWindows extends TraversalEngine { * @param Type of the result. */ public void printOnTraversalDone( T sum ) { - printOnTraversalDone( TRAVERSAL_TYPE.LOCUS_WINDOW, sum ); + printOnTraversalDone(LOCUS_WINDOW_STRING, sum ); } } diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java index 5b43032a1..fa82766f0 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java @@ -56,6 +56,9 @@ public class TraverseReads extends TraversalEngine { /** our log, which we want to capture anything from this class */ protected static Logger logger = Logger.getLogger(TraverseReads.class); + /** descriptor of the type */ + private static final String READS_STRING = "reads"; + /** * Traverse by reads, given the data and the walker * @@ -121,7 +124,7 @@ public class TraverseReads extends TraversalEngine { sum = readWalker.reduce(x, sum); } - printProgress(TRAVERSAL_TYPE.READ, + printProgress(READS_STRING, (read.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) ? null : GenomeLocParser.createGenomeLoc(read.getReferenceIndex(),read.getAlignmentStart())); @@ -136,6 +139,6 @@ public class TraverseReads extends TraversalEngine { * @param Type of the result. */ public void printOnTraversalDone( T sum ) { - printOnTraversalDone(TRAVERSAL_TYPE.READ, sum ); + printOnTraversalDone(READS_STRING, sum ); } } diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java index b9b598e02..fcf44459d 100644 --- a/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java +++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java @@ -118,7 +118,7 @@ public class ArtificialReadsTraversal extends TraversalEngine { sum = readWalker.reduce(x, sum); } - if (alignment != null) { printProgress(TRAVERSAL_TYPE.READ, alignment.getLocation()); } + if (alignment != null) { printProgress("reads", alignment.getLocation()); } } return sum; } @@ -131,7 +131,7 @@ public class ArtificialReadsTraversal extends TraversalEngine { * @param Type of the result. */ public void printOnTraversalDone( T sum ) { - printOnTraversalDone(TRAVERSAL_TYPE.READ, sum); + printOnTraversalDone("reads", sum); } diff --git a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java index dfab639a0..efc15b4a9 100755 --- a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java +++ b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.traversals; +import net.sf.picard.reference.ReferenceSequenceFile; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; @@ -23,8 +24,6 @@ import java.lang.reflect.Field; import java.util.ArrayList; import java.util.List; -import net.sf.picard.reference.ReferenceSequenceFile; - /** * * User: aaron @@ -139,7 +138,7 @@ public class TraverseReadsTest extends BaseTest { } - traversalEngine.printOnTraversalDone(TraversalEngine.TRAVERSAL_TYPE.READ, accumulator); + traversalEngine.printOnTraversalDone("reads", accumulator); countReadWalker.onTraversalDone(accumulator); if (!(accumulator instanceof Integer)) { @@ -185,7 +184,7 @@ public class TraverseReadsTest extends BaseTest { dataProvider.close(); } - traversalEngine.printOnTraversalDone(TraversalEngine.TRAVERSAL_TYPE.READ, accumulator); + traversalEngine.printOnTraversalDone("reads", accumulator); countReadWalker.onTraversalDone(accumulator); if (!(accumulator instanceof Integer)) {