From 862b8a6787ef681b28e60fea9ed41270ada86e89 Mon Sep 17 00:00:00 2001 From: hanna Date: Tue, 12 May 2009 01:04:18 +0000 Subject: [PATCH] intervals_file + genome_loc => intervals. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@659 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/GATKArgumentCollection.java | 13 ++----- .../sting/gatk/GenomeAnalysisEngine.java | 26 +++++++------- .../gatk/traversals/TraversalEngine.java | 34 ++++--------------- .../sting/gatk/walkers/Walker.java | 15 ++++---- .../gatk/GATKArgumentCollectionTest.java | 3 +- 5 files changed, 31 insertions(+), 60 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java index c61724003..d9cfa1c4e 100755 --- a/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java @@ -67,10 +67,6 @@ public class GATKArgumentCollection { @Argument(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false) public File referenceFile = null; - @Element(required=false) - @Argument(fullName = "genome_region", shortName = "L", doc = "Genome region to operation on: from chr:start-end", required = false) - public String genomeRegion = null; - @Element(required=false) @Argument(fullName = "analysis_type", shortName = "T", doc = "Type of analysis to run") public String analysisName = null; @@ -112,8 +108,8 @@ public class GATKArgumentCollection { public String downsampleCoverage = null; @Element(required=false) - @Argument(fullName = "intervals_file", shortName = "V", doc = "File containing list of genomic intervals to operate on. line := ", required = false) - public String intervalsFile = null; + @Argument(fullName = "intervals", shortName = "L", doc = "A list of genomic intervals over which to operate. Can be explicitly specified on the command line or in a file.", required = false) + public String intervals = null; @Element(required=false) @Argument(fullName = "all_loci", shortName = "A", doc = "Should we process all loci, not just those covered by reads", required = false) @@ -222,7 +218,7 @@ public class GATKArgumentCollection { if (!other.referenceFile.equals(this.referenceFile)) { return false; } - if (!other.genomeRegion.equals(this.genomeRegion)) { + if (!other.intervals.equals(this.intervals)) { return false; } if (!other.analysisName.equals(this.analysisName)) { @@ -252,9 +248,6 @@ public class GATKArgumentCollection { if (!other.downsampleCoverage.equals(this.downsampleCoverage)) { return false; } - if (!other.intervalsFile.equals(this.intervalsFile)) { - return false; - } if (!other.walkAllLoci.equals(this.walkAllLoci)) { return false; } diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 007917405..bffd73974 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -16,6 +16,7 @@ import org.broadinstitute.sting.utils.Utils; import java.util.ArrayList; import java.util.List; +import java.io.File; public class GenomeAnalysisEngine { @@ -76,7 +77,7 @@ public class GenomeAnalysisEngine { ReferenceOrderedData.parseBindings(logger, argCollection.RODBindings, rods); // create the output streams - initializeOutputStreams(); + initializeOutputStreams( my_walker ); // our microscheduler, which is in charge of running everything MicroScheduler microScheduler = null; @@ -205,12 +206,9 @@ public class GenomeAnalysisEngine { engine.setMaxReads(Integer.parseInt(argCollection.maximumReads)); - if (argCollection.genomeRegion != null) { - engine.setLocation(argCollection.genomeRegion); - } // we default interval files over the genome region strin - if (argCollection.intervalsFile != null) { - engine.setLocationFromFile(argCollection.intervalsFile); + if (argCollection.intervals != null) { + engine.setLocation(setupIntervalRegion()); } // hmm... if (argCollection.maximumReadSorts != null) { @@ -239,11 +237,14 @@ public class GenomeAnalysisEngine { */ private List setupIntervalRegion() { List locs; - if (argCollection.intervalsFile != null) - locs = GenomeLoc.IntervalFileToList(argCollection.intervalsFile); - else - locs = GenomeLoc.parseGenomeLocs(argCollection.genomeRegion); - return locs; + if( new File(argCollection.intervals).exists() ) { + logger.info("Intervals argument specifies a file. Loading intervals from file."); + return GenomeLoc.IntervalFileToList(argCollection.intervals); + } + else { + logger.info("Intervals argument does not specify a file. Trying to parse it as a simple string."); + return GenomeLoc.parseGenomeLocs(argCollection.intervals); + } } /** @@ -276,9 +277,10 @@ public class GenomeAnalysisEngine { /** Initialize the output streams as specified by the user. */ - private void initializeOutputStreams() { + private void initializeOutputStreams( Walker walker ) { outputTracker = (argCollection.outErrFileName != null) ? new OutputTracker(argCollection.outErrFileName, argCollection.outErrFileName) : new OutputTracker(argCollection.outFileName, argCollection.errFileName); + walker.initializeOutputStreams(outputTracker); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java index a34b6b3b2..5fe8ed7a1 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java @@ -87,7 +87,7 @@ public abstract class TraversalEngine { // Locations we are going to process during the traversal - private ArrayList locs = null; + private List locs = null; // -------------------------------------------------------------------------------------------------------------- // @@ -194,33 +194,13 @@ public abstract class TraversalEngine { } /** - * Parses the location string locStr and sets the traversal engine to only process - * regions specified by the location string. The string is of the form: - * Of the form: loc1;loc2;... - * Where each locN can be: - * 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000' - * - * @param locStr + * Sets the intervals over which the traversal(s) should happen. + * @param locs */ - public void setLocation(final String locStr) { - this.locs = GenomeLoc.parseGenomeLocs(locStr); + public void setLocation(final List locs) { + this.locs = locs; } - /** - * Read a file of genome locations to process. - * regions specified by the location string. The string is of the form: - * Of the form: loc1;loc2;... - * Where each locN can be: - * 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000' - * - * @param file_name - */ - public void setLocationFromFile(final String file_name) { - - this.locs = GenomeLoc.IntervalFileToList(file_name); - } - - public boolean hasLocations() { return this.locs != null; @@ -533,7 +513,7 @@ public abstract class TraversalEngine { } walker.onTraversalDone(map); } else { - ArrayList l = new ArrayList(); + List l = new ArrayList(); if ( hasLocations() ) l = locs; sum = traverse(walker, l); @@ -543,7 +523,7 @@ public abstract class TraversalEngine { return sum; } - public T traverse(Walker walker, ArrayList locations) { + public T traverse(Walker walker, List locations) { return null; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java b/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java index 999b35764..cdc935ea6 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java @@ -6,6 +6,7 @@ import java.util.Map; import java.util.List; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.OutputTracker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Pair; import org.apache.log4j.Logger; @@ -33,15 +34,11 @@ public abstract class Walker { protected PrintStream err = null; protected Walker() { - if( GenomeAnalysisEngine.instance != null ) { - GenomeAnalysisEngine gatk = GenomeAnalysisEngine.instance; - out = new PrintStream( gatk.getOutputTracker().getOutStream() ); - err = new PrintStream( gatk.getOutputTracker().getErrStream() ); - } - else { - out = System.out; - err = System.err; - } + } + + public void initializeOutputStreams( OutputTracker outputTracker ) { + out = new PrintStream( outputTracker.getOutStream() ); + err = new PrintStream( outputTracker.getErrStream() ); } /** diff --git a/java/test/org/broadinstitute/sting/gatk/GATKArgumentCollectionTest.java b/java/test/org/broadinstitute/sting/gatk/GATKArgumentCollectionTest.java index 5d10b9cc0..3787d9d72 100755 --- a/java/test/org/broadinstitute/sting/gatk/GATKArgumentCollectionTest.java +++ b/java/test/org/broadinstitute/sting/gatk/GATKArgumentCollectionTest.java @@ -76,7 +76,6 @@ public class GATKArgumentCollectionTest extends BaseTest { collect.maximumReads = "-1"; collect.strictnessLevel = "strict"; collect.referenceFile = new File("referenceFile".toLowerCase()); - collect.genomeRegion = "genomeRegion".toLowerCase(); collect.analysisName = "analysisName".toLowerCase(); collect.DBSNPFile = "DBSNPFile".toLowerCase(); collect.HAPMAPFile = "HAPMAPFile".toLowerCase(); @@ -86,7 +85,7 @@ public class GATKArgumentCollectionTest extends BaseTest { collect.maximumReadSorts = "maximumReadSorts".toLowerCase(); collect.downsampleFraction = "downsampleFraction".toLowerCase(); collect.downsampleCoverage = "downsampleCoverage".toLowerCase(); - collect.intervalsFile = "intervalsFile".toLowerCase(); + collect.intervals = "intervals".toLowerCase(); collect.walkAllLoci = true; collect.disableThreading = false; collect.outFileName = "outFileName".toLowerCase();