From ad3a3aa35005aa6bfb7fb2450230860d2d8dc0c7 Mon Sep 17 00:00:00 2001 From: hanna Date: Thu, 25 Jun 2009 20:44:23 +0000 Subject: [PATCH] First pass at passing lists of files / lists of interval arguments work. Note that the interval ROD system will throw up its hands and not deal with intervals at all if multiple interval files are passed in (see JIRA GSA-95). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1105 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/GATKArgumentCollection.java | 4 +- .../sting/gatk/GenomeAnalysisEngine.java | 26 +++++------ .../gatk/refdata/IntervalRodIterator.java | 2 +- .../walkers/indels/CleanedReadInjector.java | 6 +-- .../walkers/indels/IntervalMergerWalker.java | 43 ++++++++++++------- .../gatk/GATKArgumentCollectionTest.java | 8 ++-- .../indels/CleanedReadInjectorTest.java | 3 +- 7 files changed, 51 insertions(+), 41 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java index 9bb5720e3..65fdfd620 100755 --- a/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java @@ -57,9 +57,9 @@ public class GATKArgumentCollection { @Argument(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false) public List samFiles = new ArrayList(); - @Element(required = false) + @ElementList(required = false) @Argument(fullName = "intervals", shortName = "L", doc = "A list of genomic intervals over which to operate. Can be explicitly specified on the command line or in a file.", required = false) - public String intervals = null; + public List intervals = null; @Element(required = false) @Argument(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false) diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 4d57e9afe..325de5540 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -40,7 +40,7 @@ import org.broadinstitute.sting.utils.cmdLine.ArgumentException; import java.util.ArrayList; import java.util.List; -import java.io.File; +import java.io.*; public class GenomeAnalysisEngine { @@ -99,8 +99,10 @@ public class GenomeAnalysisEngine { bindConvenienceRods("hapmap", "HapMapAlleleFrequencies", argCollection.HAPMAPFile); if (argCollection.HAPMAPChipFile != null) bindConvenienceRods("hapmap-chip", "GFF", argCollection.HAPMAPChipFile); - if ( argCollection.intervals != null ) - bindConvenienceRods("interval", "Intervals", argCollection.intervals.replaceAll(",", "")); + // TODO: The ROD iterator currently does not understand multiple intervals file. Fix this by cleaning the ROD system. + if ( argCollection.intervals != null && argCollection.intervals.size() == 1) { + bindConvenienceRods("interval", "Intervals", argCollection.intervals.get(0).replaceAll(",", "")); + } // parse out the rod bindings ReferenceOrderedData.parseBindings(logger, argCollection.RODBindings, rods); @@ -148,7 +150,6 @@ public class GenomeAnalysisEngine { walkerReturn = microScheduler.execute(my_walker, locs, argCollection.maximumEngineIterations); } - /** * this is to accomdate the older style traversals, that haven't been converted over to the new system. Putting them * into their own function allows us to deviate in the two behaviors so the new style traversals aren't limited to what @@ -226,7 +227,7 @@ public class GenomeAnalysisEngine { // we default interval files over the genome region string if (argCollection.intervals != null) { - engine.setLocation(parseIntervalRegion(argCollection.intervals, false)); + engine.setLocation(parseIntervalRegion(argCollection.intervals)); } engine.setReadFilters(sourceInfo); @@ -241,16 +242,15 @@ public class GenomeAnalysisEngine { * * @return a list of genomeLoc representing the interval file */ - public static List parseIntervalRegion(final String intervalsString, boolean quiet ) { - List locs = null; - if ( intervalsString != null) { - if (new File(intervalsString).exists()) { - if (! quiet) logger.info("Intervals argument specifies a file. Loading intervals from file."); - locs = GenomeLocParser.intervalFileToList(intervalsString); + public static List parseIntervalRegion(final List intervals ) { + List locs = new ArrayList(); + for( String interval: intervals ) { + if (new File(interval).exists()) { + locs.addAll(GenomeLocParser.intervalFileToList(interval)); } else { - if (! quiet) logger.info("Intervals argument does not specify a file. Trying to parse it as a simple string."); - locs = GenomeLocParser.parseGenomeLocs(intervalsString); + locs.addAll(GenomeLocParser.parseGenomeLocs(interval)); } + } return locs; } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/IntervalRodIterator.java b/java/src/org/broadinstitute/sting/gatk/refdata/IntervalRodIterator.java index fbc33a4be..2f1612ea4 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/IntervalRodIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/IntervalRodIterator.java @@ -22,7 +22,7 @@ public class IntervalRodIterator implements Iterator { public static IntervalRodIterator IntervalRodIteratorFromLocsFile(final String trackName, final File file) { //System.out.printf("Parsing %s for intervals %s%n", file, trackName); - List locs = GenomeAnalysisEngine.parseIntervalRegion(file.getPath(), true); + List locs = GenomeAnalysisEngine.parseIntervalRegion(Collections.singletonList(file.getPath())); //System.out.printf(" => got %d entries %n", locs.size()); return new IntervalRodIterator(trackName, locs); } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/CleanedReadInjector.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/CleanedReadInjector.java index 86139a62d..c598d571f 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/CleanedReadInjector.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/CleanedReadInjector.java @@ -36,7 +36,7 @@ public class CleanedReadInjector extends ReadWalker { * The source of all cleaned intervals. */ @Argument(fullName="cleaned_intervals",shortName="ci",doc="Intervals which have been cleaned.",required=true) - String intervalsSource = null; + List intervalsSource = null; /** * The source of all cleaned reads. @@ -148,8 +148,8 @@ public class CleanedReadInjector extends ReadWalker { * @param intervalsSource Source of intervals. * @return a queue of sorted, merged intervals. */ - private Queue parseIntervals( String intervalsSource ) { - List parsedIntervals = GenomeAnalysisEngine.parseIntervalRegion(intervalsSource,false); + private Queue parseIntervals( List intervalsSource ) { + List parsedIntervals = GenomeAnalysisEngine.parseIntervalRegion(intervalsSource); GenomeLocSortedSet intervalSortedSet = new GenomeLocSortedSet(); for( GenomeLoc parsedInterval: parsedIntervals ) intervalSortedSet.addRegion(parsedInterval); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalMergerWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalMergerWalker.java index 001d4a19e..92dc9113d 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalMergerWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalMergerWalker.java @@ -1,3 +1,27 @@ +/* + * Copyright (c) 2009 The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + package org.broadinstitute.sting.playground.gatk.walkers.indels; import net.sf.samtools.SAMRecord; @@ -10,19 +34,6 @@ import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.cmdLine.Argument; import java.util.*; -/** - * User: ebanks - * Date: Jun 10, 2009 - * Time: 2:40:19 PM - * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT - * Software and documentation are copyright 2005 by the Broad Institute. - * All rights are reserved. - * - * Users acknowledge that this software is supplied without any warranty or support. - * The Broad Institute is not responsible for its use, misuse, or - * functionality. - */ - /** * Merges intervals based on reads which overlap them. */ @@ -31,7 +42,7 @@ import java.util.*; public class IntervalMergerWalker extends ReadWalker { @Argument(fullName="intervalsToMerge", shortName="intervals", doc="Intervals to merge", required=true) - String intervalsSource = null; + List intervalsSource = null; @Argument(fullName="allow454Reads", shortName="454", doc="process 454 reads", required=false) public boolean allow454 = false; @Argument(fullName="maxIntervalSize", shortName="maxInterval", doc="max interval size", required=false) @@ -99,8 +110,8 @@ public class IntervalMergerWalker extends ReadWalker { * @param intervalsSource Source of intervals. * @return a linked list of sorted, merged intervals. */ - private LinkedList parseIntervals(String intervalsSource) { - List parsedIntervals = GenomeAnalysisEngine.parseIntervalRegion(intervalsSource,false); + private LinkedList parseIntervals(List intervalsSource) { + List parsedIntervals = GenomeAnalysisEngine.parseIntervalRegion(intervalsSource); GenomeLocSortedSet intervalSortedSet = new GenomeLocSortedSet(); for ( GenomeLoc parsedInterval : parsedIntervals ) intervalSortedSet.addRegion(parsedInterval); diff --git a/java/test/org/broadinstitute/sting/gatk/GATKArgumentCollectionTest.java b/java/test/org/broadinstitute/sting/gatk/GATKArgumentCollectionTest.java index b21d27dcb..2981af995 100755 --- a/java/test/org/broadinstitute/sting/gatk/GATKArgumentCollectionTest.java +++ b/java/test/org/broadinstitute/sting/gatk/GATKArgumentCollectionTest.java @@ -8,10 +8,7 @@ import org.junit.Before; import org.junit.Test; import java.io.File; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; /* * Copyright (c) 2009 The Broad Institute @@ -93,7 +90,8 @@ public class GATKArgumentCollectionTest extends BaseTest { collect.maximumReadSorts = null; collect.downsampleFraction = null; collect.downsampleCoverage = null; - collect.intervals = "intervals".toLowerCase(); + collect.intervals = new ArrayList(); + collect.intervals.add("intervals".toLowerCase()); collect.walkAllLoci = true; collect.disableThreading = false; collect.outFileName = "outFileName".toLowerCase(); diff --git a/java/test/org/broadinstitute/sting/playground/gatk/walkers/indels/CleanedReadInjectorTest.java b/java/test/org/broadinstitute/sting/playground/gatk/walkers/indels/CleanedReadInjectorTest.java index 4ec88c906..a04cdb117 100644 --- a/java/test/org/broadinstitute/sting/playground/gatk/walkers/indels/CleanedReadInjectorTest.java +++ b/java/test/org/broadinstitute/sting/playground/gatk/walkers/indels/CleanedReadInjectorTest.java @@ -14,6 +14,7 @@ import org.junit.Test; import java.io.FileNotFoundException; import java.io.File; import java.util.Arrays; +import java.util.Collections; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMRecord; @@ -223,7 +224,7 @@ public class CleanedReadInjectorTest extends BaseTest { private CleanedReadInjector createWalker( String intervals, ArtificialSAMFileReader cleanedReads, ArtificialSAMFileWriter output ) { CleanedReadInjector walker = new CleanedReadInjector(); - walker.intervalsSource = intervals; + walker.intervalsSource = Collections.singletonList(intervals); walker.cleanedReadsSource = cleanedReads; walker.outputBAM = output;