First pass at passing lists of files / lists of interval arguments work. Note that the interval

ROD system will throw up its hands and not deal with intervals at all if multiple interval files 
are passed in (see JIRA GSA-95). 


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1105 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-06-25 20:44:23 +00:00
parent 23680a9a16
commit ad3a3aa350
7 changed files with 51 additions and 41 deletions

View File

@ -57,9 +57,9 @@ public class GATKArgumentCollection {
@Argument(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false)
public List<File> samFiles = new ArrayList<File>();
@Element(required = false)
@ElementList(required = false)
@Argument(fullName = "intervals", shortName = "L", doc = "A list of genomic intervals over which to operate. Can be explicitly specified on the command line or in a file.", required = false)
public String intervals = null;
public List<String> intervals = null;
@Element(required = false)
@Argument(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false)

View File

@ -40,7 +40,7 @@ import org.broadinstitute.sting.utils.cmdLine.ArgumentException;
import java.util.ArrayList;
import java.util.List;
import java.io.File;
import java.io.*;
public class GenomeAnalysisEngine {
@ -99,8 +99,10 @@ public class GenomeAnalysisEngine {
bindConvenienceRods("hapmap", "HapMapAlleleFrequencies", argCollection.HAPMAPFile);
if (argCollection.HAPMAPChipFile != null)
bindConvenienceRods("hapmap-chip", "GFF", argCollection.HAPMAPChipFile);
if ( argCollection.intervals != null )
bindConvenienceRods("interval", "Intervals", argCollection.intervals.replaceAll(",", ""));
// TODO: The ROD iterator currently does not understand multiple intervals file. Fix this by cleaning the ROD system.
if ( argCollection.intervals != null && argCollection.intervals.size() == 1) {
bindConvenienceRods("interval", "Intervals", argCollection.intervals.get(0).replaceAll(",", ""));
}
// parse out the rod bindings
ReferenceOrderedData.parseBindings(logger, argCollection.RODBindings, rods);
@ -148,7 +150,6 @@ public class GenomeAnalysisEngine {
walkerReturn = microScheduler.execute(my_walker, locs, argCollection.maximumEngineIterations);
}
/**
* this is to accomdate the older style traversals, that haven't been converted over to the new system. Putting them
* into their own function allows us to deviate in the two behaviors so the new style traversals aren't limited to what
@ -226,7 +227,7 @@ public class GenomeAnalysisEngine {
// we default interval files over the genome region string
if (argCollection.intervals != null) {
engine.setLocation(parseIntervalRegion(argCollection.intervals, false));
engine.setLocation(parseIntervalRegion(argCollection.intervals));
}
engine.setReadFilters(sourceInfo);
@ -241,16 +242,15 @@ public class GenomeAnalysisEngine {
*
* @return a list of genomeLoc representing the interval file
*/
public static List<GenomeLoc> parseIntervalRegion(final String intervalsString, boolean quiet ) {
List<GenomeLoc> locs = null;
if ( intervalsString != null) {
if (new File(intervalsString).exists()) {
if (! quiet) logger.info("Intervals argument specifies a file. Loading intervals from file.");
locs = GenomeLocParser.intervalFileToList(intervalsString);
public static List<GenomeLoc> parseIntervalRegion(final List<String> intervals ) {
List<GenomeLoc> locs = new ArrayList<GenomeLoc>();
for( String interval: intervals ) {
if (new File(interval).exists()) {
locs.addAll(GenomeLocParser.intervalFileToList(interval));
} else {
if (! quiet) logger.info("Intervals argument does not specify a file. Trying to parse it as a simple string.");
locs = GenomeLocParser.parseGenomeLocs(intervalsString);
locs.addAll(GenomeLocParser.parseGenomeLocs(interval));
}
}
return locs;
}

View File

@ -22,7 +22,7 @@ public class IntervalRodIterator implements Iterator<IntervalRod> {
public static IntervalRodIterator IntervalRodIteratorFromLocsFile(final String trackName, final File file) {
//System.out.printf("Parsing %s for intervals %s%n", file, trackName);
List<GenomeLoc> locs = GenomeAnalysisEngine.parseIntervalRegion(file.getPath(), true);
List<GenomeLoc> locs = GenomeAnalysisEngine.parseIntervalRegion(Collections.singletonList(file.getPath()));
//System.out.printf(" => got %d entries %n", locs.size());
return new IntervalRodIterator(trackName, locs);
}

View File

@ -36,7 +36,7 @@ public class CleanedReadInjector extends ReadWalker<Integer,Integer> {
* The source of all cleaned intervals.
*/
@Argument(fullName="cleaned_intervals",shortName="ci",doc="Intervals which have been cleaned.",required=true)
String intervalsSource = null;
List<String> intervalsSource = null;
/**
* The source of all cleaned reads.
@ -148,8 +148,8 @@ public class CleanedReadInjector extends ReadWalker<Integer,Integer> {
* @param intervalsSource Source of intervals.
* @return a queue of sorted, merged intervals.
*/
private Queue parseIntervals( String intervalsSource ) {
List<GenomeLoc> parsedIntervals = GenomeAnalysisEngine.parseIntervalRegion(intervalsSource,false);
private Queue parseIntervals( List<String> intervalsSource ) {
List<GenomeLoc> parsedIntervals = GenomeAnalysisEngine.parseIntervalRegion(intervalsSource);
GenomeLocSortedSet intervalSortedSet = new GenomeLocSortedSet();
for( GenomeLoc parsedInterval: parsedIntervals )
intervalSortedSet.addRegion(parsedInterval);

View File

@ -1,3 +1,27 @@
/*
* Copyright (c) 2009 The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.playground.gatk.walkers.indels;
import net.sf.samtools.SAMRecord;
@ -10,19 +34,6 @@ import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import java.util.*;
/**
* User: ebanks
* Date: Jun 10, 2009
* Time: 2:40:19 PM
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
* Software and documentation are copyright 2005 by the Broad Institute.
* All rights are reserved.
*
* Users acknowledge that this software is supplied without any warranty or support.
* The Broad Institute is not responsible for its use, misuse, or
* functionality.
*/
/**
* Merges intervals based on reads which overlap them.
*/
@ -31,7 +42,7 @@ import java.util.*;
public class IntervalMergerWalker extends ReadWalker<Integer,Integer> {
@Argument(fullName="intervalsToMerge", shortName="intervals", doc="Intervals to merge", required=true)
String intervalsSource = null;
List<String> intervalsSource = null;
@Argument(fullName="allow454Reads", shortName="454", doc="process 454 reads", required=false)
public boolean allow454 = false;
@Argument(fullName="maxIntervalSize", shortName="maxInterval", doc="max interval size", required=false)
@ -99,8 +110,8 @@ public class IntervalMergerWalker extends ReadWalker<Integer,Integer> {
* @param intervalsSource Source of intervals.
* @return a linked list of sorted, merged intervals.
*/
private LinkedList<GenomeLoc> parseIntervals(String intervalsSource) {
List<GenomeLoc> parsedIntervals = GenomeAnalysisEngine.parseIntervalRegion(intervalsSource,false);
private LinkedList<GenomeLoc> parseIntervals(List<String> intervalsSource) {
List<GenomeLoc> parsedIntervals = GenomeAnalysisEngine.parseIntervalRegion(intervalsSource);
GenomeLocSortedSet intervalSortedSet = new GenomeLocSortedSet();
for ( GenomeLoc parsedInterval : parsedIntervals )
intervalSortedSet.addRegion(parsedInterval);

View File

@ -8,10 +8,7 @@ import org.junit.Before;
import org.junit.Test;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
/*
* Copyright (c) 2009 The Broad Institute
@ -93,7 +90,8 @@ public class GATKArgumentCollectionTest extends BaseTest {
collect.maximumReadSorts = null;
collect.downsampleFraction = null;
collect.downsampleCoverage = null;
collect.intervals = "intervals".toLowerCase();
collect.intervals = new ArrayList<String>();
collect.intervals.add("intervals".toLowerCase());
collect.walkAllLoci = true;
collect.disableThreading = false;
collect.outFileName = "outFileName".toLowerCase();

View File

@ -14,6 +14,7 @@ import org.junit.Test;
import java.io.FileNotFoundException;
import java.io.File;
import java.util.Arrays;
import java.util.Collections;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
@ -223,7 +224,7 @@ public class CleanedReadInjectorTest extends BaseTest {
private CleanedReadInjector createWalker( String intervals, ArtificialSAMFileReader cleanedReads, ArtificialSAMFileWriter output ) {
CleanedReadInjector walker = new CleanedReadInjector();
walker.intervalsSource = intervals;
walker.intervalsSource = Collections.singletonList(intervals);
walker.cleanedReadsSource = cleanedReads;
walker.outputBAM = output;