intervals_file + genome_loc => intervals.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@659 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-05-12 01:04:18 +00:00
parent 0bca588629
commit 862b8a6787
5 changed files with 31 additions and 60 deletions

View File

@ -67,10 +67,6 @@ public class GATKArgumentCollection {
@Argument(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false)
public File referenceFile = null;
@Element(required=false)
@Argument(fullName = "genome_region", shortName = "L", doc = "Genome region to operation on: from chr:start-end", required = false)
public String genomeRegion = null;
@Element(required=false)
@Argument(fullName = "analysis_type", shortName = "T", doc = "Type of analysis to run")
public String analysisName = null;
@ -112,8 +108,8 @@ public class GATKArgumentCollection {
public String downsampleCoverage = null;
@Element(required=false)
@Argument(fullName = "intervals_file", shortName = "V", doc = "File containing list of genomic intervals to operate on. line := <contig> <start> <end>", required = false)
public String intervalsFile = null;
@Argument(fullName = "intervals", shortName = "L", doc = "A list of genomic intervals over which to operate. Can be explicitly specified on the command line or in a file.", required = false)
public String intervals = null;
@Element(required=false)
@Argument(fullName = "all_loci", shortName = "A", doc = "Should we process all loci, not just those covered by reads", required = false)
@ -222,7 +218,7 @@ public class GATKArgumentCollection {
if (!other.referenceFile.equals(this.referenceFile)) {
return false;
}
if (!other.genomeRegion.equals(this.genomeRegion)) {
if (!other.intervals.equals(this.intervals)) {
return false;
}
if (!other.analysisName.equals(this.analysisName)) {
@ -252,9 +248,6 @@ public class GATKArgumentCollection {
if (!other.downsampleCoverage.equals(this.downsampleCoverage)) {
return false;
}
if (!other.intervalsFile.equals(this.intervalsFile)) {
return false;
}
if (!other.walkAllLoci.equals(this.walkAllLoci)) {
return false;
}

View File

@ -16,6 +16,7 @@ import org.broadinstitute.sting.utils.Utils;
import java.util.ArrayList;
import java.util.List;
import java.io.File;
public class GenomeAnalysisEngine {
@ -76,7 +77,7 @@ public class GenomeAnalysisEngine {
ReferenceOrderedData.parseBindings(logger, argCollection.RODBindings, rods);
// create the output streams
initializeOutputStreams();
initializeOutputStreams( my_walker );
// our microscheduler, which is in charge of running everything
MicroScheduler microScheduler = null;
@ -205,12 +206,9 @@ public class GenomeAnalysisEngine {
engine.setMaxReads(Integer.parseInt(argCollection.maximumReads));
if (argCollection.genomeRegion != null) {
engine.setLocation(argCollection.genomeRegion);
}
// we default interval files over the genome region strin
if (argCollection.intervalsFile != null) {
engine.setLocationFromFile(argCollection.intervalsFile);
if (argCollection.intervals != null) {
engine.setLocation(setupIntervalRegion());
}
// hmm...
if (argCollection.maximumReadSorts != null) {
@ -239,11 +237,14 @@ public class GenomeAnalysisEngine {
*/
private List<GenomeLoc> setupIntervalRegion() {
List<GenomeLoc> locs;
if (argCollection.intervalsFile != null)
locs = GenomeLoc.IntervalFileToList(argCollection.intervalsFile);
else
locs = GenomeLoc.parseGenomeLocs(argCollection.genomeRegion);
return locs;
if( new File(argCollection.intervals).exists() ) {
logger.info("Intervals argument specifies a file. Loading intervals from file.");
return GenomeLoc.IntervalFileToList(argCollection.intervals);
}
else {
logger.info("Intervals argument does not specify a file. Trying to parse it as a simple string.");
return GenomeLoc.parseGenomeLocs(argCollection.intervals);
}
}
/**
@ -276,9 +277,10 @@ public class GenomeAnalysisEngine {
/** Initialize the output streams as specified by the user. */
private void initializeOutputStreams() {
private void initializeOutputStreams( Walker walker ) {
outputTracker = (argCollection.outErrFileName != null) ? new OutputTracker(argCollection.outErrFileName, argCollection.outErrFileName)
: new OutputTracker(argCollection.outFileName, argCollection.errFileName);
walker.initializeOutputStreams(outputTracker);
}
/**

View File

@ -87,7 +87,7 @@ public abstract class TraversalEngine {
// Locations we are going to process during the traversal
private ArrayList<GenomeLoc> locs = null;
private List<GenomeLoc> locs = null;
// --------------------------------------------------------------------------------------------------------------
//
@ -194,33 +194,13 @@ public abstract class TraversalEngine {
}
/**
* Parses the location string locStr and sets the traversal engine to only process
* regions specified by the location string. The string is of the form:
* Of the form: loc1;loc2;...
* Where each locN can be:
* 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'
*
* @param locStr
* Sets the intervals over which the traversal(s) should happen.
* @param locs
*/
public void setLocation(final String locStr) {
this.locs = GenomeLoc.parseGenomeLocs(locStr);
public void setLocation(final List<GenomeLoc> locs) {
this.locs = locs;
}
/**
* Read a file of genome locations to process.
* regions specified by the location string. The string is of the form:
* Of the form: loc1;loc2;...
* Where each locN can be:
* 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'
*
* @param file_name
*/
public void setLocationFromFile(final String file_name) {
this.locs = GenomeLoc.IntervalFileToList(file_name);
}
public boolean hasLocations() {
return this.locs != null;
@ -533,7 +513,7 @@ public abstract class TraversalEngine {
}
walker.onTraversalDone(map);
} else {
ArrayList<GenomeLoc> l = new ArrayList<GenomeLoc>();
List<GenomeLoc> l = new ArrayList<GenomeLoc>();
if ( hasLocations() )
l = locs;
sum = traverse(walker, l);
@ -543,7 +523,7 @@ public abstract class TraversalEngine {
return sum;
}
public <M, T> T traverse(Walker<M, T> walker, ArrayList<GenomeLoc> locations) {
public <M, T> T traverse(Walker<M, T> walker, List<GenomeLoc> locations) {
return null;
}

View File

@ -6,6 +6,7 @@ import java.util.Map;
import java.util.List;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.OutputTracker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Pair;
import org.apache.log4j.Logger;
@ -33,15 +34,11 @@ public abstract class Walker<MapType, ReduceType> {
protected PrintStream err = null;
protected Walker() {
if( GenomeAnalysisEngine.instance != null ) {
GenomeAnalysisEngine gatk = GenomeAnalysisEngine.instance;
out = new PrintStream( gatk.getOutputTracker().getOutStream() );
err = new PrintStream( gatk.getOutputTracker().getErrStream() );
}
else {
out = System.out;
err = System.err;
}
}
public void initializeOutputStreams( OutputTracker outputTracker ) {
out = new PrintStream( outputTracker.getOutStream() );
err = new PrintStream( outputTracker.getErrStream() );
}
/**

View File

@ -76,7 +76,6 @@ public class GATKArgumentCollectionTest extends BaseTest {
collect.maximumReads = "-1";
collect.strictnessLevel = "strict";
collect.referenceFile = new File("referenceFile".toLowerCase());
collect.genomeRegion = "genomeRegion".toLowerCase();
collect.analysisName = "analysisName".toLowerCase();
collect.DBSNPFile = "DBSNPFile".toLowerCase();
collect.HAPMAPFile = "HAPMAPFile".toLowerCase();
@ -86,7 +85,7 @@ public class GATKArgumentCollectionTest extends BaseTest {
collect.maximumReadSorts = "maximumReadSorts".toLowerCase();
collect.downsampleFraction = "downsampleFraction".toLowerCase();
collect.downsampleCoverage = "downsampleCoverage".toLowerCase();
collect.intervalsFile = "intervalsFile".toLowerCase();
collect.intervals = "intervals".toLowerCase();
collect.walkAllLoci = true;
collect.disableThreading = false;
collect.outFileName = "outFileName".toLowerCase();