1st draft of support for an file containing a list of intervals.
Appears to work, but inefficient: At each reference location, the entire list of intervals is linear searched. Instead we need to have the intervals sorted, and simply seek forward from interval to interval. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@124 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
1fcf4c0cbf
commit
0ea44a5805
|
|
@ -22,6 +22,9 @@ public class GenomeAnalysisTK extends CommandLineProgram {
|
|||
@Option(shortName="R", doc="Reference sequence file", optional=true) public File REF_FILE_ARG = null;
|
||||
@Option(shortName="B", doc="Debugging output", optional=true) public String DEBUGGING_STR = null;
|
||||
@Option(shortName="L", doc="Genome region to operation on: from chr:start-end", optional=true) public String REGION_STR = null;
|
||||
|
||||
@Option(shortName="INT", doc="File containing list of genomic intervals to operate on. line := <contig> <start> <end>\n", optional=true) public String INTERVALS_FILE = null;
|
||||
|
||||
@Option(shortName="T", doc="Type of analysis to run") public String Analysis_Name = null;
|
||||
@Option(shortName="DBSNP", doc="DBSNP file", optional=true) public String DBSNP_FILE = null;
|
||||
@Option(shortName="THREADED_IO", doc="If true, enables threaded I/O operations", optional=true) public String ENABLED_THREADED_IO = "false";
|
||||
|
|
@ -90,6 +93,11 @@ public class GenomeAnalysisTK extends CommandLineProgram {
|
|||
engine.setLocation(REGION_STR);
|
||||
}
|
||||
|
||||
if (INTERVALS_FILE != null)
|
||||
{
|
||||
engine.setLocationFromFile(INTERVALS_FILE);
|
||||
}
|
||||
|
||||
engine.setSafetyChecking(! UNSAFE.toLowerCase().equals("true"));
|
||||
engine.setSortOnFly(ENABLED_SORT_ON_FLY.toLowerCase().equals("true"));
|
||||
|
||||
|
|
|
|||
|
|
@ -144,6 +144,47 @@ public class TraversalEngine {
|
|||
this.locs = parseGenomeLocs(locStr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a file of genome locations to process.
|
||||
* regions specified by the location string. The string is of the form:
|
||||
* Of the form: loc1;loc2;...
|
||||
* Where each locN can be:
|
||||
* Ôchr2Õ, Ôchr2:1000000Õ or Ôchr2:1,000,000-2,000,000Õ
|
||||
*
|
||||
* @param file_name
|
||||
*/
|
||||
public void setLocationFromFile( final String file_name )
|
||||
{
|
||||
String locStr = "";
|
||||
|
||||
Scanner scanner = null;
|
||||
try
|
||||
{
|
||||
scanner = new Scanner(new File(file_name));
|
||||
while ( scanner.hasNextLine() )
|
||||
{
|
||||
String line = scanner.nextLine();
|
||||
line.replaceAll("\n", "");
|
||||
locStr += line;
|
||||
if (scanner.hasNextLine()) { locStr += ";"; }
|
||||
}
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
e.printStackTrace();
|
||||
System.exit(-1);
|
||||
}
|
||||
finally
|
||||
{
|
||||
//ensure the underlying stream is always closed
|
||||
scanner.close();
|
||||
}
|
||||
|
||||
System.out.format("DEBUG: locStr: %s\n", locStr);
|
||||
|
||||
this.locs = parseGenomeLocs(locStr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Useful utility function that parses a location string into a coordinate-order sorted
|
||||
* array of GenomeLoc objects
|
||||
|
|
@ -503,6 +544,7 @@ public class TraversalEngine {
|
|||
final LocusContext locus = iter.next();
|
||||
|
||||
// Poor man's version of index LOL
|
||||
// HALP! I HAZ 10K INTERVALS 2 INDX
|
||||
GenomeLoc curLoc = locus.getLocation();
|
||||
if ( inLocations(curLoc) ) {
|
||||
if ( prevLoc != null && curLoc.compareContigs(prevLoc) != 0 )
|
||||
|
|
|
|||
Loading…
Reference in New Issue