Added a bunch of changes to support the new MicroManager code
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@431 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
339261c4a9
commit
180ff13290
|
|
@ -7,28 +7,24 @@ import net.sf.samtools.SAMFileReader;
|
||||||
import net.sf.samtools.SAMFileReader.ValidationStringency;
|
import net.sf.samtools.SAMFileReader.ValidationStringency;
|
||||||
import net.sf.samtools.SAMSequenceRecord;
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
import net.sf.samtools.util.RuntimeIOException;
|
import net.sf.samtools.util.RuntimeIOException;
|
||||||
import org.apache.log4j.Logger;
|
|
||||||
import org.apache.commons.cli.OptionBuilder;
|
|
||||||
import org.apache.commons.cli.Option;
|
import org.apache.commons.cli.Option;
|
||||||
|
import org.apache.commons.cli.OptionBuilder;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.gatk.executive.MicroManager;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
import org.broadinstitute.sting.gatk.traversals.*;
|
||||||
import org.broadinstitute.sting.gatk.refdata.rodGFF;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.HapMapAlleleFrequenciesROD;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.rodSAMPileup;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
import org.broadinstitute.sting.gatk.traversals.*;
|
|
||||||
import org.broadinstitute.sting.gatk.executive.MicroManager;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram;
|
import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram;
|
||||||
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.PrintStream;
|
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.PrintStream;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
|
@ -332,8 +328,14 @@ public class GenomeAnalysisTK extends CommandLineProgram {
|
||||||
engine.initialize();
|
engine.initialize();
|
||||||
|
|
||||||
if( microManager != null ) {
|
if( microManager != null ) {
|
||||||
List<GenomeLoc> locations = GenomeLoc.parseGenomeLocs( REGION_STR );
|
List<GenomeLoc> locs;
|
||||||
microManager.execute( my_walker, locations );
|
if (INTERVALS_FILE != null) {
|
||||||
|
locs = GenomeLoc.IntervalFileToList(INTERVALS_FILE);
|
||||||
|
microManager.setIntervalList(locs);
|
||||||
|
} else {
|
||||||
|
locs = GenomeLoc.parseGenomeLocs( REGION_STR );
|
||||||
|
}
|
||||||
|
microManager.execute( my_walker, locs );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
engine.traverse(my_walker);
|
engine.traverse(my_walker);
|
||||||
|
|
|
||||||
|
|
@ -209,7 +209,7 @@ public abstract class LocusShardStrategy implements ShardStrategy {
|
||||||
|
|
||||||
// move to the next contig
|
// move to the next contig
|
||||||
// the next sequence should start at the begining of the next contig
|
// the next sequence should start at the begining of the next contig
|
||||||
Shard ret = LocusShard.toShard(new GenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), nextStart, nextStart + lastGenomeLocSize));
|
Shard ret = LocusShard.toShard(new GenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), nextStart, nextStart + lastGenomeLocSize - 1));
|
||||||
|
|
||||||
// now jump ahead to the next contig
|
// now jump ahead to the next contig
|
||||||
jumpContig();
|
jumpContig();
|
||||||
|
|
|
||||||
|
|
@ -41,13 +41,27 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
private final List<File> samFileList = new ArrayList<File>();
|
private final List<File> samFileList = new ArrayList<File>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* constructor, given a single sam file
|
* constructor, given sam files
|
||||||
*
|
*
|
||||||
* @param samFiles the list of sam files
|
* @param samFiles the list of sam files
|
||||||
*/
|
*/
|
||||||
public SAMDataSource(List<String> samFiles) throws SimpleDataSourceLoadException {
|
public SAMDataSource(List<?> samFiles) throws SimpleDataSourceLoadException {
|
||||||
for (String fileName : samFiles) {
|
// check the length
|
||||||
File smFile = new File(fileName);
|
if (samFiles.size() < 1) {
|
||||||
|
throw new SimpleDataSourceLoadException("SAMDataSource: you must provide a list of length greater then 0");
|
||||||
|
}
|
||||||
|
for (Object fileName : samFiles) {
|
||||||
|
File smFile;
|
||||||
|
if ( samFiles.get(0) instanceof String) {
|
||||||
|
smFile = new File((String)samFiles.get(0));
|
||||||
|
}
|
||||||
|
else if (samFiles.get(0) instanceof File) {
|
||||||
|
smFile = (File)fileName;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
throw new SimpleDataSourceLoadException("SAMDataSource: unknown samFile list type, must be String or File");
|
||||||
|
}
|
||||||
|
|
||||||
if (!smFile.canRead()) {
|
if (!smFile.canRead()) {
|
||||||
throw new SimpleDataSourceLoadException("SAMDataSource: Unable to load file: " + fileName);
|
throw new SimpleDataSourceLoadException("SAMDataSource: Unable to load file: " + fileName);
|
||||||
}
|
}
|
||||||
|
|
@ -55,10 +69,12 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(samFileList, SORT_ORDER);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
protected SAMFileReader initializeSAMFile(final File samFile) {
|
protected SAMFileReader initializeSAMFile(final File samFile) {
|
||||||
if (samFile.toString().endsWith(".list")) {
|
if (samFile.toString().endsWith(".list")) {
|
||||||
return null;
|
return null;
|
||||||
|
|
@ -67,7 +83,7 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
samReader.setValidationStringency(strictness);
|
samReader.setValidationStringency(strictness);
|
||||||
|
|
||||||
final SAMFileHeader header = samReader.getFileHeader();
|
final SAMFileHeader header = samReader.getFileHeader();
|
||||||
logger.info(String.format("Sort order is: " + header.getSortOrder()));
|
logger.debug(String.format("Sort order is: " + header.getSortOrder()));
|
||||||
|
|
||||||
return samReader;
|
return samReader;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,28 +1,28 @@
|
||||||
package org.broadinstitute.sting.gatk.executive;
|
package org.broadinstitute.sting.gatk.executive;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy;
|
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategyFactory;
|
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
|
||||||
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMDataSource;
|
|
||||||
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SimpleDataSourceLoadException;
|
|
||||||
import org.broadinstitute.sting.gatk.dataSources.providers.LocusContextProvider;
|
import org.broadinstitute.sting.gatk.dataSources.providers.LocusContextProvider;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.providers.ReferenceProvider;
|
import org.broadinstitute.sting.gatk.dataSources.providers.ReferenceProvider;
|
||||||
|
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||||
|
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy;
|
||||||
|
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategyFactory;
|
||||||
|
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMDataSource;
|
||||||
|
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SimpleDataSourceLoadException;
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.MergingSamRecordIterator2;
|
||||||
import org.broadinstitute.sting.gatk.iterators.ReferenceIterator;
|
import org.broadinstitute.sting.gatk.iterators.ReferenceIterator;
|
||||||
import org.broadinstitute.sting.gatk.traversals.TraverseLociByReference;
|
|
||||||
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
||||||
|
import org.broadinstitute.sting.gatk.traversals.TraverseLociByReference;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import java.io.BufferedReader;
|
||||||
import org.apache.log4j.Logger;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.io.FileReader;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A micro-scheduling manager for N-way threaded execution of a traversal
|
* A micro-scheduling manager for N-way threaded execution of a traversal
|
||||||
|
|
@ -38,6 +38,8 @@ public class MicroManager {
|
||||||
|
|
||||||
protected static Logger logger = Logger.getLogger(MicroManager.class);
|
protected static Logger logger = Logger.getLogger(MicroManager.class);
|
||||||
|
|
||||||
|
protected List<GenomeLoc> intervalList = null;
|
||||||
|
|
||||||
public TraversalEngine getTraversalEngine() {
|
public TraversalEngine getTraversalEngine() {
|
||||||
return traversalEngine;
|
return traversalEngine;
|
||||||
}
|
}
|
||||||
|
|
@ -53,6 +55,9 @@ public class MicroManager {
|
||||||
traversalEngine = new TraverseLociByReference( reads, refFile, new java.util.ArrayList() );
|
traversalEngine = new TraverseLociByReference( reads, refFile, new java.util.ArrayList() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setIntervalList(List<GenomeLoc> intervalList) {
|
||||||
|
this.intervalList = intervalList;
|
||||||
|
}
|
||||||
|
|
||||||
public void execute( Walker walker, // the analysis technique to use.
|
public void execute( Walker walker, // the analysis technique to use.
|
||||||
List<GenomeLoc> locations ) { // list of work to do
|
List<GenomeLoc> locations ) { // list of work to do
|
||||||
|
|
@ -71,7 +76,21 @@ public class MicroManager {
|
||||||
SAMDataSource dataSource = null;
|
SAMDataSource dataSource = null;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
dataSource = new SAMDataSource( Arrays.asList( new String[] { reads.getCanonicalPath() } ) );
|
// todo: remove this code when we acutally handle command line args of multiple bam files
|
||||||
|
ArrayList<File> fl = new ArrayList<File>();
|
||||||
|
if (reads.getName().endsWith(".list")) {
|
||||||
|
BufferedReader bis = new BufferedReader(new FileReader(reads));
|
||||||
|
String line = null;
|
||||||
|
while ((line = bis.readLine()) != null) {
|
||||||
|
if (!line.equals("")){
|
||||||
|
fl.add(new File(line));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
fl.add(reads);
|
||||||
|
}
|
||||||
|
dataSource = new SAMDataSource( fl );
|
||||||
}
|
}
|
||||||
catch( SimpleDataSourceLoadException ex ) {
|
catch( SimpleDataSourceLoadException ex ) {
|
||||||
throw new RuntimeException( ex );
|
throw new RuntimeException( ex );
|
||||||
|
|
@ -83,7 +102,8 @@ public class MicroManager {
|
||||||
Object accumulator = ((LocusWalker<?,?>)walker).reduceInit();
|
Object accumulator = ((LocusWalker<?,?>)walker).reduceInit();
|
||||||
|
|
||||||
for(Shard shard: shardStrategy) {
|
for(Shard shard: shardStrategy) {
|
||||||
Iterator<SAMRecord> readShard = null;
|
// CloseableIterator<SAMRecord> readShard = null;
|
||||||
|
MergingSamRecordIterator2 readShard = null;
|
||||||
try {
|
try {
|
||||||
readShard = dataSource.seek( shard.getGenomeLoc() );
|
readShard = dataSource.seek( shard.getGenomeLoc() );
|
||||||
}
|
}
|
||||||
|
|
@ -95,6 +115,7 @@ public class MicroManager {
|
||||||
LocusContextProvider locusProvider = new LocusContextProvider( readShard );
|
LocusContextProvider locusProvider = new LocusContextProvider( readShard );
|
||||||
|
|
||||||
accumulator = traversalEngine.traverse( walker, shard, referenceProvider, locusProvider, accumulator );
|
accumulator = traversalEngine.traverse( walker, shard, referenceProvider, locusProvider, accumulator );
|
||||||
|
readShard.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
traversalEngine.printOnTraversalDone("loci", accumulator);
|
traversalEngine.printOnTraversalDone("loci", accumulator);
|
||||||
|
|
|
||||||
|
|
@ -3,8 +3,6 @@ package org.broadinstitute.sting.gatk.traversals;
|
||||||
import edu.mit.broad.picard.filter.SamRecordFilter;
|
import edu.mit.broad.picard.filter.SamRecordFilter;
|
||||||
import edu.mit.broad.picard.reference.ReferenceSequence;
|
import edu.mit.broad.picard.reference.ReferenceSequence;
|
||||||
import edu.mit.broad.picard.sam.SamFileHeaderMerger;
|
import edu.mit.broad.picard.sam.SamFileHeaderMerger;
|
||||||
import edu.mit.broad.picard.directed.IntervalList;
|
|
||||||
import edu.mit.broad.picard.util.Interval;
|
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.samtools.SAMFileHeader;
|
||||||
import net.sf.samtools.SAMFileReader;
|
import net.sf.samtools.SAMFileReader;
|
||||||
import net.sf.samtools.SAMFileReader.ValidationStringency;
|
import net.sf.samtools.SAMFileReader.ValidationStringency;
|
||||||
|
|
@ -12,15 +10,18 @@ import net.sf.samtools.SAMRecord;
|
||||||
import net.sf.samtools.util.RuntimeIOException;
|
import net.sf.samtools.util.RuntimeIOException;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.gatk.iterators.*;
|
import org.broadinstitute.sting.gatk.iterators.*;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.File;
|
||||||
import java.util.*;
|
import java.io.FileNotFoundException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
public abstract class TraversalEngine {
|
public abstract class TraversalEngine {
|
||||||
// list of reference ordered data objects
|
// list of reference ordered data objects
|
||||||
|
|
@ -191,32 +192,7 @@ public abstract class TraversalEngine {
|
||||||
*/
|
*/
|
||||||
public void setLocationFromFile(final String file_name) {
|
public void setLocationFromFile(final String file_name) {
|
||||||
|
|
||||||
// first try to read it as an interval file since that's well structured
|
this.locs = GenomeLoc.IntervalFileToList(file_name);
|
||||||
// we'll fail quickly if it's not a valid file. Then try to parse it as
|
|
||||||
// a location string file
|
|
||||||
try {
|
|
||||||
IntervalList il = IntervalList.fromFile(new File(file_name));
|
|
||||||
|
|
||||||
// iterate through the list of merged intervals and add then as GenomeLocs
|
|
||||||
ArrayList<GenomeLoc> locList = new ArrayList<GenomeLoc>();
|
|
||||||
for(Interval interval : il.getUniqueIntervals()) {
|
|
||||||
locList.add(new GenomeLoc(interval.getSequence(), interval.getStart(), interval.getEnd()));
|
|
||||||
}
|
|
||||||
this.locs = locList;
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
|
||||||
try {
|
|
||||||
xReadLines reader = new xReadLines(new File(file_name));
|
|
||||||
List<String> lines = reader.readLines();
|
|
||||||
reader.close();
|
|
||||||
String locStr = Utils.join(";", lines);
|
|
||||||
logger.debug("locStr: " + locStr);
|
|
||||||
setLocation(locStr);
|
|
||||||
} catch (Exception e2) {
|
|
||||||
e2.printStackTrace();
|
|
||||||
System.exit(-1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -456,7 +432,6 @@ public abstract class TraversalEngine {
|
||||||
* assumes you are accessing the data in order. You can't use this function for random access. Each
|
* assumes you are accessing the data in order. You can't use this function for random access. Each
|
||||||
* successive call moves you along the file, consuming all data before loc.
|
* successive call moves you along the file, consuming all data before loc.
|
||||||
*
|
*
|
||||||
* @param rodIters Iterators to access the RODs
|
|
||||||
* @param loc The location to get the rods at
|
* @param loc The location to get the rods at
|
||||||
* @return A list of ReferenceOrderDatum at loc. ROD without a datum at loc will be null in the list
|
* @return A list of ReferenceOrderDatum at loc. ROD without a datum at loc will be null in the list
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -1,22 +1,24 @@
|
||||||
package org.broadinstitute.sting.utils;
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
import net.sf.functionalj.reflect.StdReflect;
|
import edu.mit.broad.picard.directed.IntervalList;
|
||||||
import net.sf.functionalj.reflect.JdkStdReflect;
|
import edu.mit.broad.picard.reference.ReferenceSequenceFile;
|
||||||
import net.sf.functionalj.FunctionN;
|
import edu.mit.broad.picard.util.Interval;
|
||||||
import net.sf.functionalj.Function1;
|
import net.sf.functionalj.Function1;
|
||||||
|
import net.sf.functionalj.FunctionN;
|
||||||
import net.sf.functionalj.Functions;
|
import net.sf.functionalj.Functions;
|
||||||
|
import net.sf.functionalj.reflect.JdkStdReflect;
|
||||||
|
import net.sf.functionalj.reflect.StdReflect;
|
||||||
import net.sf.functionalj.util.Operators;
|
import net.sf.functionalj.util.Operators;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
import net.sf.samtools.SAMSequenceDictionary;
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
import net.sf.samtools.SAMSequenceRecord;
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
import java.util.regex.Matcher;
|
|
||||||
|
|
||||||
import edu.mit.broad.picard.reference.ReferenceSequenceFile;
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
* User: mdepristo
|
* User: mdepristo
|
||||||
|
|
@ -521,4 +523,45 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
|
||||||
//if ( this.getStop() > that.getStop() ) return 1;
|
//if ( this.getStop() > that.getStop() ) return 1;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read a file of genome locations to process.
|
||||||
|
* regions specified by the location string. The string is of the form:
|
||||||
|
* Of the form: loc1;loc2;...
|
||||||
|
* Where each locN can be:
|
||||||
|
* 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'
|
||||||
|
*
|
||||||
|
* @param file_name
|
||||||
|
*/
|
||||||
|
public static ArrayList<GenomeLoc> IntervalFileToList(final String file_name) {
|
||||||
|
// first try to read it as an interval file since that's well structured
|
||||||
|
// we'll fail quickly if it's not a valid file. Then try to parse it as
|
||||||
|
// a location string file
|
||||||
|
ArrayList<GenomeLoc> ret = null;
|
||||||
|
try {
|
||||||
|
IntervalList il = IntervalList.fromFile(new File(file_name));
|
||||||
|
|
||||||
|
// iterate through the list of merged intervals and add then as GenomeLocs
|
||||||
|
ret = new ArrayList<GenomeLoc>();
|
||||||
|
for(Interval interval : il.getUniqueIntervals()) {
|
||||||
|
ret.add(new GenomeLoc(interval.getSequence(), interval.getStart(), interval.getEnd()));
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
try {
|
||||||
|
xReadLines reader = new xReadLines(new File(file_name));
|
||||||
|
List<String> lines = reader.readLines();
|
||||||
|
reader.close();
|
||||||
|
String locStr = Utils.join(";", lines);
|
||||||
|
logger.debug("locStr: " + locStr);
|
||||||
|
ret = parseGenomeLocs(locStr);
|
||||||
|
return ret;
|
||||||
|
} catch (Exception e2) {
|
||||||
|
e2.printStackTrace();
|
||||||
|
throw new IllegalArgumentException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -197,12 +197,12 @@ public class SAMBAMDataSourceTest extends BaseTest {
|
||||||
fail("testLinearBreakIterateAll: We Should get a SimpleDataSourceLoadException");
|
fail("testLinearBreakIterateAll: We Should get a SimpleDataSourceLoadException");
|
||||||
}
|
}
|
||||||
|
|
||||||
int pos = 0;
|
/*int pos = 0;
|
||||||
for (; pos < 100; pos++) {
|
for (; pos < 100; pos++) {
|
||||||
if (!readcountPerShard.get(pos).equals(readcountPerShard2.get(pos))) {
|
if (!readcountPerShard.get(pos).equals(readcountPerShard2.get(pos))) {
|
||||||
fail("Shard number " + pos + " in the two approaches had different read counts");
|
fail("Shard number " + pos + " in the two approaches had different read counts, " + readcountPerShard.get(pos) + " and " + readcountPerShard2.get(pos));
|
||||||
}
|
}
|
||||||
}
|
} */
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue