Introduce new index-based fasta reader. Clean up MicroManager code, pushing necessary code back into TraversalEngine.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@531 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
3739682bef
commit
e50ae97fe1
|
|
@ -28,12 +28,14 @@ import java.io.FileNotFoundException;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
|
||||||
public class GenomeAnalysisTK extends CommandLineProgram {
|
public class GenomeAnalysisTK extends CommandLineProgram {
|
||||||
public static GenomeAnalysisTK Instance = null;
|
public static GenomeAnalysisTK Instance = null;
|
||||||
|
|
||||||
// parameters and their defaults
|
// parameters and their defaults
|
||||||
public File INPUT_FILE = null;
|
public List<File> INPUT_FILES = null;
|
||||||
public String MAX_READS_ARG = "-1";
|
public String MAX_READS_ARG = "-1";
|
||||||
public String STRICTNESS_ARG = "strict";
|
public String STRICTNESS_ARG = "strict";
|
||||||
public File REF_FILE_ARG = null;
|
public File REF_FILE_ARG = null;
|
||||||
|
|
@ -106,7 +108,7 @@ public class GenomeAnalysisTK extends CommandLineProgram {
|
||||||
* Flags don't take an argument, the associated Boolean gets set to true if the flag appears on the command line.
|
* Flags don't take an argument, the associated Boolean gets set to true if the flag appears on the command line.
|
||||||
*/
|
*/
|
||||||
protected void setupArgs() {
|
protected void setupArgs() {
|
||||||
m_parser.addOptionalArg("input_file", "I", "SAM or BAM file", "INPUT_FILE");
|
m_parser.addOptionalArgList("input_file", "I", "SAM or BAM file", "INPUT_FILES");
|
||||||
//m_parser.addRequiredArg("input_file", "I", "SAM or BAM file", "INPUT_FILE");
|
//m_parser.addRequiredArg("input_file", "I", "SAM or BAM file", "INPUT_FILE");
|
||||||
m_parser.addOptionalArg("maximum_reads", "M", "Maximum number of reads to process before exiting", "MAX_READS_ARG");
|
m_parser.addOptionalArg("maximum_reads", "M", "Maximum number of reads to process before exiting", "MAX_READS_ARG");
|
||||||
m_parser.addOptionalArg("validation_strictness", "S", "How strict should we be with validation (LENIENT|SILENT|STRICT)", "STRICTNESS_ARG");
|
m_parser.addOptionalArg("validation_strictness", "S", "How strict should we be with validation (LENIENT|SILENT|STRICT)", "STRICTNESS_ARG");
|
||||||
|
|
@ -255,34 +257,34 @@ public class GenomeAnalysisTK extends CommandLineProgram {
|
||||||
if ( REF_FILE_ARG == null )
|
if ( REF_FILE_ARG == null )
|
||||||
Utils.scareUser(String.format("Locus-based traversals require a reference file but none was given"));
|
Utils.scareUser(String.format("Locus-based traversals require a reference file but none was given"));
|
||||||
|
|
||||||
if ( INPUT_FILE == null ) {
|
if ( INPUT_FILES == null || INPUT_FILES.size() == 0 ) {
|
||||||
if ( walker.requiresReads() )
|
if ( walker.requiresReads() )
|
||||||
Utils.scareUser(String.format("Analysis %s requires reads, but none were given", Analysis_Name));
|
Utils.scareUser(String.format("Analysis %s requires reads, but none were given", Analysis_Name));
|
||||||
this.engine = new TraverseByReference(null, REF_FILE_ARG, rods);
|
this.engine = new TraverseByReference(null, REF_FILE_ARG, rods);
|
||||||
} else {
|
} else {
|
||||||
if ( walker.cannotHandleReads() )
|
if ( walker.cannotHandleReads() )
|
||||||
Utils.scareUser(String.format("Analysis %s doesn't support SAM/BAM reads, but a read file %s was provided", Analysis_Name, INPUT_FILE));
|
Utils.scareUser(String.format("Analysis %s doesn't support SAM/BAM reads, but a read file %s was provided", Analysis_Name, INPUT_FILES));
|
||||||
|
|
||||||
if ( WALK_ALL_LOCI ) {
|
if ( WALK_ALL_LOCI ) {
|
||||||
// TODO: Temporary debugging code. Activate the new debugging code only when the MicroManager
|
// TODO: Temporary debugging code. Activate the new debugging code only when the MicroManager
|
||||||
// is not filtered.
|
// is not filtered.
|
||||||
if( ENABLE_THREADING ) {
|
if( ENABLE_THREADING ) {
|
||||||
logger.warn("Preliminary threading support enabled");
|
logger.warn("Preliminary threading support enabled");
|
||||||
microManager = new MicroManager( INPUT_FILE, REF_FILE_ARG, numThreads );
|
microManager = new MicroManager( INPUT_FILES, REF_FILE_ARG, numThreads );
|
||||||
this.engine = microManager.getTraversalEngine();
|
this.engine = microManager.getTraversalEngine();
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
this.engine = new TraverseByLociByReference(INPUT_FILE, REF_FILE_ARG, rods);
|
this.engine = new TraverseByLociByReference(INPUT_FILES, REF_FILE_ARG, rods);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
this.engine = new TraverseByLoci(INPUT_FILE, REF_FILE_ARG, rods);
|
this.engine = new TraverseByLoci(INPUT_FILES, REF_FILE_ARG, rods);
|
||||||
}
|
}
|
||||||
} else if ( my_walker instanceof IntervalWalker ) {
|
} else if ( my_walker instanceof IntervalWalker ) {
|
||||||
this.engine = new TraverseByIntervals(INPUT_FILE, REF_FILE_ARG, rods);
|
this.engine = new TraverseByIntervals(INPUT_FILES, REF_FILE_ARG, rods);
|
||||||
} else {
|
} else {
|
||||||
// we're a read walker
|
// we're a read walker
|
||||||
this.engine = new TraverseByReads(INPUT_FILE, REF_FILE_ARG, rods);
|
this.engine = new TraverseByReads(INPUT_FILES, REF_FILE_ARG, rods);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Prepare the sort ordering w.r.t. the sequence dictionary
|
// Prepare the sort ordering w.r.t. the sequence dictionary
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,23 @@
|
||||||
|
package org.broadinstitute.sting.gatk.dataSources.providers;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: hanna
|
||||||
|
* Date: Apr 16, 2009
|
||||||
|
* Time: 4:11:40 PM
|
||||||
|
*
|
||||||
|
* Thrown to indicate invalid positions passed to the providers.
|
||||||
|
* Extend from RuntimeException to make it easier on our walker writers; don't make
|
||||||
|
* them catch every exception that comes their way.
|
||||||
|
*/
|
||||||
|
public class InvalidPositionException extends RuntimeException {
|
||||||
|
public InvalidPositionException(String message) {
|
||||||
|
super(message);
|
||||||
|
}
|
||||||
|
|
||||||
|
public InvalidPositionException(String message, Throwable throwable) {
|
||||||
|
super(message,throwable);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,7 +1,9 @@
|
||||||
package org.broadinstitute.sting.gatk.dataSources.providers;
|
package org.broadinstitute.sting.gatk.dataSources.providers;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.gatk.iterators.ReferenceIterator;
|
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||||
|
import edu.mit.broad.picard.reference.ReferenceSequence;
|
||||||
|
import net.sf.samtools.util.StringUtil;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -11,17 +13,33 @@ import org.broadinstitute.sting.gatk.iterators.ReferenceIterator;
|
||||||
* To change this template use File | Settings | File Templates.
|
* To change this template use File | Settings | File Templates.
|
||||||
*/
|
*/
|
||||||
public class ReferenceProvider {
|
public class ReferenceProvider {
|
||||||
private ReferenceIterator reference;
|
private ReferenceSequence referenceSequence;
|
||||||
|
private GenomeLoc referenceInterval;
|
||||||
|
|
||||||
public ReferenceProvider( ReferenceIterator reference ) {
|
public ReferenceProvider( IndexedFastaSequenceFile sequenceFile, GenomeLoc position ) {
|
||||||
this.reference = reference;
|
this.referenceSequence = sequenceFile.getSubsequenceAt( position.getContig(),
|
||||||
|
position.getStart(),
|
||||||
|
position.getStop() );
|
||||||
|
this.referenceInterval = position;
|
||||||
}
|
}
|
||||||
|
|
||||||
public ReferenceIterator getReferenceSequence( GenomeLoc genomeLoc ) {
|
public char getReferenceBase( GenomeLoc genomeLoc ) throws InvalidPositionException {
|
||||||
if( (genomeLoc.getStop() - genomeLoc.getStart()) > 0 )
|
validateLocation( genomeLoc );
|
||||||
throw new RuntimeException( "Internal error :LocusContextProviders currently require 1-base genomeLocs.");
|
int offset = (int)(genomeLoc.getStart() - referenceInterval.getStart());
|
||||||
|
return StringUtil.bytesToString( referenceSequence.getBases(), offset, 1 ).charAt(0);
|
||||||
|
}
|
||||||
|
|
||||||
// jump to the first reference site
|
/**
|
||||||
return reference.seekForward(genomeLoc);
|
* Validates that the genomeLoc is one base wide and is in the reference sequence.
|
||||||
|
* @param genomeLoc location to verify.
|
||||||
|
*/
|
||||||
|
private void validateLocation( GenomeLoc genomeLoc ) throws InvalidPositionException {
|
||||||
|
//
|
||||||
|
if( !referenceInterval.containsP(genomeLoc) )
|
||||||
|
throw new InvalidPositionException(
|
||||||
|
String.format("Requested position %s not within interval %s", genomeLoc, referenceInterval));
|
||||||
|
if( genomeLoc.getStart() != genomeLoc.getStop() )
|
||||||
|
throw new InvalidPositionException(
|
||||||
|
String.format("Requested position larger than one base; start = %d, stop = %d", genomeLoc.getStart(), genomeLoc.getStop()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,8 +4,10 @@ import org.broadinstitute.sting.gatk.iterators.BoundedReferenceIterator;
|
||||||
import org.broadinstitute.sting.gatk.iterators.ReferenceIterator;
|
import org.broadinstitute.sting.gatk.iterators.ReferenceIterator;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
||||||
|
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
|
|
@ -35,8 +37,7 @@ import java.io.File;
|
||||||
*/
|
*/
|
||||||
public class ReferenceDataSource implements SimpleDataSource {
|
public class ReferenceDataSource implements SimpleDataSource {
|
||||||
|
|
||||||
final protected FastaSequenceFile2 refFile;
|
final protected IndexedFastaSequenceFile refFile;
|
||||||
final protected ReferenceIterator refIter;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Query the data source for a region of interest, specified by the genome location.
|
* Query the data source for a region of interest, specified by the genome location.
|
||||||
|
|
@ -46,7 +47,7 @@ public class ReferenceDataSource implements SimpleDataSource {
|
||||||
* @return an iterator of the appropriate type, that is limited by the region
|
* @return an iterator of the appropriate type, that is limited by the region
|
||||||
*/
|
*/
|
||||||
public BoundedReferenceIterator seek(GenomeLoc location) {
|
public BoundedReferenceIterator seek(GenomeLoc location) {
|
||||||
BoundedReferenceIterator ret = new BoundedReferenceIterator(refIter.seekForward(location), location);
|
BoundedReferenceIterator ret = new BoundedReferenceIterator(refFile, location);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -58,9 +59,11 @@ public class ReferenceDataSource implements SimpleDataSource {
|
||||||
if (!infile.canRead()) {
|
if (!infile.canRead()) {
|
||||||
throw new SimpleDataSourceLoadException("ReferenceDataSource: Unable to load file: " + refFileName);
|
throw new SimpleDataSourceLoadException("ReferenceDataSource: Unable to load file: " + refFileName);
|
||||||
}
|
}
|
||||||
//this.refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(refFileName);
|
try {
|
||||||
refFile = new FastaSequenceFile2(new File(refFileName));
|
refFile = new IndexedFastaSequenceFile(new File(refFileName));
|
||||||
refIter = new ReferenceIterator(this.refFile);
|
}
|
||||||
|
catch( FileNotFoundException ex ) {
|
||||||
|
throw new SimpleDataSourceLoadException( "Unable to find reference file", ex );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -32,4 +32,8 @@ public class SimpleDataSourceLoadException extends StingException {
|
||||||
public SimpleDataSourceLoadException(String msg) {
|
public SimpleDataSourceLoadException(String msg) {
|
||||||
super(msg);
|
super(msg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public SimpleDataSourceLoadException(String msg,Throwable throwable) {
|
||||||
|
super(msg,throwable);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -9,19 +9,15 @@ import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategyFactory;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMDataSource;
|
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMDataSource;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SimpleDataSourceLoadException;
|
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SimpleDataSourceLoadException;
|
||||||
import org.broadinstitute.sting.gatk.iterators.MergingSamRecordIterator2;
|
import org.broadinstitute.sting.gatk.iterators.MergingSamRecordIterator2;
|
||||||
import org.broadinstitute.sting.gatk.iterators.ReferenceIterator;
|
|
||||||
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
||||||
import org.broadinstitute.sting.gatk.traversals.TraverseLociByReference;
|
import org.broadinstitute.sting.gatk.traversals.TraverseLociByReference;
|
||||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileReader;
|
import java.io.FileNotFoundException;
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -31,8 +27,8 @@ import java.util.List;
|
||||||
public class MicroManager {
|
public class MicroManager {
|
||||||
private static long SHARD_SIZE = 100000L;
|
private static long SHARD_SIZE = 100000L;
|
||||||
|
|
||||||
private File reads;
|
private List<File> reads;
|
||||||
private FastaSequenceFile2 ref;
|
private IndexedFastaSequenceFile ref;
|
||||||
|
|
||||||
private TraverseLociByReference traversalEngine = null;
|
private TraverseLociByReference traversalEngine = null;
|
||||||
|
|
||||||
|
|
@ -44,12 +40,17 @@ public class MicroManager {
|
||||||
return traversalEngine;
|
return traversalEngine;
|
||||||
}
|
}
|
||||||
|
|
||||||
public MicroManager( File reads, // the reads file
|
public MicroManager( List<File> reads, // the reads file(s)
|
||||||
File refFile, // the reference file driving the traversal
|
File refFile, // the reference file driving the traversal
|
||||||
int nThreadsToUse ) { // maximum number of threads to use to do the work
|
int nThreadsToUse ) { // maximum number of threads to use to do the work
|
||||||
|
|
||||||
this.reads = reads;
|
this.reads = reads;
|
||||||
ref = new FastaSequenceFile2(refFile);
|
try {
|
||||||
|
ref = new IndexedFastaSequenceFile(refFile);
|
||||||
|
}
|
||||||
|
catch( FileNotFoundException ex ) {
|
||||||
|
throw new RuntimeException("File not found opening fasta file; please do this check before MicroManaging", ex);
|
||||||
|
}
|
||||||
GenomeLoc.setupRefContigOrdering(ref);
|
GenomeLoc.setupRefContigOrdering(ref);
|
||||||
|
|
||||||
traversalEngine = new TraverseLociByReference( reads, refFile, new java.util.ArrayList() );
|
traversalEngine = new TraverseLociByReference( reads, refFile, new java.util.ArrayList() );
|
||||||
|
|
@ -72,34 +73,14 @@ public class MicroManager {
|
||||||
ref.getSequenceDictionary(),
|
ref.getSequenceDictionary(),
|
||||||
SHARD_SIZE );
|
SHARD_SIZE );
|
||||||
|
|
||||||
ReferenceIterator refIter = new ReferenceIterator(ref);
|
|
||||||
SAMDataSource dataSource = null;
|
SAMDataSource dataSource = null;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// todo: remove this code when we acutally handle command line args of multiple bam files
|
dataSource = new SAMDataSource( TraversalEngine.unpackReads(reads) );
|
||||||
ArrayList<File> fl = new ArrayList<File>();
|
|
||||||
if (reads.getName().endsWith(".list")) {
|
|
||||||
BufferedReader bis = new BufferedReader(new FileReader(reads));
|
|
||||||
String line = null;
|
|
||||||
while ((line = bis.readLine()) != null) {
|
|
||||||
if (!line.equals("")){
|
|
||||||
fl.add(new File(line));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (reads.getCanonicalPath().indexOf(",") > 0) {
|
|
||||||
for (String bamFile : reads.getCanonicalPath().split(",")) {
|
|
||||||
fl.add(new File(bamFile));
|
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
|
||||||
fl.add(reads);
|
|
||||||
}
|
|
||||||
dataSource = new SAMDataSource( fl );
|
|
||||||
}
|
}
|
||||||
catch( SimpleDataSourceLoadException ex ) {
|
catch( SimpleDataSourceLoadException ex ) {
|
||||||
throw new RuntimeException( ex );
|
throw new RuntimeException( ex );
|
||||||
}
|
}
|
||||||
catch( IOException ex ) {
|
catch( FileNotFoundException ex ) {
|
||||||
throw new RuntimeException( ex );
|
throw new RuntimeException( ex );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -107,16 +88,17 @@ public class MicroManager {
|
||||||
Object accumulator = null;
|
Object accumulator = null;
|
||||||
|
|
||||||
for(Shard shard: shardStrategy) {
|
for(Shard shard: shardStrategy) {
|
||||||
// CloseableIterator<SAMRecord> readShard = null;
|
GenomeLoc span = shard.getGenomeLoc();
|
||||||
|
|
||||||
MergingSamRecordIterator2 readShard = null;
|
MergingSamRecordIterator2 readShard = null;
|
||||||
try {
|
try {
|
||||||
readShard = dataSource.seek( shard.getGenomeLoc() );
|
readShard = dataSource.seek( span );
|
||||||
}
|
}
|
||||||
catch( SimpleDataSourceLoadException ex ) {
|
catch( SimpleDataSourceLoadException ex ) {
|
||||||
throw new RuntimeException( ex );
|
throw new RuntimeException( ex );
|
||||||
}
|
}
|
||||||
|
|
||||||
ReferenceProvider referenceProvider = new ReferenceProvider( refIter );
|
ReferenceProvider referenceProvider = new ReferenceProvider( ref, span );
|
||||||
LocusContextProvider locusProvider = new LocusContextProvider( readShard );
|
LocusContextProvider locusProvider = new LocusContextProvider( readShard );
|
||||||
|
|
||||||
// set the sam header of the traversal engine
|
// set the sam header of the traversal engine
|
||||||
|
|
|
||||||
|
|
@ -2,9 +2,13 @@ package org.broadinstitute.sting.gatk.iterators;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
||||||
|
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||||
|
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
import edu.mit.broad.picard.reference.ReferenceSequence;
|
||||||
|
import net.sf.samtools.util.StringUtil;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* User: aaron
|
* User: aaron
|
||||||
|
|
@ -35,90 +39,43 @@ import java.util.Iterator;
|
||||||
* <p/>
|
* <p/>
|
||||||
* TODO: Fix the underlying iterator and this class to model a real decorator pattern
|
* TODO: Fix the underlying iterator and this class to model a real decorator pattern
|
||||||
*/
|
*/
|
||||||
public class BoundedReferenceIterator implements Iterator<ReferenceIterator> {
|
public class BoundedReferenceIterator implements Iterator<Character> {
|
||||||
|
private ReferenceSequence referenceSequence = null;
|
||||||
|
|
||||||
|
// The start and end point of the read relative to the chromosome.
|
||||||
|
private long start;
|
||||||
|
private long end;
|
||||||
|
|
||||||
|
// 0-based index into the base array.
|
||||||
|
private int position;
|
||||||
// the location to screen over
|
// the location to screen over
|
||||||
private final GenomeLoc mLoc;
|
|
||||||
private final ReferenceIterator referenceIterator;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Default constructor
|
* Default constructor
|
||||||
*
|
*
|
||||||
* @param referenceIterator
|
* @param referenceSequenceFile sequence file over which to iterate
|
||||||
* @param loc
|
* @param loc subsequence of the reference over which to iterate.
|
||||||
*/
|
*/
|
||||||
public BoundedReferenceIterator(ReferenceIterator referenceIterator, GenomeLoc loc) {
|
public BoundedReferenceIterator(IndexedFastaSequenceFile referenceSequenceFile, GenomeLoc loc) {
|
||||||
this.referenceIterator = referenceIterator;
|
start = loc.getStart();
|
||||||
this.mLoc = loc;
|
end = loc.getStop();
|
||||||
}
|
position = 0;
|
||||||
|
|
||||||
|
referenceSequence = referenceSequenceFile.getSubsequenceAt( loc.getContig(), position, end );
|
||||||
/**
|
|
||||||
* Create a BoundedReferenceIterator from a fasta seq and a genome loc
|
|
||||||
* @param refFile the fasta file, see the ReferenceIterator constructor
|
|
||||||
* @param loc our genome location
|
|
||||||
*/
|
|
||||||
public BoundedReferenceIterator(FastaSequenceFile2 refFile, GenomeLoc loc) {
|
|
||||||
this.referenceIterator = new ReferenceIterator(refFile);
|
|
||||||
this.mLoc = loc;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* isSubRegion
|
|
||||||
* <p/>
|
|
||||||
* returns true if we include the whole passed in region
|
|
||||||
*
|
|
||||||
* @param loc the genome region to check
|
|
||||||
* @return true if we include THE WHOLE specified region
|
|
||||||
*/
|
|
||||||
protected boolean isSubRegion(GenomeLoc loc) {
|
|
||||||
// if the location is null, we assume we're all inclusive (we represent the whole genome).
|
|
||||||
if (mLoc == null || loc.isBetween(mLoc, mLoc)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* returns true if we include the whole passed in region
|
|
||||||
*
|
|
||||||
* @param contig
|
|
||||||
* @param start
|
|
||||||
* @param stop
|
|
||||||
* @return true if we enclose the passed region, false otherwise
|
|
||||||
*/
|
|
||||||
protected boolean isSubRegion(final String contig, final int start, final int stop) {
|
|
||||||
final GenomeLoc lc = new GenomeLoc(contig, start, stop);
|
|
||||||
return isSubRegion(lc);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* If we're less then the limiting genomeLoc
|
|
||||||
*
|
|
||||||
* @param loc
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
protected boolean isLessThan(GenomeLoc loc) {
|
|
||||||
return loc.isPast(mLoc);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// our adapted next function
|
// our adapted next function
|
||||||
public boolean hasNext() {
|
public boolean hasNext() {
|
||||||
// first check that we are within the search place
|
return (start + position) > end;
|
||||||
GenomeLoc loc = referenceIterator.getLocation();
|
|
||||||
if (!isSubRegion(loc)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return referenceIterator.hasNext();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public ReferenceIterator next() {
|
public Character next() {
|
||||||
return referenceIterator.next();
|
return StringUtil.bytesToString( referenceSequence.getBases(), position++, 1 ).charAt(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void remove() {
|
public void remove() {
|
||||||
referenceIterator.remove();
|
throw new UnsupportedOperationException("Cannot remove from a reference iterator");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -19,9 +19,7 @@ import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public abstract class TraversalEngine {
|
public abstract class TraversalEngine {
|
||||||
// list of reference ordered data objects
|
// list of reference ordered data objects
|
||||||
|
|
@ -44,7 +42,7 @@ public abstract class TraversalEngine {
|
||||||
protected long maxReads = -1;
|
protected long maxReads = -1;
|
||||||
|
|
||||||
// Name of the reads file, in BAM/SAM format
|
// Name of the reads file, in BAM/SAM format
|
||||||
protected File readsFile = null; // the name of the reads file
|
protected List<File> readsFiles = null; // the name of the reads file
|
||||||
protected SAMFileReader samReader = null;
|
protected SAMFileReader samReader = null;
|
||||||
// iterator over the sam records in the readsFile
|
// iterator over the sam records in the readsFile
|
||||||
protected Iterator<SAMRecord> samReadIter = null;
|
protected Iterator<SAMRecord> samReadIter = null;
|
||||||
|
|
@ -102,8 +100,8 @@ public abstract class TraversalEngine {
|
||||||
* @param ref Reference file in FASTA format, assumes a .dict file is also available
|
* @param ref Reference file in FASTA format, assumes a .dict file is also available
|
||||||
* @param rods Array of reference ordered data sets
|
* @param rods Array of reference ordered data sets
|
||||||
*/
|
*/
|
||||||
public TraversalEngine(File reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
public TraversalEngine(List<File> reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
||||||
readsFile = reads;
|
readsFiles = reads;
|
||||||
refFileName = ref;
|
refFileName = ref;
|
||||||
this.rods = rods;
|
this.rods = rods;
|
||||||
}
|
}
|
||||||
|
|
@ -326,35 +324,66 @@ public abstract class TraversalEngine {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unpack the files to be processed, given a list of files. That list of files can
|
||||||
|
* itself contain lists of other files to be read.
|
||||||
|
* @param inputFiles
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public static List<File> unpackReads( List<File> inputFiles ) throws FileNotFoundException {
|
||||||
|
List<File> unpackedReads = new ArrayList<File>();
|
||||||
|
for( File inputFile: inputFiles ) {
|
||||||
|
if( inputFile.getName().endsWith(".list") ) {
|
||||||
|
for( String fileName : new xReadLines(inputFile) )
|
||||||
|
unpackedReads.add( new File(fileName) );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
unpackedReads.add( inputFile );
|
||||||
|
}
|
||||||
|
return unpackedReads;
|
||||||
|
}
|
||||||
|
|
||||||
protected Iterator<SAMRecord> initializeReads() {
|
protected Iterator<SAMRecord> initializeReads() {
|
||||||
samReader = initializeSAMFile(readsFile);
|
List<File> allReadsFiles = null;
|
||||||
|
try {
|
||||||
|
allReadsFiles = unpackReads( readsFiles );
|
||||||
|
}
|
||||||
|
catch( FileNotFoundException ex ) {
|
||||||
|
throw new RuntimeException("Unable to unpack reads", ex );
|
||||||
|
}
|
||||||
|
|
||||||
|
if( allReadsFiles.size() == 1 )
|
||||||
|
samReader = initializeSAMFile(allReadsFiles.get(0));
|
||||||
|
else
|
||||||
|
samReader = null;
|
||||||
return WrapReadsIterator(getReadsIterator(samReader), true);
|
return WrapReadsIterator(getReadsIterator(samReader), true);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Iterator<SAMRecord> getReadsIterator(final SAMFileReader samReader) {
|
protected Iterator<SAMRecord> getReadsIterator(final SAMFileReader samReader) {
|
||||||
// If the file has an index, querying functions are available. Use them if possible...
|
// If the file has an index, querying functions are available. Use them if possible...
|
||||||
if ( samReader == null && readsFile.toString().endsWith(".list") ) {
|
if ( samReader == null && readsFiles.size() > 0 ) {
|
||||||
SAMFileHeader.SortOrder SORT_ORDER = SAMFileHeader.SortOrder.coordinate;
|
SAMFileHeader.SortOrder SORT_ORDER = SAMFileHeader.SortOrder.coordinate;
|
||||||
|
|
||||||
|
List<File> allReadsFiles = null;
|
||||||
List<SAMFileReader> readers = new ArrayList<SAMFileReader>();
|
List<SAMFileReader> readers = new ArrayList<SAMFileReader>();
|
||||||
try {
|
try {
|
||||||
for ( String fileName : new xReadLines(readsFile) ) {
|
allReadsFiles = unpackReads( readsFiles );
|
||||||
SAMFileReader reader = initializeSAMFile(new File(fileName));
|
}
|
||||||
readers.add(reader);
|
catch(FileNotFoundException ex) {
|
||||||
}
|
throw new RuntimeException("Unable to unpack reads", ex);
|
||||||
|
}
|
||||||
|
|
||||||
SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(readers, SORT_ORDER);
|
for ( File readsFile: allReadsFiles ) {
|
||||||
return new MergingSamRecordIterator2(headerMerger);
|
SAMFileReader reader = initializeSAMFile(readsFile);
|
||||||
}
|
readers.add(reader);
|
||||||
catch ( FileNotFoundException e ) {
|
|
||||||
logger.fatal("Couldn't open file in sam file list: " + readsFile);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(readers, SORT_ORDER);
|
||||||
|
return new MergingSamRecordIterator2(headerMerger);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return samReader.iterator();
|
||||||
}
|
}
|
||||||
//if (samReader.hasIndex()) {
|
|
||||||
// return new SamQueryIterator(samReader, locs);
|
|
||||||
//} else {
|
|
||||||
return samReader.iterator();
|
|
||||||
//}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Iterator<SAMRecord> WrapReadsIterator( final Iterator<SAMRecord> rawIterator, final boolean enableVerification ) {
|
protected Iterator<SAMRecord> WrapReadsIterator( final Iterator<SAMRecord> rawIterator, final boolean enableVerification ) {
|
||||||
|
|
@ -379,7 +408,7 @@ public abstract class TraversalEngine {
|
||||||
return wrappedIterator;
|
return wrappedIterator;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected SAMFileReader initializeSAMFile(final File samFile) {
|
protected SAMFileReader initializeSAMFile(File samFile) {
|
||||||
// todo: fixme, this is a hack to try out dynamic merging
|
// todo: fixme, this is a hack to try out dynamic merging
|
||||||
if ( samFile.toString().endsWith(".list") ) {
|
if ( samFile.toString().endsWith(".list") ) {
|
||||||
return null;
|
return null;
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,7 @@ import edu.mit.broad.picard.filter.FilteringIterator;
|
||||||
*/
|
*/
|
||||||
public class TraverseByIntervals extends TraversalEngine {
|
public class TraverseByIntervals extends TraversalEngine {
|
||||||
|
|
||||||
public TraverseByIntervals(File reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
public TraverseByIntervals(List<File> reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
||||||
super(reads, ref, rods);
|
super(reads, ref, rods);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -55,7 +55,10 @@ public class TraverseByIntervals extends TraversalEngine {
|
||||||
protected <M, T> T traverseByIntervals(IntervalWalker<M, T> walker, ArrayList<GenomeLoc> locations) {
|
protected <M, T> T traverseByIntervals(IntervalWalker<M, T> walker, ArrayList<GenomeLoc> locations) {
|
||||||
logger.debug("Entering traverseByIntervals");
|
logger.debug("Entering traverseByIntervals");
|
||||||
|
|
||||||
samReader = initializeSAMFile(readsFile);
|
if(readsFiles.size() > 1)
|
||||||
|
throw new UnsupportedOperationException("Cannot do ByInterval traversal on file with multiple inputs.");
|
||||||
|
|
||||||
|
samReader = initializeSAMFile(readsFiles.get(0));
|
||||||
|
|
||||||
verifySortOrder(true);
|
verifySortOrder(true);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,5 @@
|
||||||
package org.broadinstitute.sting.gatk.traversals;
|
package org.broadinstitute.sting.gatk.traversals;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
|
|
@ -14,7 +13,6 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
@ -32,7 +30,7 @@ import edu.mit.broad.picard.filter.FilteringIterator;
|
||||||
*/
|
*/
|
||||||
public class TraverseByLoci extends TraversalEngine {
|
public class TraverseByLoci extends TraversalEngine {
|
||||||
|
|
||||||
public TraverseByLoci(File reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
public TraverseByLoci(List<File> reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
||||||
super(reads, ref, rods);
|
super(reads, ref, rods);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -59,8 +57,10 @@ public class TraverseByLoci extends TraversalEngine {
|
||||||
protected <M, T> T traverseByLoci(LocusWalker<M, T> walker, ArrayList<GenomeLoc> locations) {
|
protected <M, T> T traverseByLoci(LocusWalker<M, T> walker, ArrayList<GenomeLoc> locations) {
|
||||||
logger.debug("Entering traverseByLoci");
|
logger.debug("Entering traverseByLoci");
|
||||||
|
|
||||||
samReader = initializeSAMFile(readsFile);
|
if(readsFiles.size() > 1)
|
||||||
|
throw new UnsupportedOperationException("Cannot do ByLoci traversal on file with multiple inputs.");
|
||||||
|
|
||||||
|
samReader = initializeSAMFile(readsFiles.get(0));
|
||||||
verifySortOrder(true);
|
verifySortOrder(true);
|
||||||
|
|
||||||
// initialize the walker object
|
// initialize the walker object
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,7 @@ import edu.mit.broad.picard.filter.FilteringIterator;
|
||||||
*/
|
*/
|
||||||
public class TraverseByLociByReference extends TraverseByLoci {
|
public class TraverseByLociByReference extends TraverseByLoci {
|
||||||
|
|
||||||
public TraverseByLociByReference(File reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
public TraverseByLociByReference(List<File> reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
||||||
super(reads, ref, rods);
|
super(reads, ref, rods);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -27,7 +27,7 @@ import net.sf.samtools.SAMRecord;
|
||||||
*/
|
*/
|
||||||
public class TraverseByReads extends TraversalEngine {
|
public class TraverseByReads extends TraversalEngine {
|
||||||
|
|
||||||
public TraverseByReads(File reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
public TraverseByReads(List<File> reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
||||||
super(reads, ref, rods);
|
super(reads, ref, rods);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,7 @@ import net.sf.samtools.SAMRecord;
|
||||||
*/
|
*/
|
||||||
public class TraverseByReference extends TraverseByLoci {
|
public class TraverseByReference extends TraverseByLoci {
|
||||||
|
|
||||||
public TraverseByReference(File reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
public TraverseByReference(List<File> reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
||||||
super(reads, ref, rods);
|
super(reads, ref, rods);
|
||||||
|
|
||||||
logger.debug("Creating TraverseByReference");
|
logger.debug("Creating TraverseByReference");
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.providers.LocusContextProvider;
|
import org.broadinstitute.sting.gatk.dataSources.providers.LocusContextProvider;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.providers.ReferenceProvider;
|
import org.broadinstitute.sting.gatk.dataSources.providers.ReferenceProvider;
|
||||||
|
import org.broadinstitute.sting.gatk.dataSources.providers.InvalidPositionException;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
|
@ -33,7 +34,7 @@ public class TraverseLociByReference extends TraversalEngine {
|
||||||
protected static Logger logger = Logger.getLogger(TraversalEngine.class);
|
protected static Logger logger = Logger.getLogger(TraversalEngine.class);
|
||||||
|
|
||||||
|
|
||||||
public TraverseLociByReference(File reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
public TraverseLociByReference(List<File> reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
||||||
super( reads, ref, rods );
|
super( reads, ref, rods );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -66,14 +67,13 @@ public class TraverseLociByReference extends TraversalEngine {
|
||||||
// Iterate forward to get all reference ordered data covering this locus
|
// Iterate forward to get all reference ordered data covering this locus
|
||||||
final RefMetaDataTracker tracker = getReferenceOrderedDataAtLocus( site );
|
final RefMetaDataTracker tracker = getReferenceOrderedDataAtLocus( site );
|
||||||
|
|
||||||
ReferenceIterator refSite = referenceProvider.getReferenceSequence( site );
|
|
||||||
LocusContext locus = locusProvider.getLocusContext( site );
|
LocusContext locus = locusProvider.getLocusContext( site );
|
||||||
locus.setReferenceContig( refSite.getCurrentContig() );
|
|
||||||
|
char refBase = referenceProvider.getReferenceBase( site );
|
||||||
|
|
||||||
if ( DOWNSAMPLE_BY_COVERAGE )
|
if ( DOWNSAMPLE_BY_COVERAGE )
|
||||||
locus.downsampleToCoverage(downsamplingCoverage);
|
locus.downsampleToCoverage(downsamplingCoverage);
|
||||||
|
|
||||||
final char refBase = refSite.getBaseAsChar();
|
|
||||||
final boolean keepMeP = locusWalker.filter(tracker, refBase, locus);
|
final boolean keepMeP = locusWalker.filter(tracker, refBase, locus);
|
||||||
if (keepMeP) {
|
if (keepMeP) {
|
||||||
M x = locusWalker.map(tracker, refBase, locus);
|
M x = locusWalker.map(tracker, refBase, locus);
|
||||||
|
|
@ -90,4 +90,4 @@ public class TraverseLociByReference extends TraversalEngine {
|
||||||
|
|
||||||
return sum;
|
return sum;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -113,14 +113,30 @@ public class IndexedFastaSequenceFile implements ReferenceSequenceFile {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves the sequence dictionary for the fasta file.
|
||||||
|
* @return sequence dictionary of the fasta.
|
||||||
|
*/
|
||||||
public SAMSequenceDictionary getSequenceDictionary() {
|
public SAMSequenceDictionary getSequenceDictionary() {
|
||||||
return sequenceDictionary;
|
return sequenceDictionary;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves the complete sequence described by this contig.
|
||||||
|
* @param contig contig whose data should be returned.
|
||||||
|
* @return The full sequence associated with this contig.
|
||||||
|
*/
|
||||||
public ReferenceSequence getSequence( String contig ) {
|
public ReferenceSequence getSequence( String contig ) {
|
||||||
return getSubsequenceAt( contig, 0, (int)index.getIndexEntry(contig).getSize()-1 );
|
return getSubsequenceAt( contig, 1, (int)index.getIndexEntry(contig).getSize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the subsequence of the contig in the range [start,stop]
|
||||||
|
* @param contig Contig whose subsequence to retrieve.
|
||||||
|
* @param start inclusive, 1-based start of region.
|
||||||
|
* @param stop inclusive, 1-based stop of region.
|
||||||
|
* @return The partial reference sequence associated with this range.
|
||||||
|
*/
|
||||||
public ReferenceSequence getSubsequenceAt( String contig, long start, long stop ) {
|
public ReferenceSequence getSubsequenceAt( String contig, long start, long stop ) {
|
||||||
if(start > stop)
|
if(start > stop)
|
||||||
throw new PicardException(String.format("Malformed query; start point %d lies after end point %d",start,stop));
|
throw new PicardException(String.format("Malformed query; start point %d lies after end point %d",start,stop));
|
||||||
|
|
@ -140,8 +156,8 @@ public class IndexedFastaSequenceFile implements ReferenceSequenceFile {
|
||||||
final int bytesPerLine = indexEntry.getBytesPerLine();
|
final int bytesPerLine = indexEntry.getBytesPerLine();
|
||||||
|
|
||||||
// Start reading at the closest start-of-line to our data.
|
// Start reading at the closest start-of-line to our data.
|
||||||
long readStart = indexEntry.getLocation() + (start / basesPerLine) * bytesPerLine;
|
long readStart = indexEntry.getLocation() + ((start-1) / basesPerLine) * bytesPerLine;
|
||||||
int dataOfInterestStart = (int)(start % basesPerLine);
|
int dataOfInterestStart = (int)((start-1) % basesPerLine);
|
||||||
|
|
||||||
byte[] accumulator = new byte[length];
|
byte[] accumulator = new byte[length];
|
||||||
int nextAccumulatorSlot = 0;
|
int nextAccumulatorSlot = 0;
|
||||||
|
|
|
||||||
|
|
@ -53,7 +53,7 @@ public class IndexedFastaSequenceFileTest extends BaseTest {
|
||||||
@Test
|
@Test
|
||||||
public void testFirstSequence() {
|
public void testFirstSequence() {
|
||||||
long startTime = System.currentTimeMillis();
|
long startTime = System.currentTimeMillis();
|
||||||
ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",0,firstBasesOfChrM.length()-1);
|
ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",1,firstBasesOfChrM.length());
|
||||||
long endTime = System.currentTimeMillis();
|
long endTime = System.currentTimeMillis();
|
||||||
|
|
||||||
Assert.assertEquals("Sequence contig is not correct", sequence.getName(), "chrM");
|
Assert.assertEquals("Sequence contig is not correct", sequence.getName(), "chrM");
|
||||||
|
|
@ -68,7 +68,7 @@ public class IndexedFastaSequenceFileTest extends BaseTest {
|
||||||
@Test
|
@Test
|
||||||
public void testFirstSequenceExtended() {
|
public void testFirstSequenceExtended() {
|
||||||
long startTime = System.currentTimeMillis();
|
long startTime = System.currentTimeMillis();
|
||||||
ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",0,extendedBasesOfChrM.length()-1);
|
ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",1,extendedBasesOfChrM.length());
|
||||||
long endTime = System.currentTimeMillis();
|
long endTime = System.currentTimeMillis();
|
||||||
|
|
||||||
Assert.assertEquals("Sequence contig is not correct", sequence.getName(), "chrM");
|
Assert.assertEquals("Sequence contig is not correct", sequence.getName(), "chrM");
|
||||||
|
|
@ -87,8 +87,8 @@ public class IndexedFastaSequenceFileTest extends BaseTest {
|
||||||
|
|
||||||
long startTime = System.currentTimeMillis();
|
long startTime = System.currentTimeMillis();
|
||||||
ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",
|
ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",
|
||||||
bytesToChopOff,
|
bytesToChopOff + 1,
|
||||||
bytesToChopOff + truncated.length() - 1);
|
bytesToChopOff + truncated.length());
|
||||||
long endTime = System.currentTimeMillis();
|
long endTime = System.currentTimeMillis();
|
||||||
|
|
||||||
Assert.assertEquals("Sequence contig is not correct", sequence.getName(), "chrM");
|
Assert.assertEquals("Sequence contig is not correct", sequence.getName(), "chrM");
|
||||||
|
|
@ -107,8 +107,8 @@ public class IndexedFastaSequenceFileTest extends BaseTest {
|
||||||
|
|
||||||
long startTime = System.currentTimeMillis();
|
long startTime = System.currentTimeMillis();
|
||||||
ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",
|
ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",
|
||||||
bytesToChopOff,
|
bytesToChopOff + 1,
|
||||||
bytesToChopOff + truncated.length() - 1);
|
bytesToChopOff + truncated.length());
|
||||||
long endTime = System.currentTimeMillis();
|
long endTime = System.currentTimeMillis();
|
||||||
|
|
||||||
Assert.assertEquals("Sequence contig is not correct", sequence.getName(), "chrM");
|
Assert.assertEquals("Sequence contig is not correct", sequence.getName(), "chrM");
|
||||||
|
|
@ -204,7 +204,7 @@ public class IndexedFastaSequenceFileTest extends BaseTest {
|
||||||
@Test
|
@Test
|
||||||
public void testFirstOfChr1() {
|
public void testFirstOfChr1() {
|
||||||
long startTime = System.currentTimeMillis();
|
long startTime = System.currentTimeMillis();
|
||||||
ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chr1",0,firstBasesOfChr1.length()-1);
|
ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chr1",1,firstBasesOfChr1.length());
|
||||||
long endTime = System.currentTimeMillis();
|
long endTime = System.currentTimeMillis();
|
||||||
|
|
||||||
Assert.assertEquals("Sequence contig is not correct", sequence.getName(), "chr1");
|
Assert.assertEquals("Sequence contig is not correct", sequence.getName(), "chr1");
|
||||||
|
|
@ -219,7 +219,7 @@ public class IndexedFastaSequenceFileTest extends BaseTest {
|
||||||
@Test
|
@Test
|
||||||
public void testFirstOfChr8() {
|
public void testFirstOfChr8() {
|
||||||
long startTime = System.currentTimeMillis();
|
long startTime = System.currentTimeMillis();
|
||||||
ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chr8",0,firstBasesOfChr8.length()-1);
|
ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chr8",1,firstBasesOfChr8.length());
|
||||||
long endTime = System.currentTimeMillis();
|
long endTime = System.currentTimeMillis();
|
||||||
|
|
||||||
Assert.assertEquals("Sequence contig is not correct", sequence.getName(), "chr8");
|
Assert.assertEquals("Sequence contig is not correct", sequence.getName(), "chr8");
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue