Rough draft of patch to use bam indices when available.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@81 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
151c37591e
commit
58aa2aab43
|
|
@ -230,6 +230,12 @@ public class TraversalEngine {
|
|||
* @param loc Current location
|
||||
*/
|
||||
public void printProgress( boolean mustPrint, final String type, GenomeLoc loc ) {
|
||||
// If an index is enabled, file read progress is meaningless because a linear
|
||||
// traversal is not being performed. For now, don't bother printing progress.
|
||||
// TODO: Create a sam indexed read tracker that tracks based on percentage through the query.
|
||||
if( samReadingTracker == null )
|
||||
return;
|
||||
|
||||
final long nRecords = this.nRecords;
|
||||
final long curTime = System.currentTimeMillis();
|
||||
final double elapsed = (curTime - startTime) / 1000.0;
|
||||
|
|
@ -290,33 +296,53 @@ public class TraversalEngine {
|
|||
}
|
||||
|
||||
private void initializeReads(final boolean THREADED_IO) {
|
||||
|
||||
Iterator<SAMRecord> samIterator;
|
||||
try {
|
||||
samReadIter = loadSAMFile( readsFile, THREADED_IO );
|
||||
}
|
||||
catch( IOException ex ) {
|
||||
// TODO: IOException should be a checked exception in this case.
|
||||
throw new RuntimeIOException(ex);
|
||||
}
|
||||
|
||||
if ( beSafeP )
|
||||
samReadIter = new VerifyingSamIterator(samReadIter);
|
||||
|
||||
if ( THREADED_IO ) {
|
||||
System.out.printf("Enabling threaded I/O with buffer of %d reads%n", THREADED_IO_BUFFER_SIZE);
|
||||
samReadIter = new ThreadedIterator<SAMRecord>(samReadIter, THREADED_IO_BUFFER_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
protected Iterator<SAMRecord> loadSAMFile( final File samFile, final boolean threadedIO )
|
||||
throws IOException {
|
||||
Iterator<SAMRecord> iterator = null;
|
||||
|
||||
samReader = new SAMFileReader(readsFile, true);
|
||||
samReader.setValidationStringency(strictness);
|
||||
|
||||
final SAMFileHeader header = samReader.getFileHeader();
|
||||
System.err.println("Sort order is: " + header.getSortOrder());
|
||||
|
||||
// If the file has an index, querying functions are available. Use them if possible...
|
||||
if(samReader.hasIndex()) {
|
||||
iterator = new SamQueryIterator( samReader, locs );
|
||||
}
|
||||
else {
|
||||
// Ugh. Close and reopen the file so that the file progress decorator can be assigned to the input stream.
|
||||
samReader.close();
|
||||
|
||||
final FileInputStream samFileStream = new FileInputStream(readsFile);
|
||||
final InputStream bufferedStream= new BufferedInputStream(samFileStream);
|
||||
//final InputStream bufferedStream= new BufferedInputStream(samInputStream, 10000000);
|
||||
samReader = new SAMFileReader(bufferedStream, true);
|
||||
samReader = new SAMFileReader(readsFile, true);
|
||||
samReader.setValidationStringency(strictness);
|
||||
|
||||
final SAMFileHeader header = samReader.getFileHeader();
|
||||
System.err.println("Sort order is: " + header.getSortOrder());
|
||||
|
||||
samReadingTracker = new FileProgressTracker<SAMRecord>( readsFile, samReader.iterator(), samFileStream.getChannel(), 1000 );
|
||||
if ( beSafeP ) {
|
||||
verifyingSamReadIter = new VerifyingSamIterator(samReadingTracker);
|
||||
samReadIter = verifyingSamReadIter;
|
||||
} else {
|
||||
samReadIter = samReadingTracker;
|
||||
}
|
||||
|
||||
if ( THREADED_IO ) {
|
||||
System.out.printf("Enabling threaded I/O with buffer of %d reads%n", THREADED_IO_BUFFER_SIZE);
|
||||
samReadIter = new ThreadedIterator<SAMRecord>(samReadIter, THREADED_IO_BUFFER_SIZE);
|
||||
}
|
||||
iterator = samReadingTracker;
|
||||
}
|
||||
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new RuntimeIOException(e);
|
||||
}
|
||||
return iterator;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -524,7 +550,7 @@ public class TraversalEngine {
|
|||
* @return 0 on success
|
||||
*/
|
||||
protected <M,R> int traverseByRead(ReadWalker<M,R> walker) {
|
||||
if ( refFileName == null && ! walker.requiresOrderedReads() ) {
|
||||
if ( refFileName == null && ! walker.requiresOrderedReads() && verifyingSamReadIter != null ) {
|
||||
System.out.println("STATUS: No reference file provided and unordered reads are tolerated, enabling out of order read processing.");
|
||||
if ( verifyingSamReadIter != null )
|
||||
verifyingSamReadIter.setCheckOrderP(false);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,90 @@
|
|||
package org.broadinstitute.sting.gatk.iterators;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: hanna
|
||||
* Date: Mar 16, 2009
|
||||
* Time: 6:08:08 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class SamQueryIterator implements Iterator<SAMRecord> {
|
||||
|
||||
SAMFileReader reader = null;
|
||||
|
||||
Iterator<GenomeLoc> locIter = null;
|
||||
CloseableIterator<SAMRecord> recordIter = null;
|
||||
|
||||
public SamQueryIterator( SAMFileReader reader, GenomeLoc[] locs ) {
|
||||
this.reader = reader;
|
||||
|
||||
// Our internal contract for the class guarantees that locIter and recordIter are never null.
|
||||
// Initialize them and seed them with empty data as necessary.
|
||||
if(locs != null) {
|
||||
// The user requested a specific set of locations, set up the iterators accordly.
|
||||
locIter = Arrays.asList(locs).iterator();
|
||||
recordIter = new NullCloseableIterator<SAMRecord>();
|
||||
}
|
||||
else {
|
||||
// The user requested traversal of the entire SAM file. Handle that here.
|
||||
// TODO: This would be better handled as a completely separate iterator.
|
||||
locIter = new ArrayList<GenomeLoc>().iterator();
|
||||
recordIter = reader.iterator();
|
||||
}
|
||||
|
||||
bumpToNextSAMRecord();
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
bumpToNextSAMRecord();
|
||||
return recordIter.hasNext();
|
||||
}
|
||||
|
||||
public SAMRecord next() {
|
||||
bumpToNextSAMRecord();
|
||||
return recordIter.next();
|
||||
}
|
||||
|
||||
/**
|
||||
* Bump the loc iterator to the next spot with a read.
|
||||
*
|
||||
* For simplicity's sake, bumpToNextSAMRecord() expects locIter and recordIter to be non-null, and
|
||||
* guarantees that locIter and recordIter will be non-null after the bump.
|
||||
*/
|
||||
private void bumpToNextSAMRecord() {
|
||||
// If there's a record still waiting in the current iterator, do nothing.
|
||||
if( recordIter.hasNext() )
|
||||
return;
|
||||
|
||||
// Otherwise, find the next record.
|
||||
recordIter.close();
|
||||
while( locIter.hasNext() ) {
|
||||
GenomeLoc currentLoc = locIter.next();
|
||||
recordIter = reader.queryOverlapping( currentLoc.getContig(),
|
||||
(int)currentLoc.getStart(),
|
||||
(int)currentLoc.getStop() );
|
||||
if( recordIter.hasNext() )
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
|
||||
}
|
||||
|
||||
private class NullCloseableIterator<T> implements CloseableIterator<T> {
|
||||
public boolean hasNext() { return false; }
|
||||
public T next() { throw new java.util.NoSuchElementException(); }
|
||||
public void close() {}
|
||||
public void remove() {}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue