Rough draft of patch to use bam indices when available.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@81 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-03-17 16:39:03 +00:00
parent 151c37591e
commit 58aa2aab43
2 changed files with 137 additions and 21 deletions

View File

@ -230,6 +230,12 @@ public class TraversalEngine {
* @param loc Current location
*/
public void printProgress( boolean mustPrint, final String type, GenomeLoc loc ) {
// If an index is enabled, file read progress is meaningless because a linear
// traversal is not being performed. For now, don't bother printing progress.
// TODO: Create a sam indexed read tracker that tracks based on percentage through the query.
if( samReadingTracker == null )
return;
final long nRecords = this.nRecords;
final long curTime = System.currentTimeMillis();
final double elapsed = (curTime - startTime) / 1000.0;
@ -290,33 +296,53 @@ public class TraversalEngine {
}
private void initializeReads(final boolean THREADED_IO) {
Iterator<SAMRecord> samIterator;
try {
samReadIter = loadSAMFile( readsFile, THREADED_IO );
}
catch( IOException ex ) {
// TODO: IOException should be a checked exception in this case.
throw new RuntimeIOException(ex);
}
if ( beSafeP )
samReadIter = new VerifyingSamIterator(samReadIter);
if ( THREADED_IO ) {
System.out.printf("Enabling threaded I/O with buffer of %d reads%n", THREADED_IO_BUFFER_SIZE);
samReadIter = new ThreadedIterator<SAMRecord>(samReadIter, THREADED_IO_BUFFER_SIZE);
}
}
protected Iterator<SAMRecord> loadSAMFile( final File samFile, final boolean threadedIO )
throws IOException {
Iterator<SAMRecord> iterator = null;
samReader = new SAMFileReader(readsFile, true);
samReader.setValidationStringency(strictness);
final SAMFileHeader header = samReader.getFileHeader();
System.err.println("Sort order is: " + header.getSortOrder());
// If the file has an index, querying functions are available. Use them if possible...
if(samReader.hasIndex()) {
iterator = new SamQueryIterator( samReader, locs );
}
else {
// Ugh. Close and reopen the file so that the file progress decorator can be assigned to the input stream.
samReader.close();
final FileInputStream samFileStream = new FileInputStream(readsFile);
final InputStream bufferedStream= new BufferedInputStream(samFileStream);
//final InputStream bufferedStream= new BufferedInputStream(samInputStream, 10000000);
samReader = new SAMFileReader(bufferedStream, true);
samReader = new SAMFileReader(readsFile, true);
samReader.setValidationStringency(strictness);
final SAMFileHeader header = samReader.getFileHeader();
System.err.println("Sort order is: " + header.getSortOrder());
samReadingTracker = new FileProgressTracker<SAMRecord>( readsFile, samReader.iterator(), samFileStream.getChannel(), 1000 );
if ( beSafeP ) {
verifyingSamReadIter = new VerifyingSamIterator(samReadingTracker);
samReadIter = verifyingSamReadIter;
} else {
samReadIter = samReadingTracker;
}
if ( THREADED_IO ) {
System.out.printf("Enabling threaded I/O with buffer of %d reads%n", THREADED_IO_BUFFER_SIZE);
samReadIter = new ThreadedIterator<SAMRecord>(samReadIter, THREADED_IO_BUFFER_SIZE);
}
iterator = samReadingTracker;
}
}
catch (IOException e) {
throw new RuntimeIOException(e);
}
return iterator;
}
@ -524,7 +550,7 @@ public class TraversalEngine {
* @return 0 on success
*/
protected <M,R> int traverseByRead(ReadWalker<M,R> walker) {
if ( refFileName == null && ! walker.requiresOrderedReads() ) {
if ( refFileName == null && ! walker.requiresOrderedReads() && verifyingSamReadIter != null ) {
System.out.println("STATUS: No reference file provided and unordered reads are tolerated, enabling out of order read processing.");
if ( verifyingSamReadIter != null )
verifyingSamReadIter.setCheckOrderP(false);

View File

@ -0,0 +1,90 @@
package org.broadinstitute.sting.gatk.iterators;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.utils.GenomeLoc;
/**
* Created by IntelliJ IDEA.
* User: hanna
* Date: Mar 16, 2009
* Time: 6:08:08 PM
* To change this template use File | Settings | File Templates.
*/
public class SamQueryIterator implements Iterator<SAMRecord> {
SAMFileReader reader = null;
Iterator<GenomeLoc> locIter = null;
CloseableIterator<SAMRecord> recordIter = null;
public SamQueryIterator( SAMFileReader reader, GenomeLoc[] locs ) {
this.reader = reader;
// Our internal contract for the class guarantees that locIter and recordIter are never null.
// Initialize them and seed them with empty data as necessary.
if(locs != null) {
// The user requested a specific set of locations, set up the iterators accordly.
locIter = Arrays.asList(locs).iterator();
recordIter = new NullCloseableIterator<SAMRecord>();
}
else {
// The user requested traversal of the entire SAM file. Handle that here.
// TODO: This would be better handled as a completely separate iterator.
locIter = new ArrayList<GenomeLoc>().iterator();
recordIter = reader.iterator();
}
bumpToNextSAMRecord();
}
public boolean hasNext() {
bumpToNextSAMRecord();
return recordIter.hasNext();
}
public SAMRecord next() {
bumpToNextSAMRecord();
return recordIter.next();
}
/**
* Bump the loc iterator to the next spot with a read.
*
* For simplicity's sake, bumpToNextSAMRecord() expects locIter and recordIter to be non-null, and
* guarantees that locIter and recordIter will be non-null after the bump.
*/
private void bumpToNextSAMRecord() {
// If there's a record still waiting in the current iterator, do nothing.
if( recordIter.hasNext() )
return;
// Otherwise, find the next record.
recordIter.close();
while( locIter.hasNext() ) {
GenomeLoc currentLoc = locIter.next();
recordIter = reader.queryOverlapping( currentLoc.getContig(),
(int)currentLoc.getStart(),
(int)currentLoc.getStop() );
if( recordIter.hasNext() )
break;
}
}
public void remove() {
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
}
private class NullCloseableIterator<T> implements CloseableIterator<T> {
public boolean hasNext() { return false; }
public T next() { throw new java.util.NoSuchElementException(); }
public void close() {}
public void remove() {}
}
}