Rough draft of patch to use bam indices when available.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@81 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
151c37591e
commit
58aa2aab43
|
|
@ -230,6 +230,12 @@ public class TraversalEngine {
|
||||||
* @param loc Current location
|
* @param loc Current location
|
||||||
*/
|
*/
|
||||||
public void printProgress( boolean mustPrint, final String type, GenomeLoc loc ) {
|
public void printProgress( boolean mustPrint, final String type, GenomeLoc loc ) {
|
||||||
|
// If an index is enabled, file read progress is meaningless because a linear
|
||||||
|
// traversal is not being performed. For now, don't bother printing progress.
|
||||||
|
// TODO: Create a sam indexed read tracker that tracks based on percentage through the query.
|
||||||
|
if( samReadingTracker == null )
|
||||||
|
return;
|
||||||
|
|
||||||
final long nRecords = this.nRecords;
|
final long nRecords = this.nRecords;
|
||||||
final long curTime = System.currentTimeMillis();
|
final long curTime = System.currentTimeMillis();
|
||||||
final double elapsed = (curTime - startTime) / 1000.0;
|
final double elapsed = (curTime - startTime) / 1000.0;
|
||||||
|
|
@ -290,33 +296,53 @@ public class TraversalEngine {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void initializeReads(final boolean THREADED_IO) {
|
private void initializeReads(final boolean THREADED_IO) {
|
||||||
|
|
||||||
|
Iterator<SAMRecord> samIterator;
|
||||||
try {
|
try {
|
||||||
|
samReadIter = loadSAMFile( readsFile, THREADED_IO );
|
||||||
|
}
|
||||||
|
catch( IOException ex ) {
|
||||||
|
// TODO: IOException should be a checked exception in this case.
|
||||||
|
throw new RuntimeIOException(ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( beSafeP )
|
||||||
|
samReadIter = new VerifyingSamIterator(samReadIter);
|
||||||
|
|
||||||
|
if ( THREADED_IO ) {
|
||||||
|
System.out.printf("Enabling threaded I/O with buffer of %d reads%n", THREADED_IO_BUFFER_SIZE);
|
||||||
|
samReadIter = new ThreadedIterator<SAMRecord>(samReadIter, THREADED_IO_BUFFER_SIZE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Iterator<SAMRecord> loadSAMFile( final File samFile, final boolean threadedIO )
|
||||||
|
throws IOException {
|
||||||
|
Iterator<SAMRecord> iterator = null;
|
||||||
|
|
||||||
|
samReader = new SAMFileReader(readsFile, true);
|
||||||
|
samReader.setValidationStringency(strictness);
|
||||||
|
|
||||||
|
final SAMFileHeader header = samReader.getFileHeader();
|
||||||
|
System.err.println("Sort order is: " + header.getSortOrder());
|
||||||
|
|
||||||
|
// If the file has an index, querying functions are available. Use them if possible...
|
||||||
|
if(samReader.hasIndex()) {
|
||||||
|
iterator = new SamQueryIterator( samReader, locs );
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Ugh. Close and reopen the file so that the file progress decorator can be assigned to the input stream.
|
||||||
|
samReader.close();
|
||||||
|
|
||||||
final FileInputStream samFileStream = new FileInputStream(readsFile);
|
final FileInputStream samFileStream = new FileInputStream(readsFile);
|
||||||
final InputStream bufferedStream= new BufferedInputStream(samFileStream);
|
final InputStream bufferedStream= new BufferedInputStream(samFileStream);
|
||||||
//final InputStream bufferedStream= new BufferedInputStream(samInputStream, 10000000);
|
samReader = new SAMFileReader(readsFile, true);
|
||||||
samReader = new SAMFileReader(bufferedStream, true);
|
|
||||||
samReader.setValidationStringency(strictness);
|
samReader.setValidationStringency(strictness);
|
||||||
|
|
||||||
final SAMFileHeader header = samReader.getFileHeader();
|
|
||||||
System.err.println("Sort order is: " + header.getSortOrder());
|
|
||||||
|
|
||||||
samReadingTracker = new FileProgressTracker<SAMRecord>( readsFile, samReader.iterator(), samFileStream.getChannel(), 1000 );
|
samReadingTracker = new FileProgressTracker<SAMRecord>( readsFile, samReader.iterator(), samFileStream.getChannel(), 1000 );
|
||||||
if ( beSafeP ) {
|
iterator = samReadingTracker;
|
||||||
verifyingSamReadIter = new VerifyingSamIterator(samReadingTracker);
|
}
|
||||||
samReadIter = verifyingSamReadIter;
|
|
||||||
} else {
|
|
||||||
samReadIter = samReadingTracker;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( THREADED_IO ) {
|
|
||||||
System.out.printf("Enabling threaded I/O with buffer of %d reads%n", THREADED_IO_BUFFER_SIZE);
|
|
||||||
samReadIter = new ThreadedIterator<SAMRecord>(samReadIter, THREADED_IO_BUFFER_SIZE);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
return iterator;
|
||||||
catch (IOException e) {
|
|
||||||
throw new RuntimeIOException(e);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -524,7 +550,7 @@ public class TraversalEngine {
|
||||||
* @return 0 on success
|
* @return 0 on success
|
||||||
*/
|
*/
|
||||||
protected <M,R> int traverseByRead(ReadWalker<M,R> walker) {
|
protected <M,R> int traverseByRead(ReadWalker<M,R> walker) {
|
||||||
if ( refFileName == null && ! walker.requiresOrderedReads() ) {
|
if ( refFileName == null && ! walker.requiresOrderedReads() && verifyingSamReadIter != null ) {
|
||||||
System.out.println("STATUS: No reference file provided and unordered reads are tolerated, enabling out of order read processing.");
|
System.out.println("STATUS: No reference file provided and unordered reads are tolerated, enabling out of order read processing.");
|
||||||
if ( verifyingSamReadIter != null )
|
if ( verifyingSamReadIter != null )
|
||||||
verifyingSamReadIter.setCheckOrderP(false);
|
verifyingSamReadIter.setCheckOrderP(false);
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,90 @@
|
||||||
|
package org.broadinstitute.sting.gatk.iterators;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMFileReader;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import net.sf.samtools.util.CloseableIterator;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: hanna
|
||||||
|
* Date: Mar 16, 2009
|
||||||
|
* Time: 6:08:08 PM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
public class SamQueryIterator implements Iterator<SAMRecord> {
|
||||||
|
|
||||||
|
SAMFileReader reader = null;
|
||||||
|
|
||||||
|
Iterator<GenomeLoc> locIter = null;
|
||||||
|
CloseableIterator<SAMRecord> recordIter = null;
|
||||||
|
|
||||||
|
public SamQueryIterator( SAMFileReader reader, GenomeLoc[] locs ) {
|
||||||
|
this.reader = reader;
|
||||||
|
|
||||||
|
// Our internal contract for the class guarantees that locIter and recordIter are never null.
|
||||||
|
// Initialize them and seed them with empty data as necessary.
|
||||||
|
if(locs != null) {
|
||||||
|
// The user requested a specific set of locations, set up the iterators accordly.
|
||||||
|
locIter = Arrays.asList(locs).iterator();
|
||||||
|
recordIter = new NullCloseableIterator<SAMRecord>();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// The user requested traversal of the entire SAM file. Handle that here.
|
||||||
|
// TODO: This would be better handled as a completely separate iterator.
|
||||||
|
locIter = new ArrayList<GenomeLoc>().iterator();
|
||||||
|
recordIter = reader.iterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
bumpToNextSAMRecord();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasNext() {
|
||||||
|
bumpToNextSAMRecord();
|
||||||
|
return recordIter.hasNext();
|
||||||
|
}
|
||||||
|
|
||||||
|
public SAMRecord next() {
|
||||||
|
bumpToNextSAMRecord();
|
||||||
|
return recordIter.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Bump the loc iterator to the next spot with a read.
|
||||||
|
*
|
||||||
|
* For simplicity's sake, bumpToNextSAMRecord() expects locIter and recordIter to be non-null, and
|
||||||
|
* guarantees that locIter and recordIter will be non-null after the bump.
|
||||||
|
*/
|
||||||
|
private void bumpToNextSAMRecord() {
|
||||||
|
// If there's a record still waiting in the current iterator, do nothing.
|
||||||
|
if( recordIter.hasNext() )
|
||||||
|
return;
|
||||||
|
|
||||||
|
// Otherwise, find the next record.
|
||||||
|
recordIter.close();
|
||||||
|
while( locIter.hasNext() ) {
|
||||||
|
GenomeLoc currentLoc = locIter.next();
|
||||||
|
recordIter = reader.queryOverlapping( currentLoc.getContig(),
|
||||||
|
(int)currentLoc.getStart(),
|
||||||
|
(int)currentLoc.getStop() );
|
||||||
|
if( recordIter.hasNext() )
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void remove() {
|
||||||
|
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
|
||||||
|
}
|
||||||
|
|
||||||
|
private class NullCloseableIterator<T> implements CloseableIterator<T> {
|
||||||
|
public boolean hasNext() { return false; }
|
||||||
|
public T next() { throw new java.util.NoSuchElementException(); }
|
||||||
|
public void close() {}
|
||||||
|
public void remove() {}
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue