diff --git a/playground/java/src/org/broadinstitute/sting/gatk/TraversalEngine.java b/playground/java/src/org/broadinstitute/sting/gatk/TraversalEngine.java index df54ce815..2c68271d9 100755 --- a/playground/java/src/org/broadinstitute/sting/gatk/TraversalEngine.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/TraversalEngine.java @@ -230,6 +230,12 @@ public class TraversalEngine { * @param loc Current location */ public void printProgress( boolean mustPrint, final String type, GenomeLoc loc ) { + // If an index is enabled, file read progress is meaningless because a linear + // traversal is not being performed. For now, don't bother printing progress. + // TODO: Create a sam indexed read tracker that tracks based on percentage through the query. + if( samReadingTracker == null ) + return; + final long nRecords = this.nRecords; final long curTime = System.currentTimeMillis(); final double elapsed = (curTime - startTime) / 1000.0; @@ -290,33 +296,53 @@ public class TraversalEngine { } private void initializeReads(final boolean THREADED_IO) { + + Iterator samIterator; try { + samReadIter = loadSAMFile( readsFile, THREADED_IO ); + } + catch( IOException ex ) { + // TODO: IOException should be a checked exception in this case. + throw new RuntimeIOException(ex); + } + + if ( beSafeP ) + samReadIter = new VerifyingSamIterator(samReadIter); + + if ( THREADED_IO ) { + System.out.printf("Enabling threaded I/O with buffer of %d reads%n", THREADED_IO_BUFFER_SIZE); + samReadIter = new ThreadedIterator(samReadIter, THREADED_IO_BUFFER_SIZE); + } + } + + protected Iterator loadSAMFile( final File samFile, final boolean threadedIO ) + throws IOException { + Iterator iterator = null; + + samReader = new SAMFileReader(readsFile, true); + samReader.setValidationStringency(strictness); + + final SAMFileHeader header = samReader.getFileHeader(); + System.err.println("Sort order is: " + header.getSortOrder()); + + // If the file has an index, querying functions are available. Use them if possible... + if(samReader.hasIndex()) { + iterator = new SamQueryIterator( samReader, locs ); + } + else { + // Ugh. Close and reopen the file so that the file progress decorator can be assigned to the input stream. + samReader.close(); + final FileInputStream samFileStream = new FileInputStream(readsFile); final InputStream bufferedStream= new BufferedInputStream(samFileStream); - //final InputStream bufferedStream= new BufferedInputStream(samInputStream, 10000000); - samReader = new SAMFileReader(bufferedStream, true); + samReader = new SAMFileReader(readsFile, true); samReader.setValidationStringency(strictness); - final SAMFileHeader header = samReader.getFileHeader(); - System.err.println("Sort order is: " + header.getSortOrder()); - samReadingTracker = new FileProgressTracker( readsFile, samReader.iterator(), samFileStream.getChannel(), 1000 ); - if ( beSafeP ) { - verifyingSamReadIter = new VerifyingSamIterator(samReadingTracker); - samReadIter = verifyingSamReadIter; - } else { - samReadIter = samReadingTracker; - } - - if ( THREADED_IO ) { - System.out.printf("Enabling threaded I/O with buffer of %d reads%n", THREADED_IO_BUFFER_SIZE); - samReadIter = new ThreadedIterator(samReadIter, THREADED_IO_BUFFER_SIZE); - } + iterator = samReadingTracker; + } - } - catch (IOException e) { - throw new RuntimeIOException(e); - } + return iterator; } @@ -524,7 +550,7 @@ public class TraversalEngine { * @return 0 on success */ protected int traverseByRead(ReadWalker walker) { - if ( refFileName == null && ! walker.requiresOrderedReads() ) { + if ( refFileName == null && ! walker.requiresOrderedReads() && verifyingSamReadIter != null ) { System.out.println("STATUS: No reference file provided and unordered reads are tolerated, enabling out of order read processing."); if ( verifyingSamReadIter != null ) verifyingSamReadIter.setCheckOrderP(false); diff --git a/playground/java/src/org/broadinstitute/sting/gatk/iterators/SamQueryIterator.java b/playground/java/src/org/broadinstitute/sting/gatk/iterators/SamQueryIterator.java new file mode 100755 index 000000000..27c4937b9 --- /dev/null +++ b/playground/java/src/org/broadinstitute/sting/gatk/iterators/SamQueryIterator.java @@ -0,0 +1,90 @@ +package org.broadinstitute.sting.gatk.iterators; + +import java.util.Arrays; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import net.sf.samtools.SAMFileReader; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.util.CloseableIterator; +import org.broadinstitute.sting.utils.GenomeLoc; + +/** + * Created by IntelliJ IDEA. + * User: hanna + * Date: Mar 16, 2009 + * Time: 6:08:08 PM + * To change this template use File | Settings | File Templates. + */ +public class SamQueryIterator implements Iterator { + + SAMFileReader reader = null; + + Iterator locIter = null; + CloseableIterator recordIter = null; + + public SamQueryIterator( SAMFileReader reader, GenomeLoc[] locs ) { + this.reader = reader; + + // Our internal contract for the class guarantees that locIter and recordIter are never null. + // Initialize them and seed them with empty data as necessary. + if(locs != null) { + // The user requested a specific set of locations, set up the iterators accordly. + locIter = Arrays.asList(locs).iterator(); + recordIter = new NullCloseableIterator(); + } + else { + // The user requested traversal of the entire SAM file. Handle that here. + // TODO: This would be better handled as a completely separate iterator. + locIter = new ArrayList().iterator(); + recordIter = reader.iterator(); + } + + bumpToNextSAMRecord(); + } + + public boolean hasNext() { + bumpToNextSAMRecord(); + return recordIter.hasNext(); + } + + public SAMRecord next() { + bumpToNextSAMRecord(); + return recordIter.next(); + } + + /** + * Bump the loc iterator to the next spot with a read. + * + * For simplicity's sake, bumpToNextSAMRecord() expects locIter and recordIter to be non-null, and + * guarantees that locIter and recordIter will be non-null after the bump. + */ + private void bumpToNextSAMRecord() { + // If there's a record still waiting in the current iterator, do nothing. + if( recordIter.hasNext() ) + return; + + // Otherwise, find the next record. + recordIter.close(); + while( locIter.hasNext() ) { + GenomeLoc currentLoc = locIter.next(); + recordIter = reader.queryOverlapping( currentLoc.getContig(), + (int)currentLoc.getStart(), + (int)currentLoc.getStop() ); + if( recordIter.hasNext() ) + break; + } + } + + public void remove() { + throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); + } + + private class NullCloseableIterator implements CloseableIterator { + public boolean hasNext() { return false; } + public T next() { throw new java.util.NoSuchElementException(); } + public void close() {} + public void remove() {} + } +}