Restored serial version of reader initialization. Serial mode is default, as the performance gains aren't so huge.

-- Serial version can be re-enabled with a static boolean, if we decide to return to the serial version

-- Comparison of serial and parallel reader with cached and uncached files:

Initialization time: serial   with 500 fully cached BAMs: 8.20 seconds
Initialization time: serial   with 500 uncached BAMs    : 197.02 seconds
Initialization time: parallel with 500 fully cached BAMs: 30.12 seconds
Initialization time: parallel with 500 uncached BAMs    : 75.47 seconds
This commit is contained in:
Mark DePristo 2011-12-16 09:07:40 -05:00
parent fb1c9d2abc
commit 3414ecfe2e
1 changed files with 12 additions and 8 deletions

View File

@ -63,7 +63,9 @@ import java.util.concurrent.*;
*/ */
public class SAMDataSource { public class SAMDataSource {
final private static GATKSamRecordFactory factory = new GATKSamRecordFactory(); final private static GATKSamRecordFactory factory = new GATKSamRecordFactory();
final private static boolean USE_PARALLEL_LOADING = true;
/** If true, we will load SAMReaders in parallel */
final private static boolean USE_PARALLEL_LOADING = false;
/** Backing support for reads. */ /** Backing support for reads. */
protected final ReadProperties readProperties; protected final ReadProperties readProperties;
@ -721,7 +723,7 @@ public class SAMDataSource {
logger.info("Initializing SAMRecords " + (USE_PARALLEL_LOADING ? "in parallel" : "in serial")); logger.info("Initializing SAMRecords " + (USE_PARALLEL_LOADING ? "in parallel" : "in serial"));
if ( ! USE_PARALLEL_LOADING ) { if ( ! USE_PARALLEL_LOADING ) {
final int tickSize = 10; final int tickSize = 50;
int nExecutedTotal = 0; int nExecutedTotal = 0;
long lastTick = timer.currentTime(); long lastTick = timer.currentTime();
for(final SAMReaderID readerID: readerIDs) { for(final SAMReaderID readerID: readerIDs) {
@ -733,8 +735,9 @@ public class SAMDataSource {
logger.debug(String.format("Processing file (%d of %d) %s...", readerNumber++, totalNumberOfFiles, readerID.samFile)); logger.debug(String.format("Processing file (%d of %d) %s...", readerNumber++, totalNumberOfFiles, readerID.samFile));
readers.put(init.readerID,init.reader); readers.put(init.readerID,init.reader);
if ( nExecutedTotal++ % tickSize == 0) { if ( nExecutedTotal++ % tickSize == 0) {
int tickInMS = (int)((timer.currentTime() - lastTick) / 1000.0); double tickInSec = (timer.currentTime() - lastTick) / 1000.0;
printReaderPerformance(nExecutedTotal, tickSize, totalNumberOfFiles, timer, tickInMS); printReaderPerformance(nExecutedTotal, tickSize, totalNumberOfFiles, timer, tickInSec);
lastTick = timer.currentTime();
} }
} }
} else { } else {
@ -776,7 +779,7 @@ public class SAMDataSource {
final int nExecutedTotal = totalNumberOfFiles - pending.size(); final int nExecutedTotal = totalNumberOfFiles - pending.size();
final int nExecutedInTick = prevSize - pending.size(); final int nExecutedInTick = prevSize - pending.size();
printReaderPerformance(nExecutedTotal, nExecutedInTick, totalNumberOfFiles, timer, (int)(waitTimeInMS / 1000)); printReaderPerformance(nExecutedTotal, nExecutedInTick, totalNumberOfFiles, timer, waitTimeInMS / 1000.0);
futures = pending; futures = pending;
} }
} catch ( InterruptedException e ) { } catch ( InterruptedException e ) {
@ -794,14 +797,15 @@ public class SAMDataSource {
final private void printReaderPerformance(final int nExecutedTotal, final private void printReaderPerformance(final int nExecutedTotal,
final int nExecutedInTick, final int nExecutedInTick,
final int totalNumberOfFiles, final int totalNumberOfFiles,
final SimpleTimer timer, final int tickDuration) { final SimpleTimer timer,
final double tickDurationInSec) {
final int pendingSize = totalNumberOfFiles - nExecutedTotal; final int pendingSize = totalNumberOfFiles - nExecutedTotal;
final double totalTimeInSeconds = timer.getElapsedTime(); final double totalTimeInSeconds = timer.getElapsedTime();
final double nTasksPerSecond = nExecutedTotal / (1.0*totalTimeInSeconds); final double nTasksPerSecond = nExecutedTotal / (1.0*totalTimeInSeconds);
final int nRemaining = pendingSize; final int nRemaining = pendingSize;
final double estTimeToComplete = pendingSize / nTasksPerSecond; final double estTimeToComplete = pendingSize / nTasksPerSecond;
logger.info(String.format("Init %d BAMs in last %d s, %d of %d in %.2f s / %.2f m (%.2f tasks/s). %d remaining with est. completion in %.2f s / %.2f m", logger.info(String.format("Init %d BAMs in last %.2f s, %d of %d in %.2f s / %.2f m (%.2f tasks/s). %d remaining with est. completion in %.2f s / %.2f m",
nExecutedInTick, tickDuration, nExecutedInTick, tickDurationInSec,
nExecutedTotal, totalNumberOfFiles, totalTimeInSeconds, totalTimeInSeconds / 60, nTasksPerSecond, nExecutedTotal, totalNumberOfFiles, totalTimeInSeconds, totalTimeInSeconds / 60, nTasksPerSecond,
nRemaining, estTimeToComplete, estTimeToComplete / 60)); nRemaining, estTimeToComplete, estTimeToComplete / 60));