Restored serial version of reader initialization. Parallel mode is default.
-- Serial version can be re-enabled with a static boolean, if we decide to return to the serial version
This commit is contained in:
parent
16a563889f
commit
fb1c9d2abc
|
|
@ -63,6 +63,7 @@ import java.util.concurrent.*;
|
||||||
*/
|
*/
|
||||||
public class SAMDataSource {
|
public class SAMDataSource {
|
||||||
final private static GATKSamRecordFactory factory = new GATKSamRecordFactory();
|
final private static GATKSamRecordFactory factory = new GATKSamRecordFactory();
|
||||||
|
final private static boolean USE_PARALLEL_LOADING = true;
|
||||||
|
|
||||||
/** Backing support for reads. */
|
/** Backing support for reads. */
|
||||||
protected final ReadProperties readProperties;
|
protected final ReadProperties readProperties;
|
||||||
|
|
@ -714,68 +715,96 @@ public class SAMDataSource {
|
||||||
* @param validationStringency validation stringency.
|
* @param validationStringency validation stringency.
|
||||||
*/
|
*/
|
||||||
public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) {
|
public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) {
|
||||||
final int N_THREADS = 8;
|
final int totalNumberOfFiles = readerIDs.size();
|
||||||
int totalNumberOfFiles = readerIDs.size();
|
|
||||||
int readerNumber = 1;
|
int readerNumber = 1;
|
||||||
|
final SimpleTimer timer = new SimpleTimer().start();
|
||||||
|
|
||||||
ExecutorService executor = Executors.newFixedThreadPool(N_THREADS);
|
logger.info("Initializing SAMRecords " + (USE_PARALLEL_LOADING ? "in parallel" : "in serial"));
|
||||||
final List<ReaderInitializer> inits = new ArrayList<ReaderInitializer>(totalNumberOfFiles);
|
if ( ! USE_PARALLEL_LOADING ) {
|
||||||
Queue<Future<ReaderInitializer>> futures = new LinkedList<Future<ReaderInitializer>>();
|
final int tickSize = 10;
|
||||||
for (SAMReaderID readerID: readerIDs) {
|
int nExecutedTotal = 0;
|
||||||
logger.debug("Enqueuing for initialization: " + readerID.samFile);
|
long lastTick = timer.currentTime();
|
||||||
final ReaderInitializer init = new ReaderInitializer(readerID);
|
for(final SAMReaderID readerID: readerIDs) {
|
||||||
inits.add(init);
|
final ReaderInitializer init = new ReaderInitializer(readerID).call();
|
||||||
futures.add(executor.submit(init));
|
if (threadAllocation.getNumIOThreads() > 0) {
|
||||||
}
|
inputStreams.put(init.readerID, init.blockInputStream); // get from initializer
|
||||||
|
|
||||||
final SimpleTimer timer = new SimpleTimer();
|
|
||||||
try {
|
|
||||||
final int MAX_WAIT = 30 * 1000;
|
|
||||||
final int MIN_WAIT = 1 * 1000;
|
|
||||||
|
|
||||||
timer.start();
|
|
||||||
while ( ! futures.isEmpty() ) {
|
|
||||||
final int prevSize = futures.size();
|
|
||||||
final double waitTime = prevSize * (0.5 / N_THREADS); // about 0.5 seconds to load each file
|
|
||||||
final int waitTimeInMS = Math.min(MAX_WAIT, Math.max((int) (waitTime * 1000), MIN_WAIT));
|
|
||||||
Thread.sleep(waitTimeInMS);
|
|
||||||
|
|
||||||
Queue<Future<ReaderInitializer>> pending = new LinkedList<Future<ReaderInitializer>>();
|
|
||||||
for ( final Future<ReaderInitializer> initFuture : futures ) {
|
|
||||||
if ( initFuture.isDone() ) {
|
|
||||||
final ReaderInitializer init = initFuture.get();
|
|
||||||
if (threadAllocation.getNumIOThreads() > 0) {
|
|
||||||
inputStreams.put(init.readerID, init.blockInputStream); // get from initializer
|
|
||||||
}
|
|
||||||
logger.debug(String.format("Processing file (%d of %d) %s...", readerNumber++, totalNumberOfFiles, init.readerID));
|
|
||||||
readers.put(init.readerID, init.reader);
|
|
||||||
} else {
|
|
||||||
pending.add(initFuture);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
final int pendingSize = pending.size();
|
logger.debug(String.format("Processing file (%d of %d) %s...", readerNumber++, totalNumberOfFiles, readerID.samFile));
|
||||||
final int nExecutedInTick = prevSize - pendingSize;
|
readers.put(init.readerID,init.reader);
|
||||||
final int nExecutedTotal = totalNumberOfFiles - pendingSize;
|
if ( nExecutedTotal++ % tickSize == 0) {
|
||||||
final double totalTimeInSeconds = timer.getElapsedTime();
|
int tickInMS = (int)((timer.currentTime() - lastTick) / 1000.0);
|
||||||
final double nTasksPerSecond = nExecutedTotal / (1.0*totalTimeInSeconds);
|
printReaderPerformance(nExecutedTotal, tickSize, totalNumberOfFiles, timer, tickInMS);
|
||||||
final int nRemaining = pendingSize;
|
}
|
||||||
final double estTimeToComplete = pendingSize / nTasksPerSecond;
|
|
||||||
logger.info(String.format("Init %d BAMs in last %d s, %d of %d in %.2f s / %.2f m (%.2f tasks/s). %d remaining with est. completion in %.2f s / %.2f m",
|
|
||||||
nExecutedInTick, (int)(waitTimeInMS / 1000.0),
|
|
||||||
nExecutedTotal, totalNumberOfFiles, totalTimeInSeconds, totalTimeInSeconds / 60, nTasksPerSecond,
|
|
||||||
nRemaining, estTimeToComplete, estTimeToComplete / 60));
|
|
||||||
|
|
||||||
futures = pending;
|
|
||||||
}
|
}
|
||||||
} catch ( InterruptedException e ) {
|
} else {
|
||||||
throw new ReviewedStingException("Interrupted SAMReader initialization", e);
|
final int N_THREADS = 8;
|
||||||
} catch ( ExecutionException e ) {
|
|
||||||
throw new ReviewedStingException("Execution exception during SAMReader initialization", e);
|
final ExecutorService executor = Executors.newFixedThreadPool(N_THREADS);
|
||||||
|
final List<ReaderInitializer> inits = new ArrayList<ReaderInitializer>(totalNumberOfFiles);
|
||||||
|
Queue<Future<ReaderInitializer>> futures = new LinkedList<Future<ReaderInitializer>>();
|
||||||
|
for (final SAMReaderID readerID: readerIDs) {
|
||||||
|
logger.debug("Enqueuing for initialization: " + readerID.samFile);
|
||||||
|
final ReaderInitializer init = new ReaderInitializer(readerID);
|
||||||
|
inits.add(init);
|
||||||
|
futures.add(executor.submit(init));
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
final int MAX_WAIT = 30 * 1000;
|
||||||
|
final int MIN_WAIT = 1 * 1000;
|
||||||
|
|
||||||
|
while ( ! futures.isEmpty() ) {
|
||||||
|
final int prevSize = futures.size();
|
||||||
|
final double waitTime = prevSize * (0.5 / N_THREADS); // about 0.5 seconds to load each file
|
||||||
|
final int waitTimeInMS = Math.min(MAX_WAIT, Math.max((int) (waitTime * 1000), MIN_WAIT));
|
||||||
|
Thread.sleep(waitTimeInMS);
|
||||||
|
|
||||||
|
Queue<Future<ReaderInitializer>> pending = new LinkedList<Future<ReaderInitializer>>();
|
||||||
|
for ( final Future<ReaderInitializer> initFuture : futures ) {
|
||||||
|
if ( initFuture.isDone() ) {
|
||||||
|
final ReaderInitializer init = initFuture.get();
|
||||||
|
if (threadAllocation.getNumIOThreads() > 0) {
|
||||||
|
inputStreams.put(init.readerID, init.blockInputStream); // get from initializer
|
||||||
|
}
|
||||||
|
logger.debug(String.format("Processing file (%d of %d) %s...", readerNumber++, totalNumberOfFiles, init.readerID));
|
||||||
|
readers.put(init.readerID, init.reader);
|
||||||
|
} else {
|
||||||
|
pending.add(initFuture);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final int nExecutedTotal = totalNumberOfFiles - pending.size();
|
||||||
|
final int nExecutedInTick = prevSize - pending.size();
|
||||||
|
printReaderPerformance(nExecutedTotal, nExecutedInTick, totalNumberOfFiles, timer, (int)(waitTimeInMS / 1000));
|
||||||
|
futures = pending;
|
||||||
|
}
|
||||||
|
} catch ( InterruptedException e ) {
|
||||||
|
throw new ReviewedStingException("Interrupted SAMReader initialization", e);
|
||||||
|
} catch ( ExecutionException e ) {
|
||||||
|
throw new ReviewedStingException("Execution exception during SAMReader initialization", e);
|
||||||
|
}
|
||||||
|
|
||||||
|
executor.shutdown();
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.info(String.format("Done initializing BAM readers: total time %.2f", timer.getElapsedTime()));
|
logger.info(String.format("Done initializing BAM readers: total time %.2f", timer.getElapsedTime()));
|
||||||
executor.shutdown();
|
}
|
||||||
|
|
||||||
|
final private void printReaderPerformance(final int nExecutedTotal,
|
||||||
|
final int nExecutedInTick,
|
||||||
|
final int totalNumberOfFiles,
|
||||||
|
final SimpleTimer timer, final int tickDuration) {
|
||||||
|
final int pendingSize = totalNumberOfFiles - nExecutedTotal;
|
||||||
|
final double totalTimeInSeconds = timer.getElapsedTime();
|
||||||
|
final double nTasksPerSecond = nExecutedTotal / (1.0*totalTimeInSeconds);
|
||||||
|
final int nRemaining = pendingSize;
|
||||||
|
final double estTimeToComplete = pendingSize / nTasksPerSecond;
|
||||||
|
logger.info(String.format("Init %d BAMs in last %d s, %d of %d in %.2f s / %.2f m (%.2f tasks/s). %d remaining with est. completion in %.2f s / %.2f m",
|
||||||
|
nExecutedInTick, tickDuration,
|
||||||
|
nExecutedTotal, totalNumberOfFiles, totalTimeInSeconds, totalTimeInSeconds / 60, nTasksPerSecond,
|
||||||
|
nRemaining, estTimeToComplete, estTimeToComplete / 60));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue