Start printing traversal stats after 30 seconds. I can't stand waiting 2 minutes.
This commit is contained in:
parent
7204fcc2c3
commit
6d260ec6ae
|
|
@ -469,7 +469,7 @@ public class GenomeAnalysisEngine {
|
|||
throw new ReviewedStingException("Unable to determine walker type for walker " + walker.getClass().getName());
|
||||
}
|
||||
else {
|
||||
final int SHARD_SIZE = walker instanceof RodWalker ? 1000000 : 100000;
|
||||
final int SHARD_SIZE = walker instanceof RodWalker ? 1000000 : 100000; // TODO -- make it a multiple of 16K
|
||||
if(intervals == null)
|
||||
return referenceDataSource.createShardsOverEntireReference(readsDataSource,genomeLocParser,SHARD_SIZE);
|
||||
else
|
||||
|
|
|
|||
|
|
@ -30,7 +30,6 @@ import net.sf.picard.reference.FastaSequenceIndexBuilder;
|
|||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.picard.sam.CreateSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.FilePointer;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.LocusShard;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
|
|
@ -46,7 +45,6 @@ import org.broadinstitute.sting.utils.file.FileSystemInabilityToLockException;
|
|||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
|
|
@ -230,26 +228,51 @@ public class ReferenceDataSource {
|
|||
|
||||
/**
|
||||
* Creates an iterator for processing the entire reference.
|
||||
* @param readsDataSource the reads datasource to embed in the locus shard. TODO: decouple the creation of the shards themselves from the creation of the driving iterator so that datasources need not be passed to datasources.
|
||||
* @param intervals the list of intervals to use when processing the reference.
|
||||
* @param maxShardSize The maximum shard size which can be used to create this list.
|
||||
* @param readsDataSource the reads datasource to embed in the locus shard. TODO: decouple the creation of the shards themselves from the creation of the driving iterator so that datasources need not be passed to datasources.
|
||||
* @param intervals the list of intervals to use when processing the reference.
|
||||
* @param targetShardSize the suggested - and maximum - shard size which can be used to create this list; we will merge intervals greedily so that we generate shards up to but not greater than the target size.
|
||||
* @return Creates a schedule for performing a traversal over the entire reference.
|
||||
*/
|
||||
public Iterable<Shard> createShardsOverIntervals(final SAMDataSource readsDataSource, final GenomeLocSortedSet intervals, final int maxShardSize) {
|
||||
List<Shard> shards = new ArrayList<Shard>();
|
||||
public Iterable<Shard> createShardsOverIntervals(final SAMDataSource readsDataSource, final GenomeLocSortedSet intervals, final int targetShardSize) {
|
||||
final List<Shard> shards = new ArrayList<Shard>();
|
||||
final GenomeLocParser parser = intervals.getGenomeLocParser();
|
||||
GenomeLoc currentInterval = null;
|
||||
|
||||
for(GenomeLoc interval: intervals) {
|
||||
while(interval.size() > maxShardSize) {
|
||||
shards.add(new LocusShard(intervals.getGenomeLocParser(),
|
||||
readsDataSource,
|
||||
Collections.singletonList(intervals.getGenomeLocParser().createGenomeLoc(interval.getContig(),interval.getStart(),interval.getStart()+maxShardSize-1)),
|
||||
null));
|
||||
interval = intervals.getGenomeLocParser().createGenomeLoc(interval.getContig(),interval.getStart()+maxShardSize,interval.getStop());
|
||||
// if the next interval is too big, we can safely shard currentInterval and then break down this one
|
||||
if (interval.size() > targetShardSize) {
|
||||
if (currentInterval != null)
|
||||
shards.add(createShardFromInterval(currentInterval, readsDataSource, parser));
|
||||
currentInterval = interval;
|
||||
while(currentInterval.size() > targetShardSize) {
|
||||
final GenomeLoc partialInterval = parser.createGenomeLoc(currentInterval.getContig(), currentInterval.getStart(), currentInterval.getStart()+targetShardSize-1);
|
||||
shards.add(createShardFromInterval(partialInterval, readsDataSource, parser));
|
||||
currentInterval = parser.createGenomeLoc(currentInterval.getContig(),currentInterval.getStart()+targetShardSize,currentInterval.getStop());
|
||||
}
|
||||
}
|
||||
// otherwise, we need to check whether we can merge this interval with currentInterval (and either shard currentInterval or merge accordingly)
|
||||
else {
|
||||
if (currentInterval == null) {
|
||||
currentInterval = interval;
|
||||
}
|
||||
else if (currentInterval.compareContigs(interval) != 0 || interval.getStop() - currentInterval.getStart() + 1 > targetShardSize) {
|
||||
shards.add(createShardFromInterval(currentInterval, readsDataSource, parser));
|
||||
currentInterval = interval;
|
||||
} else {
|
||||
currentInterval = parser.createGenomeLoc(currentInterval.getContig(),currentInterval.getStart(),interval.getStop());
|
||||
}
|
||||
}
|
||||
shards.add(new LocusShard(intervals.getGenomeLocParser(),
|
||||
readsDataSource,
|
||||
Collections.singletonList(interval),
|
||||
null));
|
||||
}
|
||||
if (currentInterval != null)
|
||||
shards.add(createShardFromInterval(currentInterval, readsDataSource, parser));
|
||||
return shards;
|
||||
}
|
||||
|
||||
private static Shard createShardFromInterval(final GenomeLoc interval, final SAMDataSource readsDataSource, final GenomeLocParser parser) {
|
||||
System.out.println("Adding shard " + interval);
|
||||
return new LocusShard(parser,
|
||||
readsDataSource,
|
||||
Collections.singletonList(interval),
|
||||
null);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -121,7 +121,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
|
|||
private static final int PRINT_PROGRESS_CHECK_FREQUENCY_IN_CYCLES = 1000;
|
||||
private int printProgressCheckCounter = 0;
|
||||
private long lastProgressPrintTime = -1; // When was the last time we printed progress log?
|
||||
private long MIN_ELAPSED_TIME_BEFORE_FIRST_PROGRESS = 120 * 1000; // in milliseconds
|
||||
private long MIN_ELAPSED_TIME_BEFORE_FIRST_PROGRESS = 30 * 1000; // in milliseconds
|
||||
private long PROGRESS_PRINT_FREQUENCY = 10 * 1000; // in milliseconds
|
||||
private final double TWO_HOURS_IN_SECONDS = 2.0 * 60.0 * 60.0;
|
||||
private final double TWELVE_HOURS_IN_SECONDS = 12.0 * 60.0 * 60.0;
|
||||
|
|
|
|||
Loading…
Reference in New Issue