Start printing traversal stats after 30 seconds. I can't stand waiting 2 minutes.
This commit is contained in:
parent
7204fcc2c3
commit
6d260ec6ae
|
|
@ -469,7 +469,7 @@ public class GenomeAnalysisEngine {
|
||||||
throw new ReviewedStingException("Unable to determine walker type for walker " + walker.getClass().getName());
|
throw new ReviewedStingException("Unable to determine walker type for walker " + walker.getClass().getName());
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
final int SHARD_SIZE = walker instanceof RodWalker ? 1000000 : 100000;
|
final int SHARD_SIZE = walker instanceof RodWalker ? 1000000 : 100000; // TODO -- make it a multiple of 16K
|
||||||
if(intervals == null)
|
if(intervals == null)
|
||||||
return referenceDataSource.createShardsOverEntireReference(readsDataSource,genomeLocParser,SHARD_SIZE);
|
return referenceDataSource.createShardsOverEntireReference(readsDataSource,genomeLocParser,SHARD_SIZE);
|
||||||
else
|
else
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,6 @@ import net.sf.picard.reference.FastaSequenceIndexBuilder;
|
||||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
import net.sf.picard.sam.CreateSequenceDictionary;
|
import net.sf.picard.sam.CreateSequenceDictionary;
|
||||||
import net.sf.samtools.SAMSequenceRecord;
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
import org.broadinstitute.sting.gatk.datasources.reads.FilePointer;
|
|
||||||
import org.broadinstitute.sting.gatk.datasources.reads.LocusShard;
|
import org.broadinstitute.sting.gatk.datasources.reads.LocusShard;
|
||||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
|
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
|
||||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||||
|
|
@ -46,7 +45,6 @@ import org.broadinstitute.sting.utils.file.FileSystemInabilityToLockException;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -230,26 +228,51 @@ public class ReferenceDataSource {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates an iterator for processing the entire reference.
|
* Creates an iterator for processing the entire reference.
|
||||||
* @param readsDataSource the reads datasource to embed in the locus shard. TODO: decouple the creation of the shards themselves from the creation of the driving iterator so that datasources need not be passed to datasources.
|
* @param readsDataSource the reads datasource to embed in the locus shard. TODO: decouple the creation of the shards themselves from the creation of the driving iterator so that datasources need not be passed to datasources.
|
||||||
* @param intervals the list of intervals to use when processing the reference.
|
* @param intervals the list of intervals to use when processing the reference.
|
||||||
* @param maxShardSize The maximum shard size which can be used to create this list.
|
* @param targetShardSize the suggested - and maximum - shard size which can be used to create this list; we will merge intervals greedily so that we generate shards up to but not greater than the target size.
|
||||||
* @return Creates a schedule for performing a traversal over the entire reference.
|
* @return Creates a schedule for performing a traversal over the entire reference.
|
||||||
*/
|
*/
|
||||||
public Iterable<Shard> createShardsOverIntervals(final SAMDataSource readsDataSource, final GenomeLocSortedSet intervals, final int maxShardSize) {
|
public Iterable<Shard> createShardsOverIntervals(final SAMDataSource readsDataSource, final GenomeLocSortedSet intervals, final int targetShardSize) {
|
||||||
List<Shard> shards = new ArrayList<Shard>();
|
final List<Shard> shards = new ArrayList<Shard>();
|
||||||
|
final GenomeLocParser parser = intervals.getGenomeLocParser();
|
||||||
|
GenomeLoc currentInterval = null;
|
||||||
|
|
||||||
for(GenomeLoc interval: intervals) {
|
for(GenomeLoc interval: intervals) {
|
||||||
while(interval.size() > maxShardSize) {
|
// if the next interval is too big, we can safely shard currentInterval and then break down this one
|
||||||
shards.add(new LocusShard(intervals.getGenomeLocParser(),
|
if (interval.size() > targetShardSize) {
|
||||||
readsDataSource,
|
if (currentInterval != null)
|
||||||
Collections.singletonList(intervals.getGenomeLocParser().createGenomeLoc(interval.getContig(),interval.getStart(),interval.getStart()+maxShardSize-1)),
|
shards.add(createShardFromInterval(currentInterval, readsDataSource, parser));
|
||||||
null));
|
currentInterval = interval;
|
||||||
interval = intervals.getGenomeLocParser().createGenomeLoc(interval.getContig(),interval.getStart()+maxShardSize,interval.getStop());
|
while(currentInterval.size() > targetShardSize) {
|
||||||
|
final GenomeLoc partialInterval = parser.createGenomeLoc(currentInterval.getContig(), currentInterval.getStart(), currentInterval.getStart()+targetShardSize-1);
|
||||||
|
shards.add(createShardFromInterval(partialInterval, readsDataSource, parser));
|
||||||
|
currentInterval = parser.createGenomeLoc(currentInterval.getContig(),currentInterval.getStart()+targetShardSize,currentInterval.getStop());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// otherwise, we need to check whether we can merge this interval with currentInterval (and either shard currentInterval or merge accordingly)
|
||||||
|
else {
|
||||||
|
if (currentInterval == null) {
|
||||||
|
currentInterval = interval;
|
||||||
|
}
|
||||||
|
else if (currentInterval.compareContigs(interval) != 0 || interval.getStop() - currentInterval.getStart() + 1 > targetShardSize) {
|
||||||
|
shards.add(createShardFromInterval(currentInterval, readsDataSource, parser));
|
||||||
|
currentInterval = interval;
|
||||||
|
} else {
|
||||||
|
currentInterval = parser.createGenomeLoc(currentInterval.getContig(),currentInterval.getStart(),interval.getStop());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
shards.add(new LocusShard(intervals.getGenomeLocParser(),
|
|
||||||
readsDataSource,
|
|
||||||
Collections.singletonList(interval),
|
|
||||||
null));
|
|
||||||
}
|
}
|
||||||
|
if (currentInterval != null)
|
||||||
|
shards.add(createShardFromInterval(currentInterval, readsDataSource, parser));
|
||||||
return shards;
|
return shards;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static Shard createShardFromInterval(final GenomeLoc interval, final SAMDataSource readsDataSource, final GenomeLocParser parser) {
|
||||||
|
System.out.println("Adding shard " + interval);
|
||||||
|
return new LocusShard(parser,
|
||||||
|
readsDataSource,
|
||||||
|
Collections.singletonList(interval),
|
||||||
|
null);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -121,7 +121,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
|
||||||
private static final int PRINT_PROGRESS_CHECK_FREQUENCY_IN_CYCLES = 1000;
|
private static final int PRINT_PROGRESS_CHECK_FREQUENCY_IN_CYCLES = 1000;
|
||||||
private int printProgressCheckCounter = 0;
|
private int printProgressCheckCounter = 0;
|
||||||
private long lastProgressPrintTime = -1; // When was the last time we printed progress log?
|
private long lastProgressPrintTime = -1; // When was the last time we printed progress log?
|
||||||
private long MIN_ELAPSED_TIME_BEFORE_FIRST_PROGRESS = 120 * 1000; // in milliseconds
|
private long MIN_ELAPSED_TIME_BEFORE_FIRST_PROGRESS = 30 * 1000; // in milliseconds
|
||||||
private long PROGRESS_PRINT_FREQUENCY = 10 * 1000; // in milliseconds
|
private long PROGRESS_PRINT_FREQUENCY = 10 * 1000; // in milliseconds
|
||||||
private final double TWO_HOURS_IN_SECONDS = 2.0 * 60.0 * 60.0;
|
private final double TWO_HOURS_IN_SECONDS = 2.0 * 60.0 * 60.0;
|
||||||
private final double TWELVE_HOURS_IN_SECONDS = 12.0 * 60.0 * 60.0;
|
private final double TWELVE_HOURS_IN_SECONDS = 12.0 * 60.0 * 60.0;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue