diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index e3f197716..5c932fdce 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -253,9 +253,10 @@ public class SAMDataSource { if(readBufferSize != null) ReadShard.setReadBufferSize(readBufferSize); // TODO: use of non-final static variable here is just awful, especially for parallel tests else { - // Choose a sensible default for the read buffer size. For the moment, we're picking 1000 reads per BAM per shard (which effectively - // will mean per-thread once ReadWalkers are parallelized) with a max cap of 250K reads in memory at once. - ReadShard.setReadBufferSize(Math.min(10000*samFiles.size(),250000)); + // Choose a sensible default for the read buffer size. + // Previously we we're picked 100000 reads per BAM per shard with a max cap of 250K reads in memory at once. + // Now we are simply setting it to 100K reads + ReadShard.setReadBufferSize(100000); } resourcePool = new SAMResourcePool(Integer.MAX_VALUE); diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java index cd0198a29..ee71d82bb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java @@ -40,6 +40,7 @@ import org.broadinstitute.sting.utils.nanoScheduler.NanoScheduler; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.Iterator; +import java.util.LinkedList; /** * A nano-scheduling version of TraverseReads. @@ -53,6 +54,7 @@ import java.util.Iterator; */ public class TraverseReadsNano extends TraversalEngine,ReadShardDataProvider> { /** our log, which we want to capture anything from this class */ + private final static boolean PRE_READ_ALL_MAP_DATA = true; protected static final Logger logger = Logger.getLogger(TraverseReadsNano.class); private static final boolean DEBUG = false; final NanoScheduler nanoScheduler; @@ -111,7 +113,19 @@ public class TraverseReadsNano extends TraversalEngine, * should execute */ private Iterator aggregateMapData(final ReadShardDataProvider dataProvider) { - return new Iterator() { + final Iterator it = makeDataIterator(dataProvider); + if ( PRE_READ_ALL_MAP_DATA ) { + final LinkedList l = new LinkedList(); + while ( it.hasNext() ) l.add(it.next()); + return l.iterator(); + } else { + return it; + } + } + + + private Iterator makeDataIterator(final ReadShardDataProvider dataProvider) { + return new Iterator () { final ReadView reads = new ReadView(dataProvider); final ReadReferenceView reference = new ReadReferenceView(dataProvider); final ReadBasedReferenceOrderedView rodView = new ReadBasedReferenceOrderedView(dataProvider);