diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShard.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShard.java index 47b0c9833..662c7526b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShard.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShard.java @@ -34,10 +34,21 @@ import java.util.*; * @version 0.1 */ public class ReadShard extends Shard { + + /** + * Default read shard buffer size + */ + public static final int DEFAULT_MAX_READS = 10000; + /** * What is the maximum number of reads per BAM file which should go into a read shard. + * + * TODO: this non-final static variable should either be made final or turned into an + * TODO: instance variable somewhere -- as both static and mutable it wreaks havoc + * TODO: with tests that use multiple instances of SAMDataSource (since SAMDataSource + * TODO: changes this value) */ - public static int MAX_READS = 10000; + public static int MAX_READS = DEFAULT_MAX_READS; /** * The reads making up this shard. @@ -51,6 +62,9 @@ public class ReadShard extends Shard { /** * Sets the maximum number of reads buffered in a read shard. Implemented as a weirdly static interface * until we know what effect tuning this parameter has. + * + * TODO: this mutable static interface is awful and breaks tests -- need to refactor + * * @param bufferSize New maximum number */ static void setReadBufferSize(final int bufferSize) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index bf0d45f83..8562ace98 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -252,7 +252,7 @@ public class SAMDataSource { validationStringency = strictness; this.removeProgramRecords = removeProgramRecords; if(readBufferSize != null) - ReadShard.setReadBufferSize(readBufferSize); + ReadShard.setReadBufferSize(readBufferSize); // TODO: use of non-final static variable here is just awful, especially for parallel tests else { // Choose a sensible default for the read buffer size. For the moment, we're picking 1000 reads per BAM per shard (which effectively // will mean per-thread once ReadWalkers are parallelized) with a max cap of 250K reads in memory at once. diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/ExperimentalReadShardBalancerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/ExperimentalReadShardBalancerUnitTest.java index b68956c0b..0807f36dc 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/ExperimentalReadShardBalancerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/ExperimentalReadShardBalancerUnitTest.java @@ -90,7 +90,7 @@ public class ExperimentalReadShardBalancerUnitTest extends BaseTest { new GenomeLocParser(header.getSequenceDictionary()), false, SAMFileReader.ValidationStringency.SILENT, - null, + ReadShard.DEFAULT_MAX_READS, // reset ReadShard.MAX_READS to ReadShard.DEFAULT_MAX_READS for each test downsamplingMethod, new ValidationExclusion(), new ArrayList(), @@ -180,10 +180,10 @@ public class ExperimentalReadShardBalancerUnitTest extends BaseTest { for ( int numContigs = 1; numContigs <= 3; numContigs++ ) { for ( int numStacksPerContig : Arrays.asList(1, 2, 4) ) { // Use crucial read shard boundary values as the stack sizes - for ( int stackSize : Arrays.asList(ReadShard.MAX_READS / 2, ReadShard.MAX_READS / 2 + 10, ReadShard.MAX_READS, ReadShard.MAX_READS - 1, ReadShard.MAX_READS + 1, ReadShard.MAX_READS * 2) ) { - for ( int numUnmappedReads : Arrays.asList(0, ReadShard.MAX_READS / 2, ReadShard.MAX_READS * 2) ) { + for ( int stackSize : Arrays.asList(ReadShard.DEFAULT_MAX_READS / 2, ReadShard.DEFAULT_MAX_READS / 2 + 10, ReadShard.DEFAULT_MAX_READS, ReadShard.DEFAULT_MAX_READS - 1, ReadShard.DEFAULT_MAX_READS + 1, ReadShard.DEFAULT_MAX_READS * 2) ) { + for ( int numUnmappedReads : Arrays.asList(0, ReadShard.DEFAULT_MAX_READS / 2, ReadShard.DEFAULT_MAX_READS * 2) ) { // The first value will result in no downsampling at all, the others in some downsampling - for ( int downsamplingTargetCoverage : Arrays.asList(ReadShard.MAX_READS * 10, ReadShard.MAX_READS, ReadShard.MAX_READS / 2) ) { + for ( int downsamplingTargetCoverage : Arrays.asList(ReadShard.DEFAULT_MAX_READS * 10, ReadShard.DEFAULT_MAX_READS, ReadShard.DEFAULT_MAX_READS / 2) ) { new ExperimentalReadShardBalancerTest(numContigs, numStacksPerContig, stackSize, numUnmappedReads, downsamplingTargetCoverage); } }