diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/Downsampler.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/Downsampler.java index bfac08d35..23b16cff2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/Downsampler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/Downsampler.java @@ -94,6 +94,17 @@ public interface Downsampler { */ public T peekPending(); + /** + * Get the current number of items in this downsampler + * + * This should be the best estimate of the total number of elements that will come out of the downsampler + * were consumeFinalizedItems() to be called immediately after this call. In other words it should + * be number of finalized items + estimate of number of pending items that will ultimately be included as well. + * + * @return a positive integer + */ + public int size(); + /** * Returns the number of items discarded (so far) during the downsampling process * diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/FractionalDownsampler.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/FractionalDownsampler.java index 266148178..1cede9c33 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/FractionalDownsampler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/FractionalDownsampler.java @@ -109,6 +109,11 @@ public class FractionalDownsampler implements ReadsDownsamp return numDiscardedItems; } + @Override + public int size() { + return selectedReads.size(); + } + public void signalEndOfInput() { // NO-OP } diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/LevelingDownsampler.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/LevelingDownsampler.java index a8a808333..4ff729537 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/LevelingDownsampler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/LevelingDownsampler.java @@ -128,6 +128,15 @@ public class LevelingDownsampler, E> implements Downsampler return numDiscardedItems; } + @Override + public int size() { + int s = 0; + for ( final List l : groups ) { + s += l.size(); + } + return s; + } + public void signalEndOfInput() { levelGroups(); groupsAreFinalized = true; diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/PassThroughDownsampler.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/PassThroughDownsampler.java index b06d5f5b4..3aaed6c73 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/PassThroughDownsampler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/PassThroughDownsampler.java @@ -89,6 +89,11 @@ public class PassThroughDownsampler implements ReadsDownsam return 0; } + @Override + public int size() { + return selectedReads.size(); + } + public void signalEndOfInput() { // NO-OP } diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsampler.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsampler.java index 4331fd723..0e6bbfcb6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsampler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsampler.java @@ -156,6 +156,11 @@ public class ReservoirDownsampler implements ReadsDownsampl return numDiscardedItems; } + @Override + public int size() { + return reservoir.size(); + } + public void signalEndOfInput() { // NO-OP } diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsampler.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsampler.java index 3da18b2bb..7c6c043c2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsampler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsampler.java @@ -112,6 +112,11 @@ public class SimplePositionalDownsampler implements ReadsDo return numDiscardedItems; } + @Override + public int size() { + return finalizedReads.size() + reservoir.size(); + } + public void signalEndOfInput() { finalizeReservoir(); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/downsampling/LevelingDownsamplerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/downsampling/LevelingDownsamplerUnitTest.java index 3a12c7ce7..972e51dcd 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/downsampling/LevelingDownsamplerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/downsampling/LevelingDownsamplerUnitTest.java @@ -139,6 +139,7 @@ public class LevelingDownsamplerUnitTest extends BaseTest { Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null); } + final int sizeFromDownsampler = downsampler.size(); List> downsampledStacks = downsampler.consumeFinalizedItems(); Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems()); Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null); @@ -151,6 +152,7 @@ public class LevelingDownsamplerUnitTest extends BaseTest { totalRemainingItems += stack.size(); } + Assert.assertEquals(sizeFromDownsampler, totalRemainingItems); int numItemsReportedDiscarded = downsampler.getNumberOfDiscardedItems(); int numItemsActuallyDiscarded = test.numStacks * test.stackSize - totalRemainingItems; diff --git a/public/java/test/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsamplerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsamplerUnitTest.java index 74a17189e..022eb02d2 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsamplerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsamplerUnitTest.java @@ -115,6 +115,7 @@ public class ReservoirDownsamplerUnitTest extends BaseTest { Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null); } + Assert.assertEquals(downsampler.size(), test.expectedNumReadsAfterDownsampling); List downsampledReads = downsampler.consumeFinalizedItems(); Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems()); Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null);