From 317dc4c323ff011418127df3183c9957f6f01bf6 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sun, 7 Apr 2013 12:20:44 -0400 Subject: [PATCH] Add size() method to Downsampler interface -- This method provides client with the current number of elements, without having to retreive the underlying list. Added unit tests for LevelingDownsampler and ReservoirDownsampler as these are the only two complex ones. All of the others are trivially obviously correct. --- .../sting/gatk/downsampling/Downsampler.java | 11 +++++++++++ .../gatk/downsampling/FractionalDownsampler.java | 5 +++++ .../sting/gatk/downsampling/LevelingDownsampler.java | 9 +++++++++ .../gatk/downsampling/PassThroughDownsampler.java | 5 +++++ .../sting/gatk/downsampling/ReservoirDownsampler.java | 5 +++++ .../downsampling/SimplePositionalDownsampler.java | 5 +++++ .../downsampling/LevelingDownsamplerUnitTest.java | 2 ++ .../downsampling/ReservoirDownsamplerUnitTest.java | 1 + 8 files changed, 43 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/Downsampler.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/Downsampler.java index bfac08d35..23b16cff2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/Downsampler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/Downsampler.java @@ -94,6 +94,17 @@ public interface Downsampler { */ public T peekPending(); + /** + * Get the current number of items in this downsampler + * + * This should be the best estimate of the total number of elements that will come out of the downsampler + * were consumeFinalizedItems() to be called immediately after this call. In other words it should + * be number of finalized items + estimate of number of pending items that will ultimately be included as well. + * + * @return a positive integer + */ + public int size(); + /** * Returns the number of items discarded (so far) during the downsampling process * diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/FractionalDownsampler.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/FractionalDownsampler.java index 266148178..1cede9c33 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/FractionalDownsampler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/FractionalDownsampler.java @@ -109,6 +109,11 @@ public class FractionalDownsampler implements ReadsDownsamp return numDiscardedItems; } + @Override + public int size() { + return selectedReads.size(); + } + public void signalEndOfInput() { // NO-OP } diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/LevelingDownsampler.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/LevelingDownsampler.java index a8a808333..4ff729537 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/LevelingDownsampler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/LevelingDownsampler.java @@ -128,6 +128,15 @@ public class LevelingDownsampler, E> implements Downsampler return numDiscardedItems; } + @Override + public int size() { + int s = 0; + for ( final List l : groups ) { + s += l.size(); + } + return s; + } + public void signalEndOfInput() { levelGroups(); groupsAreFinalized = true; diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/PassThroughDownsampler.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/PassThroughDownsampler.java index b06d5f5b4..3aaed6c73 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/PassThroughDownsampler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/PassThroughDownsampler.java @@ -89,6 +89,11 @@ public class PassThroughDownsampler implements ReadsDownsam return 0; } + @Override + public int size() { + return selectedReads.size(); + } + public void signalEndOfInput() { // NO-OP } diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsampler.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsampler.java index 4331fd723..0e6bbfcb6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsampler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsampler.java @@ -156,6 +156,11 @@ public class ReservoirDownsampler implements ReadsDownsampl return numDiscardedItems; } + @Override + public int size() { + return reservoir.size(); + } + public void signalEndOfInput() { // NO-OP } diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsampler.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsampler.java index 3da18b2bb..7c6c043c2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsampler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsampler.java @@ -112,6 +112,11 @@ public class SimplePositionalDownsampler implements ReadsDo return numDiscardedItems; } + @Override + public int size() { + return finalizedReads.size() + reservoir.size(); + } + public void signalEndOfInput() { finalizeReservoir(); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/downsampling/LevelingDownsamplerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/downsampling/LevelingDownsamplerUnitTest.java index 3a12c7ce7..972e51dcd 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/downsampling/LevelingDownsamplerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/downsampling/LevelingDownsamplerUnitTest.java @@ -139,6 +139,7 @@ public class LevelingDownsamplerUnitTest extends BaseTest { Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null); } + final int sizeFromDownsampler = downsampler.size(); List> downsampledStacks = downsampler.consumeFinalizedItems(); Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems()); Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null); @@ -151,6 +152,7 @@ public class LevelingDownsamplerUnitTest extends BaseTest { totalRemainingItems += stack.size(); } + Assert.assertEquals(sizeFromDownsampler, totalRemainingItems); int numItemsReportedDiscarded = downsampler.getNumberOfDiscardedItems(); int numItemsActuallyDiscarded = test.numStacks * test.stackSize - totalRemainingItems; diff --git a/public/java/test/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsamplerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsamplerUnitTest.java index 74a17189e..022eb02d2 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsamplerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsamplerUnitTest.java @@ -115,6 +115,7 @@ public class ReservoirDownsamplerUnitTest extends BaseTest { Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null); } + Assert.assertEquals(downsampler.size(), test.expectedNumReadsAfterDownsampling); List downsampledReads = downsampler.consumeFinalizedItems(); Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems()); Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null);