From def077c4e53b4b64949077a173d85dd43b9bc97e Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 9 Aug 2012 12:42:50 -0400 Subject: [PATCH 1/3] There's actually a subtle but important difference between foo++ and ++foo --- .../gatk/walkers/diagnostics/targets/FindCoveredIntervals.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java index 23e4d5ae0..373c8232e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java @@ -78,7 +78,7 @@ public class FindCoveredIntervals extends ActiveRegionWalker { public Long reduce(final GenomeLoc value, Long reduce) { if (value != null) { out.println(value.toString()); - return reduce++; + return ++reduce; } else return reduce; } From c6132ebe26ed7b0cddc0dafc173ed53f478c33ff Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 9 Aug 2012 13:02:11 -0400 Subject: [PATCH 2/3] Fixed divide by zero bug when downsampler goes over regions where reads are all filtered out. Added Guillermo's bug report as an integration test --- .../compression/reducereads/SlidingWindow.java | 5 +++++ .../compression/reducereads/SyntheticRead.java | 2 +- .../reducereads/ReduceReadsIntegrationTest.java | 13 +++++++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java index 6a77b0c65..bdb9ef843 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java @@ -536,6 +536,10 @@ public class SlidingWindow { * @return a list of reads selected by the downsampler to cover the window to at least the desired coverage */ protected List downsampleVariantRegion(final List allReads) { + int nReads = allReads.size(); + if (nReads == 0) + return allReads; + double fraction = 100 / allReads.size(); if (fraction >= 1) return allReads; @@ -545,6 +549,7 @@ public class SlidingWindow { return downsampler.consumeDownsampledItems(); } + /** * Properly closes a Sliding Window, finalizing all consensus and variant * regions that still exist regardless of being able to fulfill the diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java index 9ee1a4634..cea596a7d 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java @@ -102,7 +102,7 @@ public class SyntheticRead { * @param base the base to add * @param count number of reads with this base */ - @Requires("count < Byte.MAX_VALUE") + @Requires("count <= Byte.MAX_VALUE") public void add(BaseIndex base, byte count, byte qual, byte insQual, byte delQual, double mappingQuality) { counts.add(count); bases.add(base); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java index ee894b420..5d16ba019 100755 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java @@ -11,6 +11,8 @@ public class ReduceReadsIntegrationTest extends WalkerTest { final String DELETION_BAM = validationDataLocation + "filtered_deletion_for_reduce_reads.bam"; final String STASH_BAM = validationDataLocation + "ReduceReadsStashBug.bam"; final String STASH_L = " -L 14:73718184-73718284 -L 14:73718294-73718330 -L 14:73718360-73718556"; + final String DIVIDEBYZERO_BAM = validationDataLocation + "ReduceReadsDivideByZeroBug.bam"; + final String DIVIDEBYZERO_L = " -L " + validationDataLocation + "ReduceReadsDivideByZeroBug.intervals"; final String L = " -L 20:10,100,000-10,120,000 "; private void RRTest(String testName, String args, String md5) { @@ -64,5 +66,16 @@ public class ReduceReadsIntegrationTest extends WalkerTest { String base = String.format("-T ReduceReads %s -npt -R %s -I %s", STASH_L, REF, STASH_BAM) + " -o %s "; executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("886b43e1f26ff18425814dc7563931c6"))); } + + /** + * Divide by zero bug reported by GdA and users in the forum. Happens when the downsampler goes over a region where all reads get + * filtered out. + */ + @Test(enabled = true) + public void testDivideByZero() { + String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s "; + executeTest("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("137505c3efd1e9f8d9209dbdf8419ff9"))); + } + } From 67d4148b32b18af06518f8644bc01bc34a251436 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 9 Aug 2012 15:58:18 -0400 Subject: [PATCH 3/3] Fixing but reported by Thomas in the forum where reads were soft-clipped beyond the limits of the contig and ReduceReads was failing with a NoSuchElement exception. Now we hard clip anything that goes beyond the boundaries of the contig. --- .../walkers/compression/reducereads/ReduceReads.java | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java index 93d8319b7..177050667 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java @@ -251,7 +251,7 @@ public class ReduceReads extends ReadWalker, ReduceRea LinkedList mappedReads; totalReads++; if (!debugRead.isEmpty() && read.getReadName().contains(debugRead)) - System.out.println("Found debug read!"); + System.out.println("Found debug read!"); if (debugLevel == 1) System.out.printf("\nOriginal: %s %s %d %d\n", read, read.getCigar(), read.getAlignmentStart(), read.getAlignmentEnd()); @@ -260,7 +260,14 @@ public class ReduceReads extends ReadWalker, ReduceRea // attribute hash so we can determine later if we need to write down the alignment shift to the reduced BAM file read.setTemporaryAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT, read.getAlignmentStart()); read.setTemporaryAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT, read.getAlignmentEnd()); - + + // Check if the read goes beyond the boundaries of the chromosome, and hard clip those boundaries. + int chromosomeLength = ref.getGenomeLocParser().getContigInfo(read.getReferenceName()).getSequenceLength(); + if (read.getSoftStart() < 0) + read = ReadClipper.hardClipByReadCoordinates(read, 0, -read.getSoftStart() - 1); + if (read.getSoftEnd() > chromosomeLength) + read = ReadClipper.hardClipByReadCoordinates(read, chromosomeLength - read.getSoftStart() + 1, read.getReadLength() - 1); + if (!DONT_SIMPLIFY_READS) read.simplify(); // Clear all unnecessary attributes if (!DONT_CLIP_ADAPTOR_SEQUENCES)