diff --git a/public/java/src/net/sf/samtools/GATKBAMFileSpan.java b/public/java/src/net/sf/samtools/GATKBAMFileSpan.java index 4692c6671..ffc40067a 100644 --- a/public/java/src/net/sf/samtools/GATKBAMFileSpan.java +++ b/public/java/src/net/sf/samtools/GATKBAMFileSpan.java @@ -143,10 +143,14 @@ public class GATKBAMFileSpan extends BAMFileSpan { List mergedUnion = new ArrayList(); GATKChunk currentChunk = unmergedUnion.remove(); while(!unmergedUnion.isEmpty()) { - // Find the end of this range of chunks. - while(!unmergedUnion.isEmpty() && currentChunk.getChunkEnd() >= unmergedUnion.peek().getChunkStart()) { + + // While the current chunk can be merged with the next chunk: + while( ! unmergedUnion.isEmpty() && + (currentChunk.overlaps(unmergedUnion.peek()) || currentChunk.isAdjacentTo(unmergedUnion.peek())) ) { + + // Merge the current chunk with the next chunk: GATKChunk nextChunk = unmergedUnion.remove(); - currentChunk = new GATKChunk(currentChunk.getChunkStart(),nextChunk.getChunkEnd()); + currentChunk = currentChunk.merge(nextChunk); } // Add the accumulated range. mergedUnion.add(currentChunk); diff --git a/public/java/src/net/sf/samtools/GATKChunk.java b/public/java/src/net/sf/samtools/GATKChunk.java index e9335a86d..42f087131 100644 --- a/public/java/src/net/sf/samtools/GATKChunk.java +++ b/public/java/src/net/sf/samtools/GATKChunk.java @@ -100,4 +100,16 @@ public class GATKChunk extends Chunk { final int offsetSpan = (int)((getChunkEnd()&0xFFFF)-(getChunkStart()&0xFFFF)); return chunkSpan + offsetSpan; } + + /** + * Merges two chunks together. The caller is responsible for testing whether the + * chunks overlap/are adjacent before calling this method! + * + * @param other the chunk to merge with this chunk + * @return a new chunk representing the union of the two chunks (provided the chunks were + * overlapping/adjacent) + */ + public GATKChunk merge ( GATKChunk other ) { + return new GATKChunk(Math.min(getChunkStart(), other.getChunkStart()), Math.max(getChunkEnd(), other.getChunkEnd())); + } } diff --git a/public/java/test/net/sf/samtools/GATKBAMFileSpanUnitTest.java b/public/java/test/net/sf/samtools/GATKBAMFileSpanUnitTest.java index a9586c3c8..dbb4ea225 100644 --- a/public/java/test/net/sf/samtools/GATKBAMFileSpanUnitTest.java +++ b/public/java/test/net/sf/samtools/GATKBAMFileSpanUnitTest.java @@ -50,7 +50,10 @@ public class GATKBAMFileSpanUnitTest { } @Test - public void testUnionOfOverlappingFileSpans() { + public void testUnionOfContiguousFileSpans() { + // Region 1 ends at position adjacent to Region 2 start: + // |---1----|---2----| + GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk(0,1<<16)); GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(1<<16,(1<<16)|65535)); GATKBAMFileSpan union = regionOne.union(regionTwo); @@ -58,6 +61,71 @@ public class GATKBAMFileSpanUnitTest { Assert.assertEquals(union.getGATKChunks().get(0),new GATKChunk(0,(1<<16)|65535)); } + @Test + public void testUnionOfFileSpansFirstRegionEndsWithinSecondRegion() { + // Region 1 ends within Region 2: + // |---2----| + // |---1----| + + GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk(0,(1<<16)|32767)); + GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(1<<16,(1<<16)|65535)); + GATKBAMFileSpan union = regionOne.union(regionTwo); + Assert.assertEquals(union.getGATKChunks().size(),1,"Elements to be merged were not."); + Assert.assertEquals(union.getGATKChunks().get(0),new GATKChunk(0,(1<<16)|65535)); + } + + @Test + public void testUnionOfFileSpansFirstRegionEndsAtSecondRegionEnd() { + // Region 1 ends at Region 2 end: + // |---2----| + // |---1-----------| + + GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk(0,(1<<16)|65535)); + GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(1<<16,(1<<16)|65535)); + GATKBAMFileSpan union = regionOne.union(regionTwo); + Assert.assertEquals(union.getGATKChunks().size(),1,"Elements to be merged were not."); + Assert.assertEquals(union.getGATKChunks().get(0),new GATKChunk(0,(1<<16)|65535)); + } + + @Test + public void testUnionOfFileSpansFirstRegionEndsAfterSecondRegionEnd() { + // Region 1 ends after Region 2 end: + // |---2----| + // |---1---------------| + + GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk(0,(1<<16)|65535)); + GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(1<<16,(1<<16)|32767)); + GATKBAMFileSpan union = regionOne.union(regionTwo); + Assert.assertEquals(union.getGATKChunks().size(),1,"Elements to be merged were not."); + Assert.assertEquals(union.getGATKChunks().get(0),new GATKChunk(0,(1<<16)|65535)); + } + + @Test + public void testUnionOfFileSpansFirstRegionStartsAtSecondRegionStart() { + // Region 1 starts at Region 2 start, but ends before Region 2: + // |---2--------| + // |---1----| + + GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk(1<<16,(1<<16)|32767)); + GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(1<<16,(1<<16)|65535)); + GATKBAMFileSpan union = regionOne.union(regionTwo); + Assert.assertEquals(union.getGATKChunks().size(),1,"Elements to be merged were not."); + Assert.assertEquals(union.getGATKChunks().get(0),new GATKChunk(1<<16,(1<<16)|65535)); + } + + @Test + public void testUnionOfFileSpansFirstRegionEqualToSecondRegion() { + // Region 1 and Region 2 represent the same region: + // |---2----| + // |---1----| + + GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk(1<<16,(1<<16)|65535)); + GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(1<<16,(1<<16)|65535)); + GATKBAMFileSpan union = regionOne.union(regionTwo); + Assert.assertEquals(union.getGATKChunks().size(),1,"Elements to be merged were not."); + Assert.assertEquals(union.getGATKChunks().get(0),new GATKChunk(1<<16,(1<<16)|65535)); + } + @Test public void testUnionOfStringOfFileSpans() { GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk[] { new GATKChunk(0,1<<16), new GATKChunk(2<<16,3<<16) });