From deab9f0aa59828d0374ca5357a62901779bc5bf6 Mon Sep 17 00:00:00 2001 From: hanna Date: Wed, 30 Mar 2011 22:03:43 +0000 Subject: [PATCH] Initial work on proto-shard merger: - create size() method that returns an approximation of the uncompressed size in bytes of BAM span. I'll use this method as a protoshard weighting function until we determine how to normalize the weights across the different data access mechanisms (reads, reference, RODs). - Implementations of basic union/intersection/subtraction mechanisms for BAM spans; should be enough to get an accurate weight for two proto-shards put together. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5541 348d0f76-0448-11de-a6fe-93d51630548a --- java/src/net/sf/samtools/GATKBAMFileSpan.java | 163 +++++++++++++++ java/src/net/sf/samtools/GATKChunk.java | 18 +- .../reads/LowMemoryIntervalSharder.java | 4 +- .../sf/samtools/GATKBAMFileSpanUnitTest.java | 185 ++++++++++++++++++ .../net/sf/samtools/GATKChunkUnitTest.java | 70 +++++++ 5 files changed, 437 insertions(+), 3 deletions(-) create mode 100644 java/test/net/sf/samtools/GATKBAMFileSpanUnitTest.java create mode 100644 java/test/net/sf/samtools/GATKChunkUnitTest.java diff --git a/java/src/net/sf/samtools/GATKBAMFileSpan.java b/java/src/net/sf/samtools/GATKBAMFileSpan.java index 702248060..16d4ed0f5 100644 --- a/java/src/net/sf/samtools/GATKBAMFileSpan.java +++ b/java/src/net/sf/samtools/GATKBAMFileSpan.java @@ -24,9 +24,15 @@ package net.sf.samtools; +import net.sf.picard.util.PeekableIterator; + import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; +import java.util.Iterator; +import java.util.LinkedList; import java.util.List; +import java.util.Queue; /** * A temporary solution to work around Java access rights issues: @@ -75,4 +81,161 @@ public class GATKBAMFileSpan extends BAMFileSpan { builder.append(String.format("%s;",chunk)); return builder.toString(); } + + /** + * Returns an approximation of the number of uncompressed bytes in this + * file span. + * @return Approximation of uncompressed bytes in filespan. + */ + public long size() { + long size = 0L; + for(GATKChunk chunk: getGATKChunks()) + size += chunk.size(); + return size; + } + + /** + * Computes the union of two FileSpans. + * @param other FileSpan to union with this one. + * @return A file span that's been unioned. + */ + public GATKBAMFileSpan union(final GATKBAMFileSpan other) { + // No data? Return an empty file span. + if(getGATKChunks().size() == 0 && other.getGATKChunks().size() == 0) + return new GATKBAMFileSpan(); + + LinkedList unmergedUnion = new LinkedList(); + unmergedUnion.addAll(getGATKChunks()); + unmergedUnion.addAll(other.getGATKChunks()); + Collections.sort(unmergedUnion); + + List mergedUnion = new ArrayList(); + GATKChunk currentChunk = unmergedUnion.remove(); + while(!unmergedUnion.isEmpty()) { + // Find the end of this range of chunks. + while(!unmergedUnion.isEmpty() && currentChunk.getChunkEnd() >= unmergedUnion.peek().getChunkStart()) { + GATKChunk nextChunk = unmergedUnion.remove(); + currentChunk = new GATKChunk(currentChunk.getChunkStart(),nextChunk.getChunkEnd()); + } + // Add the accumulated range. + mergedUnion.add(currentChunk); + currentChunk = !unmergedUnion.isEmpty() ? unmergedUnion.remove() : null; + } + + // At end of the loop above, the last chunk will be contained in currentChunk and will not yet have been added. Add it. + if(currentChunk !=null) + mergedUnion.add(currentChunk); + + return new GATKBAMFileSpan(mergedUnion.toArray(new GATKChunk[mergedUnion.size()])); + } + + /** + * Intersects two BAM file spans. + * @param other File span to intersect with this one. + * @return The intersected BAM file span. + */ + public GATKBAMFileSpan intersection(final GATKBAMFileSpan other) { + Iterator thisIterator = getGATKChunks().iterator(); + Iterator otherIterator = other.getGATKChunks().iterator(); + + if(!thisIterator.hasNext() || !otherIterator.hasNext()) + return new GATKBAMFileSpan(); + + GATKChunk thisChunk = thisIterator.next(); + GATKChunk otherChunk = otherIterator.next(); + + List intersected = new ArrayList(); + + while(thisChunk != null && otherChunk != null) { + // If this iterator is before other, skip this ahead. + if(thisChunk.getChunkEnd() <= otherChunk.getChunkStart()) { + thisChunk = thisIterator.hasNext() ? thisIterator.next() : null; + continue; + } + + // If other iterator is before this, skip other ahead. + if(thisChunk.getChunkStart() >= otherChunk.getChunkEnd()) { + otherChunk = otherIterator.hasNext() ? otherIterator.next() : null; + continue; + } + + // If these two chunks overlap, pull out intersection of data and truncated current chunks to point after + // the intersection (or next chunk if no such overlap exists). + if(thisChunk.overlaps(otherChunk)) { + // Determine the chunk constraints + GATKChunk firstChunk = thisChunk.getChunkStart() < otherChunk.getChunkStart() ? thisChunk : otherChunk; + GATKChunk secondChunk = thisChunk==firstChunk ? otherChunk : thisChunk; + GATKChunk intersectedChunk = new GATKChunk(secondChunk.getChunkStart(),Math.min(firstChunk.getChunkEnd(),secondChunk.getChunkEnd())); + intersected.add(intersectedChunk); + + if(thisChunk.getChunkEnd() > intersectedChunk.getChunkEnd()) + thisChunk = new GATKChunk(intersectedChunk.getChunkEnd(),thisChunk.getChunkEnd()); + else + thisChunk = thisIterator.hasNext() ? thisIterator.next() : null; + + if(otherChunk.getChunkEnd() > intersectedChunk.getChunkEnd()) + otherChunk = new GATKChunk(intersectedChunk.getChunkEnd(),otherChunk.getChunkEnd()); + else + otherChunk = otherIterator.hasNext() ? otherIterator.next() : null; + } + + } + + return new GATKBAMFileSpan(intersected.toArray(new GATKChunk[intersected.size()])); + } + + /** + * Substracts other file span from this file span. + * @param other File span to strike out. + * @return This file span minuse the other file span. + */ + + public GATKBAMFileSpan subtract(final GATKBAMFileSpan other) { + Iterator thisIterator = getGATKChunks().iterator(); + Iterator otherIterator = other.getGATKChunks().iterator(); + + if(!thisIterator.hasNext() || !otherIterator.hasNext()) + return this; + + GATKChunk thisChunk = thisIterator.next(); + GATKChunk otherChunk = otherIterator.next(); + + List subtracted = new ArrayList(); + + while(thisChunk != null && otherChunk != null) { + // If this iterator is before the other, add this to the subtracted list and forge ahead. + if(thisChunk.getChunkEnd() < otherChunk.getChunkStart()) { + subtracted.add(thisChunk); + thisChunk = thisIterator.hasNext() ? thisIterator.next() : null; + continue; + } + + // If other iterator is before this, skip other ahead. + if(thisChunk.getChunkStart() >= otherChunk.getChunkEnd()) { + otherChunk = otherIterator.hasNext() ? otherIterator.next() : null; + continue; + } + + // If these two chunks overlap, pull out intersection of data and truncated current chunks to point after + // the intersection (or next chunk if no such overlap exists). + if(thisChunk.overlaps(otherChunk)) { + // Add in any sort of prefix that this chunk might have over the other. + if(thisChunk.getChunkStart() < otherChunk.getChunkStart()) + subtracted.add(new GATKChunk(thisChunk.getChunkStart(),otherChunk.getChunkStart())); + + if(thisChunk.getChunkEnd() > otherChunk.getChunkEnd()) + thisChunk = new GATKChunk(otherChunk.getChunkEnd(),thisChunk.getChunkEnd()); + else + thisChunk = thisIterator.hasNext() ? thisIterator.next() : null; + } + } + + // Finish up any remaining contents of this that didn't make it into the subtracted array. + if(thisChunk != null) + subtracted.add(thisChunk); + while(thisIterator.hasNext()) + subtracted.add(thisIterator.next()); + + return new GATKBAMFileSpan(subtracted.toArray(new GATKChunk[subtracted.size()])); + } } diff --git a/java/src/net/sf/samtools/GATKChunk.java b/java/src/net/sf/samtools/GATKChunk.java index 69ff70555..f590809e2 100644 --- a/java/src/net/sf/samtools/GATKChunk.java +++ b/java/src/net/sf/samtools/GATKChunk.java @@ -30,6 +30,12 @@ package net.sf.samtools; * TODO: Eliminate once we determine the final fate of the BAM index reading code. */ public class GATKChunk extends Chunk { + /** + * The average ratio of compressed block size / uncompressed block size, computed empirically + * using the output of org.broadinstitute.sting.gatk.datasources.reads.utilities.PrintBGZFBounds. + */ + private static final double AVERAGE_BAM_COMPRESSION_RATIO = 0.39; + public GATKChunk(final long start, final long stop) { super(start,stop); } @@ -63,5 +69,15 @@ public class GATKChunk extends Chunk { super.setChunkEnd(value); } - + /** + * Computes an approximation of the uncompressed size of the + * chunk, in bytes. Can be used to determine relative weights + * of chunk size. + * @return An approximation of the chunk size in bytes. + */ + public long size() { + final long chunkSpan = Math.round(((getChunkEnd()>>16)-(getChunkStart()>>16))/AVERAGE_BAM_COMPRESSION_RATIO); + final int offsetSpan = (int)((getChunkEnd()&0xFFFF)-(getChunkStart()&0xFFFF)); + return chunkSpan + offsetSpan; + } } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java index 7b7976dc3..53fb49134 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java @@ -114,11 +114,11 @@ public class LowMemoryIntervalSharder implements Iterator { coveredRegionStop = Math.min(coveredRegionStop,scheduleEntry.stop); coveredRegion = loci.getGenomeLocParser().createGenomeLoc(currentLocus.getContig(),coveredRegionStart,coveredRegionStop); - // Always create a file span, whether there was covered data or not. If there was no covered data, then the binTree is empty. - //System.out.printf("Shard: index file = %s; reference sequence = %d; ",index.getIndexFile(),currentLocus.getContigIndex()); nextFilePointer.addFileSpans(scheduleEntry.fileSpans); } else { + // Always create a file span, whether there was covered data or not. If there was no covered data, then the binTree is empty. + //System.out.printf("Shard: index file = %s; reference sequence = %d; ",index.getIndexFile(),currentLocus.getContigIndex()); for(SAMReaderID reader: indices.keySet()) nextFilePointer.addFileSpans(reader,new GATKBAMFileSpan()); } diff --git a/java/test/net/sf/samtools/GATKBAMFileSpanUnitTest.java b/java/test/net/sf/samtools/GATKBAMFileSpanUnitTest.java new file mode 100644 index 000000000..b76306013 --- /dev/null +++ b/java/test/net/sf/samtools/GATKBAMFileSpanUnitTest.java @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package net.sf.samtools; + +import org.testng.Assert; +import org.testng.annotations.Test; + +/** + * Tests of functionality of union, intersection operators. + */ +public class GATKBAMFileSpanUnitTest { + @Test + public void testUnionOfEmptyFileSpans() { + GATKBAMFileSpan empty1 = new GATKBAMFileSpan(); + GATKBAMFileSpan empty2 = new GATKBAMFileSpan(); + GATKBAMFileSpan union = empty1.union(empty2); + Assert.assertEquals(union.getGATKChunks().size(),0,"Elements inserted in union of two empty sets"); + } + + @Test + public void testUnionOfNonOverlappingFileSpans() { + GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk(0,65535)); + GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(1<<16,(1<<16)|65535)); + GATKBAMFileSpan union = regionOne.union(regionTwo); + Assert.assertEquals(union.getGATKChunks().size(),2,"Discontiguous elements were merged"); + Assert.assertEquals(union.getGATKChunks().get(0),regionOne.getGATKChunks().get(0),"Wrong chunk was first in list"); + Assert.assertEquals(union.getGATKChunks().get(1),regionTwo.getGATKChunks().get(0),"Wrong chunk was second in list"); + } + + @Test + public void testUnionOfOverlappingFileSpans() { + GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk(0,1<<16)); + GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(1<<16,(1<<16)|65535)); + GATKBAMFileSpan union = regionOne.union(regionTwo); + Assert.assertEquals(union.getGATKChunks().size(),1,"Elements to be merged were not."); + Assert.assertEquals(union.getGATKChunks().get(0),new GATKChunk(0,(1<<16)|65535)); + } + + @Test + public void testUnionOfStringOfFileSpans() { + GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk[] { new GATKChunk(0,1<<16), new GATKChunk(2<<16,3<<16) }); + GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(1<<16,2<<16)); + GATKBAMFileSpan union = regionOne.union(regionTwo); + Assert.assertEquals(union.getGATKChunks().size(),1,"Elements to be merged were not."); + Assert.assertEquals(union.getGATKChunks().get(0),new GATKChunk(0,3<<16)); + } + + @Test + public void testUnionAllFileSpansAdded() { + GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk[] { new GATKChunk(0,1<<16), new GATKChunk(2<<16,3<<16), new GATKChunk(20<<16,21<<16) }); + GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(1<<16,2<<16)); + GATKBAMFileSpan union = regionOne.union(regionTwo); + Assert.assertEquals(union.getGATKChunks().size(),2,"Elements to be merged were not."); + Assert.assertEquals(union.getGATKChunks().get(0),new GATKChunk(0,3<<16)); + Assert.assertEquals(union.getGATKChunks().get(1),new GATKChunk(20<<16,21<<16)); + } + + @Test + public void testIntersectionOfEmptyFileSpans() { + GATKBAMFileSpan empty1 = new GATKBAMFileSpan(); + GATKBAMFileSpan empty2 = new GATKBAMFileSpan(); + GATKBAMFileSpan intersection = empty1.intersection(empty2); + Assert.assertEquals(intersection.getGATKChunks().size(),0,"Elements inserted in intersection of two empty sets"); + } + + @Test + public void testIntersectionOfNonOverlappingFileSpans() { + GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk(0,1<<16)); + GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(1<<16,2<<16)); + GATKBAMFileSpan intersection = regionOne.intersection(regionTwo); + Assert.assertEquals(intersection.getGATKChunks().size(),0,"Elements inserted in intersection of two non-intersecting filespans"); + } + + @Test + public void testIntersectionOfSmallOverlapInFileSpans() { + GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk(0,1<<16)); + GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(65535,2<<16)); + GATKBAMFileSpan intersection = regionOne.intersection(regionTwo); + Assert.assertEquals(intersection.getGATKChunks().size(),1,"No intersection found between two partially overlapping filespans"); + Assert.assertEquals(intersection.getGATKChunks().get(0),new GATKChunk(65535,1<<16),"Determined intersection is incorrect."); + } + + @Test + public void testIntersectionOfStrictSubset() { + GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk(0,1<<16)); + GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(0,2<<16)); + GATKBAMFileSpan intersection = regionOne.intersection(regionTwo); + Assert.assertEquals(intersection.getGATKChunks().size(),1,"No intersection found between two partially overlapping filespans"); + Assert.assertEquals(intersection.getGATKChunks().get(0),new GATKChunk(0<<16,1<<16),"Determined intersection is incorrect."); + + // Make sure intersection is symmetric + intersection = regionTwo.intersection(regionOne); + Assert.assertEquals(intersection.getGATKChunks().size(),1,"No intersection found between two partially overlapping filespans"); + Assert.assertEquals(intersection.getGATKChunks().get(0),new GATKChunk(0<<16,1<<16),"Determined intersection is incorrect."); + } + + @Test + public void testIntersectionOfPartialOverlap() { + GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk(0,2<<16)); + GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(0<<16|32768,1<<16|32768)); + GATKBAMFileSpan intersection = regionOne.intersection(regionTwo); + Assert.assertEquals(intersection.getGATKChunks().size(),1,"No intersection found between two partially overlapping filespans"); + Assert.assertEquals(intersection.getGATKChunks().get(0),new GATKChunk(0<<16|32768,1<<16|32768),"Determined intersection is incorrect."); + } + + @Test + public void testIntersectionOfChunkLists() { + GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk(0,5<<16)); + GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk[] { new GATKChunk(1<<16,2<<16), new GATKChunk(3<<16,4<<16) }); + GATKBAMFileSpan intersection = regionOne.intersection(regionTwo); + Assert.assertEquals(intersection.getGATKChunks().size(),2,"Wrong number of intersections found."); + Assert.assertEquals(intersection.getGATKChunks().get(0),new GATKChunk(1<<16,2<<16),"Determined intersection is incorrect."); + Assert.assertEquals(intersection.getGATKChunks().get(1),new GATKChunk(3<<16,4<<16),"Determined intersection is incorrect."); + + // Make sure intersection is symmetric + intersection = regionTwo.intersection(regionOne); + Assert.assertEquals(intersection.getGATKChunks().size(),2,"Wrong number of intersections found."); + Assert.assertEquals(intersection.getGATKChunks().get(0),new GATKChunk(1<<16,2<<16),"Determined intersection is incorrect."); + Assert.assertEquals(intersection.getGATKChunks().get(1),new GATKChunk(3<<16,4<<16),"Determined intersection is incorrect."); + } + + @Test + public void testSubtractionOfEmptyChunkLists() { + GATKBAMFileSpan regionOne = new GATKBAMFileSpan(); + GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(); + GATKBAMFileSpan subtraction = regionOne.subtract(regionTwo); + Assert.assertEquals(subtraction.getGATKChunks().size(),0,"Elements inserted in subtraction of two empty sets"); + } + + @Test + public void testSingleIntervalSubtractedAway() { + GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk(0,1<<16)); + GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(0,1<<16)); + GATKBAMFileSpan subtraction = regionOne.subtract(regionTwo); + Assert.assertEquals(subtraction.getGATKChunks().size(),0,"Elements inserted in complete subtraction of region"); + } + + @Test + public void testMultipleIntervalsSubtractedAway() { + GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk[] { new GATKChunk(0,1<<16), new GATKChunk(2<<16,3<<16) }); + GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk[] { new GATKChunk(0,1<<16), new GATKChunk(2<<16,3<<16) }); + GATKBAMFileSpan subtraction = regionOne.subtract(regionTwo); + Assert.assertEquals(subtraction.getGATKChunks().size(),0,"Elements inserted in complete subtraction of region"); + } + + @Test + public void testSubtractionOfStrictSubset() { + GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk(0,2<<16)); + GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(0,1<<16)); + GATKBAMFileSpan subtraction = regionOne.subtract(regionTwo); + Assert.assertEquals(subtraction.getGATKChunks().size(),1,"Incorrect size in strict subset subtraction of region"); + Assert.assertEquals(subtraction.getGATKChunks().get(0),new GATKChunk(1<<16,2<<16),"Determined subtraction is incorrect."); + } + + @Test + public void testSubtractionOfPartialOverlap() { + GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk(0,2<<16)); + GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(1<<16,3<<16)); + GATKBAMFileSpan subtraction = regionOne.subtract(regionTwo); + Assert.assertEquals(subtraction.getGATKChunks().size(),1,"Incorrect size in partial subset subtraction of region"); + Assert.assertEquals(subtraction.getGATKChunks().get(0),new GATKChunk(0<<16,1<<16),"Determined subtraction is incorrect."); + } +} diff --git a/java/test/net/sf/samtools/GATKChunkUnitTest.java b/java/test/net/sf/samtools/GATKChunkUnitTest.java new file mode 100644 index 000000000..dd01afa2b --- /dev/null +++ b/java/test/net/sf/samtools/GATKChunkUnitTest.java @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package net.sf.samtools; + +import org.testng.Assert; +import org.testng.annotations.Test; + +/** + * Test basic functionality of the GATK chunk, giving informative size capabilities, etc. + */ +public class GATKChunkUnitTest { + private static final int FULL_BLOCK_COMPRESSED_SIZE = 25559; + private static final int FULL_BLOCK_UNCOMPRESSED_SIZE = 65536; + private static final int HALF_BLOCK_UNCOMPRESSED_SIZE = FULL_BLOCK_UNCOMPRESSED_SIZE/2; + + @Test + public void testSizeOfEmptyChunk() { + GATKChunk chunk = new GATKChunk(0,0); + Assert.assertEquals(chunk.size(),0,"Empty chunk's size is not equal to 0."); + } + + @Test + public void testSizeOfChunkWithinSingleBlock() { + GATKChunk chunk = new GATKChunk(0,FULL_BLOCK_UNCOMPRESSED_SIZE-1); + Assert.assertEquals(chunk.size(),FULL_BLOCK_UNCOMPRESSED_SIZE-1,"Chunk spanning limits of block is returning wrong size."); + + chunk = new GATKChunk(0,HALF_BLOCK_UNCOMPRESSED_SIZE); + Assert.assertEquals(chunk.size(),HALF_BLOCK_UNCOMPRESSED_SIZE,"Chunk spanning 1/2 block is returning the wrong size."); + } + + @Test + public void testSizeOfSingleBlock() { + GATKChunk chunk = new GATKChunk(0,FULL_BLOCK_COMPRESSED_SIZE<<16); + Assert.assertEquals(chunk.size(),FULL_BLOCK_UNCOMPRESSED_SIZE,"Chunk spanning complete block returns incorrect size."); + } + + @Test + public void testSizeOfBlockAndAHalf() { + GATKChunk chunk = new GATKChunk(0,(FULL_BLOCK_COMPRESSED_SIZE<<16)+HALF_BLOCK_UNCOMPRESSED_SIZE); + Assert.assertEquals(chunk.size(),FULL_BLOCK_UNCOMPRESSED_SIZE+HALF_BLOCK_UNCOMPRESSED_SIZE,"Chunk spanning 1.5 blocks returns incorrect size."); + } + + @Test + public void testSizeOfHalfBlock() { + GATKChunk chunk = new GATKChunk(HALF_BLOCK_UNCOMPRESSED_SIZE,FULL_BLOCK_COMPRESSED_SIZE<<16); + Assert.assertEquals(chunk.size(),HALF_BLOCK_UNCOMPRESSED_SIZE,"Chunk spanning 0.5 blocks returns incorrect size."); + } +}