Initial work on proto-shard merger:
- create size() method that returns an approximation of the uncompressed size in bytes of BAM span. I'll use this method as a protoshard weighting function until we determine how to normalize the weights across the different data access mechanisms (reads, reference, RODs). - Implementations of basic union/intersection/subtraction mechanisms for BAM spans; should be enough to get an accurate weight for two proto-shards put together. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5541 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d5ac822e97
commit
deab9f0aa5
|
|
@ -24,9 +24,15 @@
|
|||
|
||||
package net.sf.samtools;
|
||||
|
||||
import net.sf.picard.util.PeekableIterator;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Queue;
|
||||
|
||||
/**
|
||||
* A temporary solution to work around Java access rights issues:
|
||||
|
|
@ -75,4 +81,161 @@ public class GATKBAMFileSpan extends BAMFileSpan {
|
|||
builder.append(String.format("%s;",chunk));
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an approximation of the number of uncompressed bytes in this
|
||||
* file span.
|
||||
* @return Approximation of uncompressed bytes in filespan.
|
||||
*/
|
||||
public long size() {
|
||||
long size = 0L;
|
||||
for(GATKChunk chunk: getGATKChunks())
|
||||
size += chunk.size();
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the union of two FileSpans.
|
||||
* @param other FileSpan to union with this one.
|
||||
* @return A file span that's been unioned.
|
||||
*/
|
||||
public GATKBAMFileSpan union(final GATKBAMFileSpan other) {
|
||||
// No data? Return an empty file span.
|
||||
if(getGATKChunks().size() == 0 && other.getGATKChunks().size() == 0)
|
||||
return new GATKBAMFileSpan();
|
||||
|
||||
LinkedList<GATKChunk> unmergedUnion = new LinkedList<GATKChunk>();
|
||||
unmergedUnion.addAll(getGATKChunks());
|
||||
unmergedUnion.addAll(other.getGATKChunks());
|
||||
Collections.sort(unmergedUnion);
|
||||
|
||||
List<GATKChunk> mergedUnion = new ArrayList<GATKChunk>();
|
||||
GATKChunk currentChunk = unmergedUnion.remove();
|
||||
while(!unmergedUnion.isEmpty()) {
|
||||
// Find the end of this range of chunks.
|
||||
while(!unmergedUnion.isEmpty() && currentChunk.getChunkEnd() >= unmergedUnion.peek().getChunkStart()) {
|
||||
GATKChunk nextChunk = unmergedUnion.remove();
|
||||
currentChunk = new GATKChunk(currentChunk.getChunkStart(),nextChunk.getChunkEnd());
|
||||
}
|
||||
// Add the accumulated range.
|
||||
mergedUnion.add(currentChunk);
|
||||
currentChunk = !unmergedUnion.isEmpty() ? unmergedUnion.remove() : null;
|
||||
}
|
||||
|
||||
// At end of the loop above, the last chunk will be contained in currentChunk and will not yet have been added. Add it.
|
||||
if(currentChunk !=null)
|
||||
mergedUnion.add(currentChunk);
|
||||
|
||||
return new GATKBAMFileSpan(mergedUnion.toArray(new GATKChunk[mergedUnion.size()]));
|
||||
}
|
||||
|
||||
/**
|
||||
* Intersects two BAM file spans.
|
||||
* @param other File span to intersect with this one.
|
||||
* @return The intersected BAM file span.
|
||||
*/
|
||||
public GATKBAMFileSpan intersection(final GATKBAMFileSpan other) {
|
||||
Iterator<GATKChunk> thisIterator = getGATKChunks().iterator();
|
||||
Iterator<GATKChunk> otherIterator = other.getGATKChunks().iterator();
|
||||
|
||||
if(!thisIterator.hasNext() || !otherIterator.hasNext())
|
||||
return new GATKBAMFileSpan();
|
||||
|
||||
GATKChunk thisChunk = thisIterator.next();
|
||||
GATKChunk otherChunk = otherIterator.next();
|
||||
|
||||
List<GATKChunk> intersected = new ArrayList<GATKChunk>();
|
||||
|
||||
while(thisChunk != null && otherChunk != null) {
|
||||
// If this iterator is before other, skip this ahead.
|
||||
if(thisChunk.getChunkEnd() <= otherChunk.getChunkStart()) {
|
||||
thisChunk = thisIterator.hasNext() ? thisIterator.next() : null;
|
||||
continue;
|
||||
}
|
||||
|
||||
// If other iterator is before this, skip other ahead.
|
||||
if(thisChunk.getChunkStart() >= otherChunk.getChunkEnd()) {
|
||||
otherChunk = otherIterator.hasNext() ? otherIterator.next() : null;
|
||||
continue;
|
||||
}
|
||||
|
||||
// If these two chunks overlap, pull out intersection of data and truncated current chunks to point after
|
||||
// the intersection (or next chunk if no such overlap exists).
|
||||
if(thisChunk.overlaps(otherChunk)) {
|
||||
// Determine the chunk constraints
|
||||
GATKChunk firstChunk = thisChunk.getChunkStart() < otherChunk.getChunkStart() ? thisChunk : otherChunk;
|
||||
GATKChunk secondChunk = thisChunk==firstChunk ? otherChunk : thisChunk;
|
||||
GATKChunk intersectedChunk = new GATKChunk(secondChunk.getChunkStart(),Math.min(firstChunk.getChunkEnd(),secondChunk.getChunkEnd()));
|
||||
intersected.add(intersectedChunk);
|
||||
|
||||
if(thisChunk.getChunkEnd() > intersectedChunk.getChunkEnd())
|
||||
thisChunk = new GATKChunk(intersectedChunk.getChunkEnd(),thisChunk.getChunkEnd());
|
||||
else
|
||||
thisChunk = thisIterator.hasNext() ? thisIterator.next() : null;
|
||||
|
||||
if(otherChunk.getChunkEnd() > intersectedChunk.getChunkEnd())
|
||||
otherChunk = new GATKChunk(intersectedChunk.getChunkEnd(),otherChunk.getChunkEnd());
|
||||
else
|
||||
otherChunk = otherIterator.hasNext() ? otherIterator.next() : null;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return new GATKBAMFileSpan(intersected.toArray(new GATKChunk[intersected.size()]));
|
||||
}
|
||||
|
||||
/**
|
||||
* Substracts other file span from this file span.
|
||||
* @param other File span to strike out.
|
||||
* @return This file span minuse the other file span.
|
||||
*/
|
||||
|
||||
public GATKBAMFileSpan subtract(final GATKBAMFileSpan other) {
|
||||
Iterator<GATKChunk> thisIterator = getGATKChunks().iterator();
|
||||
Iterator<GATKChunk> otherIterator = other.getGATKChunks().iterator();
|
||||
|
||||
if(!thisIterator.hasNext() || !otherIterator.hasNext())
|
||||
return this;
|
||||
|
||||
GATKChunk thisChunk = thisIterator.next();
|
||||
GATKChunk otherChunk = otherIterator.next();
|
||||
|
||||
List<GATKChunk> subtracted = new ArrayList<GATKChunk>();
|
||||
|
||||
while(thisChunk != null && otherChunk != null) {
|
||||
// If this iterator is before the other, add this to the subtracted list and forge ahead.
|
||||
if(thisChunk.getChunkEnd() < otherChunk.getChunkStart()) {
|
||||
subtracted.add(thisChunk);
|
||||
thisChunk = thisIterator.hasNext() ? thisIterator.next() : null;
|
||||
continue;
|
||||
}
|
||||
|
||||
// If other iterator is before this, skip other ahead.
|
||||
if(thisChunk.getChunkStart() >= otherChunk.getChunkEnd()) {
|
||||
otherChunk = otherIterator.hasNext() ? otherIterator.next() : null;
|
||||
continue;
|
||||
}
|
||||
|
||||
// If these two chunks overlap, pull out intersection of data and truncated current chunks to point after
|
||||
// the intersection (or next chunk if no such overlap exists).
|
||||
if(thisChunk.overlaps(otherChunk)) {
|
||||
// Add in any sort of prefix that this chunk might have over the other.
|
||||
if(thisChunk.getChunkStart() < otherChunk.getChunkStart())
|
||||
subtracted.add(new GATKChunk(thisChunk.getChunkStart(),otherChunk.getChunkStart()));
|
||||
|
||||
if(thisChunk.getChunkEnd() > otherChunk.getChunkEnd())
|
||||
thisChunk = new GATKChunk(otherChunk.getChunkEnd(),thisChunk.getChunkEnd());
|
||||
else
|
||||
thisChunk = thisIterator.hasNext() ? thisIterator.next() : null;
|
||||
}
|
||||
}
|
||||
|
||||
// Finish up any remaining contents of this that didn't make it into the subtracted array.
|
||||
if(thisChunk != null)
|
||||
subtracted.add(thisChunk);
|
||||
while(thisIterator.hasNext())
|
||||
subtracted.add(thisIterator.next());
|
||||
|
||||
return new GATKBAMFileSpan(subtracted.toArray(new GATKChunk[subtracted.size()]));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,6 +30,12 @@ package net.sf.samtools;
|
|||
* TODO: Eliminate once we determine the final fate of the BAM index reading code.
|
||||
*/
|
||||
public class GATKChunk extends Chunk {
|
||||
/**
|
||||
* The average ratio of compressed block size / uncompressed block size, computed empirically
|
||||
* using the output of org.broadinstitute.sting.gatk.datasources.reads.utilities.PrintBGZFBounds.
|
||||
*/
|
||||
private static final double AVERAGE_BAM_COMPRESSION_RATIO = 0.39;
|
||||
|
||||
public GATKChunk(final long start, final long stop) {
|
||||
super(start,stop);
|
||||
}
|
||||
|
|
@ -63,5 +69,15 @@ public class GATKChunk extends Chunk {
|
|||
super.setChunkEnd(value);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Computes an approximation of the uncompressed size of the
|
||||
* chunk, in bytes. Can be used to determine relative weights
|
||||
* of chunk size.
|
||||
* @return An approximation of the chunk size in bytes.
|
||||
*/
|
||||
public long size() {
|
||||
final long chunkSpan = Math.round(((getChunkEnd()>>16)-(getChunkStart()>>16))/AVERAGE_BAM_COMPRESSION_RATIO);
|
||||
final int offsetSpan = (int)((getChunkEnd()&0xFFFF)-(getChunkStart()&0xFFFF));
|
||||
return chunkSpan + offsetSpan;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -114,11 +114,11 @@ public class LowMemoryIntervalSharder implements Iterator<FilePointer> {
|
|||
coveredRegionStop = Math.min(coveredRegionStop,scheduleEntry.stop);
|
||||
coveredRegion = loci.getGenomeLocParser().createGenomeLoc(currentLocus.getContig(),coveredRegionStart,coveredRegionStop);
|
||||
|
||||
// Always create a file span, whether there was covered data or not. If there was no covered data, then the binTree is empty.
|
||||
//System.out.printf("Shard: index file = %s; reference sequence = %d; ",index.getIndexFile(),currentLocus.getContigIndex());
|
||||
nextFilePointer.addFileSpans(scheduleEntry.fileSpans);
|
||||
}
|
||||
else {
|
||||
// Always create a file span, whether there was covered data or not. If there was no covered data, then the binTree is empty.
|
||||
//System.out.printf("Shard: index file = %s; reference sequence = %d; ",index.getIndexFile(),currentLocus.getContigIndex());
|
||||
for(SAMReaderID reader: indices.keySet())
|
||||
nextFilePointer.addFileSpans(reader,new GATKBAMFileSpan());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,185 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package net.sf.samtools;
|
||||
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
/**
|
||||
* Tests of functionality of union, intersection operators.
|
||||
*/
|
||||
public class GATKBAMFileSpanUnitTest {
|
||||
@Test
|
||||
public void testUnionOfEmptyFileSpans() {
|
||||
GATKBAMFileSpan empty1 = new GATKBAMFileSpan();
|
||||
GATKBAMFileSpan empty2 = new GATKBAMFileSpan();
|
||||
GATKBAMFileSpan union = empty1.union(empty2);
|
||||
Assert.assertEquals(union.getGATKChunks().size(),0,"Elements inserted in union of two empty sets");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnionOfNonOverlappingFileSpans() {
|
||||
GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk(0,65535));
|
||||
GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(1<<16,(1<<16)|65535));
|
||||
GATKBAMFileSpan union = regionOne.union(regionTwo);
|
||||
Assert.assertEquals(union.getGATKChunks().size(),2,"Discontiguous elements were merged");
|
||||
Assert.assertEquals(union.getGATKChunks().get(0),regionOne.getGATKChunks().get(0),"Wrong chunk was first in list");
|
||||
Assert.assertEquals(union.getGATKChunks().get(1),regionTwo.getGATKChunks().get(0),"Wrong chunk was second in list");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnionOfOverlappingFileSpans() {
|
||||
GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk(0,1<<16));
|
||||
GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(1<<16,(1<<16)|65535));
|
||||
GATKBAMFileSpan union = regionOne.union(regionTwo);
|
||||
Assert.assertEquals(union.getGATKChunks().size(),1,"Elements to be merged were not.");
|
||||
Assert.assertEquals(union.getGATKChunks().get(0),new GATKChunk(0,(1<<16)|65535));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnionOfStringOfFileSpans() {
|
||||
GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk[] { new GATKChunk(0,1<<16), new GATKChunk(2<<16,3<<16) });
|
||||
GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(1<<16,2<<16));
|
||||
GATKBAMFileSpan union = regionOne.union(regionTwo);
|
||||
Assert.assertEquals(union.getGATKChunks().size(),1,"Elements to be merged were not.");
|
||||
Assert.assertEquals(union.getGATKChunks().get(0),new GATKChunk(0,3<<16));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnionAllFileSpansAdded() {
|
||||
GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk[] { new GATKChunk(0,1<<16), new GATKChunk(2<<16,3<<16), new GATKChunk(20<<16,21<<16) });
|
||||
GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(1<<16,2<<16));
|
||||
GATKBAMFileSpan union = regionOne.union(regionTwo);
|
||||
Assert.assertEquals(union.getGATKChunks().size(),2,"Elements to be merged were not.");
|
||||
Assert.assertEquals(union.getGATKChunks().get(0),new GATKChunk(0,3<<16));
|
||||
Assert.assertEquals(union.getGATKChunks().get(1),new GATKChunk(20<<16,21<<16));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIntersectionOfEmptyFileSpans() {
|
||||
GATKBAMFileSpan empty1 = new GATKBAMFileSpan();
|
||||
GATKBAMFileSpan empty2 = new GATKBAMFileSpan();
|
||||
GATKBAMFileSpan intersection = empty1.intersection(empty2);
|
||||
Assert.assertEquals(intersection.getGATKChunks().size(),0,"Elements inserted in intersection of two empty sets");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIntersectionOfNonOverlappingFileSpans() {
|
||||
GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk(0,1<<16));
|
||||
GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(1<<16,2<<16));
|
||||
GATKBAMFileSpan intersection = regionOne.intersection(regionTwo);
|
||||
Assert.assertEquals(intersection.getGATKChunks().size(),0,"Elements inserted in intersection of two non-intersecting filespans");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIntersectionOfSmallOverlapInFileSpans() {
|
||||
GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk(0,1<<16));
|
||||
GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(65535,2<<16));
|
||||
GATKBAMFileSpan intersection = regionOne.intersection(regionTwo);
|
||||
Assert.assertEquals(intersection.getGATKChunks().size(),1,"No intersection found between two partially overlapping filespans");
|
||||
Assert.assertEquals(intersection.getGATKChunks().get(0),new GATKChunk(65535,1<<16),"Determined intersection is incorrect.");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIntersectionOfStrictSubset() {
|
||||
GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk(0,1<<16));
|
||||
GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(0,2<<16));
|
||||
GATKBAMFileSpan intersection = regionOne.intersection(regionTwo);
|
||||
Assert.assertEquals(intersection.getGATKChunks().size(),1,"No intersection found between two partially overlapping filespans");
|
||||
Assert.assertEquals(intersection.getGATKChunks().get(0),new GATKChunk(0<<16,1<<16),"Determined intersection is incorrect.");
|
||||
|
||||
// Make sure intersection is symmetric
|
||||
intersection = regionTwo.intersection(regionOne);
|
||||
Assert.assertEquals(intersection.getGATKChunks().size(),1,"No intersection found between two partially overlapping filespans");
|
||||
Assert.assertEquals(intersection.getGATKChunks().get(0),new GATKChunk(0<<16,1<<16),"Determined intersection is incorrect.");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIntersectionOfPartialOverlap() {
|
||||
GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk(0,2<<16));
|
||||
GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(0<<16|32768,1<<16|32768));
|
||||
GATKBAMFileSpan intersection = regionOne.intersection(regionTwo);
|
||||
Assert.assertEquals(intersection.getGATKChunks().size(),1,"No intersection found between two partially overlapping filespans");
|
||||
Assert.assertEquals(intersection.getGATKChunks().get(0),new GATKChunk(0<<16|32768,1<<16|32768),"Determined intersection is incorrect.");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIntersectionOfChunkLists() {
|
||||
GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk(0,5<<16));
|
||||
GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk[] { new GATKChunk(1<<16,2<<16), new GATKChunk(3<<16,4<<16) });
|
||||
GATKBAMFileSpan intersection = regionOne.intersection(regionTwo);
|
||||
Assert.assertEquals(intersection.getGATKChunks().size(),2,"Wrong number of intersections found.");
|
||||
Assert.assertEquals(intersection.getGATKChunks().get(0),new GATKChunk(1<<16,2<<16),"Determined intersection is incorrect.");
|
||||
Assert.assertEquals(intersection.getGATKChunks().get(1),new GATKChunk(3<<16,4<<16),"Determined intersection is incorrect.");
|
||||
|
||||
// Make sure intersection is symmetric
|
||||
intersection = regionTwo.intersection(regionOne);
|
||||
Assert.assertEquals(intersection.getGATKChunks().size(),2,"Wrong number of intersections found.");
|
||||
Assert.assertEquals(intersection.getGATKChunks().get(0),new GATKChunk(1<<16,2<<16),"Determined intersection is incorrect.");
|
||||
Assert.assertEquals(intersection.getGATKChunks().get(1),new GATKChunk(3<<16,4<<16),"Determined intersection is incorrect.");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSubtractionOfEmptyChunkLists() {
|
||||
GATKBAMFileSpan regionOne = new GATKBAMFileSpan();
|
||||
GATKBAMFileSpan regionTwo = new GATKBAMFileSpan();
|
||||
GATKBAMFileSpan subtraction = regionOne.subtract(regionTwo);
|
||||
Assert.assertEquals(subtraction.getGATKChunks().size(),0,"Elements inserted in subtraction of two empty sets");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSingleIntervalSubtractedAway() {
|
||||
GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk(0,1<<16));
|
||||
GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(0,1<<16));
|
||||
GATKBAMFileSpan subtraction = regionOne.subtract(regionTwo);
|
||||
Assert.assertEquals(subtraction.getGATKChunks().size(),0,"Elements inserted in complete subtraction of region");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleIntervalsSubtractedAway() {
|
||||
GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk[] { new GATKChunk(0,1<<16), new GATKChunk(2<<16,3<<16) });
|
||||
GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk[] { new GATKChunk(0,1<<16), new GATKChunk(2<<16,3<<16) });
|
||||
GATKBAMFileSpan subtraction = regionOne.subtract(regionTwo);
|
||||
Assert.assertEquals(subtraction.getGATKChunks().size(),0,"Elements inserted in complete subtraction of region");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSubtractionOfStrictSubset() {
|
||||
GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk(0,2<<16));
|
||||
GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(0,1<<16));
|
||||
GATKBAMFileSpan subtraction = regionOne.subtract(regionTwo);
|
||||
Assert.assertEquals(subtraction.getGATKChunks().size(),1,"Incorrect size in strict subset subtraction of region");
|
||||
Assert.assertEquals(subtraction.getGATKChunks().get(0),new GATKChunk(1<<16,2<<16),"Determined subtraction is incorrect.");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSubtractionOfPartialOverlap() {
|
||||
GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk(0,2<<16));
|
||||
GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(1<<16,3<<16));
|
||||
GATKBAMFileSpan subtraction = regionOne.subtract(regionTwo);
|
||||
Assert.assertEquals(subtraction.getGATKChunks().size(),1,"Incorrect size in partial subset subtraction of region");
|
||||
Assert.assertEquals(subtraction.getGATKChunks().get(0),new GATKChunk(0<<16,1<<16),"Determined subtraction is incorrect.");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package net.sf.samtools;
|
||||
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
/**
|
||||
* Test basic functionality of the GATK chunk, giving informative size capabilities, etc.
|
||||
*/
|
||||
public class GATKChunkUnitTest {
|
||||
private static final int FULL_BLOCK_COMPRESSED_SIZE = 25559;
|
||||
private static final int FULL_BLOCK_UNCOMPRESSED_SIZE = 65536;
|
||||
private static final int HALF_BLOCK_UNCOMPRESSED_SIZE = FULL_BLOCK_UNCOMPRESSED_SIZE/2;
|
||||
|
||||
@Test
|
||||
public void testSizeOfEmptyChunk() {
|
||||
GATKChunk chunk = new GATKChunk(0,0);
|
||||
Assert.assertEquals(chunk.size(),0,"Empty chunk's size is not equal to 0.");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSizeOfChunkWithinSingleBlock() {
|
||||
GATKChunk chunk = new GATKChunk(0,FULL_BLOCK_UNCOMPRESSED_SIZE-1);
|
||||
Assert.assertEquals(chunk.size(),FULL_BLOCK_UNCOMPRESSED_SIZE-1,"Chunk spanning limits of block is returning wrong size.");
|
||||
|
||||
chunk = new GATKChunk(0,HALF_BLOCK_UNCOMPRESSED_SIZE);
|
||||
Assert.assertEquals(chunk.size(),HALF_BLOCK_UNCOMPRESSED_SIZE,"Chunk spanning 1/2 block is returning the wrong size.");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSizeOfSingleBlock() {
|
||||
GATKChunk chunk = new GATKChunk(0,FULL_BLOCK_COMPRESSED_SIZE<<16);
|
||||
Assert.assertEquals(chunk.size(),FULL_BLOCK_UNCOMPRESSED_SIZE,"Chunk spanning complete block returns incorrect size.");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSizeOfBlockAndAHalf() {
|
||||
GATKChunk chunk = new GATKChunk(0,(FULL_BLOCK_COMPRESSED_SIZE<<16)+HALF_BLOCK_UNCOMPRESSED_SIZE);
|
||||
Assert.assertEquals(chunk.size(),FULL_BLOCK_UNCOMPRESSED_SIZE+HALF_BLOCK_UNCOMPRESSED_SIZE,"Chunk spanning 1.5 blocks returns incorrect size.");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSizeOfHalfBlock() {
|
||||
GATKChunk chunk = new GATKChunk(HALF_BLOCK_UNCOMPRESSED_SIZE,FULL_BLOCK_COMPRESSED_SIZE<<16);
|
||||
Assert.assertEquals(chunk.size(),HALF_BLOCK_UNCOMPRESSED_SIZE,"Chunk spanning 0.5 blocks returns incorrect size.");
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue