gatk-3.8/public/gatk-framework/src/main/java/htsjdk/samtools/GATKBAMFileSpan.java

309 lines
12 KiB
Java

/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package net.sf.samtools;
import net.sf.picard.util.PeekableIterator;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
/**
* A temporary solution to work around Java access rights issues:
* override BAMFileSpan and make it public.
* TODO: Eliminate once we determine the final fate of the BAM index reading code.
*/
public class GATKBAMFileSpan extends BAMFileSpan {
/**
* Create a new empty list of chunks.
*/
public GATKBAMFileSpan() {
super();
}
/**
* Create a new GATKBAMFileSpan from an existing BAMFileSpan.
* @param sourceFileSpan
*/
public GATKBAMFileSpan(SAMFileSpan sourceFileSpan) {
if(!(sourceFileSpan instanceof BAMFileSpan))
throw new SAMException("Unable to create GATKBAMFileSpan from a SAMFileSpan. Please submit a BAMFileSpan instead");
BAMFileSpan sourceBAMFileSpan = (BAMFileSpan)sourceFileSpan;
for(Chunk chunk: sourceBAMFileSpan.getChunks())
add(chunk instanceof GATKChunk ? chunk : new GATKChunk(chunk));
}
/**
* Convenience constructor to construct a BAM file span from
* a single chunk.
* @param chunk Chunk to use as the sole region in this span.
*/
public GATKBAMFileSpan(final Chunk chunk) {
super(chunk);
}
/**
* Create a new chunk list from the given list of chunks.
* @param chunks Constituent chunks.
*/
public GATKBAMFileSpan(final GATKChunk[] chunks) {
super(Arrays.<Chunk>asList(chunks));
}
@Override
public boolean equals(final Object other) {
if(!(other instanceof BAMFileSpan))
return false;
List<Chunk> theseChunks = getChunks();
List<Chunk> otherChunks = ((BAMFileSpan)other).getChunks();
if(theseChunks.size() != otherChunks.size())
return false;
for(int i = 0; i < theseChunks.size(); i++) {
if(!theseChunks.get(i).equals(otherChunks.get(i)))
return false;
}
return true;
}
/**
* Gets the constituent chunks stored in this span.
* @return An unmodifiable list of chunks.
*/
public List<GATKChunk> getGATKChunks() {
List<GATKChunk> gatkChunks = new ArrayList<GATKChunk>();
for(Chunk chunk: getChunks())
gatkChunks.add(new GATKChunk(chunk));
return gatkChunks;
}
public String toString() {
StringBuilder builder = new StringBuilder();
for(GATKChunk chunk: getGATKChunks())
builder.append(String.format("%s;",chunk));
return builder.toString();
}
/**
* Returns an approximation of the number of uncompressed bytes in this
* file span.
* @return Approximation of uncompressed bytes in filespan.
*/
public long size() {
long size = 0L;
for(GATKChunk chunk: getGATKChunks())
size += chunk.size();
return size;
}
/**
* Get a GATKChunk representing the "extent" of this file span, from the start of the first
* chunk to the end of the last chunk.The chunks list must be sorted in order to use this method.
*
* @return a GATKChunk representing the extent of this file span, or a GATKChunk representing
* a span of size 0 if there are no chunks
*/
public GATKChunk getExtent() {
validateSorted(); // TODO: defensive measure: may be unnecessary
List<Chunk> chunks = getChunks();
if ( chunks.isEmpty() ) {
return new GATKChunk(0L, 0L);
}
return new GATKChunk(chunks.get(0).getChunkStart(), chunks.get(chunks.size() - 1).getChunkEnd());
}
/**
* Validates the list of chunks to ensure that they appear in sorted order.
*/
private void validateSorted() {
List<Chunk> chunks = getChunks();
for ( int i = 1; i < chunks.size(); i++ ) {
if ( chunks.get(i).getChunkStart() < chunks.get(i-1).getChunkEnd() ) {
throw new ReviewedStingException(String.format("Chunk list is unsorted; chunk %s is before chunk %s", chunks.get(i-1), chunks.get(i)));
}
}
}
/**
* Computes the union of two FileSpans.
* @param other FileSpan to union with this one.
* @return A file span that's been unioned.
*/
public GATKBAMFileSpan union(final GATKBAMFileSpan other) {
// No data? Return an empty file span.
if(getGATKChunks().size() == 0 && other.getGATKChunks().size() == 0)
return new GATKBAMFileSpan();
LinkedList<GATKChunk> unmergedUnion = new LinkedList<GATKChunk>();
unmergedUnion.addAll(getGATKChunks());
unmergedUnion.addAll(other.getGATKChunks());
Collections.sort(unmergedUnion);
List<GATKChunk> mergedUnion = new ArrayList<GATKChunk>();
GATKChunk currentChunk = unmergedUnion.remove();
while(!unmergedUnion.isEmpty()) {
// While the current chunk can be merged with the next chunk:
while( ! unmergedUnion.isEmpty() &&
(currentChunk.overlaps(unmergedUnion.peek()) || currentChunk.isAdjacentTo(unmergedUnion.peek())) ) {
// Merge the current chunk with the next chunk:
GATKChunk nextChunk = unmergedUnion.remove();
currentChunk = currentChunk.merge(nextChunk);
}
// Add the accumulated range.
mergedUnion.add(currentChunk);
currentChunk = !unmergedUnion.isEmpty() ? unmergedUnion.remove() : null;
}
// At end of the loop above, the last chunk will be contained in currentChunk and will not yet have been added. Add it.
if(currentChunk !=null)
mergedUnion.add(currentChunk);
return new GATKBAMFileSpan(mergedUnion.toArray(new GATKChunk[mergedUnion.size()]));
}
/**
* Intersects two BAM file spans.
* @param other File span to intersect with this one.
* @return The intersected BAM file span.
*/
public GATKBAMFileSpan intersection(final GATKBAMFileSpan other) {
Iterator<GATKChunk> thisIterator = getGATKChunks().iterator();
Iterator<GATKChunk> otherIterator = other.getGATKChunks().iterator();
if(!thisIterator.hasNext() || !otherIterator.hasNext())
return new GATKBAMFileSpan();
GATKChunk thisChunk = thisIterator.next();
GATKChunk otherChunk = otherIterator.next();
List<GATKChunk> intersected = new ArrayList<GATKChunk>();
while(thisChunk != null && otherChunk != null) {
// If this iterator is before other, skip this ahead.
if(thisChunk.getChunkEnd() <= otherChunk.getChunkStart()) {
thisChunk = thisIterator.hasNext() ? thisIterator.next() : null;
continue;
}
// If other iterator is before this, skip other ahead.
if(thisChunk.getChunkStart() >= otherChunk.getChunkEnd()) {
otherChunk = otherIterator.hasNext() ? otherIterator.next() : null;
continue;
}
// If these two chunks overlap, pull out intersection of data and truncated current chunks to point after
// the intersection (or next chunk if no such overlap exists).
if(thisChunk.overlaps(otherChunk)) {
// Determine the chunk constraints
GATKChunk firstChunk = thisChunk.getChunkStart() < otherChunk.getChunkStart() ? thisChunk : otherChunk;
GATKChunk secondChunk = thisChunk==firstChunk ? otherChunk : thisChunk;
GATKChunk intersectedChunk = new GATKChunk(secondChunk.getChunkStart(),Math.min(firstChunk.getChunkEnd(),secondChunk.getChunkEnd()));
intersected.add(intersectedChunk);
if(thisChunk.getChunkEnd() > intersectedChunk.getChunkEnd())
thisChunk = new GATKChunk(intersectedChunk.getChunkEnd(),thisChunk.getChunkEnd());
else
thisChunk = thisIterator.hasNext() ? thisIterator.next() : null;
if(otherChunk.getChunkEnd() > intersectedChunk.getChunkEnd())
otherChunk = new GATKChunk(intersectedChunk.getChunkEnd(),otherChunk.getChunkEnd());
else
otherChunk = otherIterator.hasNext() ? otherIterator.next() : null;
}
}
return new GATKBAMFileSpan(intersected.toArray(new GATKChunk[intersected.size()]));
}
/**
* Substracts other file span from this file span.
* @param other File span to strike out.
* @return This file span minuse the other file span.
*/
public GATKBAMFileSpan minus(final GATKBAMFileSpan other) {
Iterator<GATKChunk> thisIterator = getGATKChunks().iterator();
Iterator<GATKChunk> otherIterator = other.getGATKChunks().iterator();
if(!thisIterator.hasNext() || !otherIterator.hasNext())
return this;
GATKChunk thisChunk = thisIterator.next();
GATKChunk otherChunk = otherIterator.next();
List<GATKChunk> subtracted = new ArrayList<GATKChunk>();
while(thisChunk != null && otherChunk != null) {
// If this iterator is before the other, add this to the subtracted list and forge ahead.
if(thisChunk.getChunkEnd() <= otherChunk.getChunkStart()) {
subtracted.add(thisChunk);
thisChunk = thisIterator.hasNext() ? thisIterator.next() : null;
continue;
}
// If other iterator is before this, skip other ahead.
if(thisChunk.getChunkStart() >= otherChunk.getChunkEnd()) {
otherChunk = otherIterator.hasNext() ? otherIterator.next() : null;
continue;
}
// If these two chunks overlap, pull out intersection of data and truncated current chunks to point after
// the intersection (or next chunk if no such overlap exists).
if(thisChunk.overlaps(otherChunk)) {
// Add in any sort of prefix that this chunk might have over the other.
if(thisChunk.getChunkStart() < otherChunk.getChunkStart())
subtracted.add(new GATKChunk(thisChunk.getChunkStart(),otherChunk.getChunkStart()));
if(thisChunk.getChunkEnd() > otherChunk.getChunkEnd())
thisChunk = new GATKChunk(otherChunk.getChunkEnd(),thisChunk.getChunkEnd());
else
thisChunk = thisIterator.hasNext() ? thisIterator.next() : null;
}
}
// Finish up any remaining contents of this that didn't make it into the subtracted array.
if(thisChunk != null)
subtracted.add(thisChunk);
while(thisIterator.hasNext())
subtracted.add(thisIterator.next());
return new GATKBAMFileSpan(subtracted.toArray(new GATKChunk[subtracted.size()]));
}
}