Moving from GATK dependencies on isolated classes checked into the GATK
codebase to a dependency on a jar file compiled from my private picard branch. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3034 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
f455412ea8
commit
96662d8d1b
|
|
@ -1,92 +0,0 @@
|
|||
/*
|
||||
* The MIT License
|
||||
*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
package net.sf.picard.sam;
|
||||
|
||||
import net.sf.picard.util.PeekableIterator;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
|
||||
/**
|
||||
* Iterator for SAM records that implements comparable to enable sorting of iterators.
|
||||
* The comparison is performed by comparing the next record in the iterator to the next
|
||||
* record in another iterator and returning the ordering between those SAM records.
|
||||
*/
|
||||
class ComparableSamRecordIterator extends PeekableIterator<SAMRecord> implements Comparable<ComparableSamRecordIterator> {
|
||||
private final CloseableIterator<SAMRecord> iterator;
|
||||
private final Comparator<SAMRecord> comparator;
|
||||
|
||||
/**
|
||||
* Constructs a wrapping iterator around the given iterator that will be able
|
||||
* to compare itself to other ComparableSamRecordIterators using the given comparator.
|
||||
*
|
||||
* @param iterator the wrapped iterator.
|
||||
* @param comparator the Comparator to use to provide ordering fo SAMRecords
|
||||
*/
|
||||
public ComparableSamRecordIterator(final CloseableIterator<SAMRecord> iterator, final Comparator<SAMRecord> comparator) {
|
||||
super(iterator);
|
||||
this.iterator = iterator;
|
||||
this.comparator = comparator;
|
||||
}
|
||||
|
||||
public CloseableIterator<SAMRecord> getWrappedIterator() {
|
||||
return iterator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares this iterator to another comparable iterator based on the next record
|
||||
* available in each iterator. If the two comparable iterators have different
|
||||
* comparator types internally an exception is thrown.
|
||||
*
|
||||
* @param that another iterator to compare to
|
||||
* @return a negative, 0 or positive number as described in the Comparator interface
|
||||
*/
|
||||
public int compareTo(final ComparableSamRecordIterator that) {
|
||||
if (this.comparator.getClass() != that.comparator.getClass()) {
|
||||
throw new IllegalStateException("Attempt to compare two ComparableSAMRecordIterators that " +
|
||||
"have different orderings internally");
|
||||
}
|
||||
|
||||
final SAMRecord record = this.peek();
|
||||
final SAMRecord record2 = that.peek();
|
||||
return comparator.compare(record, record2);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
|
||||
return compareTo((ComparableSamRecordIterator)o) == 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
throw new UnsupportedOperationException("ComparableSamRecordIterator should not be hashed because it can change value");
|
||||
}
|
||||
}
|
||||
|
|
@ -1,243 +0,0 @@
|
|||
/*
|
||||
* The MIT License
|
||||
*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
package net.sf.picard.sam;
|
||||
|
||||
import net.sf.picard.PicardException;
|
||||
|
||||
import java.util.*;
|
||||
import java.lang.reflect.Constructor;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
|
||||
/**
|
||||
* Provides an iterator interface for merging multiple underlying iterators into a single
|
||||
* iterable stream. The underlying iterators/files must all have the same sort order unless
|
||||
* the requested output format is unsorted, in which case any combination is valid.
|
||||
*/
|
||||
public class MergingSamRecordIterator implements CloseableIterator<SAMRecord> {
|
||||
private final PriorityQueue<ComparableSamRecordIterator> pq;
|
||||
private final SamFileHeaderMerger samHeaderMerger;
|
||||
private final SAMFileHeader.SortOrder sortOrder;
|
||||
|
||||
/**
|
||||
* Maps iterators back to the readers from which they are derived.
|
||||
*/
|
||||
private final Map<CloseableIterator<SAMRecord>,SAMFileReader> iteratorToSourceMap = new HashMap<CloseableIterator<SAMRecord>,SAMFileReader>();
|
||||
|
||||
/**
|
||||
* Constructs a new merging iterator with the same set of readers and sort order as
|
||||
* provided by the header merger parameter.
|
||||
* @param headerMerger The merged header and contents of readers.
|
||||
* @param forcePresorted True to ensure that the iterator checks the headers of the readers for appropriate sort order.
|
||||
*/
|
||||
public MergingSamRecordIterator(final SamFileHeaderMerger headerMerger, final boolean forcePresorted) {
|
||||
this(headerMerger,createWholeFileIterators(headerMerger.getReaders()),forcePresorted);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new merging iterator with a given merged header and a subset of readers.
|
||||
* @param headerMerger The merged header and contents of readers.
|
||||
* @param readerToIteratorMap A mapping of reader to iterator.
|
||||
* @param forcePresorted True to ensure that the iterator checks the headers of the readers for appropriate sort order.
|
||||
*/
|
||||
public MergingSamRecordIterator(final SamFileHeaderMerger headerMerger, final Map<SAMFileReader, CloseableIterator<SAMRecord>> readerToIteratorMap, final boolean forcePresorted) {
|
||||
this.samHeaderMerger = headerMerger;
|
||||
this.sortOrder = headerMerger.getMergedHeader().getSortOrder();
|
||||
final SAMRecordComparator comparator = getComparator();
|
||||
|
||||
final Collection<SAMFileReader> readers = headerMerger.getReaders();
|
||||
this.pq = new PriorityQueue<ComparableSamRecordIterator>(readers.size());
|
||||
|
||||
for(final SAMFileReader reader: readerToIteratorMap.keySet()) {
|
||||
if (!forcePresorted && this.sortOrder != SAMFileHeader.SortOrder.unsorted &&
|
||||
reader.getFileHeader().getSortOrder() != this.sortOrder){
|
||||
throw new PicardException("Files are not compatible with sort order");
|
||||
}
|
||||
|
||||
final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(readerToIteratorMap.get(reader),comparator);
|
||||
addIfNotEmpty(iterator);
|
||||
iteratorToSourceMap.put(iterator.getWrappedIterator(),reader);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* For each reader, derive an iterator that can walk the entire file and associate that back to
|
||||
* @param readers The readers from which to derive iterators.
|
||||
* @return A map of reader to its associated iterator.
|
||||
*/
|
||||
private static Map<SAMFileReader,CloseableIterator<SAMRecord>> createWholeFileIterators(Collection<SAMFileReader> readers) {
|
||||
Map<SAMFileReader,CloseableIterator<SAMRecord>> readerToIteratorMap = new HashMap<SAMFileReader,CloseableIterator<SAMRecord>>();
|
||||
for(final SAMFileReader reader: readers)
|
||||
readerToIteratorMap.put(reader,reader.iterator());
|
||||
return readerToIteratorMap;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close down all open iterators.
|
||||
*/
|
||||
public void close() {
|
||||
// Iterators not in the priority queue have already been closed; only close down the iterators that are still in the priority queue.
|
||||
for(CloseableIterator<SAMRecord> iterator: pq)
|
||||
iterator.close();
|
||||
}
|
||||
|
||||
/** Returns true if any of the underlying iterators has more records, otherwise false. */
|
||||
public boolean hasNext() {
|
||||
return !this.pq.isEmpty();
|
||||
}
|
||||
|
||||
/** Returns the next record from the top most iterator during merging. */
|
||||
public SAMRecord next() {
|
||||
final ComparableSamRecordIterator iterator = this.pq.poll();
|
||||
final SAMRecord record = iterator.next();
|
||||
addIfNotEmpty(iterator);
|
||||
record.setHeader(this.samHeaderMerger.getMergedHeader());
|
||||
|
||||
// Fix the read group if needs be
|
||||
if (this.samHeaderMerger.hasReadGroupCollisions()) {
|
||||
final String oldGroupId = (String) record.getAttribute(ReservedTagConstants.READ_GROUP_ID);
|
||||
if (oldGroupId != null ) {
|
||||
final String newGroupId = this.samHeaderMerger.getReadGroupId(iteratorToSourceMap.get(iterator.getWrappedIterator()), oldGroupId);
|
||||
record.setAttribute(ReservedTagConstants.READ_GROUP_ID, newGroupId);
|
||||
}
|
||||
}
|
||||
|
||||
// Fix the program group if needs be
|
||||
if (this.samHeaderMerger.hasProgramGroupCollisions()) {
|
||||
final String oldGroupId = (String) record.getAttribute(ReservedTagConstants.PROGRAM_GROUP_ID);
|
||||
if (oldGroupId != null ) {
|
||||
final String newGroupId = this.samHeaderMerger.getProgramGroupId(iteratorToSourceMap.get(iterator.getWrappedIterator()), oldGroupId);
|
||||
record.setAttribute(ReservedTagConstants.PROGRAM_GROUP_ID, newGroupId);
|
||||
}
|
||||
}
|
||||
|
||||
// Fix up the sequence indexes if needs be
|
||||
if (this.samHeaderMerger.hasMergedSequenceDictionary()) {
|
||||
if (record.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
|
||||
record.setReferenceIndex(this.samHeaderMerger.getMergedSequenceIndex(iteratorToSourceMap.get(iterator.getWrappedIterator()),record.getReferenceIndex()));
|
||||
}
|
||||
|
||||
if (record.getReadPairedFlag() && record.getMateReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
|
||||
record.setMateReferenceIndex(this.samHeaderMerger.getMergedSequenceIndex(iteratorToSourceMap.get(iterator.getWrappedIterator()), record.getMateReferenceIndex()));
|
||||
}
|
||||
}
|
||||
|
||||
return record;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds iterator to priority queue. If the iterator has more records it is added
|
||||
* otherwise it is closed and not added.
|
||||
*/
|
||||
private void addIfNotEmpty(final ComparableSamRecordIterator iterator) {
|
||||
if (iterator.hasNext()) {
|
||||
pq.offer(iterator);
|
||||
}
|
||||
else {
|
||||
iterator.close();
|
||||
}
|
||||
}
|
||||
|
||||
/** Unsupported operation. */
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("MergingSAMRecorderIterator.remove()");
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the right comparator for a given sort order (coordinate, alphabetic). In the
|
||||
* case of "unsorted" it will return a comparator that gives an arbitrary but reflexive
|
||||
* ordering.
|
||||
*/
|
||||
private SAMRecordComparator getComparator() {
|
||||
// For unsorted build a fake comparator that compares based on object ID
|
||||
if (this.sortOrder == SAMFileHeader.SortOrder.unsorted) {
|
||||
return new SAMRecordComparator() {
|
||||
public int fileOrderCompare(final SAMRecord lhs, final SAMRecord rhs) {
|
||||
return System.identityHashCode(lhs) - System.identityHashCode(rhs);
|
||||
}
|
||||
|
||||
public int compare(final SAMRecord lhs, final SAMRecord rhs) {
|
||||
return fileOrderCompare(lhs, rhs);
|
||||
}
|
||||
};
|
||||
}
|
||||
if (samHeaderMerger.hasMergedSequenceDictionary() && sortOrder.equals(SAMFileHeader.SortOrder.coordinate)) {
|
||||
return new MergedSequenceDictionaryCoordinateOrderComparator();
|
||||
}
|
||||
|
||||
// Otherwise try and figure out what kind of comparator to return and build it
|
||||
final Class<? extends SAMRecordComparator> type = this.sortOrder.getComparator();
|
||||
|
||||
try {
|
||||
final Constructor<? extends SAMRecordComparator> ctor = type.getConstructor();
|
||||
return ctor.newInstance();
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new PicardException("Could not instantiate a comparator for sort order: " + this.sortOrder, e);
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns the merged header that the merging iterator is working from. */
|
||||
public SAMFileHeader getMergedHeader() {
|
||||
return this.samHeaderMerger.getMergedHeader();
|
||||
}
|
||||
|
||||
/**
|
||||
* Ugh. Basically does a regular coordinate compare, but looks up the sequence indices in the merged
|
||||
* sequence dictionary. I hate the fact that this extends SAMRecordCoordinateComparator, but it avoids
|
||||
* more copy & paste.
|
||||
*/
|
||||
private class MergedSequenceDictionaryCoordinateOrderComparator extends SAMRecordCoordinateComparator {
|
||||
|
||||
public int fileOrderCompare(final SAMRecord samRecord1, final SAMRecord samRecord2) {
|
||||
final int referenceIndex1 = getReferenceIndex(samRecord1);
|
||||
final int referenceIndex2 = getReferenceIndex(samRecord2);
|
||||
if (referenceIndex1 != referenceIndex2) {
|
||||
if (referenceIndex1 == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
|
||||
return 1;
|
||||
} else if (referenceIndex2 == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
|
||||
return -1;
|
||||
} else {
|
||||
return referenceIndex1 - referenceIndex2;
|
||||
}
|
||||
}
|
||||
if (referenceIndex1 == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
|
||||
// Both are unmapped.
|
||||
return 0;
|
||||
}
|
||||
return samRecord1.getAlignmentStart() - samRecord2.getAlignmentStart();
|
||||
}
|
||||
|
||||
private int getReferenceIndex(final SAMRecord samRecord) {
|
||||
if (samRecord.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
|
||||
return samHeaderMerger.getMergedSequenceIndex(samRecord.getHeader(), samRecord.getReferenceIndex());
|
||||
}
|
||||
if (samRecord.getMateReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
|
||||
return samHeaderMerger.getMergedSequenceIndex(samRecord.getHeader(), samRecord.getMateReferenceIndex());
|
||||
}
|
||||
return SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,391 +0,0 @@
|
|||
/*
|
||||
* The MIT License
|
||||
*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
package net.sf.samtools;
|
||||
|
||||
|
||||
import net.sf.samtools.util.RuntimeIOException;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.*;
|
||||
import java.nio.channels.*;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Class for reading BAM file indexes.
|
||||
*/
|
||||
public class BAMFileIndex2 extends BAMFileIndex
|
||||
{
|
||||
/**
|
||||
* Reports the total amount of genomic data that any bin can index.
|
||||
*/
|
||||
private static final int BIN_SPAN = 512*1024*1024;
|
||||
|
||||
/**
|
||||
* Reports the maximum number of bins in a BAM file index, based on the the pseudocode
|
||||
* in section 1.2 of the BAM spec.
|
||||
*/
|
||||
private static final int MAX_BINS = 37450; // =(8^6-1)/7+1
|
||||
|
||||
private static final int BAM_LIDX_SHIFT = 14;
|
||||
|
||||
/**
|
||||
* What is the starting bin for each level?
|
||||
*/
|
||||
private static final int[] LEVEL_STARTS = {0,1,9,73,585,4681};
|
||||
|
||||
/**
|
||||
* A mapping of reference sequence index to list of bins.
|
||||
*/
|
||||
protected final SortedMap<Integer,Bin[]> referenceToBins = new TreeMap<Integer,Bin[]>();
|
||||
|
||||
/**
|
||||
* A mapping of reference sequence index to linear indices.
|
||||
*/
|
||||
protected final SortedMap<Integer,LinearIndex> referenceToLinearIndices = new TreeMap<Integer,LinearIndex>();
|
||||
|
||||
/**
|
||||
* A mapping from bin to the chunks contained in that bin.
|
||||
*/
|
||||
protected final SortedMap<Bin,List<Chunk>> binToChunks = new TreeMap<Bin,List<Chunk>>();
|
||||
|
||||
protected BAMFileIndex2(final File file) {
|
||||
super(file);
|
||||
loadIndex(file);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of levels employed by this index.
|
||||
* @return Number of levels in this index.
|
||||
*/
|
||||
protected int getNumIndexLevels() {
|
||||
return LEVEL_STARTS.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the level associated with the given bin number.
|
||||
* @param binNumber The bin number for which to determine the level.
|
||||
* @return the level associated with the given bin number.
|
||||
*/
|
||||
protected int getLevelForBinNumber(final int binNumber) {
|
||||
if(binNumber >= MAX_BINS)
|
||||
throw new SAMException("Tried to get level for invalid bin.");
|
||||
for(int i = getNumIndexLevels()-1; i >= 0; i--) {
|
||||
if(binNumber >= LEVEL_STARTS[i])
|
||||
return i;
|
||||
}
|
||||
throw new SAMException("Unable to find correct bin for bin number "+binNumber);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the first locus that this bin can index into.
|
||||
* @param bin The bin to test.
|
||||
* @return The last position that the given bin can represent.
|
||||
*/
|
||||
protected int getFirstLocusInBin(final Bin bin) {
|
||||
final int level = getLevelForBinNumber(bin.binNumber);
|
||||
final int levelStart = LEVEL_STARTS[level];
|
||||
final int levelSize = ((level==getNumIndexLevels()-1) ? MAX_BINS-1 : LEVEL_STARTS[level+1]) - levelStart;
|
||||
return (bin.binNumber - levelStart)*(BIN_SPAN/levelSize)+1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the last locus that this bin can index into.
|
||||
* @param bin The bin to test.
|
||||
* @return The last position that the given bin can represent.
|
||||
*/
|
||||
protected int getLastLocusInBin(final Bin bin) {
|
||||
final int level = getLevelForBinNumber(bin.binNumber);
|
||||
final int levelStart = LEVEL_STARTS[level];
|
||||
final int levelSize = ((level==getNumIndexLevels()-1) ? MAX_BINS-1 : LEVEL_STARTS[level+1]) - levelStart;
|
||||
return (bin.binNumber-levelStart+1)*(BIN_SPAN/levelSize);
|
||||
}
|
||||
|
||||
/**
|
||||
* Completely load the index into memory.
|
||||
* @param file File to load.
|
||||
*/
|
||||
private void loadIndex(final File file) {
|
||||
FileInputStream fileStream;
|
||||
FileChannel fileChannel;
|
||||
MappedByteBuffer fileBuffer;
|
||||
|
||||
try {
|
||||
fileStream = new FileInputStream(file);
|
||||
fileChannel = fileStream.getChannel();
|
||||
fileBuffer = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0L, fileChannel.size());
|
||||
fileBuffer.order(ByteOrder.LITTLE_ENDIAN);
|
||||
} catch (IOException exc) {
|
||||
throw new RuntimeIOException(exc.getMessage(), exc);
|
||||
}
|
||||
|
||||
try {
|
||||
final byte[] buffer = new byte[4];
|
||||
readBytes(fileBuffer,buffer);
|
||||
if (!Arrays.equals(buffer, BAMFileConstants.BAM_INDEX_MAGIC)) {
|
||||
throw new RuntimeException("Invalid file header in BAM index " + file +
|
||||
": " + new String(buffer));
|
||||
}
|
||||
|
||||
final int sequenceCount = readInteger(fileBuffer);
|
||||
for(int sequence = 0; sequence < sequenceCount; sequence++) {
|
||||
final int binCount = readInteger(fileBuffer);
|
||||
final Bin[] bins = new Bin[binCount];
|
||||
for(int bin = 0; bin < binCount; bin++) {
|
||||
List<Chunk> chunkList = new ArrayList<Chunk>();
|
||||
final int indexBin = readInteger(fileBuffer);
|
||||
final int nChunks = readInteger(fileBuffer);
|
||||
for (int ci = 0; ci < nChunks; ci++) {
|
||||
final long chunkBegin = readLong(fileBuffer);
|
||||
final long chunkEnd = readLong(fileBuffer);
|
||||
chunkList.add(new Chunk(chunkBegin, chunkEnd));
|
||||
}
|
||||
bins[bin] = new Bin(sequence,indexBin);
|
||||
binToChunks.put(bins[bin],chunkList);
|
||||
}
|
||||
referenceToBins.put(sequence,bins);
|
||||
|
||||
int linearIndexSize = readInteger(fileBuffer);
|
||||
long[] linearIndex = new long[linearIndexSize];
|
||||
for(int indexEntry = 0; indexEntry < linearIndexSize; indexEntry++)
|
||||
linearIndex[indexEntry] = readLong(fileBuffer);
|
||||
|
||||
referenceToLinearIndices.put(sequence,new LinearIndex(sequence,linearIndex));
|
||||
}
|
||||
}
|
||||
finally {
|
||||
try {
|
||||
fileChannel.close();
|
||||
fileStream.close();
|
||||
} catch (IOException exc) {
|
||||
throw new RuntimeIOException(exc.getMessage(), exc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform an overlapping query of all bins bounding the given location.
|
||||
* @param bin The bin over which to perform an overlapping query.
|
||||
* @return The file pointers
|
||||
*/
|
||||
long[] getFilePointersBounding(final Bin bin) {
|
||||
if(bin == null)
|
||||
return null;
|
||||
|
||||
final int referenceSequence = bin.referenceSequence;
|
||||
final Bin[] allBins = referenceToBins.get(referenceSequence);
|
||||
|
||||
final int binLevel = getLevelForBinNumber(bin.binNumber);
|
||||
final int firstLocusInBin = getFirstLocusInBin(bin);
|
||||
|
||||
List<Bin> binTree = new ArrayList<Bin>();
|
||||
binTree.add(bin);
|
||||
|
||||
int currentBinLevel = binLevel;
|
||||
while(--currentBinLevel >= 0) {
|
||||
final int binStart = LEVEL_STARTS[currentBinLevel];
|
||||
final int binWidth = BIN_SPAN/(LEVEL_STARTS[currentBinLevel+1]-LEVEL_STARTS[currentBinLevel]);
|
||||
final int binNumber = firstLocusInBin/binWidth + binStart;
|
||||
for(Bin referenceBin: allBins) {
|
||||
if(binNumber == referenceBin.binNumber)
|
||||
binTree.add(referenceBin);
|
||||
}
|
||||
}
|
||||
|
||||
List<Chunk> chunkList = new ArrayList<Chunk>();
|
||||
for(Bin coveringBin: binTree) {
|
||||
for(Chunk chunk: binToChunks.get(coveringBin))
|
||||
chunkList.add(chunk.clone());
|
||||
}
|
||||
|
||||
final int start = getFirstLocusInBin(bin)-1;
|
||||
final int regionLinearBin = start >> BAM_LIDX_SHIFT;
|
||||
LinearIndex index = referenceToLinearIndices.get(referenceSequence);
|
||||
long minimumOffset = 0;
|
||||
if (regionLinearBin < index.indexEntries.length)
|
||||
minimumOffset = index.indexEntries[regionLinearBin];
|
||||
|
||||
chunkList = optimizeChunkList(chunkList, minimumOffset);
|
||||
return convertToArray(chunkList);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get list of regions of BAM file that may contain SAMRecords for the given range
|
||||
* @param referenceIndex sequence of desired SAMRecords
|
||||
* @param startPos 1-based start of the desired interval, inclusive
|
||||
* @param endPos 1-based end of the desired interval, inclusive
|
||||
* @return array of pairs of virtual file positions. Each pair is the first and last
|
||||
* virtual file position in a range that can be scanned to find SAMRecords that overlap the given
|
||||
* positions. The last position in each pair is a virtual file pointer to the first SAMRecord beyond
|
||||
* the range that may contain the indicated SAMRecords.
|
||||
*/
|
||||
long[] getFilePointersContaining(final int referenceIndex, final int startPos, final int endPos) {
|
||||
List<Bin> bins = getBinsContaining(referenceIndex,startPos,endPos);
|
||||
// System.out.println("# Sequence target TID: " + referenceIndex);
|
||||
if (bins == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
List<Chunk> chunkList = new ArrayList<Chunk>();
|
||||
for(Bin bin: bins) {
|
||||
for(Chunk chunk: binToChunks.get(bin))
|
||||
chunkList.add(chunk.clone());
|
||||
}
|
||||
|
||||
if (chunkList.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
final int start = (startPos <= 0) ? 0 : startPos-1;
|
||||
final int regionLinearBin = start >> BAM_LIDX_SHIFT;
|
||||
// System.out.println("# regionLinearBin: " + regionLinearBin);
|
||||
LinearIndex index = referenceToLinearIndices.get(referenceIndex);
|
||||
long minimumOffset = 0;
|
||||
if (regionLinearBin < index.indexEntries.length)
|
||||
minimumOffset = index.indexEntries[regionLinearBin];
|
||||
chunkList = optimizeChunkList(chunkList, minimumOffset);
|
||||
return convertToArray(chunkList);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a list of bins in the BAM file that may contain SAMRecords for the given range.
|
||||
* @param referenceIndex sequence of desired SAMRecords
|
||||
* @param startPos 1-based start of the desired interval, inclusive
|
||||
* @param endPos 1-based end of the desired interval, inclusive
|
||||
* @return a list of bins that contain relevant data.
|
||||
*/
|
||||
List<Bin> getBinsContaining(final int referenceIndex, final int startPos, final int endPos) {
|
||||
List<Bin> filteredBins = new ArrayList<Bin>();
|
||||
|
||||
if (referenceIndex >= referenceToBins.size()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
final BitSet regionBins = regionToBins(startPos, endPos);
|
||||
if (regionBins == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
Bin[] bins = referenceToBins.get(referenceIndex);
|
||||
|
||||
for(Bin bin: bins) {
|
||||
if (regionBins.get(bin.binNumber))
|
||||
filteredBins.add(bin);
|
||||
}
|
||||
|
||||
return filteredBins;
|
||||
}
|
||||
|
||||
/**
|
||||
* Use to get close to the unmapped reads at the end of a BAM file.
|
||||
* @return The file offset of the first record in the last linear bin, or -1
|
||||
* if there are no elements in linear bins (i.e. no mapped reads).
|
||||
*/
|
||||
long getStartOfLastLinearBin() {
|
||||
LinearIndex lastLinearIndex = referenceToLinearIndices.get(referenceToLinearIndices.lastKey());
|
||||
return lastLinearIndex.indexEntries[lastLinearIndex.indexEntries.length-1];
|
||||
}
|
||||
|
||||
private List<Chunk> optimizeChunkList(final List<Chunk> chunkList, final long minimumOffset) {
|
||||
Chunk lastChunk = null;
|
||||
Collections.sort(chunkList);
|
||||
final List<Chunk> result = new ArrayList<Chunk>();
|
||||
for (final Chunk chunk : chunkList) {
|
||||
if (chunk.getChunkEnd() <= minimumOffset) {
|
||||
continue;
|
||||
}
|
||||
if (result.isEmpty()) {
|
||||
result.add(chunk);
|
||||
lastChunk = chunk;
|
||||
continue;
|
||||
}
|
||||
// Coalesce chunks that are in adjacent file blocks.
|
||||
// This is a performance optimization.
|
||||
final long lastFileBlock = getFileBlock(lastChunk.getChunkEnd());
|
||||
final long chunkFileBlock = getFileBlock(chunk.getChunkStart());
|
||||
if (chunkFileBlock - lastFileBlock > 1) {
|
||||
result.add(chunk);
|
||||
lastChunk = chunk;
|
||||
} else {
|
||||
if (chunk.getChunkEnd() > lastChunk.getChunkEnd()) {
|
||||
lastChunk.setChunkEnd(chunk.getChunkEnd());
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private long[] convertToArray(final List<Chunk> chunkList) {
|
||||
final int count = chunkList.size() * 2;
|
||||
if (count == 0) {
|
||||
return null;
|
||||
}
|
||||
int index = 0;
|
||||
final long[] result = new long[count];
|
||||
for (final Chunk chunk : chunkList) {
|
||||
result[index++] = chunk.getChunkStart();
|
||||
result[index++] = chunk.getChunkEnd();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get candidate bins for the specified region
|
||||
* @param startPos 1-based start of target region, inclusive.
|
||||
* @param endPos 1-based end of target region, inclusive.
|
||||
* @return bit set for each bin that may contain SAMRecords in the target region.
|
||||
*/
|
||||
protected BitSet regionToBins(final int startPos, final int endPos) {
|
||||
final int maxPos = 0x1FFFFFFF;
|
||||
final int start = (startPos <= 0) ? 0 : (startPos-1) & maxPos;
|
||||
final int end = (endPos <= 0) ? maxPos : (endPos-1) & maxPos;
|
||||
if (start > end) {
|
||||
return null;
|
||||
}
|
||||
int k;
|
||||
final BitSet bitSet = new BitSet(MAX_BINS);
|
||||
bitSet.set(0);
|
||||
for (k = LEVEL_STARTS[1] + (start>>26); k <= LEVEL_STARTS[1] + (end>>26); ++k) bitSet.set(k);
|
||||
for (k = LEVEL_STARTS[2] + (start>>23); k <= LEVEL_STARTS[2] + (end>>23); ++k) bitSet.set(k);
|
||||
for (k = LEVEL_STARTS[3] + (start>>20); k <= LEVEL_STARTS[3] + (end>>20); ++k) bitSet.set(k);
|
||||
for (k = LEVEL_STARTS[4] + (start>>17); k <= LEVEL_STARTS[4] + (end>>17); ++k) bitSet.set(k);
|
||||
for (k = LEVEL_STARTS[5] + (start>>14); k <= LEVEL_STARTS[5] + (end>>14); ++k) bitSet.set(k);
|
||||
return bitSet;
|
||||
}
|
||||
|
||||
private long getFileBlock(final long bgzfOffset) {
|
||||
return ((bgzfOffset >> 16L) & 0xFFFFFFFFFFFFL);
|
||||
}
|
||||
|
||||
private void readBytes(MappedByteBuffer source, final byte[] target) {
|
||||
source.get(target);
|
||||
}
|
||||
|
||||
private int readInteger(MappedByteBuffer source) {
|
||||
return source.getInt();
|
||||
}
|
||||
|
||||
private long readLong(MappedByteBuffer source) {
|
||||
return source.getLong();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,664 +0,0 @@
|
|||
/*
|
||||
* The MIT License
|
||||
*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
package net.sf.samtools;
|
||||
|
||||
|
||||
import net.sf.samtools.util.BinaryCodec;
|
||||
import net.sf.samtools.util.BlockCompressedInputStream;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import net.sf.samtools.util.StringLineReader;
|
||||
import net.sf.samtools.SAMFileReader.ValidationStringency;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
import java.net.URL;
|
||||
|
||||
/**
|
||||
* Internal class for reading and querying BAM files.
|
||||
*/
|
||||
class BAMFileReader2
|
||||
extends SAMFileReader.ReaderImplementation {
|
||||
// True if reading from a File rather than an InputStream
|
||||
private boolean mIsSeekable = false;
|
||||
// For converting bytes into other primitive types
|
||||
private BinaryCodec mStream = null;
|
||||
// Underlying compressed data stream.
|
||||
private final BlockCompressedInputStream mCompressedInputStream;
|
||||
private SAMFileReader mFileReader = null;
|
||||
private SAMFileHeader mFileHeader = null;
|
||||
// Populated if the file is seekable and an index exists
|
||||
private BAMFileIndex2 mFileIndex = null;
|
||||
private long mFirstRecordPointer = 0;
|
||||
private CloseableIterator<SAMRecord> mCurrentIterator = null;
|
||||
// If true, all SAMRecords are fully decoded as they are read.
|
||||
private final boolean eagerDecode;
|
||||
// For error-checking.
|
||||
private ValidationStringency mValidationStringency;
|
||||
|
||||
/**
|
||||
* Prepare to read BAM from a stream (not seekable)
|
||||
* @param stream source of bytes.
|
||||
* @param eagerDecode if true, decode all BAM fields as reading rather than lazily.
|
||||
* @param validationStringency Controls how to handle invalidate reads or header lines.
|
||||
*/
|
||||
BAMFileReader2(final InputStream stream, final boolean eagerDecode, final ValidationStringency validationStringency)
|
||||
throws IOException {
|
||||
mIsSeekable = false;
|
||||
mCompressedInputStream = new BlockCompressedInputStream(stream);
|
||||
mStream = new BinaryCodec(new DataInputStream(mCompressedInputStream));
|
||||
this.eagerDecode = eagerDecode;
|
||||
this.mValidationStringency = validationStringency;
|
||||
readHeader(null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare to read BAM from a file (seekable)
|
||||
* @param file source of bytes.
|
||||
* @param eagerDecode if true, decode all BAM fields as reading rather than lazily.
|
||||
* @param validationStringency Controls how to handle invalidate reads or header lines.
|
||||
*/
|
||||
BAMFileReader2(final File file, final boolean eagerDecode, final ValidationStringency validationStringency)
|
||||
throws IOException {
|
||||
this(new BlockCompressedInputStream(file), eagerDecode, file.getAbsolutePath(), validationStringency);
|
||||
}
|
||||
|
||||
|
||||
BAMFileReader2(final URL url, final boolean eagerDecode, final ValidationStringency validationStringency)
|
||||
throws IOException {
|
||||
this(new BlockCompressedInputStream(url), eagerDecode, url.toString(), validationStringency);
|
||||
}
|
||||
|
||||
private BAMFileReader2(final BlockCompressedInputStream compressedInputStream, final boolean eagerDecode,
|
||||
final String source, final ValidationStringency validationStringency)
|
||||
throws IOException {
|
||||
mIsSeekable = true;
|
||||
mCompressedInputStream = compressedInputStream;
|
||||
mStream = new BinaryCodec(new DataInputStream(mCompressedInputStream));
|
||||
this.eagerDecode = eagerDecode;
|
||||
this.mValidationStringency = validationStringency;
|
||||
readHeader(source);
|
||||
mFirstRecordPointer = mCompressedInputStream.getFilePointer();
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the reader reading this file.
|
||||
* @param reader The source reader.
|
||||
*/
|
||||
void setReader(SAMFileReader reader) {
|
||||
mFileReader = reader;
|
||||
}
|
||||
|
||||
void close() {
|
||||
if (mStream != null) {
|
||||
mStream.close();
|
||||
}
|
||||
mStream = null;
|
||||
mFileHeader = null;
|
||||
mFileIndex = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the file index, if one exists, else null.
|
||||
*/
|
||||
BAMFileIndex2 getFileIndex() {
|
||||
return mFileIndex;
|
||||
}
|
||||
|
||||
void setFileIndex(final BAMFileIndex2 fileIndex) {
|
||||
mFileIndex = fileIndex;
|
||||
}
|
||||
|
||||
SAMFileHeader getFileHeader() {
|
||||
return mFileHeader;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set error-checking level for subsequent SAMRecord reads.
|
||||
*/
|
||||
void setValidationStringency(final SAMFileReader.ValidationStringency validationStringency) {
|
||||
this.mValidationStringency = validationStringency;
|
||||
}
|
||||
|
||||
SAMFileReader.ValidationStringency getValidationStringency() {
|
||||
return this.mValidationStringency;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare to iterate through the SAMRecords in file order.
|
||||
* Only a single iterator on a BAM file can be extant at a time. If getIterator() or a query method has been called once,
|
||||
* that iterator must be closed before getIterator() can be called again.
|
||||
* A somewhat peculiar aspect of this method is that if the file is not seekable, a second call to
|
||||
* getIterator() begins its iteration where the last one left off. That is the best that can be
|
||||
* done in that situation.
|
||||
*/
|
||||
CloseableIterator<SAMRecord> getIterator() {
|
||||
if (mStream == null) {
|
||||
throw new IllegalStateException("File reader is closed");
|
||||
}
|
||||
if (mCurrentIterator != null) {
|
||||
throw new IllegalStateException("Iteration in progress");
|
||||
}
|
||||
if (mIsSeekable) {
|
||||
try {
|
||||
mCompressedInputStream.seek(mFirstRecordPointer);
|
||||
} catch (IOException exc) {
|
||||
throw new RuntimeException(exc.getMessage(), exc);
|
||||
}
|
||||
}
|
||||
mCurrentIterator = new BAMFileIterator();
|
||||
return mCurrentIterator;
|
||||
}
|
||||
|
||||
CloseableIterator<SAMRecord> getIterator(List<Chunk> chunks) {
|
||||
if (mStream == null) {
|
||||
throw new IllegalStateException("File reader is closed");
|
||||
}
|
||||
if (mCurrentIterator != null) {
|
||||
throw new IllegalStateException("Iteration in progress");
|
||||
}
|
||||
if (mIsSeekable) {
|
||||
try {
|
||||
mCompressedInputStream.seek(mFirstRecordPointer);
|
||||
} catch (IOException exc) {
|
||||
throw new RuntimeException(exc.getMessage(), exc);
|
||||
}
|
||||
}
|
||||
|
||||
// Create an iterator over the given chunk boundaries.
|
||||
mCurrentIterator = new BAMFileIndexIterator(Chunk.toCoordinateArray(chunks));
|
||||
return mCurrentIterator;
|
||||
}
|
||||
|
||||
public List<Bin> getOverlappingBins(final String sequence, final int start, final int end) {
|
||||
List<Bin> bins = Collections.emptyList();
|
||||
|
||||
final SAMFileHeader fileHeader = getFileHeader();
|
||||
int referenceIndex = fileHeader.getSequenceIndex(sequence);
|
||||
if (referenceIndex != -1) {
|
||||
final BAMFileIndex2 fileIndex = getFileIndex();
|
||||
bins = fileIndex.getBinsContaining(referenceIndex, start, end);
|
||||
}
|
||||
|
||||
return bins;
|
||||
}
|
||||
|
||||
public List<Chunk> getFilePointersBounding(final Bin bin) {
|
||||
final BAMFileIndex2 fileIndex = getFileIndex();
|
||||
long[] filePointers = fileIndex.getFilePointersBounding(bin);
|
||||
return (filePointers != null) ? Chunk.toChunkList(filePointers) : Collections.<Chunk>emptyList();
|
||||
}
|
||||
|
||||
public Long getFilePointer() {
|
||||
return mCompressedInputStream.getFilePointer();
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare to iterate through the SAMRecords that match the given interval.
|
||||
* Only a single iterator on a BAMFile can be extant at a time. The previous one must be closed
|
||||
* before calling any of the methods that return an iterator.
|
||||
*
|
||||
* Note that an unmapped SAMRecord may still have a reference name and an alignment start for sorting
|
||||
* purposes (typically this is the coordinate of its mate), and will be found by this method if the coordinate
|
||||
* matches the specified interval.
|
||||
*
|
||||
* Note that this method is not necessarily efficient in terms of disk I/O. The index does not have perfect
|
||||
* resolution, so some SAMRecords may be read and then discarded because they do not match the specified interval.
|
||||
*
|
||||
* @param sequence Reference sequence sought.
|
||||
* @param start Desired SAMRecords must overlap or be contained in the interval specified by start and end.
|
||||
* A value of zero implies the start of the reference sequence.
|
||||
* @param end A value of zero implies the end of the reference sequence.
|
||||
* @param contained If true, the alignments for the SAMRecords must be completely contained in the interval
|
||||
* specified by start and end. If false, the SAMRecords need only overlap the interval.
|
||||
* @return Iterator for the matching SAMRecords
|
||||
*/
|
||||
CloseableIterator<SAMRecord> query(final String sequence, final int start, final int end, final boolean contained) {
|
||||
if (mStream == null) {
|
||||
throw new IllegalStateException("File reader is closed");
|
||||
}
|
||||
if (mCurrentIterator != null) {
|
||||
throw new IllegalStateException("Iteration in progress");
|
||||
}
|
||||
if (!mIsSeekable) {
|
||||
throw new UnsupportedOperationException("Cannot query stream-based BAM file");
|
||||
}
|
||||
if (mFileIndex == null) {
|
||||
throw new IllegalStateException("No BAM file index is available");
|
||||
}
|
||||
mCurrentIterator = createIndexIterator(sequence, start, end, contained? QueryType.CONTAINED: QueryType.OVERLAPPING);
|
||||
return mCurrentIterator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare to iterate through the SAMRecords with the given alignment start.
|
||||
* Only a single iterator on a BAMFile can be extant at a time. The previous one must be closed
|
||||
* before calling any of the methods that return an iterator.
|
||||
*
|
||||
* Note that an unmapped SAMRecord may still have a reference name and an alignment start for sorting
|
||||
* purposes (typically this is the coordinate of its mate), and will be found by this method if the coordinate
|
||||
* matches the specified interval.
|
||||
*
|
||||
* Note that this method is not necessarily efficient in terms of disk I/O. The index does not have perfect
|
||||
* resolution, so some SAMRecords may be read and then discarded because they do not match the specified interval.
|
||||
*
|
||||
* @param sequence Reference sequence sought.
|
||||
* @param start Alignment start sought.
|
||||
* @return Iterator for the matching SAMRecords.
|
||||
*/
|
||||
CloseableIterator<SAMRecord> queryAlignmentStart(final String sequence, final int start) {
|
||||
if (mStream == null) {
|
||||
throw new IllegalStateException("File reader is closed");
|
||||
}
|
||||
if (mCurrentIterator != null) {
|
||||
throw new IllegalStateException("Iteration in progress");
|
||||
}
|
||||
if (!mIsSeekable) {
|
||||
throw new UnsupportedOperationException("Cannot query stream-based BAM file");
|
||||
}
|
||||
if (mFileIndex == null) {
|
||||
throw new IllegalStateException("No BAM file index is available");
|
||||
}
|
||||
mCurrentIterator = createIndexIterator(sequence, start, -1, QueryType.STARTING_AT);
|
||||
return mCurrentIterator;
|
||||
}
|
||||
|
||||
public CloseableIterator<SAMRecord> queryUnmapped() {
|
||||
if (mStream == null) {
|
||||
throw new IllegalStateException("File reader is closed");
|
||||
}
|
||||
if (mCurrentIterator != null) {
|
||||
throw new IllegalStateException("Iteration in progress");
|
||||
}
|
||||
if (!mIsSeekable) {
|
||||
throw new UnsupportedOperationException("Cannot query stream-based BAM file");
|
||||
}
|
||||
if (mFileIndex == null) {
|
||||
throw new IllegalStateException("No BAM file index is available");
|
||||
}
|
||||
try {
|
||||
final long startOfLastLinearBin = mFileIndex.getStartOfLastLinearBin();
|
||||
if (startOfLastLinearBin != -1) {
|
||||
mCompressedInputStream.seek(startOfLastLinearBin);
|
||||
} else {
|
||||
// No mapped reads in file, just start at the first read in file.
|
||||
mCompressedInputStream.seek(mFirstRecordPointer);
|
||||
}
|
||||
mCurrentIterator = new BAMFileIndexUnmappedIterator();
|
||||
return mCurrentIterator;
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("IOException seeking to unmapped reads", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the header from the file or stream
|
||||
* @param source Note that this is used only for reporting errors.
|
||||
*/
|
||||
private void readHeader(final String source)
|
||||
throws IOException {
|
||||
|
||||
final byte[] buffer = new byte[4];
|
||||
mStream.readBytes(buffer);
|
||||
if (!Arrays.equals(buffer, BAMFileConstants.BAM_MAGIC)) {
|
||||
throw new IOException("Invalid BAM file header");
|
||||
}
|
||||
|
||||
final int headerTextLength = mStream.readInt();
|
||||
final String textHeader = mStream.readString(headerTextLength);
|
||||
final SAMTextHeaderCodec headerCodec = new SAMTextHeaderCodec();
|
||||
headerCodec.setValidationStringency(mValidationStringency);
|
||||
mFileHeader = headerCodec.decode(new StringLineReader(textHeader),
|
||||
source);
|
||||
|
||||
final int sequenceCount = mStream.readInt();
|
||||
if (mFileHeader.getSequenceDictionary().size() > 0) {
|
||||
// It is allowed to have binary sequences but no text sequences, so only validate if both are present
|
||||
if (sequenceCount != mFileHeader.getSequenceDictionary().size()) {
|
||||
throw new SAMFormatException("Number of sequences in text header (" +
|
||||
mFileHeader.getSequenceDictionary().size() +
|
||||
") != number of sequences in binary header (" + sequenceCount + ") for file " + source);
|
||||
}
|
||||
for (int i = 0; i < sequenceCount; i++) {
|
||||
final SAMSequenceRecord binarySequenceRecord = readSequenceRecord(source);
|
||||
final SAMSequenceRecord sequenceRecord = mFileHeader.getSequence(i);
|
||||
if (!sequenceRecord.getSequenceName().equals(binarySequenceRecord.getSequenceName())) {
|
||||
throw new SAMFormatException("For sequence " + i + ", text and binary have different names in file " +
|
||||
source);
|
||||
}
|
||||
if (sequenceRecord.getSequenceLength() != binarySequenceRecord.getSequenceLength()) {
|
||||
throw new SAMFormatException("For sequence " + i + ", text and binary have different lengths in file " +
|
||||
source);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// If only binary sequences are present, copy them into mFileHeader
|
||||
final List<SAMSequenceRecord> sequences = new ArrayList<SAMSequenceRecord>(sequenceCount);
|
||||
for (int i = 0; i < sequenceCount; i++) {
|
||||
sequences.add(readSequenceRecord(source));
|
||||
}
|
||||
mFileHeader.setSequenceDictionary(new SAMSequenceDictionary(sequences));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a single binary sequence record from the file or stream
|
||||
* @param source Note that this is used only for reporting errors.
|
||||
*/
|
||||
private SAMSequenceRecord readSequenceRecord(final String source) {
|
||||
final int nameLength = mStream.readInt();
|
||||
if (nameLength <= 1) {
|
||||
throw new SAMFormatException("Invalid BAM file header: missing sequence name in file " + source);
|
||||
}
|
||||
final String sequenceName = mStream.readString(nameLength - 1);
|
||||
// Skip the null terminator
|
||||
mStream.readByte();
|
||||
final int sequenceLength = mStream.readInt();
|
||||
return new SAMSequenceRecord(sequenceName, sequenceLength);
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterator for non-indexed sequential iteration through all SAMRecords in file.
|
||||
* Starting point of iteration is wherever current file position is when the iterator is constructed.
|
||||
*/
|
||||
private class BAMFileIterator implements CloseableIterator<SAMRecord> {
|
||||
private SAMRecord mNextRecord = null;
|
||||
private final BAMRecordCodec bamRecordCodec = new BAMRecordCodec(getFileHeader());
|
||||
private long samRecordIndex = 0; // Records at what position (counted in records) we are at in the file
|
||||
|
||||
BAMFileIterator() {
|
||||
this(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param advance Trick to enable subclass to do more setup before advancing
|
||||
*/
|
||||
BAMFileIterator(final boolean advance) {
|
||||
this.bamRecordCodec.setInputStream(BAMFileReader2.this.mStream.getInputStream());
|
||||
|
||||
if (advance) {
|
||||
advance();
|
||||
}
|
||||
}
|
||||
|
||||
public void close() {
|
||||
if (this != mCurrentIterator) {
|
||||
throw new IllegalStateException("Attempt to close non-current iterator");
|
||||
}
|
||||
mCurrentIterator = null;
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return (mNextRecord != null);
|
||||
}
|
||||
|
||||
public SAMRecord next() {
|
||||
final SAMRecord result = mNextRecord;
|
||||
advance();
|
||||
return result;
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Not supported: remove");
|
||||
}
|
||||
|
||||
void advance() {
|
||||
try {
|
||||
long startCoordinate = mCompressedInputStream.getFilePointer();
|
||||
mNextRecord = getNextRecord();
|
||||
long stopCoordinate = mCompressedInputStream.getFilePointer();
|
||||
|
||||
if (mNextRecord != null) {
|
||||
++this.samRecordIndex;
|
||||
// Because some decoding is done lazily, the record needs to remember the validation stringency.
|
||||
mNextRecord.setReader(mFileReader);
|
||||
mNextRecord.setValidationStringency(mValidationStringency);
|
||||
mNextRecord.setCoordinates(new Chunk(startCoordinate,stopCoordinate));
|
||||
|
||||
if (mValidationStringency != ValidationStringency.SILENT) {
|
||||
final List<SAMValidationError> validationErrors = mNextRecord.isValid();
|
||||
SAMUtils.processValidationErrors(validationErrors,
|
||||
this.samRecordIndex, BAMFileReader2.this.getValidationStringency());
|
||||
}
|
||||
}
|
||||
if (eagerDecode && mNextRecord != null) {
|
||||
mNextRecord.eagerDecode();
|
||||
}
|
||||
} catch (IOException exc) {
|
||||
throw new RuntimeException(exc.getMessage(), exc);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the next record from the input stream.
|
||||
*/
|
||||
SAMRecord getNextRecord() throws IOException {
|
||||
return bamRecordCodec.decode();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The record that will be return by the next call to next()
|
||||
*/
|
||||
protected SAMRecord peek() {
|
||||
return mNextRecord;
|
||||
}
|
||||
}
|
||||
|
||||
enum QueryType {CONTAINED, OVERLAPPING, STARTING_AT}
|
||||
|
||||
/**
|
||||
* Creates an iterator over indexed data in the specified range.
|
||||
* @param sequence Sequence to which to constrain the data.
|
||||
* @param start Starting position within the above sequence to which the data should be constrained.
|
||||
* @param end Ending position within the above sequence to which the data should be constrained.s
|
||||
* @param queryType Type of query. Useful for establishing the boundary rules.
|
||||
* @return An iterator over the requested data.
|
||||
*/
|
||||
private CloseableIterator<SAMRecord> createIndexIterator(final String sequence,
|
||||
final int start,
|
||||
final int end,
|
||||
final QueryType queryType) {
|
||||
long[] filePointers = null;
|
||||
|
||||
// Hit the index to determine the chunk boundaries for the required data.
|
||||
final SAMFileHeader fileHeader = getFileHeader();
|
||||
int referenceIndex = fileHeader.getSequenceIndex(sequence);
|
||||
if (referenceIndex != -1) {
|
||||
final BAMFileIndex2 fileIndex = getFileIndex();
|
||||
filePointers = fileIndex.getFilePointersContaining(referenceIndex, start, end);
|
||||
}
|
||||
|
||||
// Create an iterator over the above chunk boundaries.
|
||||
BAMFileIndexIterator iterator = new BAMFileIndexIterator(filePointers);
|
||||
|
||||
// Add some preprocessing filters for edge-case reads that don't fit into this
|
||||
// query type.
|
||||
return new BAMQueryFilteringIterator(iterator,sequence,start,end,queryType);
|
||||
}
|
||||
|
||||
private class BAMFileIndexIterator
|
||||
extends BAMFileIterator {
|
||||
|
||||
private long[] mFilePointers = null;
|
||||
private int mFilePointerIndex = 0;
|
||||
private long mFilePointerLimit = -1;
|
||||
|
||||
BAMFileIndexIterator(final long[] filePointers) {
|
||||
super(false); // delay advance() until after construction
|
||||
mFilePointers = filePointers;
|
||||
advance();
|
||||
}
|
||||
|
||||
SAMRecord getNextRecord()
|
||||
throws IOException {
|
||||
while (true) {
|
||||
// Advance to next file block if necessary
|
||||
while (mCompressedInputStream.getFilePointer() >= mFilePointerLimit) {
|
||||
if (mFilePointers == null ||
|
||||
mFilePointerIndex >= mFilePointers.length) {
|
||||
return null;
|
||||
}
|
||||
final long startOffset = mFilePointers[mFilePointerIndex++];
|
||||
final long endOffset = mFilePointers[mFilePointerIndex++];
|
||||
mCompressedInputStream.seek(startOffset);
|
||||
mFilePointerLimit = endOffset;
|
||||
}
|
||||
// Pull next record from stream
|
||||
return super.getNextRecord();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A decorating iterator that filters out records that are outside the bounds of the
|
||||
* given query parameters.
|
||||
*/
|
||||
private class BAMQueryFilteringIterator implements CloseableIterator<SAMRecord> {
|
||||
/**
|
||||
* The wrapped iterator.
|
||||
*/
|
||||
private final CloseableIterator<SAMRecord> wrappedIterator;
|
||||
|
||||
/**
|
||||
* The next record to be returned. Will be null if no such record exists.
|
||||
*/
|
||||
private SAMRecord nextRead;
|
||||
|
||||
private final int mReferenceIndex;
|
||||
private final int mRegionStart;
|
||||
private final int mRegionEnd;
|
||||
private final QueryType mQueryType;
|
||||
|
||||
public BAMQueryFilteringIterator(final CloseableIterator<SAMRecord> iterator,final String sequence, final int start, final int end, final QueryType queryType) {
|
||||
this.wrappedIterator = iterator;
|
||||
final SAMFileHeader fileHeader = getFileHeader();
|
||||
mReferenceIndex = fileHeader.getSequenceIndex(sequence);
|
||||
mRegionStart = start;
|
||||
if (queryType == QueryType.STARTING_AT) {
|
||||
mRegionEnd = mRegionStart;
|
||||
} else {
|
||||
mRegionEnd = (end <= 0) ? Integer.MAX_VALUE : end;
|
||||
}
|
||||
mQueryType = queryType;
|
||||
nextRead = advance();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if a next element exists; false otherwise.
|
||||
*/
|
||||
public boolean hasNext() {
|
||||
return nextRead != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the next record from the given iterator.
|
||||
* @return The next SAM record in the iterator.
|
||||
*/
|
||||
public SAMRecord next() {
|
||||
if(!hasNext())
|
||||
throw new NoSuchElementException("BAMQueryFilteringIterator: no next element available");
|
||||
final SAMRecord currentRead = nextRead;
|
||||
nextRead = advance();
|
||||
return currentRead;
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes down the existing iterator.
|
||||
*/
|
||||
public void close() {
|
||||
if (this != mCurrentIterator) {
|
||||
throw new IllegalStateException("Attempt to close non-current iterator");
|
||||
}
|
||||
mCurrentIterator = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws UnsupportedOperationException always.
|
||||
*/
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Not supported: remove");
|
||||
}
|
||||
|
||||
SAMRecord advance() {
|
||||
while (true) {
|
||||
// Pull next record from stream
|
||||
if(!wrappedIterator.hasNext())
|
||||
return null;
|
||||
|
||||
final SAMRecord record = wrappedIterator.next();
|
||||
// If beyond the end of this reference sequence, end iteration
|
||||
final int referenceIndex = record.getReferenceIndex();
|
||||
if (referenceIndex != mReferenceIndex) {
|
||||
if (referenceIndex < 0 ||
|
||||
referenceIndex > mReferenceIndex) {
|
||||
return null;
|
||||
}
|
||||
// If before this reference sequence, continue
|
||||
continue;
|
||||
}
|
||||
if (mRegionStart == 0 && mRegionEnd == Integer.MAX_VALUE) {
|
||||
// Quick exit to avoid expensive alignment end calculation
|
||||
return record;
|
||||
}
|
||||
final int alignmentStart = record.getAlignmentStart();
|
||||
// If read is unmapped but has a coordinate, return it if the coordinate is within
|
||||
// the query region, regardless of whether the mapped mate will be returned.
|
||||
final int alignmentEnd;
|
||||
if (mQueryType == QueryType.STARTING_AT) {
|
||||
alignmentEnd = -1;
|
||||
} else {
|
||||
alignmentEnd = (record.getAlignmentEnd() != SAMRecord.NO_ALIGNMENT_START?
|
||||
record.getAlignmentEnd(): alignmentStart);
|
||||
}
|
||||
|
||||
if (alignmentStart > mRegionEnd) {
|
||||
// If scanned beyond target region, end iteration
|
||||
return null;
|
||||
}
|
||||
// Filter for overlap with region
|
||||
if (mQueryType == QueryType.CONTAINED) {
|
||||
if (alignmentStart >= mRegionStart && alignmentEnd <= mRegionEnd) {
|
||||
return record;
|
||||
}
|
||||
} else if (mQueryType == QueryType.OVERLAPPING) {
|
||||
if (alignmentEnd >= mRegionStart && alignmentStart <= mRegionEnd) {
|
||||
return record;
|
||||
}
|
||||
} else {
|
||||
if (alignmentStart == mRegionStart) {
|
||||
return record;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private class BAMFileIndexUnmappedIterator extends BAMFileIterator {
|
||||
private BAMFileIndexUnmappedIterator() {
|
||||
while (this.hasNext() && peek().getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
|
||||
advance();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,69 +0,0 @@
|
|||
package net.sf.samtools;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* An individual bin in a BAM file.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class Bin implements Comparable {
|
||||
/**
|
||||
* The reference sequence associated with this bin.
|
||||
*/
|
||||
public final int referenceSequence;
|
||||
|
||||
/**
|
||||
* The number of this bin within the BAM file.
|
||||
*/
|
||||
public final int binNumber;
|
||||
|
||||
public Bin(int referenceSequence, int binNumber) {
|
||||
this.referenceSequence = referenceSequence;
|
||||
this.binNumber = binNumber;
|
||||
}
|
||||
|
||||
/**
|
||||
* See whether two bins are equal. If the ref seq and the bin number
|
||||
* are equal, assume equality of the chunk list.
|
||||
* @param other The other Bin to which to compare this.
|
||||
* @return True if the two bins are equal. False otherwise.
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if(other == null) return false;
|
||||
if(!(other instanceof Bin)) return false;
|
||||
|
||||
Bin otherBin = (Bin)other;
|
||||
return this.referenceSequence == otherBin.referenceSequence && this.binNumber == otherBin.binNumber;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute a unique hash code for the given reference sequence and bin number.
|
||||
* @return A unique hash code.
|
||||
*/
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return ((Integer)referenceSequence).hashCode() ^ ((Integer)binNumber).hashCode();
|
||||
}
|
||||
|
||||
/**
|
||||
* Compare two bins to see what ordering they should appear in.
|
||||
* @param other Other bin to which this bin should be compared.
|
||||
* @return -1 if this < other, 0 if this == other, 1 if this > other.
|
||||
*/
|
||||
@Override
|
||||
public int compareTo(Object other) {
|
||||
if(other == null)
|
||||
throw new ClassCastException("Cannot compare to a null object");
|
||||
Bin otherBin = (Bin)other;
|
||||
|
||||
// Check the reference sequences first.
|
||||
if(this.referenceSequence != otherBin.referenceSequence)
|
||||
return ((Integer)referenceSequence).compareTo(otherBin.referenceSequence);
|
||||
|
||||
// Then check the bin ordering.
|
||||
return ((Integer)binNumber).compareTo(otherBin.binNumber);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,114 +0,0 @@
|
|||
package net.sf.samtools;
|
||||
|
||||
import net.sf.picard.PicardException;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* Represents a chunk stolen from the BAM file. Originally a private static inner class within
|
||||
* BAMFileIndex; now breaking it out so that the sharding system can use it.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class Chunk implements Cloneable,Comparable<Chunk> {
|
||||
|
||||
private long mChunkStart;
|
||||
private long mChunkEnd;
|
||||
|
||||
public Chunk(final long start, final long end) {
|
||||
mChunkStart = start;
|
||||
mChunkEnd = end;
|
||||
}
|
||||
|
||||
protected Chunk clone() {
|
||||
return new Chunk(mChunkStart,mChunkEnd);
|
||||
}
|
||||
|
||||
public long getChunkStart() {
|
||||
return mChunkStart;
|
||||
}
|
||||
|
||||
public void setChunkStart(final long value) {
|
||||
mChunkStart = value;
|
||||
}
|
||||
|
||||
public long getChunkEnd() {
|
||||
return mChunkEnd;
|
||||
}
|
||||
|
||||
public void setChunkEnd(final long value) {
|
||||
mChunkEnd = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* The list of chunks is often represented as an array of
|
||||
* longs where every even-numbered index is a start coordinate
|
||||
* and every odd-numbered index is a stop coordinate. Convert
|
||||
* from that format back to a list of chunks.
|
||||
* @param coordinateArray List of chunks to convert.
|
||||
* @return A list of chunks.
|
||||
*/
|
||||
public static List<Chunk> toChunkList(long[] coordinateArray) {
|
||||
if(coordinateArray.length % 2 != 0)
|
||||
throw new PicardException("Data supplied does not appear to be in coordinate array format.");
|
||||
|
||||
// TODO: possibly also check for monotonically increasing; this seems to be an implicit requirement of this format.
|
||||
List<Chunk> chunkList = new ArrayList<Chunk>();
|
||||
for(int i = 0; i < coordinateArray.length; i += 2)
|
||||
chunkList.add(new Chunk(coordinateArray[i],coordinateArray[i+1]));
|
||||
|
||||
return chunkList;
|
||||
}
|
||||
|
||||
/**
|
||||
* The list of chunks is often represented as an array of
|
||||
* longs where every even-numbered index is a start coordinate
|
||||
* and every odd-numbered index is a stop coordinate.
|
||||
* @param chunks List of chunks to convert.
|
||||
* @return A long array of the format described above.
|
||||
*/
|
||||
public static long[] toCoordinateArray(List<Chunk> chunks) {
|
||||
long[] coordinateArray = new long[chunks.size()*2];
|
||||
int position = 0;
|
||||
for(Chunk chunk: chunks) {
|
||||
coordinateArray[position++] = chunk.getChunkStart();
|
||||
coordinateArray[position++] = chunk.getChunkEnd();
|
||||
}
|
||||
return coordinateArray;
|
||||
}
|
||||
|
||||
public int compareTo(final Chunk chunk) {
|
||||
int result = Long.signum(mChunkStart - chunk.mChunkStart);
|
||||
if (result == 0) {
|
||||
result = Long.signum(mChunkEnd - chunk.mChunkEnd);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
|
||||
final Chunk chunk = (Chunk) o;
|
||||
|
||||
if (mChunkEnd != chunk.mChunkEnd) return false;
|
||||
if (mChunkStart != chunk.mChunkStart) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = (int) (mChunkStart ^ (mChunkStart >>> 32));
|
||||
result = 31 * result + (int) (mChunkEnd ^ (mChunkEnd >>> 32));
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%d:%d-%d:%d",mChunkStart >> 16,mChunkStart & 0xFFFF,mChunkEnd >> 16,mChunkEnd & 0xFFFF);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,24 +0,0 @@
|
|||
package net.sf.samtools;
|
||||
|
||||
/**
|
||||
* The linear index associated with a given reference in a BAM index.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class LinearIndex {
|
||||
/**
|
||||
* The reference sequence number for this linear index.
|
||||
*/
|
||||
public final int referenceSequence;
|
||||
|
||||
/**
|
||||
* The linear index entries within this bin.
|
||||
*/
|
||||
public final long[] indexEntries;
|
||||
|
||||
public LinearIndex(final int referenceSequence, final long[] indexEntries) {
|
||||
this.referenceSequence = referenceSequence;
|
||||
this.indexEntries = indexEntries;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,191 +0,0 @@
|
|||
/*
|
||||
* The MIT License
|
||||
*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
package net.sf.samtools;
|
||||
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.List;
|
||||
import java.lang.reflect.Field;
|
||||
import java.lang.reflect.Method;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
|
||||
import org.broadinstitute.sting.utils.JVMUtils;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
/**
|
||||
* Class for reading and querying SAM/BAM files. Delegates to appropriate concrete implementation.
|
||||
*/
|
||||
public class SAMFileReader2 extends SAMFileReader {
|
||||
private final File sourceFile;
|
||||
|
||||
/**
|
||||
* Prepare to read a SAM or BAM file. If the given file is a BAM, and has a companion BAI index file
|
||||
*/
|
||||
public SAMFileReader2(final File file) {
|
||||
this(file, null, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a SAM or BAM file, possibly with an index file if present.
|
||||
* If the given file is a BAM, and an index is present, indexed query will be allowed.
|
||||
*
|
||||
* @param file SAM or BAM.
|
||||
* @param eagerDecode if true, decode SAM record entirely when reading it.
|
||||
*/
|
||||
public SAMFileReader2(final File file, final boolean eagerDecode) {
|
||||
this(file,null,eagerDecode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a SAM or BAM file, possibly with an index file. If the given file is a BAM, and an index is present,
|
||||
* indexed query will be allowed.
|
||||
*
|
||||
* @param file SAM or BAM.
|
||||
* @param indexFile Location of index file, or null in order to use the default index file (if present).
|
||||
* @param eagerDecode eagerDecode if true, decode SAM record entirely when reading it.
|
||||
*/
|
||||
public SAMFileReader2(final File file, File indexFile, final boolean eagerDecode){
|
||||
super(file,indexFile,eagerDecode);
|
||||
this.sourceFile = file;
|
||||
close();
|
||||
|
||||
try {
|
||||
BAMFileReader2 reader = new BAMFileReader2(file,eagerDecode,getDefaultValidationStringency());
|
||||
reader.setReader(this);
|
||||
JVMUtils.setFieldValue(getField("mReader"),this,reader);
|
||||
|
||||
if(indexFile != null || findIndexFileFromParent(file) != null) {
|
||||
BAMFileIndex2 index = new BAMFileIndex2(indexFile != null ? indexFile : findIndexFileFromParent(file));
|
||||
reader.setFileIndex(index);
|
||||
JVMUtils.setFieldValue(getField("mFileIndex"),this,index);
|
||||
}
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new StingException("Unable to load BAM file: " + file,ex);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of levels employed by this index.
|
||||
* @return Number of levels in this index.
|
||||
*/
|
||||
public int getNumIndexLevels() {
|
||||
final BAMFileIndex2 fileIndex = (BAMFileIndex2)JVMUtils.getFieldValue(getField("mFileIndex"),this);
|
||||
if(fileIndex == null)
|
||||
throw new SAMException("Unable to determine number of index levels; BAM file index is not present.");
|
||||
return fileIndex.getNumIndexLevels();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the level associated with the given bin number.
|
||||
* @param bin The bin for which to determine the level.
|
||||
* @return the level associated with the given bin number.
|
||||
*/
|
||||
public int getLevelForBin(final Bin bin) {
|
||||
final BAMFileIndex2 fileIndex = (BAMFileIndex2)JVMUtils.getFieldValue(getField("mFileIndex"),this);
|
||||
if(fileIndex == null)
|
||||
throw new SAMException("Unable to determine number of index levels; BAM file index is not present.");
|
||||
return fileIndex.getLevelForBinNumber(bin.binNumber);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the first locus that this bin can index into.
|
||||
* @param bin The bin to test.
|
||||
* @return The last position that the given bin can represent.
|
||||
*/
|
||||
public int getFirstLocusInBin(final Bin bin) {
|
||||
final BAMFileIndex2 fileIndex = (BAMFileIndex2)JVMUtils.getFieldValue(getField("mFileIndex"),this);
|
||||
if(fileIndex == null)
|
||||
throw new SAMException("Unable to determine number of index levels; BAM file index is not present.");
|
||||
return fileIndex.getFirstLocusInBin(bin);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the last locus that this bin can index into.
|
||||
* @param bin The bin to test.
|
||||
* @return The last position that the given bin can represent.
|
||||
*/
|
||||
public int getLastLocusInBin(final Bin bin) {
|
||||
final BAMFileIndex2 fileIndex = (BAMFileIndex2)JVMUtils.getFieldValue(getField("mFileIndex"),this);
|
||||
if(fileIndex == null)
|
||||
throw new SAMException("Unable to determine number of index levels; BAM file index is not present.");
|
||||
return fileIndex.getLastLocusInBin(bin);
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterate through the given chunks in the file.
|
||||
* @param chunks List of chunks for which to retrieve data.
|
||||
* @return An iterator over the given chunks.
|
||||
*/
|
||||
public CloseableIterator<SAMRecord> iterator(List<Chunk> chunks) {
|
||||
// TODO: Add sanity checks so that we're not doing this against an unsupported BAM file.
|
||||
BAMFileReader2 reader = (BAMFileReader2)JVMUtils.getFieldValue(getField("mReader"),this);
|
||||
return reader.getIterator(chunks);
|
||||
}
|
||||
|
||||
public List<Bin> getOverlappingBins(final String sequence, final int start, final int end) {
|
||||
// TODO: Add sanity checks so that we're not doing this against an unsupported BAM file.
|
||||
BAMFileReader2 reader = (BAMFileReader2)JVMUtils.getFieldValue(getField("mReader"),this);
|
||||
return reader.getOverlappingBins(sequence,start,end);
|
||||
}
|
||||
|
||||
public List<Chunk> getFilePointersBounding(final Bin bin) {
|
||||
// TODO: Add sanity checks so that we're not doing this against an unsupported BAM file.
|
||||
BAMFileReader2 reader = (BAMFileReader2)JVMUtils.getFieldValue(getField("mReader"),this);
|
||||
return reader.getFilePointersBounding(bin);
|
||||
}
|
||||
|
||||
public Chunk getCurrentPosition() {
|
||||
// TODO: Add sanity checks so that we're not doing this against an unsupported BAM file.
|
||||
BAMFileReader2 reader = (BAMFileReader2)JVMUtils.getFieldValue(getField("mReader"),this);
|
||||
return new Chunk(reader.getFilePointer(),Long.MAX_VALUE);
|
||||
}
|
||||
|
||||
private Field getField(String fieldName) {
|
||||
try {
|
||||
return getClass().getSuperclass().getDeclaredField(fieldName);
|
||||
}
|
||||
catch(NoSuchFieldException ex) {
|
||||
throw new StingException("Unable to load field: " + fieldName);
|
||||
}
|
||||
}
|
||||
|
||||
private File findIndexFileFromParent(File bamFile) {
|
||||
try {
|
||||
Method method = getClass().getSuperclass().getDeclaredMethod("findIndexFile",File.class);
|
||||
method.setAccessible(true);
|
||||
return (File)method.invoke(this,bamFile);
|
||||
}
|
||||
catch(IllegalAccessException ex) {
|
||||
throw new StingException("Unable to run method findIndexFile",ex);
|
||||
}
|
||||
catch(InvocationTargetException ex) {
|
||||
throw new StingException("Unable to run method findIndexFile",ex);
|
||||
}
|
||||
catch(NoSuchMethodException ex) {
|
||||
throw new StingException("Unable to run method findIndexFile",ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
|
|
@ -1,3 +1,3 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="edu.mit.broad" module="picard-private-parts" revision="1198" status="integration" publication="20091228124200" />
|
||||
<info organisation="edu.mit.broad" module="picard-private-parts" revision="1333-sharding" status="integration" publication="20100318121400" />
|
||||
</ivy-module>
|
||||
Binary file not shown.
|
|
@ -1,3 +0,0 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="net.sf" module="picard" revision="1.12.256" status="release" />
|
||||
</ivy-module>
|
||||
Binary file not shown.
|
|
@ -0,0 +1,3 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="net.sf" module="picard" revision="1.16.359-sharding" status="release" />
|
||||
</ivy-module>
|
||||
|
|
@ -1,3 +0,0 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="net.sf" module="sam" revision="1.12.256" status="release" />
|
||||
</ivy-module>
|
||||
Binary file not shown.
|
|
@ -0,0 +1,3 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="net.sf" module="sam" revision="1.16.359-sharding" status="release" />
|
||||
</ivy-module>
|
||||
Loading…
Reference in New Issue