diff --git a/java/src/net/sf/picard/sam/ComparableSamRecordIterator.java b/java/src/net/sf/picard/sam/ComparableSamRecordIterator.java deleted file mode 100644 index eb1101eaa..000000000 --- a/java/src/net/sf/picard/sam/ComparableSamRecordIterator.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * The MIT License - * - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package net.sf.picard.sam; - -import net.sf.picard.util.PeekableIterator; - -import java.util.Comparator; -import java.util.Iterator; - -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMFileReader; -import net.sf.samtools.util.CloseableIterator; - -/** - * Iterator for SAM records that implements comparable to enable sorting of iterators. - * The comparison is performed by comparing the next record in the iterator to the next - * record in another iterator and returning the ordering between those SAM records. - */ -class ComparableSamRecordIterator extends PeekableIterator implements Comparable { - private final CloseableIterator iterator; - private final Comparator comparator; - - /** - * Constructs a wrapping iterator around the given iterator that will be able - * to compare itself to other ComparableSamRecordIterators using the given comparator. - * - * @param iterator the wrapped iterator. - * @param comparator the Comparator to use to provide ordering fo SAMRecords - */ - public ComparableSamRecordIterator(final CloseableIterator iterator, final Comparator comparator) { - super(iterator); - this.iterator = iterator; - this.comparator = comparator; - } - - public CloseableIterator getWrappedIterator() { - return iterator; - } - - /** - * Compares this iterator to another comparable iterator based on the next record - * available in each iterator. If the two comparable iterators have different - * comparator types internally an exception is thrown. - * - * @param that another iterator to compare to - * @return a negative, 0 or positive number as described in the Comparator interface - */ - public int compareTo(final ComparableSamRecordIterator that) { - if (this.comparator.getClass() != that.comparator.getClass()) { - throw new IllegalStateException("Attempt to compare two ComparableSAMRecordIterators that " + - "have different orderings internally"); - } - - final SAMRecord record = this.peek(); - final SAMRecord record2 = that.peek(); - return comparator.compare(record, record2); - } - - @Override - public boolean equals(final Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - - return compareTo((ComparableSamRecordIterator)o) == 0; - } - - @Override - public int hashCode() { - throw new UnsupportedOperationException("ComparableSamRecordIterator should not be hashed because it can change value"); - } -} diff --git a/java/src/net/sf/picard/sam/MergingSamRecordIterator.java b/java/src/net/sf/picard/sam/MergingSamRecordIterator.java deleted file mode 100644 index 6160d1301..000000000 --- a/java/src/net/sf/picard/sam/MergingSamRecordIterator.java +++ /dev/null @@ -1,243 +0,0 @@ -/* - * The MIT License - * - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package net.sf.picard.sam; - -import net.sf.picard.PicardException; - -import java.util.*; -import java.lang.reflect.Constructor; - -import net.sf.samtools.*; -import net.sf.samtools.util.CloseableIterator; - -/** - * Provides an iterator interface for merging multiple underlying iterators into a single - * iterable stream. The underlying iterators/files must all have the same sort order unless - * the requested output format is unsorted, in which case any combination is valid. - */ -public class MergingSamRecordIterator implements CloseableIterator { - private final PriorityQueue pq; - private final SamFileHeaderMerger samHeaderMerger; - private final SAMFileHeader.SortOrder sortOrder; - - /** - * Maps iterators back to the readers from which they are derived. - */ - private final Map,SAMFileReader> iteratorToSourceMap = new HashMap,SAMFileReader>(); - - /** - * Constructs a new merging iterator with the same set of readers and sort order as - * provided by the header merger parameter. - * @param headerMerger The merged header and contents of readers. - * @param forcePresorted True to ensure that the iterator checks the headers of the readers for appropriate sort order. - */ - public MergingSamRecordIterator(final SamFileHeaderMerger headerMerger, final boolean forcePresorted) { - this(headerMerger,createWholeFileIterators(headerMerger.getReaders()),forcePresorted); - } - - /** - * Constructs a new merging iterator with a given merged header and a subset of readers. - * @param headerMerger The merged header and contents of readers. - * @param readerToIteratorMap A mapping of reader to iterator. - * @param forcePresorted True to ensure that the iterator checks the headers of the readers for appropriate sort order. - */ - public MergingSamRecordIterator(final SamFileHeaderMerger headerMerger, final Map> readerToIteratorMap, final boolean forcePresorted) { - this.samHeaderMerger = headerMerger; - this.sortOrder = headerMerger.getMergedHeader().getSortOrder(); - final SAMRecordComparator comparator = getComparator(); - - final Collection readers = headerMerger.getReaders(); - this.pq = new PriorityQueue(readers.size()); - - for(final SAMFileReader reader: readerToIteratorMap.keySet()) { - if (!forcePresorted && this.sortOrder != SAMFileHeader.SortOrder.unsorted && - reader.getFileHeader().getSortOrder() != this.sortOrder){ - throw new PicardException("Files are not compatible with sort order"); - } - - final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(readerToIteratorMap.get(reader),comparator); - addIfNotEmpty(iterator); - iteratorToSourceMap.put(iterator.getWrappedIterator(),reader); - } - } - - /** - * For each reader, derive an iterator that can walk the entire file and associate that back to - * @param readers The readers from which to derive iterators. - * @return A map of reader to its associated iterator. - */ - private static Map> createWholeFileIterators(Collection readers) { - Map> readerToIteratorMap = new HashMap>(); - for(final SAMFileReader reader: readers) - readerToIteratorMap.put(reader,reader.iterator()); - return readerToIteratorMap; - } - - /** - * Close down all open iterators. - */ - public void close() { - // Iterators not in the priority queue have already been closed; only close down the iterators that are still in the priority queue. - for(CloseableIterator iterator: pq) - iterator.close(); - } - - /** Returns true if any of the underlying iterators has more records, otherwise false. */ - public boolean hasNext() { - return !this.pq.isEmpty(); - } - - /** Returns the next record from the top most iterator during merging. */ - public SAMRecord next() { - final ComparableSamRecordIterator iterator = this.pq.poll(); - final SAMRecord record = iterator.next(); - addIfNotEmpty(iterator); - record.setHeader(this.samHeaderMerger.getMergedHeader()); - - // Fix the read group if needs be - if (this.samHeaderMerger.hasReadGroupCollisions()) { - final String oldGroupId = (String) record.getAttribute(ReservedTagConstants.READ_GROUP_ID); - if (oldGroupId != null ) { - final String newGroupId = this.samHeaderMerger.getReadGroupId(iteratorToSourceMap.get(iterator.getWrappedIterator()), oldGroupId); - record.setAttribute(ReservedTagConstants.READ_GROUP_ID, newGroupId); - } - } - - // Fix the program group if needs be - if (this.samHeaderMerger.hasProgramGroupCollisions()) { - final String oldGroupId = (String) record.getAttribute(ReservedTagConstants.PROGRAM_GROUP_ID); - if (oldGroupId != null ) { - final String newGroupId = this.samHeaderMerger.getProgramGroupId(iteratorToSourceMap.get(iterator.getWrappedIterator()), oldGroupId); - record.setAttribute(ReservedTagConstants.PROGRAM_GROUP_ID, newGroupId); - } - } - - // Fix up the sequence indexes if needs be - if (this.samHeaderMerger.hasMergedSequenceDictionary()) { - if (record.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { - record.setReferenceIndex(this.samHeaderMerger.getMergedSequenceIndex(iteratorToSourceMap.get(iterator.getWrappedIterator()),record.getReferenceIndex())); - } - - if (record.getReadPairedFlag() && record.getMateReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { - record.setMateReferenceIndex(this.samHeaderMerger.getMergedSequenceIndex(iteratorToSourceMap.get(iterator.getWrappedIterator()), record.getMateReferenceIndex())); - } - } - - return record; - } - - /** - * Adds iterator to priority queue. If the iterator has more records it is added - * otherwise it is closed and not added. - */ - private void addIfNotEmpty(final ComparableSamRecordIterator iterator) { - if (iterator.hasNext()) { - pq.offer(iterator); - } - else { - iterator.close(); - } - } - - /** Unsupported operation. */ - public void remove() { - throw new UnsupportedOperationException("MergingSAMRecorderIterator.remove()"); - } - - /** - * Get the right comparator for a given sort order (coordinate, alphabetic). In the - * case of "unsorted" it will return a comparator that gives an arbitrary but reflexive - * ordering. - */ - private SAMRecordComparator getComparator() { - // For unsorted build a fake comparator that compares based on object ID - if (this.sortOrder == SAMFileHeader.SortOrder.unsorted) { - return new SAMRecordComparator() { - public int fileOrderCompare(final SAMRecord lhs, final SAMRecord rhs) { - return System.identityHashCode(lhs) - System.identityHashCode(rhs); - } - - public int compare(final SAMRecord lhs, final SAMRecord rhs) { - return fileOrderCompare(lhs, rhs); - } - }; - } - if (samHeaderMerger.hasMergedSequenceDictionary() && sortOrder.equals(SAMFileHeader.SortOrder.coordinate)) { - return new MergedSequenceDictionaryCoordinateOrderComparator(); - } - - // Otherwise try and figure out what kind of comparator to return and build it - final Class type = this.sortOrder.getComparator(); - - try { - final Constructor ctor = type.getConstructor(); - return ctor.newInstance(); - } - catch (Exception e) { - throw new PicardException("Could not instantiate a comparator for sort order: " + this.sortOrder, e); - } - } - - /** Returns the merged header that the merging iterator is working from. */ - public SAMFileHeader getMergedHeader() { - return this.samHeaderMerger.getMergedHeader(); - } - - /** - * Ugh. Basically does a regular coordinate compare, but looks up the sequence indices in the merged - * sequence dictionary. I hate the fact that this extends SAMRecordCoordinateComparator, but it avoids - * more copy & paste. - */ - private class MergedSequenceDictionaryCoordinateOrderComparator extends SAMRecordCoordinateComparator { - - public int fileOrderCompare(final SAMRecord samRecord1, final SAMRecord samRecord2) { - final int referenceIndex1 = getReferenceIndex(samRecord1); - final int referenceIndex2 = getReferenceIndex(samRecord2); - if (referenceIndex1 != referenceIndex2) { - if (referenceIndex1 == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { - return 1; - } else if (referenceIndex2 == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { - return -1; - } else { - return referenceIndex1 - referenceIndex2; - } - } - if (referenceIndex1 == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { - // Both are unmapped. - return 0; - } - return samRecord1.getAlignmentStart() - samRecord2.getAlignmentStart(); - } - - private int getReferenceIndex(final SAMRecord samRecord) { - if (samRecord.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { - return samHeaderMerger.getMergedSequenceIndex(samRecord.getHeader(), samRecord.getReferenceIndex()); - } - if (samRecord.getMateReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { - return samHeaderMerger.getMergedSequenceIndex(samRecord.getHeader(), samRecord.getMateReferenceIndex()); - } - return SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX; - } - } -} diff --git a/java/src/net/sf/samtools/BAMFileIndex2.java b/java/src/net/sf/samtools/BAMFileIndex2.java deleted file mode 100644 index 1def4e694..000000000 --- a/java/src/net/sf/samtools/BAMFileIndex2.java +++ /dev/null @@ -1,391 +0,0 @@ -/* - * The MIT License - * - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package net.sf.samtools; - - -import net.sf.samtools.util.RuntimeIOException; - -import java.io.*; -import java.nio.*; -import java.nio.channels.*; -import java.util.*; - -/** - * Class for reading BAM file indexes. - */ -public class BAMFileIndex2 extends BAMFileIndex -{ - /** - * Reports the total amount of genomic data that any bin can index. - */ - private static final int BIN_SPAN = 512*1024*1024; - - /** - * Reports the maximum number of bins in a BAM file index, based on the the pseudocode - * in section 1.2 of the BAM spec. - */ - private static final int MAX_BINS = 37450; // =(8^6-1)/7+1 - - private static final int BAM_LIDX_SHIFT = 14; - - /** - * What is the starting bin for each level? - */ - private static final int[] LEVEL_STARTS = {0,1,9,73,585,4681}; - - /** - * A mapping of reference sequence index to list of bins. - */ - protected final SortedMap referenceToBins = new TreeMap(); - - /** - * A mapping of reference sequence index to linear indices. - */ - protected final SortedMap referenceToLinearIndices = new TreeMap(); - - /** - * A mapping from bin to the chunks contained in that bin. - */ - protected final SortedMap> binToChunks = new TreeMap>(); - - protected BAMFileIndex2(final File file) { - super(file); - loadIndex(file); - } - - /** - * Get the number of levels employed by this index. - * @return Number of levels in this index. - */ - protected int getNumIndexLevels() { - return LEVEL_STARTS.length; - } - - /** - * Gets the level associated with the given bin number. - * @param binNumber The bin number for which to determine the level. - * @return the level associated with the given bin number. - */ - protected int getLevelForBinNumber(final int binNumber) { - if(binNumber >= MAX_BINS) - throw new SAMException("Tried to get level for invalid bin."); - for(int i = getNumIndexLevels()-1; i >= 0; i--) { - if(binNumber >= LEVEL_STARTS[i]) - return i; - } - throw new SAMException("Unable to find correct bin for bin number "+binNumber); - } - - /** - * Gets the first locus that this bin can index into. - * @param bin The bin to test. - * @return The last position that the given bin can represent. - */ - protected int getFirstLocusInBin(final Bin bin) { - final int level = getLevelForBinNumber(bin.binNumber); - final int levelStart = LEVEL_STARTS[level]; - final int levelSize = ((level==getNumIndexLevels()-1) ? MAX_BINS-1 : LEVEL_STARTS[level+1]) - levelStart; - return (bin.binNumber - levelStart)*(BIN_SPAN/levelSize)+1; - } - - /** - * Gets the last locus that this bin can index into. - * @param bin The bin to test. - * @return The last position that the given bin can represent. - */ - protected int getLastLocusInBin(final Bin bin) { - final int level = getLevelForBinNumber(bin.binNumber); - final int levelStart = LEVEL_STARTS[level]; - final int levelSize = ((level==getNumIndexLevels()-1) ? MAX_BINS-1 : LEVEL_STARTS[level+1]) - levelStart; - return (bin.binNumber-levelStart+1)*(BIN_SPAN/levelSize); - } - - /** - * Completely load the index into memory. - * @param file File to load. - */ - private void loadIndex(final File file) { - FileInputStream fileStream; - FileChannel fileChannel; - MappedByteBuffer fileBuffer; - - try { - fileStream = new FileInputStream(file); - fileChannel = fileStream.getChannel(); - fileBuffer = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0L, fileChannel.size()); - fileBuffer.order(ByteOrder.LITTLE_ENDIAN); - } catch (IOException exc) { - throw new RuntimeIOException(exc.getMessage(), exc); - } - - try { - final byte[] buffer = new byte[4]; - readBytes(fileBuffer,buffer); - if (!Arrays.equals(buffer, BAMFileConstants.BAM_INDEX_MAGIC)) { - throw new RuntimeException("Invalid file header in BAM index " + file + - ": " + new String(buffer)); - } - - final int sequenceCount = readInteger(fileBuffer); - for(int sequence = 0; sequence < sequenceCount; sequence++) { - final int binCount = readInteger(fileBuffer); - final Bin[] bins = new Bin[binCount]; - for(int bin = 0; bin < binCount; bin++) { - List chunkList = new ArrayList(); - final int indexBin = readInteger(fileBuffer); - final int nChunks = readInteger(fileBuffer); - for (int ci = 0; ci < nChunks; ci++) { - final long chunkBegin = readLong(fileBuffer); - final long chunkEnd = readLong(fileBuffer); - chunkList.add(new Chunk(chunkBegin, chunkEnd)); - } - bins[bin] = new Bin(sequence,indexBin); - binToChunks.put(bins[bin],chunkList); - } - referenceToBins.put(sequence,bins); - - int linearIndexSize = readInteger(fileBuffer); - long[] linearIndex = new long[linearIndexSize]; - for(int indexEntry = 0; indexEntry < linearIndexSize; indexEntry++) - linearIndex[indexEntry] = readLong(fileBuffer); - - referenceToLinearIndices.put(sequence,new LinearIndex(sequence,linearIndex)); - } - } - finally { - try { - fileChannel.close(); - fileStream.close(); - } catch (IOException exc) { - throw new RuntimeIOException(exc.getMessage(), exc); - } - } - } - - /** - * Perform an overlapping query of all bins bounding the given location. - * @param bin The bin over which to perform an overlapping query. - * @return The file pointers - */ - long[] getFilePointersBounding(final Bin bin) { - if(bin == null) - return null; - - final int referenceSequence = bin.referenceSequence; - final Bin[] allBins = referenceToBins.get(referenceSequence); - - final int binLevel = getLevelForBinNumber(bin.binNumber); - final int firstLocusInBin = getFirstLocusInBin(bin); - - List binTree = new ArrayList(); - binTree.add(bin); - - int currentBinLevel = binLevel; - while(--currentBinLevel >= 0) { - final int binStart = LEVEL_STARTS[currentBinLevel]; - final int binWidth = BIN_SPAN/(LEVEL_STARTS[currentBinLevel+1]-LEVEL_STARTS[currentBinLevel]); - final int binNumber = firstLocusInBin/binWidth + binStart; - for(Bin referenceBin: allBins) { - if(binNumber == referenceBin.binNumber) - binTree.add(referenceBin); - } - } - - List chunkList = new ArrayList(); - for(Bin coveringBin: binTree) { - for(Chunk chunk: binToChunks.get(coveringBin)) - chunkList.add(chunk.clone()); - } - - final int start = getFirstLocusInBin(bin)-1; - final int regionLinearBin = start >> BAM_LIDX_SHIFT; - LinearIndex index = referenceToLinearIndices.get(referenceSequence); - long minimumOffset = 0; - if (regionLinearBin < index.indexEntries.length) - minimumOffset = index.indexEntries[regionLinearBin]; - - chunkList = optimizeChunkList(chunkList, minimumOffset); - return convertToArray(chunkList); - } - - /** - * Get list of regions of BAM file that may contain SAMRecords for the given range - * @param referenceIndex sequence of desired SAMRecords - * @param startPos 1-based start of the desired interval, inclusive - * @param endPos 1-based end of the desired interval, inclusive - * @return array of pairs of virtual file positions. Each pair is the first and last - * virtual file position in a range that can be scanned to find SAMRecords that overlap the given - * positions. The last position in each pair is a virtual file pointer to the first SAMRecord beyond - * the range that may contain the indicated SAMRecords. - */ - long[] getFilePointersContaining(final int referenceIndex, final int startPos, final int endPos) { - List bins = getBinsContaining(referenceIndex,startPos,endPos); - // System.out.println("# Sequence target TID: " + referenceIndex); - if (bins == null) { - return null; - } - - List chunkList = new ArrayList(); - for(Bin bin: bins) { - for(Chunk chunk: binToChunks.get(bin)) - chunkList.add(chunk.clone()); - } - - if (chunkList.isEmpty()) { - return null; - } - - final int start = (startPos <= 0) ? 0 : startPos-1; - final int regionLinearBin = start >> BAM_LIDX_SHIFT; - // System.out.println("# regionLinearBin: " + regionLinearBin); - LinearIndex index = referenceToLinearIndices.get(referenceIndex); - long minimumOffset = 0; - if (regionLinearBin < index.indexEntries.length) - minimumOffset = index.indexEntries[regionLinearBin]; - chunkList = optimizeChunkList(chunkList, minimumOffset); - return convertToArray(chunkList); - } - - /** - * Get a list of bins in the BAM file that may contain SAMRecords for the given range. - * @param referenceIndex sequence of desired SAMRecords - * @param startPos 1-based start of the desired interval, inclusive - * @param endPos 1-based end of the desired interval, inclusive - * @return a list of bins that contain relevant data. - */ - List getBinsContaining(final int referenceIndex, final int startPos, final int endPos) { - List filteredBins = new ArrayList(); - - if (referenceIndex >= referenceToBins.size()) { - return null; - } - - final BitSet regionBins = regionToBins(startPos, endPos); - if (regionBins == null) { - return null; - } - - Bin[] bins = referenceToBins.get(referenceIndex); - - for(Bin bin: bins) { - if (regionBins.get(bin.binNumber)) - filteredBins.add(bin); - } - - return filteredBins; - } - - /** - * Use to get close to the unmapped reads at the end of a BAM file. - * @return The file offset of the first record in the last linear bin, or -1 - * if there are no elements in linear bins (i.e. no mapped reads). - */ - long getStartOfLastLinearBin() { - LinearIndex lastLinearIndex = referenceToLinearIndices.get(referenceToLinearIndices.lastKey()); - return lastLinearIndex.indexEntries[lastLinearIndex.indexEntries.length-1]; - } - - private List optimizeChunkList(final List chunkList, final long minimumOffset) { - Chunk lastChunk = null; - Collections.sort(chunkList); - final List result = new ArrayList(); - for (final Chunk chunk : chunkList) { - if (chunk.getChunkEnd() <= minimumOffset) { - continue; - } - if (result.isEmpty()) { - result.add(chunk); - lastChunk = chunk; - continue; - } - // Coalesce chunks that are in adjacent file blocks. - // This is a performance optimization. - final long lastFileBlock = getFileBlock(lastChunk.getChunkEnd()); - final long chunkFileBlock = getFileBlock(chunk.getChunkStart()); - if (chunkFileBlock - lastFileBlock > 1) { - result.add(chunk); - lastChunk = chunk; - } else { - if (chunk.getChunkEnd() > lastChunk.getChunkEnd()) { - lastChunk.setChunkEnd(chunk.getChunkEnd()); - } - } - } - return result; - } - - private long[] convertToArray(final List chunkList) { - final int count = chunkList.size() * 2; - if (count == 0) { - return null; - } - int index = 0; - final long[] result = new long[count]; - for (final Chunk chunk : chunkList) { - result[index++] = chunk.getChunkStart(); - result[index++] = chunk.getChunkEnd(); - } - return result; - } - - /** - * Get candidate bins for the specified region - * @param startPos 1-based start of target region, inclusive. - * @param endPos 1-based end of target region, inclusive. - * @return bit set for each bin that may contain SAMRecords in the target region. - */ - protected BitSet regionToBins(final int startPos, final int endPos) { - final int maxPos = 0x1FFFFFFF; - final int start = (startPos <= 0) ? 0 : (startPos-1) & maxPos; - final int end = (endPos <= 0) ? maxPos : (endPos-1) & maxPos; - if (start > end) { - return null; - } - int k; - final BitSet bitSet = new BitSet(MAX_BINS); - bitSet.set(0); - for (k = LEVEL_STARTS[1] + (start>>26); k <= LEVEL_STARTS[1] + (end>>26); ++k) bitSet.set(k); - for (k = LEVEL_STARTS[2] + (start>>23); k <= LEVEL_STARTS[2] + (end>>23); ++k) bitSet.set(k); - for (k = LEVEL_STARTS[3] + (start>>20); k <= LEVEL_STARTS[3] + (end>>20); ++k) bitSet.set(k); - for (k = LEVEL_STARTS[4] + (start>>17); k <= LEVEL_STARTS[4] + (end>>17); ++k) bitSet.set(k); - for (k = LEVEL_STARTS[5] + (start>>14); k <= LEVEL_STARTS[5] + (end>>14); ++k) bitSet.set(k); - return bitSet; - } - - private long getFileBlock(final long bgzfOffset) { - return ((bgzfOffset >> 16L) & 0xFFFFFFFFFFFFL); - } - - private void readBytes(MappedByteBuffer source, final byte[] target) { - source.get(target); - } - - private int readInteger(MappedByteBuffer source) { - return source.getInt(); - } - - private long readLong(MappedByteBuffer source) { - return source.getLong(); - } -} diff --git a/java/src/net/sf/samtools/BAMFileReader2.java b/java/src/net/sf/samtools/BAMFileReader2.java deleted file mode 100644 index 0509b8df6..000000000 --- a/java/src/net/sf/samtools/BAMFileReader2.java +++ /dev/null @@ -1,664 +0,0 @@ -/* - * The MIT License - * - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package net.sf.samtools; - - -import net.sf.samtools.util.BinaryCodec; -import net.sf.samtools.util.BlockCompressedInputStream; -import net.sf.samtools.util.CloseableIterator; -import net.sf.samtools.util.StringLineReader; -import net.sf.samtools.SAMFileReader.ValidationStringency; - -import java.io.*; -import java.util.*; -import java.net.URL; - -/** - * Internal class for reading and querying BAM files. - */ -class BAMFileReader2 - extends SAMFileReader.ReaderImplementation { - // True if reading from a File rather than an InputStream - private boolean mIsSeekable = false; - // For converting bytes into other primitive types - private BinaryCodec mStream = null; - // Underlying compressed data stream. - private final BlockCompressedInputStream mCompressedInputStream; - private SAMFileReader mFileReader = null; - private SAMFileHeader mFileHeader = null; - // Populated if the file is seekable and an index exists - private BAMFileIndex2 mFileIndex = null; - private long mFirstRecordPointer = 0; - private CloseableIterator mCurrentIterator = null; - // If true, all SAMRecords are fully decoded as they are read. - private final boolean eagerDecode; - // For error-checking. - private ValidationStringency mValidationStringency; - - /** - * Prepare to read BAM from a stream (not seekable) - * @param stream source of bytes. - * @param eagerDecode if true, decode all BAM fields as reading rather than lazily. - * @param validationStringency Controls how to handle invalidate reads or header lines. - */ - BAMFileReader2(final InputStream stream, final boolean eagerDecode, final ValidationStringency validationStringency) - throws IOException { - mIsSeekable = false; - mCompressedInputStream = new BlockCompressedInputStream(stream); - mStream = new BinaryCodec(new DataInputStream(mCompressedInputStream)); - this.eagerDecode = eagerDecode; - this.mValidationStringency = validationStringency; - readHeader(null); - } - - /** - * Prepare to read BAM from a file (seekable) - * @param file source of bytes. - * @param eagerDecode if true, decode all BAM fields as reading rather than lazily. - * @param validationStringency Controls how to handle invalidate reads or header lines. - */ - BAMFileReader2(final File file, final boolean eagerDecode, final ValidationStringency validationStringency) - throws IOException { - this(new BlockCompressedInputStream(file), eagerDecode, file.getAbsolutePath(), validationStringency); - } - - - BAMFileReader2(final URL url, final boolean eagerDecode, final ValidationStringency validationStringency) - throws IOException { - this(new BlockCompressedInputStream(url), eagerDecode, url.toString(), validationStringency); - } - - private BAMFileReader2(final BlockCompressedInputStream compressedInputStream, final boolean eagerDecode, - final String source, final ValidationStringency validationStringency) - throws IOException { - mIsSeekable = true; - mCompressedInputStream = compressedInputStream; - mStream = new BinaryCodec(new DataInputStream(mCompressedInputStream)); - this.eagerDecode = eagerDecode; - this.mValidationStringency = validationStringency; - readHeader(source); - mFirstRecordPointer = mCompressedInputStream.getFilePointer(); - } - - /** - * Sets the reader reading this file. - * @param reader The source reader. - */ - void setReader(SAMFileReader reader) { - mFileReader = reader; - } - - void close() { - if (mStream != null) { - mStream.close(); - } - mStream = null; - mFileHeader = null; - mFileIndex = null; - } - - /** - * @return the file index, if one exists, else null. - */ - BAMFileIndex2 getFileIndex() { - return mFileIndex; - } - - void setFileIndex(final BAMFileIndex2 fileIndex) { - mFileIndex = fileIndex; - } - - SAMFileHeader getFileHeader() { - return mFileHeader; - } - - /** - * Set error-checking level for subsequent SAMRecord reads. - */ - void setValidationStringency(final SAMFileReader.ValidationStringency validationStringency) { - this.mValidationStringency = validationStringency; - } - - SAMFileReader.ValidationStringency getValidationStringency() { - return this.mValidationStringency; - } - - /** - * Prepare to iterate through the SAMRecords in file order. - * Only a single iterator on a BAM file can be extant at a time. If getIterator() or a query method has been called once, - * that iterator must be closed before getIterator() can be called again. - * A somewhat peculiar aspect of this method is that if the file is not seekable, a second call to - * getIterator() begins its iteration where the last one left off. That is the best that can be - * done in that situation. - */ - CloseableIterator getIterator() { - if (mStream == null) { - throw new IllegalStateException("File reader is closed"); - } - if (mCurrentIterator != null) { - throw new IllegalStateException("Iteration in progress"); - } - if (mIsSeekable) { - try { - mCompressedInputStream.seek(mFirstRecordPointer); - } catch (IOException exc) { - throw new RuntimeException(exc.getMessage(), exc); - } - } - mCurrentIterator = new BAMFileIterator(); - return mCurrentIterator; - } - - CloseableIterator getIterator(List chunks) { - if (mStream == null) { - throw new IllegalStateException("File reader is closed"); - } - if (mCurrentIterator != null) { - throw new IllegalStateException("Iteration in progress"); - } - if (mIsSeekable) { - try { - mCompressedInputStream.seek(mFirstRecordPointer); - } catch (IOException exc) { - throw new RuntimeException(exc.getMessage(), exc); - } - } - - // Create an iterator over the given chunk boundaries. - mCurrentIterator = new BAMFileIndexIterator(Chunk.toCoordinateArray(chunks)); - return mCurrentIterator; - } - - public List getOverlappingBins(final String sequence, final int start, final int end) { - List bins = Collections.emptyList(); - - final SAMFileHeader fileHeader = getFileHeader(); - int referenceIndex = fileHeader.getSequenceIndex(sequence); - if (referenceIndex != -1) { - final BAMFileIndex2 fileIndex = getFileIndex(); - bins = fileIndex.getBinsContaining(referenceIndex, start, end); - } - - return bins; - } - - public List getFilePointersBounding(final Bin bin) { - final BAMFileIndex2 fileIndex = getFileIndex(); - long[] filePointers = fileIndex.getFilePointersBounding(bin); - return (filePointers != null) ? Chunk.toChunkList(filePointers) : Collections.emptyList(); - } - - public Long getFilePointer() { - return mCompressedInputStream.getFilePointer(); - } - - /** - * Prepare to iterate through the SAMRecords that match the given interval. - * Only a single iterator on a BAMFile can be extant at a time. The previous one must be closed - * before calling any of the methods that return an iterator. - * - * Note that an unmapped SAMRecord may still have a reference name and an alignment start for sorting - * purposes (typically this is the coordinate of its mate), and will be found by this method if the coordinate - * matches the specified interval. - * - * Note that this method is not necessarily efficient in terms of disk I/O. The index does not have perfect - * resolution, so some SAMRecords may be read and then discarded because they do not match the specified interval. - * - * @param sequence Reference sequence sought. - * @param start Desired SAMRecords must overlap or be contained in the interval specified by start and end. - * A value of zero implies the start of the reference sequence. - * @param end A value of zero implies the end of the reference sequence. - * @param contained If true, the alignments for the SAMRecords must be completely contained in the interval - * specified by start and end. If false, the SAMRecords need only overlap the interval. - * @return Iterator for the matching SAMRecords - */ - CloseableIterator query(final String sequence, final int start, final int end, final boolean contained) { - if (mStream == null) { - throw new IllegalStateException("File reader is closed"); - } - if (mCurrentIterator != null) { - throw new IllegalStateException("Iteration in progress"); - } - if (!mIsSeekable) { - throw new UnsupportedOperationException("Cannot query stream-based BAM file"); - } - if (mFileIndex == null) { - throw new IllegalStateException("No BAM file index is available"); - } - mCurrentIterator = createIndexIterator(sequence, start, end, contained? QueryType.CONTAINED: QueryType.OVERLAPPING); - return mCurrentIterator; - } - - /** - * Prepare to iterate through the SAMRecords with the given alignment start. - * Only a single iterator on a BAMFile can be extant at a time. The previous one must be closed - * before calling any of the methods that return an iterator. - * - * Note that an unmapped SAMRecord may still have a reference name and an alignment start for sorting - * purposes (typically this is the coordinate of its mate), and will be found by this method if the coordinate - * matches the specified interval. - * - * Note that this method is not necessarily efficient in terms of disk I/O. The index does not have perfect - * resolution, so some SAMRecords may be read and then discarded because they do not match the specified interval. - * - * @param sequence Reference sequence sought. - * @param start Alignment start sought. - * @return Iterator for the matching SAMRecords. - */ - CloseableIterator queryAlignmentStart(final String sequence, final int start) { - if (mStream == null) { - throw new IllegalStateException("File reader is closed"); - } - if (mCurrentIterator != null) { - throw new IllegalStateException("Iteration in progress"); - } - if (!mIsSeekable) { - throw new UnsupportedOperationException("Cannot query stream-based BAM file"); - } - if (mFileIndex == null) { - throw new IllegalStateException("No BAM file index is available"); - } - mCurrentIterator = createIndexIterator(sequence, start, -1, QueryType.STARTING_AT); - return mCurrentIterator; - } - - public CloseableIterator queryUnmapped() { - if (mStream == null) { - throw new IllegalStateException("File reader is closed"); - } - if (mCurrentIterator != null) { - throw new IllegalStateException("Iteration in progress"); - } - if (!mIsSeekable) { - throw new UnsupportedOperationException("Cannot query stream-based BAM file"); - } - if (mFileIndex == null) { - throw new IllegalStateException("No BAM file index is available"); - } - try { - final long startOfLastLinearBin = mFileIndex.getStartOfLastLinearBin(); - if (startOfLastLinearBin != -1) { - mCompressedInputStream.seek(startOfLastLinearBin); - } else { - // No mapped reads in file, just start at the first read in file. - mCompressedInputStream.seek(mFirstRecordPointer); - } - mCurrentIterator = new BAMFileIndexUnmappedIterator(); - return mCurrentIterator; - } catch (IOException e) { - throw new RuntimeException("IOException seeking to unmapped reads", e); - } - } - - /** - * Reads the header from the file or stream - * @param source Note that this is used only for reporting errors. - */ - private void readHeader(final String source) - throws IOException { - - final byte[] buffer = new byte[4]; - mStream.readBytes(buffer); - if (!Arrays.equals(buffer, BAMFileConstants.BAM_MAGIC)) { - throw new IOException("Invalid BAM file header"); - } - - final int headerTextLength = mStream.readInt(); - final String textHeader = mStream.readString(headerTextLength); - final SAMTextHeaderCodec headerCodec = new SAMTextHeaderCodec(); - headerCodec.setValidationStringency(mValidationStringency); - mFileHeader = headerCodec.decode(new StringLineReader(textHeader), - source); - - final int sequenceCount = mStream.readInt(); - if (mFileHeader.getSequenceDictionary().size() > 0) { - // It is allowed to have binary sequences but no text sequences, so only validate if both are present - if (sequenceCount != mFileHeader.getSequenceDictionary().size()) { - throw new SAMFormatException("Number of sequences in text header (" + - mFileHeader.getSequenceDictionary().size() + - ") != number of sequences in binary header (" + sequenceCount + ") for file " + source); - } - for (int i = 0; i < sequenceCount; i++) { - final SAMSequenceRecord binarySequenceRecord = readSequenceRecord(source); - final SAMSequenceRecord sequenceRecord = mFileHeader.getSequence(i); - if (!sequenceRecord.getSequenceName().equals(binarySequenceRecord.getSequenceName())) { - throw new SAMFormatException("For sequence " + i + ", text and binary have different names in file " + - source); - } - if (sequenceRecord.getSequenceLength() != binarySequenceRecord.getSequenceLength()) { - throw new SAMFormatException("For sequence " + i + ", text and binary have different lengths in file " + - source); - } - } - } else { - // If only binary sequences are present, copy them into mFileHeader - final List sequences = new ArrayList(sequenceCount); - for (int i = 0; i < sequenceCount; i++) { - sequences.add(readSequenceRecord(source)); - } - mFileHeader.setSequenceDictionary(new SAMSequenceDictionary(sequences)); - } - } - - /** - * Reads a single binary sequence record from the file or stream - * @param source Note that this is used only for reporting errors. - */ - private SAMSequenceRecord readSequenceRecord(final String source) { - final int nameLength = mStream.readInt(); - if (nameLength <= 1) { - throw new SAMFormatException("Invalid BAM file header: missing sequence name in file " + source); - } - final String sequenceName = mStream.readString(nameLength - 1); - // Skip the null terminator - mStream.readByte(); - final int sequenceLength = mStream.readInt(); - return new SAMSequenceRecord(sequenceName, sequenceLength); - } - - /** - * Iterator for non-indexed sequential iteration through all SAMRecords in file. - * Starting point of iteration is wherever current file position is when the iterator is constructed. - */ - private class BAMFileIterator implements CloseableIterator { - private SAMRecord mNextRecord = null; - private final BAMRecordCodec bamRecordCodec = new BAMRecordCodec(getFileHeader()); - private long samRecordIndex = 0; // Records at what position (counted in records) we are at in the file - - BAMFileIterator() { - this(true); - } - - /** - * @param advance Trick to enable subclass to do more setup before advancing - */ - BAMFileIterator(final boolean advance) { - this.bamRecordCodec.setInputStream(BAMFileReader2.this.mStream.getInputStream()); - - if (advance) { - advance(); - } - } - - public void close() { - if (this != mCurrentIterator) { - throw new IllegalStateException("Attempt to close non-current iterator"); - } - mCurrentIterator = null; - } - - public boolean hasNext() { - return (mNextRecord != null); - } - - public SAMRecord next() { - final SAMRecord result = mNextRecord; - advance(); - return result; - } - - public void remove() { - throw new UnsupportedOperationException("Not supported: remove"); - } - - void advance() { - try { - long startCoordinate = mCompressedInputStream.getFilePointer(); - mNextRecord = getNextRecord(); - long stopCoordinate = mCompressedInputStream.getFilePointer(); - - if (mNextRecord != null) { - ++this.samRecordIndex; - // Because some decoding is done lazily, the record needs to remember the validation stringency. - mNextRecord.setReader(mFileReader); - mNextRecord.setValidationStringency(mValidationStringency); - mNextRecord.setCoordinates(new Chunk(startCoordinate,stopCoordinate)); - - if (mValidationStringency != ValidationStringency.SILENT) { - final List validationErrors = mNextRecord.isValid(); - SAMUtils.processValidationErrors(validationErrors, - this.samRecordIndex, BAMFileReader2.this.getValidationStringency()); - } - } - if (eagerDecode && mNextRecord != null) { - mNextRecord.eagerDecode(); - } - } catch (IOException exc) { - throw new RuntimeException(exc.getMessage(), exc); - } - } - - /** - * Read the next record from the input stream. - */ - SAMRecord getNextRecord() throws IOException { - return bamRecordCodec.decode(); - } - - /** - * @return The record that will be return by the next call to next() - */ - protected SAMRecord peek() { - return mNextRecord; - } - } - - enum QueryType {CONTAINED, OVERLAPPING, STARTING_AT} - - /** - * Creates an iterator over indexed data in the specified range. - * @param sequence Sequence to which to constrain the data. - * @param start Starting position within the above sequence to which the data should be constrained. - * @param end Ending position within the above sequence to which the data should be constrained.s - * @param queryType Type of query. Useful for establishing the boundary rules. - * @return An iterator over the requested data. - */ - private CloseableIterator createIndexIterator(final String sequence, - final int start, - final int end, - final QueryType queryType) { - long[] filePointers = null; - - // Hit the index to determine the chunk boundaries for the required data. - final SAMFileHeader fileHeader = getFileHeader(); - int referenceIndex = fileHeader.getSequenceIndex(sequence); - if (referenceIndex != -1) { - final BAMFileIndex2 fileIndex = getFileIndex(); - filePointers = fileIndex.getFilePointersContaining(referenceIndex, start, end); - } - - // Create an iterator over the above chunk boundaries. - BAMFileIndexIterator iterator = new BAMFileIndexIterator(filePointers); - - // Add some preprocessing filters for edge-case reads that don't fit into this - // query type. - return new BAMQueryFilteringIterator(iterator,sequence,start,end,queryType); - } - - private class BAMFileIndexIterator - extends BAMFileIterator { - - private long[] mFilePointers = null; - private int mFilePointerIndex = 0; - private long mFilePointerLimit = -1; - - BAMFileIndexIterator(final long[] filePointers) { - super(false); // delay advance() until after construction - mFilePointers = filePointers; - advance(); - } - - SAMRecord getNextRecord() - throws IOException { - while (true) { - // Advance to next file block if necessary - while (mCompressedInputStream.getFilePointer() >= mFilePointerLimit) { - if (mFilePointers == null || - mFilePointerIndex >= mFilePointers.length) { - return null; - } - final long startOffset = mFilePointers[mFilePointerIndex++]; - final long endOffset = mFilePointers[mFilePointerIndex++]; - mCompressedInputStream.seek(startOffset); - mFilePointerLimit = endOffset; - } - // Pull next record from stream - return super.getNextRecord(); - } - } - } - - /** - * A decorating iterator that filters out records that are outside the bounds of the - * given query parameters. - */ - private class BAMQueryFilteringIterator implements CloseableIterator { - /** - * The wrapped iterator. - */ - private final CloseableIterator wrappedIterator; - - /** - * The next record to be returned. Will be null if no such record exists. - */ - private SAMRecord nextRead; - - private final int mReferenceIndex; - private final int mRegionStart; - private final int mRegionEnd; - private final QueryType mQueryType; - - public BAMQueryFilteringIterator(final CloseableIterator iterator,final String sequence, final int start, final int end, final QueryType queryType) { - this.wrappedIterator = iterator; - final SAMFileHeader fileHeader = getFileHeader(); - mReferenceIndex = fileHeader.getSequenceIndex(sequence); - mRegionStart = start; - if (queryType == QueryType.STARTING_AT) { - mRegionEnd = mRegionStart; - } else { - mRegionEnd = (end <= 0) ? Integer.MAX_VALUE : end; - } - mQueryType = queryType; - nextRead = advance(); - } - - /** - * Returns true if a next element exists; false otherwise. - */ - public boolean hasNext() { - return nextRead != null; - } - - /** - * Gets the next record from the given iterator. - * @return The next SAM record in the iterator. - */ - public SAMRecord next() { - if(!hasNext()) - throw new NoSuchElementException("BAMQueryFilteringIterator: no next element available"); - final SAMRecord currentRead = nextRead; - nextRead = advance(); - return currentRead; - } - - /** - * Closes down the existing iterator. - */ - public void close() { - if (this != mCurrentIterator) { - throw new IllegalStateException("Attempt to close non-current iterator"); - } - mCurrentIterator = null; - } - - /** - * @throws UnsupportedOperationException always. - */ - public void remove() { - throw new UnsupportedOperationException("Not supported: remove"); - } - - SAMRecord advance() { - while (true) { - // Pull next record from stream - if(!wrappedIterator.hasNext()) - return null; - - final SAMRecord record = wrappedIterator.next(); - // If beyond the end of this reference sequence, end iteration - final int referenceIndex = record.getReferenceIndex(); - if (referenceIndex != mReferenceIndex) { - if (referenceIndex < 0 || - referenceIndex > mReferenceIndex) { - return null; - } - // If before this reference sequence, continue - continue; - } - if (mRegionStart == 0 && mRegionEnd == Integer.MAX_VALUE) { - // Quick exit to avoid expensive alignment end calculation - return record; - } - final int alignmentStart = record.getAlignmentStart(); - // If read is unmapped but has a coordinate, return it if the coordinate is within - // the query region, regardless of whether the mapped mate will be returned. - final int alignmentEnd; - if (mQueryType == QueryType.STARTING_AT) { - alignmentEnd = -1; - } else { - alignmentEnd = (record.getAlignmentEnd() != SAMRecord.NO_ALIGNMENT_START? - record.getAlignmentEnd(): alignmentStart); - } - - if (alignmentStart > mRegionEnd) { - // If scanned beyond target region, end iteration - return null; - } - // Filter for overlap with region - if (mQueryType == QueryType.CONTAINED) { - if (alignmentStart >= mRegionStart && alignmentEnd <= mRegionEnd) { - return record; - } - } else if (mQueryType == QueryType.OVERLAPPING) { - if (alignmentEnd >= mRegionStart && alignmentStart <= mRegionEnd) { - return record; - } - } else { - if (alignmentStart == mRegionStart) { - return record; - } - } - } - } - } - - private class BAMFileIndexUnmappedIterator extends BAMFileIterator { - private BAMFileIndexUnmappedIterator() { - while (this.hasNext() && peek().getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { - advance(); - } - } - } - -} diff --git a/java/src/net/sf/samtools/Bin.java b/java/src/net/sf/samtools/Bin.java deleted file mode 100644 index c3f1a6e50..000000000 --- a/java/src/net/sf/samtools/Bin.java +++ /dev/null @@ -1,69 +0,0 @@ -package net.sf.samtools; - -import java.util.List; - -/** - * An individual bin in a BAM file. - * - * @author mhanna - * @version 0.1 - */ -public class Bin implements Comparable { - /** - * The reference sequence associated with this bin. - */ - public final int referenceSequence; - - /** - * The number of this bin within the BAM file. - */ - public final int binNumber; - - public Bin(int referenceSequence, int binNumber) { - this.referenceSequence = referenceSequence; - this.binNumber = binNumber; - } - - /** - * See whether two bins are equal. If the ref seq and the bin number - * are equal, assume equality of the chunk list. - * @param other The other Bin to which to compare this. - * @return True if the two bins are equal. False otherwise. - */ - @Override - public boolean equals(Object other) { - if(other == null) return false; - if(!(other instanceof Bin)) return false; - - Bin otherBin = (Bin)other; - return this.referenceSequence == otherBin.referenceSequence && this.binNumber == otherBin.binNumber; - } - - /** - * Compute a unique hash code for the given reference sequence and bin number. - * @return A unique hash code. - */ - @Override - public int hashCode() { - return ((Integer)referenceSequence).hashCode() ^ ((Integer)binNumber).hashCode(); - } - - /** - * Compare two bins to see what ordering they should appear in. - * @param other Other bin to which this bin should be compared. - * @return -1 if this < other, 0 if this == other, 1 if this > other. - */ - @Override - public int compareTo(Object other) { - if(other == null) - throw new ClassCastException("Cannot compare to a null object"); - Bin otherBin = (Bin)other; - - // Check the reference sequences first. - if(this.referenceSequence != otherBin.referenceSequence) - return ((Integer)referenceSequence).compareTo(otherBin.referenceSequence); - - // Then check the bin ordering. - return ((Integer)binNumber).compareTo(otherBin.binNumber); - } -} diff --git a/java/src/net/sf/samtools/Chunk.java b/java/src/net/sf/samtools/Chunk.java deleted file mode 100644 index a9750ce28..000000000 --- a/java/src/net/sf/samtools/Chunk.java +++ /dev/null @@ -1,114 +0,0 @@ -package net.sf.samtools; - -import net.sf.picard.PicardException; - -import java.util.List; -import java.util.ArrayList; - -/** - * Represents a chunk stolen from the BAM file. Originally a private static inner class within - * BAMFileIndex; now breaking it out so that the sharding system can use it. - * - * @author mhanna - * @version 0.1 - */ -public class Chunk implements Cloneable,Comparable { - - private long mChunkStart; - private long mChunkEnd; - - public Chunk(final long start, final long end) { - mChunkStart = start; - mChunkEnd = end; - } - - protected Chunk clone() { - return new Chunk(mChunkStart,mChunkEnd); - } - - public long getChunkStart() { - return mChunkStart; - } - - public void setChunkStart(final long value) { - mChunkStart = value; - } - - public long getChunkEnd() { - return mChunkEnd; - } - - public void setChunkEnd(final long value) { - mChunkEnd = value; - } - - /** - * The list of chunks is often represented as an array of - * longs where every even-numbered index is a start coordinate - * and every odd-numbered index is a stop coordinate. Convert - * from that format back to a list of chunks. - * @param coordinateArray List of chunks to convert. - * @return A list of chunks. - */ - public static List toChunkList(long[] coordinateArray) { - if(coordinateArray.length % 2 != 0) - throw new PicardException("Data supplied does not appear to be in coordinate array format."); - - // TODO: possibly also check for monotonically increasing; this seems to be an implicit requirement of this format. - List chunkList = new ArrayList(); - for(int i = 0; i < coordinateArray.length; i += 2) - chunkList.add(new Chunk(coordinateArray[i],coordinateArray[i+1])); - - return chunkList; - } - - /** - * The list of chunks is often represented as an array of - * longs where every even-numbered index is a start coordinate - * and every odd-numbered index is a stop coordinate. - * @param chunks List of chunks to convert. - * @return A long array of the format described above. - */ - public static long[] toCoordinateArray(List chunks) { - long[] coordinateArray = new long[chunks.size()*2]; - int position = 0; - for(Chunk chunk: chunks) { - coordinateArray[position++] = chunk.getChunkStart(); - coordinateArray[position++] = chunk.getChunkEnd(); - } - return coordinateArray; - } - - public int compareTo(final Chunk chunk) { - int result = Long.signum(mChunkStart - chunk.mChunkStart); - if (result == 0) { - result = Long.signum(mChunkEnd - chunk.mChunkEnd); - } - return result; - } - - @Override - public boolean equals(final Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - - final Chunk chunk = (Chunk) o; - - if (mChunkEnd != chunk.mChunkEnd) return false; - if (mChunkStart != chunk.mChunkStart) return false; - - return true; - } - - @Override - public int hashCode() { - int result = (int) (mChunkStart ^ (mChunkStart >>> 32)); - result = 31 * result + (int) (mChunkEnd ^ (mChunkEnd >>> 32)); - return result; - } - - @Override - public String toString() { - return String.format("%d:%d-%d:%d",mChunkStart >> 16,mChunkStart & 0xFFFF,mChunkEnd >> 16,mChunkEnd & 0xFFFF); - } -} diff --git a/java/src/net/sf/samtools/LinearIndex.java b/java/src/net/sf/samtools/LinearIndex.java deleted file mode 100644 index 37d454b58..000000000 --- a/java/src/net/sf/samtools/LinearIndex.java +++ /dev/null @@ -1,24 +0,0 @@ -package net.sf.samtools; - -/** - * The linear index associated with a given reference in a BAM index. - * - * @author mhanna - * @version 0.1 - */ -public class LinearIndex { - /** - * The reference sequence number for this linear index. - */ - public final int referenceSequence; - - /** - * The linear index entries within this bin. - */ - public final long[] indexEntries; - - public LinearIndex(final int referenceSequence, final long[] indexEntries) { - this.referenceSequence = referenceSequence; - this.indexEntries = indexEntries; - } -} diff --git a/java/src/net/sf/samtools/SAMFileReader2.java b/java/src/net/sf/samtools/SAMFileReader2.java deleted file mode 100644 index 40fb13c08..000000000 --- a/java/src/net/sf/samtools/SAMFileReader2.java +++ /dev/null @@ -1,191 +0,0 @@ -/* - * The MIT License - * - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package net.sf.samtools; - -import net.sf.samtools.util.CloseableIterator; - -import java.io.*; -import java.util.List; -import java.lang.reflect.Field; -import java.lang.reflect.Method; -import java.lang.reflect.InvocationTargetException; - -import org.broadinstitute.sting.utils.JVMUtils; -import org.broadinstitute.sting.utils.StingException; - -/** - * Class for reading and querying SAM/BAM files. Delegates to appropriate concrete implementation. - */ -public class SAMFileReader2 extends SAMFileReader { - private final File sourceFile; - - /** - * Prepare to read a SAM or BAM file. If the given file is a BAM, and has a companion BAI index file - */ - public SAMFileReader2(final File file) { - this(file, null, false); - } - - /** - * Read a SAM or BAM file, possibly with an index file if present. - * If the given file is a BAM, and an index is present, indexed query will be allowed. - * - * @param file SAM or BAM. - * @param eagerDecode if true, decode SAM record entirely when reading it. - */ - public SAMFileReader2(final File file, final boolean eagerDecode) { - this(file,null,eagerDecode); - } - - /** - * Read a SAM or BAM file, possibly with an index file. If the given file is a BAM, and an index is present, - * indexed query will be allowed. - * - * @param file SAM or BAM. - * @param indexFile Location of index file, or null in order to use the default index file (if present). - * @param eagerDecode eagerDecode if true, decode SAM record entirely when reading it. - */ - public SAMFileReader2(final File file, File indexFile, final boolean eagerDecode){ - super(file,indexFile,eagerDecode); - this.sourceFile = file; - close(); - - try { - BAMFileReader2 reader = new BAMFileReader2(file,eagerDecode,getDefaultValidationStringency()); - reader.setReader(this); - JVMUtils.setFieldValue(getField("mReader"),this,reader); - - if(indexFile != null || findIndexFileFromParent(file) != null) { - BAMFileIndex2 index = new BAMFileIndex2(indexFile != null ? indexFile : findIndexFileFromParent(file)); - reader.setFileIndex(index); - JVMUtils.setFieldValue(getField("mFileIndex"),this,index); - } - } - catch(IOException ex) { - throw new StingException("Unable to load BAM file: " + file,ex); - } - } - - /** - * Get the number of levels employed by this index. - * @return Number of levels in this index. - */ - public int getNumIndexLevels() { - final BAMFileIndex2 fileIndex = (BAMFileIndex2)JVMUtils.getFieldValue(getField("mFileIndex"),this); - if(fileIndex == null) - throw new SAMException("Unable to determine number of index levels; BAM file index is not present."); - return fileIndex.getNumIndexLevels(); - } - - /** - * Gets the level associated with the given bin number. - * @param bin The bin for which to determine the level. - * @return the level associated with the given bin number. - */ - public int getLevelForBin(final Bin bin) { - final BAMFileIndex2 fileIndex = (BAMFileIndex2)JVMUtils.getFieldValue(getField("mFileIndex"),this); - if(fileIndex == null) - throw new SAMException("Unable to determine number of index levels; BAM file index is not present."); - return fileIndex.getLevelForBinNumber(bin.binNumber); - } - - /** - * Gets the first locus that this bin can index into. - * @param bin The bin to test. - * @return The last position that the given bin can represent. - */ - public int getFirstLocusInBin(final Bin bin) { - final BAMFileIndex2 fileIndex = (BAMFileIndex2)JVMUtils.getFieldValue(getField("mFileIndex"),this); - if(fileIndex == null) - throw new SAMException("Unable to determine number of index levels; BAM file index is not present."); - return fileIndex.getFirstLocusInBin(bin); - } - - /** - * Gets the last locus that this bin can index into. - * @param bin The bin to test. - * @return The last position that the given bin can represent. - */ - public int getLastLocusInBin(final Bin bin) { - final BAMFileIndex2 fileIndex = (BAMFileIndex2)JVMUtils.getFieldValue(getField("mFileIndex"),this); - if(fileIndex == null) - throw new SAMException("Unable to determine number of index levels; BAM file index is not present."); - return fileIndex.getLastLocusInBin(bin); - } - - /** - * Iterate through the given chunks in the file. - * @param chunks List of chunks for which to retrieve data. - * @return An iterator over the given chunks. - */ - public CloseableIterator iterator(List chunks) { - // TODO: Add sanity checks so that we're not doing this against an unsupported BAM file. - BAMFileReader2 reader = (BAMFileReader2)JVMUtils.getFieldValue(getField("mReader"),this); - return reader.getIterator(chunks); - } - - public List getOverlappingBins(final String sequence, final int start, final int end) { - // TODO: Add sanity checks so that we're not doing this against an unsupported BAM file. - BAMFileReader2 reader = (BAMFileReader2)JVMUtils.getFieldValue(getField("mReader"),this); - return reader.getOverlappingBins(sequence,start,end); - } - - public List getFilePointersBounding(final Bin bin) { - // TODO: Add sanity checks so that we're not doing this against an unsupported BAM file. - BAMFileReader2 reader = (BAMFileReader2)JVMUtils.getFieldValue(getField("mReader"),this); - return reader.getFilePointersBounding(bin); - } - - public Chunk getCurrentPosition() { - // TODO: Add sanity checks so that we're not doing this against an unsupported BAM file. - BAMFileReader2 reader = (BAMFileReader2)JVMUtils.getFieldValue(getField("mReader"),this); - return new Chunk(reader.getFilePointer(),Long.MAX_VALUE); - } - - private Field getField(String fieldName) { - try { - return getClass().getSuperclass().getDeclaredField(fieldName); - } - catch(NoSuchFieldException ex) { - throw new StingException("Unable to load field: " + fieldName); - } - } - - private File findIndexFileFromParent(File bamFile) { - try { - Method method = getClass().getSuperclass().getDeclaredMethod("findIndexFile",File.class); - method.setAccessible(true); - return (File)method.invoke(this,bamFile); - } - catch(IllegalAccessException ex) { - throw new StingException("Unable to run method findIndexFile",ex); - } - catch(InvocationTargetException ex) { - throw new StingException("Unable to run method findIndexFile",ex); - } - catch(NoSuchMethodException ex) { - throw new StingException("Unable to run method findIndexFile",ex); - } - } -} diff --git a/java/src/net/sf/samtools/SAMRecord.java b/java/src/net/sf/samtools/SAMRecord.java deleted file mode 100644 index d21a6b7f0..000000000 --- a/java/src/net/sf/samtools/SAMRecord.java +++ /dev/null @@ -1,1558 +0,0 @@ -/* - * The MIT License - * - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package net.sf.samtools; - - -import net.sf.samtools.util.StringUtil; - -import java.util.*; - -/** - * Java binding for a SAM file record. c.f. http://samtools.sourceforge.net/SAM1.pdf - * - * The presence of reference name/reference index and alignment start - * do not necessarily mean that a read is aligned. Those values may merely be set to force a SAMRecord - * to appear in a certain place in the sort order. The readUnmappedFlag must be checked to determine whether - * or not a read is mapped. Only if the readUnmappedFlag is false can the reference name/index and alignment start - * be interpretted as indicating an actual alignment position. - * - * Likewise, presence of mate reference name/index and mate alignment start do not necessarily mean that the - * mate is aligned. These may be set for an unaligned mate if the mate has been forced into a particular place - * in the sort order per the above paragraph. Only if the mateUnmappedFlag is false can the mate reference name/index - * and mate alignment start be interpretted as indicating the actual alignment position of the mate. - * - * Note also that there are a number of getters & setters that are linked, i.e. they present different representations - * of the same underlying data. In these cases there is typically a representation that is preferred because it - * ought to be faster than some other representation. The following are the preferred representations: - * - * getReadNameLength() is preferred to getReadName().length() - * get/setReadBases() is preferred to get/setReadString() - * get/setBaseQualities() is preferred to get/setBaseQualityString() - * get/setReferenceIndex() is preferred to get/setReferenceName() - * get/setMateReferenceIndex() is preferred to get/setMateReferenceName() - * getCigarLength() is preferred to getCigar().getNumElements() - * get/setCigar() is preferred to get/setCigarString() - * - * Note that setIndexingBin() need not be called when writing SAMRecords. It will be computed as necessary. It is only - * present as an optimization in the event that the value is already known and need not be computed. - * - * setHeader() need not be called when writing SAMRecords. It may be convenient to call it, however, because - * get/setReferenceIndex() and get/setMateReferenceIndex() must have access to the SAM header, either as an argument - * or previously passed to setHeader(). - * - * setHeader() is called by the SAM reading code, so the get/setReferenceIndex() and get/setMateReferenceIndex() - * methods will have access to the sequence dictionary. - * - * Some of the get() methods return values that are mutable, due to the limitations of Java. A caller should - * never change the value returned by a get() method. If you want to change the value of some attribute of a - * SAMRecord, create a new value object and call the appropriate set() method. - */ -public class SAMRecord implements Cloneable -{ - /** - * Alignment score for a good alignment, but where computing a Phred-score is not feasible. - */ - public static final int UNKNOWN_MAPPING_QUALITY = 255; - - /** - * Alignment score for an unaligned read. - */ - public static final int NO_MAPPING_QUALITY = 0; - - /** - * If a read has this reference name, it is unaligned, but not all unaligned reads have - * this reference name (see above). - */ - public static final String NO_ALIGNMENT_REFERENCE_NAME = "*"; - - /** - * If a read has this reference index, it is unaligned, but not all unaligned reads have - * this reference index (see above). - */ - public static final int NO_ALIGNMENT_REFERENCE_INDEX = -1; - - /** - * Cigar string for an unaligned read. - */ - public static final String NO_ALIGNMENT_CIGAR = "*"; - - /** - * If a read has reference name "*", it will have this value for position. - */ - public static final int NO_ALIGNMENT_START = 0; - - /** - * This should rarely be used, since a read with no sequence doesn't make much sense. - */ - public static final byte[] NULL_SEQUENCE = new byte[0]; - - public static final String NULL_SEQUENCE_STRING = "*"; - - /** - * This should rarely be used, since all reads should have quality scores. - */ - public static final byte[] NULL_QUALS = new byte[0]; - public static final String NULL_QUALS_STRING = "*"; - - /** - * abs(insertSize) must be <= this - */ - public static final int MAX_INSERT_SIZE = 1<<29; - - /** - * It is not necessary in general to use the flag constants, because there are getters - * & setters that handles these symbolically. - */ - private static final int READ_PAIRED_FLAG = 0x1; - private static final int PROPER_PAIR_FLAG = 0x2; - private static final int READ_UNMAPPED_FLAG = 0x4; - private static final int MATE_UNMAPPED_FLAG = 0x8; - private static final int READ_STRAND_FLAG = 0x10; - private static final int MATE_STRAND_FLAG = 0x20; - private static final int FIRST_OF_PAIR_FLAG = 0x40; - private static final int SECOND_OF_PAIR_FLAG = 0x80; - private static final int NOT_PRIMARY_ALIGNMENT_FLAG = 0x100; - private static final int READ_FAILS_VENDOR_QUALITY_CHECK_FLAG = 0x200; - private static final int DUPLICATE_READ_FLAG = 0x400; - - - private String mReadName = null; - private byte[] mReadBases = NULL_SEQUENCE; - private byte[] mBaseQualities = NULL_QUALS; - private String mReferenceName = NO_ALIGNMENT_REFERENCE_NAME; - private int mAlignmentStart = NO_ALIGNMENT_START; - private transient int mAlignmentEnd = NO_ALIGNMENT_START; - private int mMappingQuality = NO_MAPPING_QUALITY; - private String mCigarString = NO_ALIGNMENT_CIGAR; - private Cigar mCigar = null; - private List mAlignmentBlocks = null; - private int mFlags = 0; - private String mMateReferenceName = NO_ALIGNMENT_REFERENCE_NAME; - private int mMateAlignmentStart = 0; - private int mInferredInsertSize = 0; - private List mAttributes = null; - private Integer mReferenceIndex = null; - private Integer mMateReferenceIndex = null; - private Integer mIndexingBin = null; - - /** - * Some attributes (e.g. CIGAR) are not decoded immediately. Use this to decide how to validate when decoded. - */ - private SAMFileReader.ValidationStringency mValidationStringency = SAMFileReader.ValidationStringency.SILENT; - - private SAMFileReader mReader = null; - private SAMFileHeader mHeader = null; - - /** - * Where is this chunk located on the file system? - */ - private Chunk coordinates; - - public SAMRecord(final SAMFileHeader header) { - mHeader = header; - } - - public String getReadName() { - return mReadName; - } - - /** - * This method is preferred over getReadName().length(), because for BAMRecord - * it may be faster. - * @return length not including a null terminator. - */ - public int getReadNameLength() { - return mReadName.length(); - } - - public void setReadName(final String value) { - mReadName = value; - } - - /** - * @return read sequence as a string of ACGTN=. - */ - public String getReadString() { - final byte[] readBases = getReadBases(); - if (readBases.length == 0) { - return NULL_SEQUENCE_STRING; - } - return StringUtil.bytesToString(readBases); - } - - public void setReadString(final String value) { - if (NULL_SEQUENCE_STRING.equals(value)) { - mReadBases = NULL_SEQUENCE; - } else { - final byte[] bases = StringUtil.stringToBytes(value); - SAMUtils.normalizeBases(bases); - setReadBases(bases); - } - } - - - /** - * Do not modify the value returned by this method. If you want to change the bases, create a new - * byte[] and call setReadBases() or call setReadString(). - * @return read sequence as ASCII bytes ACGTN=. - */ - public byte[] getReadBases() { - return mReadBases; - } - - public void setReadBases(final byte[] value) { - mReadBases = value; - } - - /** - * This method is preferred over getReadBases().length, because for BAMRecord it may be faster. - * @return number of bases in the read. - */ - public int getReadLength() { - return getReadBases().length; - } - - /** - * @return Base qualities, encoded as a FASTQ string. - */ - public String getBaseQualityString() { - if (Arrays.equals(NULL_QUALS, getBaseQualities())) { - return NULL_QUALS_STRING; - } - return SAMUtils.phredToFastq(getBaseQualities()); - } - - public void setBaseQualityString(final String value) { - if (NULL_QUALS_STRING.equals(value)) { - setBaseQualities(NULL_QUALS); - } else { - setBaseQualities(SAMUtils.fastqToPhred(value)); - } - } - - /** - * Do not modify the value returned by this method. If you want to change the qualities, create a new - * byte[] and call setBaseQualities() or call setBaseQualityString(). - * @return Base qualities, as binary phred scores (not ASCII). - */ - public byte[] getBaseQualities() { - return mBaseQualities; - } - - public void setBaseQualities(final byte[] value) { - mBaseQualities = value; - } - - /** - * If the original base quality scores have been store in the "OQ" tag will return the numeric - * score as a byte[] - */ - public byte[] getOriginalBaseQualities() { - final String oqString = (String) getAttribute("OQ"); - if (oqString != null && oqString.length() > 0) { - return SAMUtils.fastqToPhred(oqString); - } - else { - return null; - } - } - - /** - * Sets the original base quality scores into the "OQ" tag as a String. Supplied value should be - * as phred-scaled numeric qualities. - */ - public void setOriginalBaseQualities(final byte[] oq) { - setAttribute("OQ", SAMUtils.phredToFastq(oq)); - } - - private static boolean hasReferenceName(final Integer referenceIndex, final String referenceName) { - return (referenceIndex != null && referenceIndex != NO_ALIGNMENT_REFERENCE_INDEX) || - !NO_ALIGNMENT_REFERENCE_NAME.equals(referenceName); - } - - /** - * @return true if this SAMRecord has a reference, either as a String or index (or both). - */ - private boolean hasReferenceName() { - return hasReferenceName(mReferenceIndex, mReferenceName); - } - - /** - * @return true if this SAMRecord has a mate reference, either as a String or index (or both). - */ - private boolean hasMateReferenceName() { - return hasReferenceName(mMateReferenceIndex, mMateReferenceName); - } - - /** - * @return Reference name, or null if record has no reference. - */ - public String getReferenceName() { - return mReferenceName; - } - - public void setReferenceName(final String value) { - mReferenceName = value.intern(); - mReferenceIndex = null; - } - - /** - * @return index of the reference sequence for this read in the sequence dictionary, or -1 - * if read has no reference sequence set, or if a String reference name is not found in the sequence index.. - */ - public Integer getReferenceIndex() { - if (mReferenceIndex == null) { - if (mReferenceName == null) { - mReferenceIndex = NO_ALIGNMENT_REFERENCE_INDEX; - } else if (NO_ALIGNMENT_REFERENCE_NAME.equals(mReferenceName)) { - mReferenceIndex = NO_ALIGNMENT_REFERENCE_INDEX; - } else { - mReferenceIndex = mHeader.getSequenceIndex(mReferenceName); - } - } - return mReferenceIndex; - } - - /** - * @param referenceIndex Must either equal -1 (indicating no reference), or exist in the sequence dictionary - * in the header associated with this record. - */ - public void setReferenceIndex(final int referenceIndex) { - mReferenceIndex = referenceIndex; - if (mReferenceIndex == NO_ALIGNMENT_REFERENCE_INDEX) { - mReferenceName = NO_ALIGNMENT_REFERENCE_NAME; - } else { - mReferenceName = mHeader.getSequence(referenceIndex).getSequenceName(); - } - } - - /** - * @return Mate reference name, or null if one is not assigned. - */ - public String getMateReferenceName() { - return mMateReferenceName; - } - - public void setMateReferenceName(final String mateReferenceName) { - this.mMateReferenceName = mateReferenceName.intern(); - mMateReferenceIndex = null; - } - - /** - * @return index of the reference sequence for this read's mate in the sequence dictionary, or -1 - * if mate has no reference sequence set. - */ - public Integer getMateReferenceIndex() { - if (mMateReferenceIndex == null) { - if (mMateReferenceName == null) { - mMateReferenceIndex = NO_ALIGNMENT_REFERENCE_INDEX; - } else if (NO_ALIGNMENT_REFERENCE_NAME.equals(mMateReferenceName)){ - mMateReferenceIndex = NO_ALIGNMENT_REFERENCE_INDEX; - } else { - mMateReferenceIndex = mHeader.getSequenceIndex(mMateReferenceName); - } - } - return mMateReferenceIndex; - } - - /** - * @param referenceIndex Must either equal -1 (indicating no reference), or exist in the sequence dictionary - * in the header associated with this record. - */ - public void setMateReferenceIndex(final int referenceIndex) { - mMateReferenceIndex = referenceIndex; - if (mMateReferenceIndex == NO_ALIGNMENT_REFERENCE_INDEX) { - mMateReferenceName = NO_ALIGNMENT_REFERENCE_NAME; - } else { - mMateReferenceName = mHeader.getSequence(referenceIndex).getSequenceName(); - } - } - - /** - * @return 1-based inclusive leftmost position of the clippped sequence, or 0 if there is no position. - */ - public int getAlignmentStart() { - return mAlignmentStart; - } - - /** - * @param value 1-based inclusive leftmost position of the clippped sequence, or 0 if there is no position. - */ - public void setAlignmentStart(final int value) { - mAlignmentStart = value; - // Clear cached alignment end - mAlignmentEnd = NO_ALIGNMENT_START; - // Change to alignmentStart could change indexing bin - setIndexingBin(null); - } - - /** - * @return 1-based inclusive rightmost position of the clippped sequence, or 0 read if unmapped. - */ - public int getAlignmentEnd() { - if (getReadUnmappedFlag()) { - return NO_ALIGNMENT_START; - } - else if (this.mAlignmentEnd == NO_ALIGNMENT_START) { - this.mAlignmentEnd = mAlignmentStart + getCigar().getReferenceLength() - 1; - } - - return this.mAlignmentEnd; - } - - /** - * @return the alignment start (1-based, inclusive) adjusted for clipped bases. For example if the read - * has an alignment start of 100 but the first 4 bases were clipped (hard or soft clipped) - * then this method will return 96. - * - * Invalid to call on an unmapped read. - */ - public int getUnclippedStart() { - int pos = getAlignmentStart(); - - for (final CigarElement cig : getCigar().getCigarElements()) { - final CigarOperator op = cig.getOperator(); - if (op == CigarOperator.SOFT_CLIP || op == CigarOperator.HARD_CLIP) { - pos -= cig.getLength(); - } - else { - break; - } - } - - return pos; - } - - /** - * @return the alignment end (1-based, inclusive) adjusted for clipped bases. For example if the read - * has an alignment end of 100 but the last 7 bases were clipped (hard or soft clipped) - * then this method will return 107. - * - * Invalid to call on an unmapped read. - */ - public int getUnclippedEnd() { - int pos = getAlignmentEnd(); - final List cigs = getCigar().getCigarElements(); - for (int i=cigs.size() - 1; i>=0; --i) { - final CigarElement cig = cigs.get(i); - final CigarOperator op = cig.getOperator(); - - if (op == CigarOperator.SOFT_CLIP || op == CigarOperator.HARD_CLIP) { - pos += cig.getLength(); - } - else { - break; - } - } - - return pos; - } - - /** - * Unsupported. This property is derived from alignment start and CIGAR. - */ - public void setAlignmentEnd(final int value) { - throw new UnsupportedOperationException("Not supported: setAlignmentEnd"); - } - - /** - * @return 1-based inclusive leftmost position of the clippped mate sequence, or 0 if there is no position. - */ - public int getMateAlignmentStart() { - return mMateAlignmentStart; - } - - public void setMateAlignmentStart(final int mateAlignmentStart) { - this.mMateAlignmentStart = mateAlignmentStart; - } - - /** - * @return insert size (difference btw 5' end of read & 5' end of mate), if possible, else 0. - * Negative if mate maps to lower position than read. - */ - public int getInferredInsertSize() { - return mInferredInsertSize; - } - - public void setInferredInsertSize(final int inferredInsertSize) { - this.mInferredInsertSize = inferredInsertSize; - } - - /** - * @return phred scaled mapping quality. 255 implies valid mapping but quality is hard to compute. - */ - public int getMappingQuality() { - return mMappingQuality; - } - - public void setMappingQuality(final int value) { - mMappingQuality = value; - } - - public String getCigarString() { - if (mCigarString == null && getCigar() != null) { - mCigarString = TextCigarCodec.getSingleton().encode(getCigar()); - } - return mCigarString; - } - - public void setCigarString(final String value) { - mCigarString = value; - mCigar = null; - mAlignmentBlocks = null; - // Clear cached alignment end - mAlignmentEnd = NO_ALIGNMENT_START; - // Change to cigar could change alignmentEnd, and thus indexing bin - setIndexingBin(null); - } - - /** - * Do not modify the value returned by this method. If you want to change the Cigar, create a new - * Cigar and call setCigar() or call setCigarString() - * @return Cigar object for the read, or null if there is none. - */ - public Cigar getCigar() { - if (mCigar == null && mCigarString != null) { - mCigar = TextCigarCodec.getSingleton().decode(mCigarString); - if (getValidationStringency() != SAMFileReader.ValidationStringency.SILENT && !this.getReadUnmappedFlag()) { - // Don't know line number, and don't want to force read name to be decoded. - SAMUtils.processValidationErrors(validateCigar(-1L), -1L, getValidationStringency()); - } - } - return mCigar; - } - - /** - * This method is preferred over getCigar().getNumElements(), because for BAMRecord it may be faster. - * @return number of cigar elements (number + operator) in the cigar string. - */ - public int getCigarLength() { - return getCigar().numCigarElements(); - } - - public void setCigar(final Cigar cigar) { - initializeCigar(cigar); - // Change to cigar could change alignmentEnd, and thus indexing bin - setIndexingBin(null); - } - - /** - * For setting the Cigar string when BAMRecord has decoded it. Use this rather than setCigar() - * so that indexing bin doesn't get clobbered. - */ - protected void initializeCigar(final Cigar cigar) { - this.mCigar = cigar; - mCigarString = null; - mAlignmentBlocks = null; - // Clear cached alignment end - mAlignmentEnd = NO_ALIGNMENT_START; - } - - /** - * Get the SAMReadGroupRecord for this SAMRecord. - * @return The SAMReadGroupRecord from the SAMFileHeader for this SAMRecord, or null if - * 1) this record has no RG tag, or 2) the header doesn't contain the read group with - * the given ID. - * @throws NullPointerException if this.getHeader() returns null. - * @throws ClassCastException if RG tag does not have a String value. - */ - public SAMReadGroupRecord getReadGroup() { - final String rgId = (String)getAttribute(SAMTagUtil.getSingleton().RG); - if (rgId == null) { - return null; - } - return getHeader().getReadGroup(rgId); - } - - /** - * It is preferrable to use the get*Flag() methods that handle the flag word symbolically. - */ - public int getFlags() { - return mFlags; - } - - public void setFlags(final int value) { - mFlags = value; - // Could imply change to readUnmapped flag, which could change indexing bin - setIndexingBin(null); - } - - /** - * the read is paired in sequencing, no matter whether it is mapped in a pair. - */ - public boolean getReadPairedFlag() { - return (mFlags & READ_PAIRED_FLAG) != 0; - } - - private void requireReadPaired() { - if (!getReadPairedFlag()) { - throw new IllegalStateException("Inappropriate call if not paired read"); - } - } - - /** - * the read is mapped in a proper pair (depends on the protocol, normally inferred during alignment). - */ - public boolean getProperPairFlag() { - requireReadPaired(); - return getProperPairFlagUnchecked(); - } - - private boolean getProperPairFlagUnchecked() { - return (mFlags & PROPER_PAIR_FLAG) != 0; - } - - /** - * the query sequence itself is unmapped. - */ - public boolean getReadUnmappedFlag() { - return (mFlags & READ_UNMAPPED_FLAG) != 0; - } - - /** - * the mate is unmapped. - */ - public boolean getMateUnmappedFlag() { - requireReadPaired(); - return getMateUnmappedFlagUnchecked(); - } - - private boolean getMateUnmappedFlagUnchecked() { - return (mFlags & MATE_UNMAPPED_FLAG) != 0; - } - - /** - * strand of the query (false for forward; true for reverse strand). - */ - public boolean getReadNegativeStrandFlag() { - return (mFlags & READ_STRAND_FLAG) != 0; - } - - /** - * strand of the mate (false for forward; true for reverse strand). - */ - public boolean getMateNegativeStrandFlag() { - requireReadPaired(); - return getMateNegativeStrandFlagUnchecked(); - } - - private boolean getMateNegativeStrandFlagUnchecked() { - return (mFlags & MATE_STRAND_FLAG) != 0; - } - - /** - * the read is the first read in a pair. - */ - public boolean getFirstOfPairFlag() { - requireReadPaired(); - return getFirstOfPairFlagUnchecked(); - } - - private boolean getFirstOfPairFlagUnchecked() { - return (mFlags & FIRST_OF_PAIR_FLAG) != 0; - } - - /** - * the read is the second read in a pair. - */ - public boolean getSecondOfPairFlag() { - requireReadPaired(); - return getSecondOfPairFlagUnchecked(); - } - - private boolean getSecondOfPairFlagUnchecked() { - return (mFlags & SECOND_OF_PAIR_FLAG) != 0; - } - - /** - * the alignment is not primary (a read having split hits may have multiple primary alignment records). - */ - public boolean getNotPrimaryAlignmentFlag() { - return (mFlags & NOT_PRIMARY_ALIGNMENT_FLAG) != 0; - } - - /** - * the read fails platform/vendor quality checks. - */ - public boolean getReadFailsVendorQualityCheckFlag() { - return (mFlags & READ_FAILS_VENDOR_QUALITY_CHECK_FLAG) != 0; - } - - /** - * the read is either a PCR duplicate or an optical duplicate. - */ - public boolean getDuplicateReadFlag() { - return (mFlags & DUPLICATE_READ_FLAG) != 0; - } - - /** - * the read is paired in sequencing, no matter whether it is mapped in a pair. - */ - public void setReadPairedFlag(final boolean flag) { - setFlag(flag, READ_PAIRED_FLAG); - } - - /** - * the read is mapped in a proper pair (depends on the protocol, normally inferred during alignment). - */ - public void setProperPairFlag(final boolean flag) { - setFlag(flag, PROPER_PAIR_FLAG); - } - - /** - * the query sequence itself is unmapped. - */ - public void setReadUmappedFlag(final boolean flag) { - setFlag(flag, READ_UNMAPPED_FLAG); - // Change to readUnmapped could change indexing bin - setIndexingBin(null); - } - - /** - * the mate is unmapped. - */ - public void setMateUnmappedFlag(final boolean flag) { - setFlag(flag, MATE_UNMAPPED_FLAG); - } - - /** - * strand of the query (false for forward; true for reverse strand). - */ - public void setReadNegativeStrandFlag(final boolean flag) { - setFlag(flag, READ_STRAND_FLAG); - } - - /** - * strand of the mate (false for forward; true for reverse strand). - */ - public void setMateNegativeStrandFlag(final boolean flag) { - setFlag(flag, MATE_STRAND_FLAG); - } - - /** - * the read is the first read in a pair. - */ - public void setFirstOfPairFlag(final boolean flag) { - setFlag(flag, FIRST_OF_PAIR_FLAG); - } - - /** - * the read is the second read in a pair. - */ - public void setSecondOfPairFlag(final boolean flag) { - setFlag(flag, SECOND_OF_PAIR_FLAG); - } - - /** - * the alignment is not primary (a read having split hits may have multiple primary alignment records). - */ - public void setNotPrimaryAlignmentFlag(final boolean flag) { - setFlag(flag, NOT_PRIMARY_ALIGNMENT_FLAG); - } - - /** - * the read fails platform/vendor quality checks. - */ - public void setReadFailsVendorQualityCheckFlag(final boolean flag) { - setFlag(flag, READ_FAILS_VENDOR_QUALITY_CHECK_FLAG); - } - - /** - * the read is either a PCR duplicate or an optical duplicate. - */ - public void setDuplicateReadFlag(final boolean flag) { - setFlag(flag, DUPLICATE_READ_FLAG); - } - - private void setFlag(final boolean flag, final int bit) { - if (flag) { - mFlags |= bit; - } else { - mFlags &= ~bit; - } - } - - public SAMFileReader.ValidationStringency getValidationStringency() { - return mValidationStringency; - } - - /** - * Control validation of lazily-decoded elements. - */ - public void setValidationStringency(final SAMFileReader.ValidationStringency validationStringency) { - this.mValidationStringency = validationStringency; - } - - /** - * Get the value for a SAM tag. - * WARNING: Some value types (e.g. byte[]) are mutable. It is dangerous to change one of these values in - * place, because some SAMRecord implementations keep track of when attributes have been changed. If you - * want to change an attribute value, call setAttribute() to replace the value. - * - * @param tag Two-character tag name. - * @return Appropriately typed tag value, or null if the requested tag is not present. - */ - public final Object getAttribute(final String tag) { - return getAttribute(SAMTagUtil.getSingleton().makeBinaryTag(tag)); - } - - /** - * Get the tag value and attempt to coerce it into the requested type. - * @param tag The requested tag. - * @return The value of a tag, converted into an Integer if possible. - * @throws RuntimeException If the value is not an integer type, or will not fit in an Integer. - */ - public final Integer getIntegerAttribute(final String tag) { - final Object val = getAttribute(tag); - if (val == null) return null; - if (val instanceof Integer) { - return (Integer)val; - } - if (!(val instanceof Number)) { - throw new RuntimeException("Value for tag " + tag + " is not Number: " + val.getClass()); - } - final long longVal = ((Number)val).longValue(); - if (longVal < Integer.MIN_VALUE || longVal > Integer.MAX_VALUE) { - throw new RuntimeException("Value for tag " + tag + " is not in Integer range: " + longVal); - } - return (int)longVal; - } - - /** - * Get the tag value and attempt to coerce it into the requested type. - * @param tag The requested tag. - * @return The value of a tag, converted into a Short if possible. - * @throws RuntimeException If the value is not an integer type, or will not fit in a Short. - */ - public final Short getShortAttribute(final String tag) { - final Object val = getAttribute(tag); - if (val == null) return null; - if (val instanceof Short) { - return (Short)val; - } - if (!(val instanceof Number)) { - throw new RuntimeException("Value for tag " + tag + " is not Number: " + val.getClass()); - } - final long longVal = ((Number)val).longValue(); - if (longVal < Short.MIN_VALUE || longVal > Short.MAX_VALUE) { - throw new RuntimeException("Value for tag " + tag + " is not in Short range: " + longVal); - } - return (short)longVal; - } - - /** - * Get the tag value and attempt to coerce it into the requested type. - * @param tag The requested tag. - * @return The value of a tag, converted into a Byte if possible. - * @throws RuntimeException If the value is not an integer type, or will not fit in a Byte. - */ - public final Byte getByteAttribute(final String tag) { - final Object val = getAttribute(tag); - if (val == null) return null; - if (val instanceof Byte) { - return (Byte)val; - } - if (!(val instanceof Number)) { - throw new RuntimeException("Value for tag " + tag + " is not Number: " + val.getClass()); - } - final long longVal = ((Number)val).longValue(); - if (longVal < Byte.MIN_VALUE || longVal > Byte.MAX_VALUE) { - throw new RuntimeException("Value for tag " + tag + " is not in Short range: " + longVal); - } - return (byte)longVal; - } - - public final String getStringAttribute(final String tag) { - final Object val = getAttribute(tag); - if (val == null) return null; - if (val instanceof String) { - return (String)val; - } - throw new SAMException("Value for tag " + tag + " is not a String: " + val.getClass()); - } - - public final Character getCharacterAttribute(final String tag) { - final Object val = getAttribute(tag); - if (val == null) return null; - if (val instanceof Character) { - return (Character)val; - } - throw new SAMException("Value for tag " + tag + " is not a Character: " + val.getClass()); - } - - public final Float getFloatAttribute(final String tag) { - final Object val = getAttribute(tag); - if (val == null) return null; - if (val instanceof Float) { - return (Float)val; - } - throw new SAMException("Value for tag " + tag + " is not a Float: " + val.getClass()); - } - - public final byte[] getByteArrayAttribute(final String tag) { - final Object val = getAttribute(tag); - if (val == null) return null; - if (val instanceof byte[]) { - return (byte[])val; - } - throw new SAMException("Value for tag " + tag + " is not a byte[]: " + val.getClass()); - } - - protected Object getAttribute(final short tag) { - if (mAttributes == null) { - return null; - } - for (final SAMBinaryTagAndValue tagAndValue : mAttributes) { - if (tagAndValue.tag == tag) { - return tagAndValue.value; - } - } - return null; - } - - /** - * Set a named attribute onto the SAMRecord. Passing a null value causes the attribute to be cleared. - * @param tag two-character tag name. See http://samtools.sourceforge.net/SAM1.pdf for standard and user-defined tags. - * @param value Supported types are String, Char, Integer, Float, byte[]. - * If value == null, tag is cleared. - * - * Byte and Short are allowed but discouraged. If written to a SAM file, these will be converted to Integer, - * whereas if written to BAM, getAttribute() will return as Byte or Short, respectively. - * - * Long with value between 0 and MAX_UINT is allowed for BAM but discouraged. Attempting to write such a value - * to SAM will cause an exception to be thrown. - */ - final public void setAttribute(final String tag, final Object value) { - setAttribute(SAMTagUtil.getSingleton().makeBinaryTag(tag), value); - } - - protected void setAttribute(final short tag, final Object value) { - if (value != null && - !(value instanceof Byte || value instanceof Short || value instanceof Integer || - value instanceof String || value instanceof Character || value instanceof Float || - value instanceof byte[])) { - throw new SAMException("Attribute type " + value.getClass() + " not supported. Tag: " + - SAMTagUtil.getSingleton().makeStringTag(tag)); - } - if (mAttributes == null) { - mAttributes = new ArrayList(); - } - int i; - for (i = 0; i < mAttributes.size(); ++i) { - if (mAttributes.get(i).tag == tag) { - break; - } - } - if (i < mAttributes.size()) { - if (value != null) { - mAttributes.set(i, new SAMBinaryTagAndValue(tag, value)); - } else { - mAttributes.remove(i); - } - } else if (value != null) { - mAttributes.add(new SAMBinaryTagAndValue(tag, value)); - } - } - - /** - * Removes all attributes. - */ - public void clearAttributes() { - mAttributes.clear(); - } - - /** - * Replace any existing attributes with the given list. Does not copy the list - * but installs it directly. - */ - protected void setAttributes(final List attributes) { - mAttributes = attributes; - } - /** - * @return List of all tags on this record. Returns null if there are no tags. - */ - protected List getBinaryAttributes() { - if (mAttributes == null || mAttributes.isEmpty()) { - return Collections.emptyList(); - } - return Collections.unmodifiableList(mAttributes); - } - - /** - * Tag name and value of an attribute, for getAttributes() method. - */ - public static class SAMTagAndValue { - public final String tag; - public final Object value; - - public SAMTagAndValue(final String tag, final Object value) { - this.tag = tag; - this.value = value; - } - } - - /** - * @return list of {tag, value} tuples - */ - public final List getAttributes() { - final List binaryAttributes = getBinaryAttributes(); - final List ret = new ArrayList(binaryAttributes.size()); - for (final SAMBinaryTagAndValue tagAndValue : binaryAttributes) { - ret.add(new SAMTagAndValue(SAMTagUtil.getSingleton().makeStringTag(tagAndValue.tag), - tagAndValue.value)); - } - return ret; - } - - Integer getIndexingBin() { - return mIndexingBin; - } - - /** - * Used internally when writing BAMRecords. - * @param mIndexingBin c.f. http://samtools.sourceforge.net/SAM1.pdf - */ - void setIndexingBin(final Integer mIndexingBin) { - this.mIndexingBin = mIndexingBin; - } - - /** - * Does not change state of this. - * @return indexing bin based on alignment start & end. - */ - int computeIndexingBin() { - // reg2bin has zero-based, half-open API - final int alignmentStart = getAlignmentStart()-1; - int alignmentEnd = getAlignmentEnd(); - if (alignmentEnd <= 0) { - // If alignment end cannot be determined (e.g. because this read is not really aligned), - // then treat this as a one base alignment for indexing purposes. - alignmentEnd = alignmentStart + 1; - } - return SAMUtils.reg2bin(alignmentStart, alignmentEnd); - } - - public SAMFileHeader getHeader() { - return mHeader; - } - - /** - * Setting header into SAMRecord facilitates conversion btw reference sequence names and indices - * @param header contains sequence dictionary for this SAMRecord - */ - public void setHeader(final SAMFileHeader header) { - this.mHeader = header; - } - - /** - * If this record has a valid binary representation of the variable-length portion of a binary record stored, - * return that byte array, otherwise return null. This will never be true for SAMRecords. It will be true - * for BAMRecords that have not been eagerDecoded(), and for which none of the data in the variable-length - * portion has been changed. - */ - public byte[] getVariableBinaryRepresentation() { - return null; - } - - /** - * Depending on the concrete implementation, the binary file size of attributes may be known without - * computing them all. - * @return binary file size of attribute, if known, else -1 - */ - public int getAttributesBinarySize() { - return -1; - } - - public String format() { - final StringBuilder buffer = new StringBuilder(); - addField(buffer, getReadName(), null, null); - addField(buffer, getFlags(), null, null); - addField(buffer, getReferenceName(), null, "*"); - addField(buffer, getAlignmentStart(), 0, "*"); - addField(buffer, getMappingQuality(), 0, "0"); - addField(buffer, getCigarString(), null, "*"); - addField(buffer, getMateReferenceName(), null, "*"); - addField(buffer, getMateAlignmentStart(), 0, "*"); - addField(buffer, getInferredInsertSize(), 0, "*"); - addField(buffer, getReadString(), null, "*"); - addField(buffer, getBaseQualityString(), null, "*"); - if (mAttributes != null) { - for (final SAMBinaryTagAndValue entry : getBinaryAttributes()) { - addField(buffer, formatTagValue(entry.tag, entry.value)); - } - } - return buffer.toString(); - } - - private void addField(final StringBuilder buffer, final Object value, final Object defaultValue, final String defaultString) { - if (safeEquals(value, defaultValue)) { - addField(buffer, defaultString); - } else if (value == null) { - addField(buffer, ""); - } else { - addField(buffer, value.toString()); - } - } - - private void addField(final StringBuilder buffer, final String field) { - if (buffer.length() > 0) { - buffer.append('\t'); - } - buffer.append(field); - } - - private String formatTagValue(final short tag, final Object value) { - final String tagString = SAMTagUtil.getSingleton().makeStringTag(tag); - if (value == null || value instanceof String) { - return tagString + ":Z:" + value; - } else if (value instanceof Integer || value instanceof Long || - value instanceof Short || value instanceof Byte) { - return tagString + ":i:" + value; - } else if (value instanceof Character) { - return tagString + ":A:" + value; - } else if (value instanceof Float) { - return tagString + ":f:" + value; - } else if (value instanceof byte[]) { - return tagString + ":H:" + StringUtil.bytesToHexString((byte[]) value); - } else { - throw new RuntimeException("Unexpected value type for tag " + tagString + - ": " + value + " of class " + value.getClass().getName()); - } - } - - private boolean safeEquals(final Object o1, final Object o2) { - if (o1 == o2) { - return true; - } else if (o1 == null || o2 == null) { - return false; - } else { - return o1.equals(o2); - } - } - - /** - * Force all lazily-initialized data members to be initialized. If a subclass overrides this method, - * typically it should also call super method. - */ - protected void eagerDecode() { - getCigar(); - getCigarString(); - } - - /** - * Returns blocks of the read sequence that have been aligned directly to the - * reference sequence. Note that clipped portions of the read and inserted and - * deleted bases (vs. the reference) are not represented in the alignment blocks. - */ - public List getAlignmentBlocks() { - if (this.mAlignmentBlocks != null) return this.mAlignmentBlocks; - - final Cigar cigar = getCigar(); - if (cigar == null) return Collections.emptyList(); - - - final List alignmentBlocks = new ArrayList(); - int readBase = 1; - int refBase = getAlignmentStart(); - - for (final CigarElement e : cigar.getCigarElements()) { - switch (e.getOperator()) { - case H : break; // ignore hard clips - case P : break; // ignore pads - case S : readBase += e.getLength(); break; // soft clip read bases - case N : refBase += e.getLength(); break; // reference skip - case D : refBase += e.getLength(); break; - case I : readBase += e.getLength(); break; - case M : - case EQ : - case X : - final int length = e.getLength(); - alignmentBlocks.add(new AlignmentBlock(readBase, refBase, length)); - readBase += length; - refBase += length; - break; - default : throw new IllegalStateException("Case statement didn't deal with cigar op: " + e.getOperator()); - } - } - this.mAlignmentBlocks = Collections.unmodifiableList(alignmentBlocks); - - return this.mAlignmentBlocks; - } - - /** - * Run all validations of CIGAR. These include validation that the CIGAR makes sense independent of - * placement, plus validation that CIGAR + placement yields all bases with M operator within the range of the reference. - * @param recordNumber For error reporting. -1 if not known. - * @return List of errors, or null if no errors. - */ - public List validateCigar(final long recordNumber) { - List ret = null; - - if (getValidationStringency() != SAMFileReader.ValidationStringency.SILENT && !this.getReadUnmappedFlag()) { - // Don't know line number, and don't want to force read name to be decoded. - ret = getCigar().isValid(getReadName(), recordNumber); - if (getReferenceIndex() != NO_ALIGNMENT_REFERENCE_INDEX) { - final SAMSequenceRecord sequence = getHeader().getSequence(getReferenceIndex()); - final int referenceSequenceLength = sequence.getSequenceLength(); - for (final AlignmentBlock alignmentBlock : getAlignmentBlocks()) { - if (alignmentBlock.getReferenceStart() + alignmentBlock.getLength() - 1 > referenceSequenceLength) { - if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.CIGAR_MAPS_OFF_REFERENCE, - "CIGAR M operator maps off end of reference", getReadName(), recordNumber)); - break; - } - } - } - } - return ret; - } - - @Override - public boolean equals(final Object o) { - if (this == o) return true; - if (!(o instanceof SAMRecord)) return false; - - final SAMRecord samRecord = (SAMRecord) o; - - // First check all the elements that do not require decoding - if (mAlignmentStart != samRecord.mAlignmentStart) return false; - if (mFlags != samRecord.mFlags) return false; - if (mInferredInsertSize != samRecord.mInferredInsertSize) return false; - if (mMappingQuality != samRecord.mMappingQuality) return false; - if (mMateAlignmentStart != samRecord.mMateAlignmentStart) return false; - if (mIndexingBin != null ? !mIndexingBin.equals(samRecord.mIndexingBin) : samRecord.mIndexingBin != null) - return false; - if (mMateReferenceIndex != null ? !mMateReferenceIndex.equals(samRecord.mMateReferenceIndex) : samRecord.mMateReferenceIndex != null) - return false; - if (mReferenceIndex != null ? !mReferenceIndex.equals(samRecord.mReferenceIndex) : samRecord.mReferenceIndex != null) - return false; - - eagerDecode(); - samRecord.eagerDecode(); - - if (mAttributes != null ? !mAttributes.equals(samRecord.mAttributes) : samRecord.mAttributes != null) - return false; - if (!Arrays.equals(mBaseQualities, samRecord.mBaseQualities)) return false; - if (mCigar != null ? !mCigar.equals(samRecord.mCigar) : samRecord.mCigar != null) - return false; - if (mMateReferenceName != null ? !mMateReferenceName.equals(samRecord.mMateReferenceName) : samRecord.mMateReferenceName != null) - return false; - if (!Arrays.equals(mReadBases, samRecord.mReadBases)) return false; - if (mReadName != null ? !mReadName.equals(samRecord.mReadName) : samRecord.mReadName != null) return false; - if (mReferenceName != null ? !mReferenceName.equals(samRecord.mReferenceName) : samRecord.mReferenceName != null) - return false; - - return true; - } - - @Override - public int hashCode() { - eagerDecode(); - int result = mReadName != null ? mReadName.hashCode() : 0; - result = 31 * result + (mReadBases != null ? Arrays.hashCode(mReadBases) : 0); - result = 31 * result + (mBaseQualities != null ? Arrays.hashCode(mBaseQualities) : 0); - result = 31 * result + (mReferenceName != null ? mReferenceName.hashCode() : 0); - result = 31 * result + mAlignmentStart; - result = 31 * result + mMappingQuality; - result = 31 * result + (mCigarString != null ? mCigarString.hashCode() : 0); - result = 31 * result + mFlags; - result = 31 * result + (mMateReferenceName != null ? mMateReferenceName.hashCode() : 0); - result = 31 * result + mMateAlignmentStart; - result = 31 * result + mInferredInsertSize; - result = 31 * result + (mAttributes != null ? mAttributes.hashCode() : 0); - result = 31 * result + (mReferenceIndex != null ? mReferenceIndex.hashCode() : 0); - result = 31 * result + (mMateReferenceIndex != null ? mMateReferenceIndex.hashCode() : 0); - result = 31 * result + (mIndexingBin != null ? mIndexingBin.hashCode() : 0); - return result; - } - - /** - * Perform various validations of SAMRecord. - * Note that this method deliberately returns null rather than Collections.emptyList() if there - * are no validation errors, because callers tend to assume that if a non-null list is returned, it is modifiable. - * @return null if valid. If invalid, returns a list of error messages. - */ - public List isValid() { - // ret is only instantiate if there are errors to report, in order to reduce GC in the typical case - // in which everything is valid. It's ugly, but more efficient. - ArrayList ret = null; - if (!getReadPairedFlag()) { - if (getProperPairFlagUnchecked()) { - if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_FLAG_PROPER_PAIR, "Proper pair flag should not be set for unpaired read.", getReadName())); - } - if (getMateUnmappedFlagUnchecked()) { - if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_FLAG_MATE_UNMAPPED, "Mate unmapped flag should not be set for unpaired read.", getReadName())); - } - if (getMateNegativeStrandFlagUnchecked()) { - if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_FLAG_MATE_NEG_STRAND, "Mate negative strand flag should not be set for unpaired read.", getReadName())); - } - if (getFirstOfPairFlagUnchecked()) { - if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_FLAG_FIRST_OF_PAIR, "First of pair flag should not be set for unpaired read.", getReadName())); - } - if (getSecondOfPairFlagUnchecked()) { - if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_FLAG_SECOND_OF_PAIR, "First of pair flag should not be set for unpaired read.", getReadName())); - } - if (getMateReferenceIndex() != NO_ALIGNMENT_REFERENCE_INDEX) { - if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_MATE_REF_INDEX, "MRNM should not be set for unpaired read.", getReadName())); - } - } else { - final List errors = isValidReferenceIndexAndPosition(mMateReferenceIndex, mMateReferenceName, - getMateAlignmentStart(), true); - if (errors != null) { - if (ret == null) ret = new ArrayList(); - ret.addAll(errors); - } - if (!hasMateReferenceName() && !getMateUnmappedFlag()) { - if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_FLAG_MATE_UNMAPPED, "Mapped mate should have mate reference name", getReadName())); - } -/* - TODO: PIC-97 This validation should be enabled, but probably at this point there are too many - BAM files that have the proper pair flag set when read or mate is unmapped. - if (getMateUnmappedFlag() && getProperPairFlagUnchecked()) { - if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_FLAG_PROPER_PAIR, "Proper pair flag should not be set for unpaired read.", getReadName())); - } -*/ - } - if (getInferredInsertSize() > MAX_INSERT_SIZE || getInferredInsertSize() < -MAX_INSERT_SIZE) { - if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_INSERT_SIZE, "Insert size out of range", getReadName())); - } - if (getReadUnmappedFlag()) { - if (getNotPrimaryAlignmentFlag()) { - if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_FLAG_NOT_PRIM_ALIGNMENT, "Not primary alignment flag should not be set for unmapped read.", getReadName())); - } - if (getMappingQuality() != 0) { - if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_MAPPING_QUALITY, "MAPQ must should be 0 for unmapped read.", getReadName())); - } - if (getCigarLength() != 0) { - if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_CIGAR, "CIGAR should have zero elements for unmapped read.", getReadName())); - } -/* - TODO: PIC-97 This validation should be enabled, but probably at this point there are too many - BAM files that have the proper pair flag set when read or mate is unmapped. - if (getProperPairFlagUnchecked()) { - if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_FLAG_PROPER_PAIR, "Proper pair flag should not be set for unmapped read.", getReadName())); - } -*/ - } else { - if (getMappingQuality() >= 256) { - if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_MAPPING_QUALITY, "MAPQ should be < 256.", getReadName())); - } - if (getCigarLength() == 0) { - if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_CIGAR, "CIGAR should have > zero elements for mapped read.", getReadName())); - } - if (getHeader().getSequenceDictionary().size() == 0) { - if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.MISSING_SEQUENCE_DICTIONARY, "Empty sequence dictionary.", getReadName())); - } - if (!hasReferenceName()) { - if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_FLAG_READ_UNMAPPED, "Mapped read should have valid reference name", getReadName())); - } -/* - Oops! We know this is broken in older BAM files, so this having this validation will cause all sorts of - problems! - if (getIndexingBin() != null && getIndexingBin() != computeIndexingBin()) { - ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_INDEXING_BIN, - "Indexing bin (" + getIndexingBin() + ") does not agree with computed value (" + computeIndexingBin() + ")", - getReadName())); - - } -*/ - } - // Validate the RG ID is found in header - final String rgId = (String)getAttribute(SAMTagUtil.getSingleton().RG); - if (rgId != null && getHeader().getReadGroup(rgId) == null) { - if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.READ_GROUP_NOT_FOUND, - "RG ID on SAMRecord not found in header: " + rgId, getReadName())); - } - final List errors = isValidReferenceIndexAndPosition(mReferenceIndex, mReferenceName, getAlignmentStart(), false); - if (errors != null) { - if (ret == null) ret = new ArrayList(); - ret.addAll(errors); - } - if (this.getReadLength() == 0) { - String cq = (String)getAttribute(SAMTagUtil.getSingleton().CQ); - String cs = (String)getAttribute(SAMTagUtil.getSingleton().CS); - if (cq == null || cq.length() == 0 || cs == null || cs.length() == 0) { - if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.EMPTY_READ, - "Zero-length read without CS or CQ tag", getReadName())); - } else if (!getReadUnmappedFlag()) { - boolean hasIndel = false; - for (CigarElement cigarElement : getCigar().getCigarElements()) { - if (cigarElement.getOperator() == CigarOperator.DELETION || - cigarElement.getOperator() == CigarOperator.INSERTION) { - hasIndel = true; - break; - } - } - if (!hasIndel) { - if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.EMPTY_READ, - "Colorspace read with zero-length bases but no indel", getReadName())); - } - } - } - if (this.getReadLength() != getBaseQualities().length && !Arrays.equals(getBaseQualities(), NULL_QUALS)) { - if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.MISMATCH_READ_LENGTH_AND_QUALS_LENGTH, - "Read length does not match quals length", getReadName())); - } - if (ret == null || ret.size() == 0) { - return null; - } - return ret; - } - - /** - * Gets the reader that read this SAM file. - * @return - */ - public SAMFileReader getReader() { - return mReader; - } - - /** - * Sets the reader that read this SAM file. Protected access only. - * @param reader Reader which retrieved this file. - */ - protected void setReader(SAMFileReader reader) { - mReader = reader; - } - - /** - * Gets the position of this record, stored on the filesystem. - * @return Chunk indicating the physical position. - */ - public Chunk getCoordinates() { - return coordinates; - } - - /** - * Internal function: sets the coordin - * @param coordinates Where this file is actually sitting on the disk. - */ - protected void setCoordinates(final Chunk coordinates) { - this.coordinates = coordinates; - } - - private List isValidReferenceIndexAndPosition(final Integer referenceIndex, final String referenceName, - final int alignmentStart, final boolean isMate) { - final boolean hasReference = hasReferenceName(referenceIndex, referenceName); - - // ret is only instantiate if there are errors to report, in order to reduce GC in the typical case - // in which everything is valid. It's ugly, but more efficient. - ArrayList ret = null; - if (!hasReference) { - if (alignmentStart != 0) { - if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_ALIGNMENT_START, buildMessage("Alignment start should be 0 because reference name = *.", isMate), getReadName())); - } - } else { - if (alignmentStart == 0) { - if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_ALIGNMENT_START, buildMessage("Alignment start should != 0 because reference name != *.", isMate), getReadName())); - } - - if (getHeader().getSequenceDictionary().size() > 0) { - final SAMSequenceRecord sequence = - (referenceIndex != null? getHeader().getSequence(referenceIndex): getHeader().getSequence(referenceName)); - if (sequence == null) { - if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_REFERENCE_INDEX, buildMessage("Reference sequence not found in sequence dictionary.", isMate), getReadName())); - } else { - if (alignmentStart > sequence.getSequenceLength()) { - if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_ALIGNMENT_START, buildMessage("Alignment start (" + alignmentStart + ") must be <= reference sequence length (" + - sequence.getSequenceLength() + ") on reference " + sequence.getSequenceName(), isMate), getReadName())); - } - } - } - } - return ret; - } - - private String buildMessage(final String baseMessage, final boolean isMate) { - return isMate ? "Mate " + baseMessage : baseMessage; - } - - /** - * Note that this does a shallow copy of everything, except for the attribute list, for which a copy of the list - * is made, but the attributes themselves are copied by reference. This should be safe because callers should - * never modify a mutable value returned by any of the get() methods anyway. - */ - @Override - public Object clone() throws CloneNotSupportedException { - final SAMRecord newRecord = (SAMRecord)super.clone(); - if (mAttributes != null) { - newRecord.mAttributes = (ArrayList)((ArrayList)mAttributes).clone(); - } - return newRecord; - } - - /** Simple toString() that gives a little bit of useful info about the read. */ - @Override - public String toString() { - StringBuilder builder = new StringBuilder(64); - builder.append(getReadName()); - if (getReadPairedFlag()) { - if (getFirstOfPairFlag()) { - builder.append(" 1/2"); - } - else { - builder.append(" 2/2"); - } - } - - builder.append(" "); - builder.append(String.valueOf(getReadLength())); - builder.append("b"); - - if (getReadUnmappedFlag()) { - builder.append(" unmapped read."); - } - else { - builder.append(" aligned read."); - } - - return builder.toString(); - } -} - diff --git a/settings/repository/edu.mit.broad/picard-private-parts-1198.jar b/settings/repository/edu.mit.broad/picard-private-parts-1333-sharding.jar similarity index 60% rename from settings/repository/edu.mit.broad/picard-private-parts-1198.jar rename to settings/repository/edu.mit.broad/picard-private-parts-1333-sharding.jar index b46954c67..9ce577e80 100644 Binary files a/settings/repository/edu.mit.broad/picard-private-parts-1198.jar and b/settings/repository/edu.mit.broad/picard-private-parts-1333-sharding.jar differ diff --git a/settings/repository/edu.mit.broad/picard-private-parts-1198.xml b/settings/repository/edu.mit.broad/picard-private-parts-1333-sharding.xml similarity index 55% rename from settings/repository/edu.mit.broad/picard-private-parts-1198.xml rename to settings/repository/edu.mit.broad/picard-private-parts-1333-sharding.xml index dfd1759d8..3d87c2fa8 100644 --- a/settings/repository/edu.mit.broad/picard-private-parts-1198.xml +++ b/settings/repository/edu.mit.broad/picard-private-parts-1333-sharding.xml @@ -1,3 +1,3 @@ - + diff --git a/settings/repository/net.sf/picard-1.12.256.jar b/settings/repository/net.sf/picard-1.12.256.jar deleted file mode 100644 index d56e5ddb6..000000000 Binary files a/settings/repository/net.sf/picard-1.12.256.jar and /dev/null differ diff --git a/settings/repository/net.sf/picard-1.12.256.xml b/settings/repository/net.sf/picard-1.12.256.xml deleted file mode 100644 index 9df3e9c08..000000000 --- a/settings/repository/net.sf/picard-1.12.256.xml +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/settings/repository/net.sf/picard-1.16.359-sharding.jar b/settings/repository/net.sf/picard-1.16.359-sharding.jar new file mode 100644 index 000000000..27d2339d5 Binary files /dev/null and b/settings/repository/net.sf/picard-1.16.359-sharding.jar differ diff --git a/settings/repository/net.sf/picard-1.16.359-sharding.xml b/settings/repository/net.sf/picard-1.16.359-sharding.xml new file mode 100644 index 000000000..3b51956c7 --- /dev/null +++ b/settings/repository/net.sf/picard-1.16.359-sharding.xml @@ -0,0 +1,3 @@ + + + diff --git a/settings/repository/net.sf/sam-1.12.256.xml b/settings/repository/net.sf/sam-1.12.256.xml deleted file mode 100644 index 7ef6d82d7..000000000 --- a/settings/repository/net.sf/sam-1.12.256.xml +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/settings/repository/net.sf/sam-1.12.256.jar b/settings/repository/net.sf/sam-1.16.359-sharding.jar similarity index 59% rename from settings/repository/net.sf/sam-1.12.256.jar rename to settings/repository/net.sf/sam-1.16.359-sharding.jar index 7516ead34..cd5dbd0f2 100644 Binary files a/settings/repository/net.sf/sam-1.12.256.jar and b/settings/repository/net.sf/sam-1.16.359-sharding.jar differ diff --git a/settings/repository/net.sf/sam-1.16.359-sharding.xml b/settings/repository/net.sf/sam-1.16.359-sharding.xml new file mode 100644 index 000000000..e71427e9c --- /dev/null +++ b/settings/repository/net.sf/sam-1.16.359-sharding.xml @@ -0,0 +1,3 @@ + + +