From 57b8c9a53c1c54f240d728145fb1e971d0af1eb7 Mon Sep 17 00:00:00 2001 From: hanna Date: Mon, 8 Feb 2010 23:59:38 +0000 Subject: [PATCH] Supporting infrastructure for merging SAM files. Not yet integrated into the datasource. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2810 348d0f76-0448-11de-a6fe-93d51630548a --- .../sam/ComparableSamRecordIterator.java | 85 +++ .../picard/sam/MergingSamRecordIterator.java | 233 +++++++++ java/src/net/sf/samtools/BAMFileIndex2.java | 3 +- java/src/net/sf/samtools/SAMFileReader2.java | 484 ++---------------- 4 files changed, 368 insertions(+), 437 deletions(-) create mode 100644 java/src/net/sf/picard/sam/ComparableSamRecordIterator.java create mode 100644 java/src/net/sf/picard/sam/MergingSamRecordIterator.java diff --git a/java/src/net/sf/picard/sam/ComparableSamRecordIterator.java b/java/src/net/sf/picard/sam/ComparableSamRecordIterator.java new file mode 100644 index 000000000..efe9a304c --- /dev/null +++ b/java/src/net/sf/picard/sam/ComparableSamRecordIterator.java @@ -0,0 +1,85 @@ +/* + * The MIT License + * + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package net.sf.picard.sam; + +import net.sf.picard.util.PeekableIterator; + +import java.util.Comparator; +import java.util.Iterator; + +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMFileReader; + +/** + * Iterator for SAM records that implements comparable to enable sorting of iterators. + * The comparison is performed by comparing the next record in the iterator to the next + * record in another iterator and returning the ordering between those SAM records. + */ +class ComparableSamRecordIterator extends PeekableIterator implements Comparable { + private final Comparator comparator; + + /** + * Constructs a wrapping iterator around the given iterator that will be able + * to compare itself to other ComparableSamRecordIterators using the given comparator. + * + * @param iterator the wrapped iterator. + * @param comparator the Comparator to use to provide ordering fo SAMRecords + */ + public ComparableSamRecordIterator(final Iterator iterator, final Comparator comparator) { + super(iterator); + this.comparator = comparator; + } + + /** + * Compares this iterator to another comparable iterator based on the next record + * available in each iterator. If the two comparable iterators have different + * comparator types internally an exception is thrown. + * + * @param that another iterator to compare to + * @return a negative, 0 or positive number as described in the Comparator interface + */ + public int compareTo(final ComparableSamRecordIterator that) { + if (this.comparator.getClass() != that.comparator.getClass()) { + throw new IllegalStateException("Attempt to compare two ComparableSAMRecordIterators that " + + "have different orderings internally"); + } + + final SAMRecord record = this.peek(); + final SAMRecord record2 = that.peek(); + return comparator.compare(record, record2); + } + + @Override + public boolean equals(final Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + return compareTo((ComparableSamRecordIterator)o) == 0; + } + + @Override + public int hashCode() { + throw new UnsupportedOperationException("ComparableSamRecordIterator should not be hashed because it can change value"); + } +} diff --git a/java/src/net/sf/picard/sam/MergingSamRecordIterator.java b/java/src/net/sf/picard/sam/MergingSamRecordIterator.java new file mode 100644 index 000000000..4599bed23 --- /dev/null +++ b/java/src/net/sf/picard/sam/MergingSamRecordIterator.java @@ -0,0 +1,233 @@ +/* + * The MIT License + * + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package net.sf.picard.sam; + +import net.sf.picard.PicardException; + +import java.util.*; +import java.lang.reflect.Constructor; + +import net.sf.samtools.*; + +/** + * Provides an iterator interface for merging multiple underlying iterators into a single + * iterable stream. The underlying iterators/files must all have the same sort order unless + * the requested output format is unsorted, in which case any combination is valid. + */ +public class MergingSamRecordIterator implements Iterator { + private final PriorityQueue pq; + private final SamFileHeaderMerger samHeaderMerger; + private final SAMFileHeader.SortOrder sortOrder; + + /** + * Maps iterators back to the readers from which they are derived. + */ + private final Map,SAMFileReader> iteratorToSourceMap = new HashMap,SAMFileReader>(); + + /** + * Constructs a new merging iterator with the same set of readers and sort order as + * provided by the header merger parameter. + * @param headerMerger The merged header and contents of readers. + * @param forcePresorted True to ensure that the iterator checks the headers of the readers for appropriate sort order. + */ + public MergingSamRecordIterator(final SamFileHeaderMerger headerMerger, final boolean forcePresorted) { + this(headerMerger,createWholeFileIterators(headerMerger.getReaders()),forcePresorted); + } + + /** + * Constructs a new merging iterator with a given merged header and a subset of readers. + * @param headerMerger The merged header and contents of readers. + * @param readerToIteratorMap A mapping of reader to iterator. + * @param forcePresorted True to ensure that the iterator checks the headers of the readers for appropriate sort order. + */ + public MergingSamRecordIterator(final SamFileHeaderMerger headerMerger, final Map> readerToIteratorMap, final boolean forcePresorted) { + this.samHeaderMerger = headerMerger; + this.sortOrder = headerMerger.getMergedHeader().getSortOrder(); + final SAMRecordComparator comparator = getComparator(); + + final Collection readers = headerMerger.getReaders(); + this.pq = new PriorityQueue(readers.size()); + + for(final SAMFileReader reader: readerToIteratorMap.keySet()) { + if (!forcePresorted && this.sortOrder != SAMFileHeader.SortOrder.unsorted && + reader.getFileHeader().getSortOrder() != this.sortOrder){ + throw new PicardException("Files are not compatible with sort order"); + } + + final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(readerToIteratorMap.get(reader),comparator); + addIfNotEmpty(iterator); + iteratorToSourceMap.put(iterator,reader); + } + } + + /** + * For each reader, derive an iterator that can walk the entire file and associate that back to + * @param readers The readers from which to derive iterators. + * @return A map of reader to its associated iterator. + */ + private static Map> createWholeFileIterators(Collection readers) { + Map> readerToIteratorMap = new HashMap>(); + for(final SAMFileReader reader: readers) + readerToIteratorMap.put(reader,reader.iterator()); + return readerToIteratorMap; + } + + /** Returns true if any of the underlying iterators has more records, otherwise false. */ + public boolean hasNext() { + return !this.pq.isEmpty(); + } + + /** Returns the next record from the top most iterator during merging. */ + public SAMRecord next() { + final ComparableSamRecordIterator iterator = this.pq.poll(); + final SAMRecord record = iterator.next(); + addIfNotEmpty(iterator); + record.setHeader(this.samHeaderMerger.getMergedHeader()); + + // Fix the read group if needs be + if (this.samHeaderMerger.hasReadGroupCollisions()) { + final String oldGroupId = (String) record.getAttribute(ReservedTagConstants.READ_GROUP_ID); + if (oldGroupId != null ) { + final String newGroupId = this.samHeaderMerger.getReadGroupId(iteratorToSourceMap.get(iterator), oldGroupId); + record.setAttribute(ReservedTagConstants.READ_GROUP_ID, newGroupId); + } + } + + // Fix the program group if needs be + if (this.samHeaderMerger.hasProgramGroupCollisions()) { + final String oldGroupId = (String) record.getAttribute(ReservedTagConstants.PROGRAM_GROUP_ID); + if (oldGroupId != null ) { + final String newGroupId = this.samHeaderMerger.getProgramGroupId(iteratorToSourceMap.get(iterator), oldGroupId); + record.setAttribute(ReservedTagConstants.PROGRAM_GROUP_ID, newGroupId); + } + } + + // Fix up the sequence indexes if needs be + if (this.samHeaderMerger.hasMergedSequenceDictionary()) { + if (record.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { + record.setReferenceIndex(this.samHeaderMerger.getMergedSequenceIndex(iteratorToSourceMap.get(iterator),record.getReferenceIndex())); + } + + if (record.getReadPairedFlag() && record.getMateReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { + record.setMateReferenceIndex(this.samHeaderMerger.getMergedSequenceIndex(iteratorToSourceMap.get(iterator), record.getMateReferenceIndex())); + } + } + + return record; + } + + /** + * Adds iterator to priority queue. If the iterator has more records it is added + * otherwise it is closed and not added. + */ + private void addIfNotEmpty(final ComparableSamRecordIterator iterator) { + if (iterator.hasNext()) { + pq.offer(iterator); + } + else { + iterator.close(); + } + } + + /** Unsupported operation. */ + public void remove() { + throw new UnsupportedOperationException("MergingSAMRecorderIterator.remove()"); + } + + /** + * Get the right comparator for a given sort order (coordinate, alphabetic). In the + * case of "unsorted" it will return a comparator that gives an arbitrary but reflexive + * ordering. + */ + private SAMRecordComparator getComparator() { + // For unsorted build a fake comparator that compares based on object ID + if (this.sortOrder == SAMFileHeader.SortOrder.unsorted) { + return new SAMRecordComparator() { + public int fileOrderCompare(final SAMRecord lhs, final SAMRecord rhs) { + return System.identityHashCode(lhs) - System.identityHashCode(rhs); + } + + public int compare(final SAMRecord lhs, final SAMRecord rhs) { + return fileOrderCompare(lhs, rhs); + } + }; + } + if (samHeaderMerger.hasMergedSequenceDictionary() && sortOrder.equals(SAMFileHeader.SortOrder.coordinate)) { + return new MergedSequenceDictionaryCoordinateOrderComparator(); + } + + // Otherwise try and figure out what kind of comparator to return and build it + final Class type = this.sortOrder.getComparator(); + + try { + final Constructor ctor = type.getConstructor(); + return ctor.newInstance(); + } + catch (Exception e) { + throw new PicardException("Could not instantiate a comparator for sort order: " + this.sortOrder, e); + } + } + + /** Returns the merged header that the merging iterator is working from. */ + public SAMFileHeader getMergedHeader() { + return this.samHeaderMerger.getMergedHeader(); + } + + /** + * Ugh. Basically does a regular coordinate compare, but looks up the sequence indices in the merged + * sequence dictionary. I hate the fact that this extends SAMRecordCoordinateComparator, but it avoids + * more copy & paste. + */ + private class MergedSequenceDictionaryCoordinateOrderComparator extends SAMRecordCoordinateComparator { + + public int fileOrderCompare(final SAMRecord samRecord1, final SAMRecord samRecord2) { + final int referenceIndex1 = getReferenceIndex(samRecord1); + final int referenceIndex2 = getReferenceIndex(samRecord2); + if (referenceIndex1 != referenceIndex2) { + if (referenceIndex1 == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { + return 1; + } else if (referenceIndex2 == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { + return -1; + } else { + return referenceIndex1 - referenceIndex2; + } + } + if (referenceIndex1 == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { + // Both are unmapped. + return 0; + } + return samRecord1.getAlignmentStart() - samRecord2.getAlignmentStart(); + } + + private int getReferenceIndex(final SAMRecord samRecord) { + if (samRecord.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { + return samHeaderMerger.getMergedSequenceIndex(samRecord.getHeader(), samRecord.getReferenceIndex()); + } + if (samRecord.getMateReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { + return samHeaderMerger.getMergedSequenceIndex(samRecord.getHeader(), samRecord.getMateReferenceIndex()); + } + return SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX; + } + } +} diff --git a/java/src/net/sf/samtools/BAMFileIndex2.java b/java/src/net/sf/samtools/BAMFileIndex2.java index 39d18df78..8ec3c9283 100644 --- a/java/src/net/sf/samtools/BAMFileIndex2.java +++ b/java/src/net/sf/samtools/BAMFileIndex2.java @@ -34,7 +34,7 @@ import java.util.*; /** * Class for reading BAM file indexes. */ -public class BAMFileIndex2 +public class BAMFileIndex2 extends BAMFileIndex { private static final int MAX_BINS = 37450; // =(8^6-1)/7+1 private static final int BAM_LIDX_SHIFT = 14; @@ -55,6 +55,7 @@ public class BAMFileIndex2 protected final SortedMap referenceToLinearIndices = new TreeMap(); protected BAMFileIndex2(final File file) { + super(file); loadIndex(file); } diff --git a/java/src/net/sf/samtools/SAMFileReader2.java b/java/src/net/sf/samtools/SAMFileReader2.java index ab01449e8..7aa5a0c1a 100644 --- a/java/src/net/sf/samtools/SAMFileReader2.java +++ b/java/src/net/sf/samtools/SAMFileReader2.java @@ -23,52 +23,22 @@ */ package net.sf.samtools; - -import net.sf.samtools.util.BlockCompressedInputStream; import net.sf.samtools.util.CloseableIterator; -import net.sf.samtools.util.IOUtil; -import net.sf.samtools.util.RuntimeIOException; -import net.sf.samtools.SAMFileReader.ReaderImplementation; -import net.sf.samtools.SAMFileReader.ValidationStringency; import net.sf.picard.PicardException; import java.io.*; -import java.util.zip.GZIPInputStream; import java.util.List; -import java.net.URL; +import java.lang.reflect.Field; +import java.lang.reflect.Method; +import java.lang.reflect.InvocationTargetException; +import org.broadinstitute.sting.utils.JVMUtils; +import org.broadinstitute.sting.utils.StingException; /** * Class for reading and querying SAM/BAM files. Delegates to appropriate concrete implementation. */ -public class SAMFileReader2 implements Iterable { - - private static ValidationStringency defaultValidationStringency = ValidationStringency.DEFAULT_STRINGENCY; - - public static ValidationStringency getDefaultValidationStringency() { - return defaultValidationStringency; - } - - /** - * Set validation stringency for all subsequently-created SAMFileReaders. This is the only way to - * change the validation stringency for SAM header. - */ - public static void setDefaultValidationStringency(final ValidationStringency defaultValidationStringency) { - SAMFileReader2.defaultValidationStringency = defaultValidationStringency; - } - - private boolean mIsBinary = false; - private BAMFileIndex2 mFileIndex = null; - private ReaderImplementation mReader = null; - private File samFile = null; - - /** - * Prepare to read a SAM or BAM file. Indexed lookup not allowed because reading from InputStream. - */ - public SAMFileReader2(final InputStream stream) { - this(stream, false); - } - +public class SAMFileReader2 extends SAMFileReader { /** * Prepare to read a SAM or BAM file. If the given file is a BAM, and has a companion BAI index file * that is named according to the convention, it will be found and opened, and indexed query will be allowed. @@ -77,28 +47,6 @@ public class SAMFileReader2 implements Iterable { this(file, null, false); } - /** - * Prepare to read a SAM or BAM file. If the given file is a BAM, and an index is present, indexed query - * will be allowed. - * - * @param file SAM or BAM to read. - * @param indexFile Index file that is companion to BAM, or null if no index file, or if index file - * should be found automatically. - */ - public SAMFileReader2(final File file, final File indexFile) { - this(file, indexFile, false); - } - - /** - * Read a SAM or BAM file. Indexed lookup not allowed because reading from InputStream. - * - * @param stream input SAM or BAM. - * @param eagerDecode if true, decode SAM record entirely when reading it. - */ - public SAMFileReader2(final InputStream stream, final boolean eagerDecode) { - init(stream, eagerDecode, defaultValidationStringency); - } - /** * Read a SAM or BAM file, possibly with an index file if present. * If the given file is a BAM, and an index is present, indexed query will be allowed. @@ -107,7 +55,7 @@ public class SAMFileReader2 implements Iterable { * @param eagerDecode if true, decode SAM record entirely when reading it. */ public SAMFileReader2(final File file, final boolean eagerDecode) { - init(file, null, eagerDecode, defaultValidationStringency); + this(file,null,eagerDecode); } /** @@ -119,41 +67,20 @@ public class SAMFileReader2 implements Iterable { * @param eagerDecode eagerDecode if true, decode SAM record entirely when reading it. */ public SAMFileReader2(final File file, final File indexFile, final boolean eagerDecode){ - init(file, indexFile, eagerDecode, defaultValidationStringency); - } + super(file,indexFile,eagerDecode); + close(); - /** - * Read a BAM file by http - * indexed query will be allowed. - * - * @param url BAM. - * @param indexFile Location of index file, or null in order to use the default index file (if present). - * @param eagerDecode eagerDecode if true, decode SAM record entirely when reading it. - */ - public SAMFileReader2(final URL url, final File indexFile, final boolean eagerDecode) { - init(url, indexFile, eagerDecode, defaultValidationStringency); - } + try { + BAMFileReader2 reader = new BAMFileReader2(file,eagerDecode,getDefaultValidationStringency()); + BAMFileIndex2 index = new BAMFileIndex2(indexFile != null ? indexFile : findIndexFileFromParent(file)); + reader.setFileIndex(index); - public void close() { - if (mReader != null) { - mReader.close(); + JVMUtils.setFieldValue(getField("mReader"),this,reader); + JVMUtils.setFieldValue(getField("mFileIndex"),this,index); + } + catch(IOException ex) { + throw new StingException("Unable to load BAM file: " + file,ex); } - mReader = null; - mFileIndex = null; - } - - /** - * @return True if this is a BAM reader. - */ - public boolean isBinary() { - return mIsBinary; - } - - /** - * @return true if ths is a BAM file, and has an index - */ - public boolean hasIndex() { - return (mFileIndex != null); } /** @@ -161,9 +88,10 @@ public class SAMFileReader2 implements Iterable { * @return Number of levels in this index. */ public int getNumIndexLevels() { - if(mFileIndex == null) + BAMFileIndex2 fileIndex = (BAMFileIndex2)JVMUtils.getFieldValue(getField("mFileIndex"),this); + if(fileIndex == null) throw new SAMException("Unable to determine number of index levels; BAM file index is not present."); - return mFileIndex.getNumIndexLevels(); + return fileIndex.getNumIndexLevels(); } /** @@ -172,34 +100,10 @@ public class SAMFileReader2 implements Iterable { * @return the level associated with the given bin number. */ public int getLevelForBin(final Bin bin) { - if(mFileIndex == null) - throw new SAMException("Unable to determine level of bin number; BAM file index is not present."); - return mFileIndex.getLevelForBinNumber(bin.binNumber); - } - - public SAMFileHeader getFileHeader() { - return mReader.getFileHeader(); - } - - /** - * Control validation of SAMRecords as they are read from file. - * In order to control validation stringency for SAM Header, call SAMFileReader.setDefaultValidationStringency - * before constructing a SAMFileReader. - */ - public void setValidationStringency(final ValidationStringency validationStringency) { - mReader.setValidationStringency(validationStringency); - } - - /** - * Iterate through file in order. For a SAMFileReader constructed from an InputStream, and for any SAM file, - * a 2nd iteration starts where the 1st one left off. For a BAM constructed from a File, each new iteration - * starts at the first record. - *

- * Only a single open iterator on a SAM or BAM file may be extant at any one time. If you want to start - * a second iteration, the first one must be closed first. - */ - public CloseableIterator iterator() { - return mReader.getIterator(); + BAMFileIndex2 fileIndex = (BAMFileIndex2)JVMUtils.getFieldValue(getField("mFileIndex"),this); + if(fileIndex == null) + throw new SAMException("Unable to determine number of index levels; BAM file index is not present."); + return fileIndex.getLevelForBinNumber(bin.binNumber); } /** @@ -209,338 +113,46 @@ public class SAMFileReader2 implements Iterable { */ public CloseableIterator iterator(List chunks) { // TODO: Add sanity checks so that we're not doing this against an unsupported BAM file. - if(!(mReader instanceof BAMFileReader2)) - throw new PicardException("This call cannot be performed without a backing BAMFileReader2"); - return ((BAMFileReader2)mReader).getIterator(chunks); + BAMFileReader2 reader = (BAMFileReader2)JVMUtils.getFieldValue(getField("mReader"),this); + return reader.getIterator(chunks); } public List getOverlappingBins(final String sequence, final int start, final int end) { // TODO: Add sanity checks so that we're not doing this against an unsupported BAM file. - if(!(mReader instanceof BAMFileReader2)) - throw new PicardException("This call cannot be performed without a backing BAMFileReader2"); - return ((BAMFileReader2)mReader).getOverlappingBins(sequence,start,end); + BAMFileReader2 reader = (BAMFileReader2)JVMUtils.getFieldValue(getField("mReader"),this); + return reader.getOverlappingBins(sequence,start,end); } public List getFilePointersBounding(final Bin bin) { - if(!(mReader instanceof BAMFileReader2)) - throw new PicardException("This call cannot be performed without a backing BAMFileReader2"); - return ((BAMFileReader2)mReader).getFilePointersBounding(bin); + // TODO: Add sanity checks so that we're not doing this against an unsupported BAM file. + BAMFileReader2 reader = (BAMFileReader2)JVMUtils.getFieldValue(getField("mReader"),this); + return reader.getFilePointersBounding(bin); } - - /** - * Iterate over records that match the given interval. Only valid to call this if hasIndex() == true. - *

- * Only a single open iterator on a given SAMFileReader may be extant at any one time. If you want to start - * a second iteration, the first one must be closed first. You can use a second SAMFileReader to iterate - * in parallel over the same underlying file. - *

- * Note that indexed lookup is not perfectly efficient in terms of disk I/O. I.e. some SAMRecords may be read - * and then discarded because they do not match the interval of interest. - *

- * Note that an unmapped read will be returned by this call if it has a coordinate for the purpose of sorting that - * is in the query region. - * - * @param sequence Reference sequence of interest. - * @param start 1-based, inclusive start of interval of interest. Zero implies start of the reference sequence. - * @param end 1-based, inclusive end of interval of interest. Zero implies end of the reference sequence. - * @param contained If true, each SAMRecord returned is will have its alignment completely contained in the - * interval of interest. If false, the alignment of the returned SAMRecords need only overlap the interval of interest. - * @return Iterator over the SAMRecords matching the interval. - */ - public CloseableIterator query(final String sequence, final int start, final int end, final boolean contained) { - return mReader.query(sequence, start, end, contained); - } - - /** - * Iterate over records that overlap the given interval. Only valid to call this if hasIndex() == true. - *

- * Only a single open iterator on a given SAMFileReader may be extant at any one time. If you want to start - * a second iteration, the first one must be closed first. - *

- * Note that indexed lookup is not perfectly efficient in terms of disk I/O. I.e. some SAMRecords may be read - * and then discarded because they do not match the interval of interest. - *

- * Note that an unmapped read will be returned by this call if it has a coordinate for the purpose of sorting that - * is in the query region. - * - * @param sequence Reference sequence of interest. - * @param start 1-based, inclusive start of interval of interest. Zero implies start of the reference sequence. - * @param end 1-based, inclusive end of interval of interest. Zero implies end of the reference sequence. - * @return Iterator over the SAMRecords overlapping the interval. - */ - public CloseableIterator queryOverlapping(final String sequence, final int start, final int end) { - return query(sequence, start, end, false); - } - - /** - * Iterate over records that are contained in the given interval. Only valid to call this if hasIndex() == true. - *

- * Only a single open iterator on a given SAMFileReader may be extant at any one time. If you want to start - * a second iteration, the first one must be closed first. - *

- * Note that indexed lookup is not perfectly efficient in terms of disk I/O. I.e. some SAMRecords may be read - * and then discarded because they do not match the interval of interest. - *

- * Note that an unmapped read will be returned by this call if it has a coordinate for the purpose of sorting that - * is in the query region. - * - * @param sequence Reference sequence of interest. - * @param start 1-based, inclusive start of interval of interest. Zero implies start of the reference sequence. - * @param end 1-based, inclusive end of interval of interest. Zero implies end of the reference sequence. - * @return Iterator over the SAMRecords contained in the interval. - */ - public CloseableIterator queryContained(final String sequence, final int start, final int end) { - return query(sequence, start, end, true); - } - - public CloseableIterator queryUnmapped() { - return mReader.queryUnmapped(); - } - - /** - * Iterate over records that map to the given sequence and start at the given position. Only valid to call this if hasIndex() == true. - *

- * Only a single open iterator on a given SAMFileReader may be extant at any one time. If you want to start - * a second iteration, the first one must be closed first. - *

- * Note that indexed lookup is not perfectly efficient in terms of disk I/O. I.e. some SAMRecords may be read - * and then discarded because they do not match the interval of interest. - *

- * Note that an unmapped read will be returned by this call if it has a coordinate for the purpose of sorting that - * matches the arguments. - * - * @param sequence Reference sequence of interest. - * @param start Alignment start of interest. - * @return Iterator over the SAMRecords with the given alignment start. - */ - public CloseableIterator queryAlignmentStart(final String sequence, final int start) { - return mReader.queryAlignmentStart(sequence, start); - } - - /** - * Fetch the mate for the given read. Only valid to call this if hasIndex() == true. - * This will work whether the mate has a coordinate or not, so long as the given read has correct - * mate information. This method iterates over the SAM file, so there may not be an unclosed - * iterator on the SAM file when this method is called. - * - * @param rec Record for which mate is sought. Must be a paired read. - * @return rec's mate, or null if it cannot be found. - */ - public SAMRecord queryMate(final SAMRecord rec) { - if (!rec.getReadPairedFlag()) { - throw new IllegalArgumentException("queryMate called for unpaired read."); - } - if (rec.getFirstOfPairFlag() == rec.getSecondOfPairFlag()) { - throw new IllegalArgumentException("SAMRecord must be either first and second of pair, but not both."); - } - final boolean firstOfPair = rec.getFirstOfPairFlag(); - final CloseableIterator it; - if (rec.getMateReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { - it = queryUnmapped(); - } else { - it = queryAlignmentStart(rec.getMateReferenceName(), rec.getMateAlignmentStart()); - } + private Field getField(String fieldName) { try { - SAMRecord mateRec = null; - while (it.hasNext()) { - final SAMRecord next = it.next(); - if (!next.getReadPairedFlag()) { - if (rec.getReadName().equals(next.getReadName())) { - throw new SAMFormatException("Paired and unpaired reads with same name: " + rec.getReadName()); - } - continue; - } - if (firstOfPair) { - if (next.getFirstOfPairFlag()) continue; - } else { - if (next.getSecondOfPairFlag()) continue; - } - if (rec.getReadName().equals(next.getReadName())) { - if (mateRec != null) { - throw new SAMFormatException("Multiple SAMRecord with read name " + rec.getReadName() + - " for " + (firstOfPair ? "second" : "first") + " end."); - } - mateRec = next; - } - } - return mateRec; - } finally { - it.close(); + return getClass().getSuperclass().getDeclaredField(fieldName); + } + catch(NoSuchFieldException ex) { + throw new StingException("Unable to load field: " + fieldName); } } - private void init(final InputStream stream, final boolean eagerDecode, final ValidationStringency validationStringency) { - + private File findIndexFileFromParent(File bamFile) { try { - final BufferedInputStream bufferedStream = IOUtil.toBufferedStream(stream); - if (isBAMFile(bufferedStream)) { - mIsBinary = true; - mReader = new BAMFileReader2(bufferedStream, eagerDecode, validationStringency); - } else if (isGzippedSAMFile(bufferedStream)) { - mIsBinary = false; - mReader = new SAMTextReader(new GZIPInputStream(bufferedStream), validationStringency); - } else if (isSAMFile(bufferedStream)) { - mIsBinary = false; - mReader = new SAMTextReader(bufferedStream, validationStringency); - } else { - throw new SAMFormatException("Unrecognized file format"); - } - setValidationStringency(validationStringency); - } catch (IOException e) { - throw new RuntimeIOException(e); + Method method = getClass().getSuperclass().getDeclaredMethod("findIndexFile",File.class); + method.setAccessible(true); + return (File)method.invoke(this,bamFile); } - } - - /** - * @param url - * @param indexFile - * @param eagerDecode - */ - private void init(final URL url, final File indexFile, final boolean eagerDecode, final ValidationStringency validationStringency) { - - try { - // Its too expensive to examine the remote file to determine type. - // Rely on file extension. - if (url.toString().toLowerCase().endsWith(".bam")) { - mIsBinary = true; - final BAMFileReader2 reader = new BAMFileReader2(url, eagerDecode, validationStringency); - mReader = reader; - if (indexFile != null) { - mFileIndex = new BAMFileIndex2(indexFile); - reader.setFileIndex(mFileIndex); - } - } else { - throw new SAMFormatException("Unrecognized file format: " + url); - } - setValidationStringency(validationStringency); + catch(IllegalAccessException ex) { + throw new StingException("Unable to run method findIndexFile",ex); } - catch (IOException e) { - throw new RuntimeIOException(e); + catch(InvocationTargetException ex) { + throw new StingException("Unable to run method findIndexFile",ex); + } + catch(NoSuchMethodException ex) { + throw new StingException("Unable to run method findIndexFile",ex); } } - - - private void init(final File file, File indexFile, final boolean eagerDecode, final ValidationStringency validationStringency) { - this.samFile = file; - - try { - final BufferedInputStream bufferedStream = new BufferedInputStream(new FileInputStream(file)); - if (isBAMFile(bufferedStream)) { - bufferedStream.close(); - mIsBinary = true; - final BAMFileReader2 reader = new BAMFileReader2(file, eagerDecode, validationStringency); - mReader = reader; - if (indexFile == null) { - indexFile = findIndexFile(file); - } - if (indexFile != null) { - mFileIndex = new BAMFileIndex2(indexFile); - reader.setFileIndex(mFileIndex); - if (indexFile.lastModified() < file.lastModified()) { - System.err.println("WARNING: BAM index file " + indexFile.getAbsolutePath() + - " is older than BAM " + file.getAbsolutePath()); - } - } - } else if (isGzippedSAMFile(bufferedStream)) { - mIsBinary = false; - mReader = new SAMTextReader(new GZIPInputStream(bufferedStream), validationStringency); - } else if (isSAMFile(bufferedStream)) { - if (indexFile != null) { - bufferedStream.close(); - throw new RuntimeException("Cannot use index file with textual SAM file"); - } - mIsBinary = false; - mReader = new SAMTextReader(bufferedStream, file, validationStringency); - } else { - bufferedStream.close(); - throw new SAMFormatException("Unrecognized file format"); - } - setValidationStringency(validationStringency); - } - catch (IOException e) { - throw new RuntimeIOException(e); - } - } - - - /** - * Look for BAM index file according to standard naming convention. - * - * @param dataFile BAM file name. - * @return Index file name, or null if not found. - */ - private File findIndexFile(final File dataFile) { - // If input is foo.bam, look for foo.bai - final String bamExtension = ".bam"; - File indexFile; - final String fileName = dataFile.getName(); - if (fileName.endsWith(bamExtension)) { - final String bai = fileName.substring(0, fileName.length() - bamExtension.length()) + ".bai"; - indexFile = new File(dataFile.getParent(), bai); - if (indexFile.exists()) { - return indexFile; - } - } - - // If foo.bai doesn't exist look for foo.bam.bai - indexFile = new File(dataFile.getParent(), dataFile.getName() + ".bai"); - if (indexFile.exists()) { - return indexFile; - } else { - return null; - } - } - - /** - * @param stream stream.markSupported() must be true - * @return true if this looks like a BAM file. - */ - private boolean isBAMFile(final InputStream stream) - throws IOException { - return BlockCompressedInputStream.isValidFile(stream); - } - - /** - * Attempts to check whether the file is a gzipped sam file. Returns true if it - * is and false otherwise. - */ - private boolean isGzippedSAMFile(final BufferedInputStream stream) { - if (!stream.markSupported()) { - throw new IllegalArgumentException("Cannot test a stream that doesn't support marking."); - } - stream.mark(8000); - - try { - final GZIPInputStream gunzip = new GZIPInputStream(stream); - final int ch = gunzip.read(); - return true; - } - catch (IOException ioe) { - return false; - } - finally { - try { - stream.reset(); - } - catch (IOException ioe) { - throw new IllegalStateException("Could not reset stream."); - } - } - } - - private boolean isSAMFile(final InputStream stream) { - // For now, assume every non-binary file is a SAM text file. - return true; - } - - @Override - public String toString() { - if (this.samFile == null) { - return getClass().getSimpleName() + "{initialized with stream}"; - } else { - return getClass().getSimpleName() + "{" + this.samFile.getAbsolutePath() + "}"; - } - } }