Supporting infrastructure for merging SAM files. Not yet integrated into the datasource.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2810 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
fc810a1800
commit
57b8c9a53c
|
|
@ -0,0 +1,85 @@
|
||||||
|
/*
|
||||||
|
* The MIT License
|
||||||
|
*
|
||||||
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
package net.sf.picard.sam;
|
||||||
|
|
||||||
|
import net.sf.picard.util.PeekableIterator;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import net.sf.samtools.SAMFileReader;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Iterator for SAM records that implements comparable to enable sorting of iterators.
|
||||||
|
* The comparison is performed by comparing the next record in the iterator to the next
|
||||||
|
* record in another iterator and returning the ordering between those SAM records.
|
||||||
|
*/
|
||||||
|
class ComparableSamRecordIterator extends PeekableIterator<SAMRecord> implements Comparable<ComparableSamRecordIterator> {
|
||||||
|
private final Comparator<SAMRecord> comparator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a wrapping iterator around the given iterator that will be able
|
||||||
|
* to compare itself to other ComparableSamRecordIterators using the given comparator.
|
||||||
|
*
|
||||||
|
* @param iterator the wrapped iterator.
|
||||||
|
* @param comparator the Comparator to use to provide ordering fo SAMRecords
|
||||||
|
*/
|
||||||
|
public ComparableSamRecordIterator(final Iterator<SAMRecord> iterator, final Comparator<SAMRecord> comparator) {
|
||||||
|
super(iterator);
|
||||||
|
this.comparator = comparator;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compares this iterator to another comparable iterator based on the next record
|
||||||
|
* available in each iterator. If the two comparable iterators have different
|
||||||
|
* comparator types internally an exception is thrown.
|
||||||
|
*
|
||||||
|
* @param that another iterator to compare to
|
||||||
|
* @return a negative, 0 or positive number as described in the Comparator interface
|
||||||
|
*/
|
||||||
|
public int compareTo(final ComparableSamRecordIterator that) {
|
||||||
|
if (this.comparator.getClass() != that.comparator.getClass()) {
|
||||||
|
throw new IllegalStateException("Attempt to compare two ComparableSAMRecordIterators that " +
|
||||||
|
"have different orderings internally");
|
||||||
|
}
|
||||||
|
|
||||||
|
final SAMRecord record = this.peek();
|
||||||
|
final SAMRecord record2 = that.peek();
|
||||||
|
return comparator.compare(record, record2);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(final Object o) {
|
||||||
|
if (this == o) return true;
|
||||||
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
|
|
||||||
|
return compareTo((ComparableSamRecordIterator)o) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
throw new UnsupportedOperationException("ComparableSamRecordIterator should not be hashed because it can change value");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,233 @@
|
||||||
|
/*
|
||||||
|
* The MIT License
|
||||||
|
*
|
||||||
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
package net.sf.picard.sam;
|
||||||
|
|
||||||
|
import net.sf.picard.PicardException;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
import java.lang.reflect.Constructor;
|
||||||
|
|
||||||
|
import net.sf.samtools.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Provides an iterator interface for merging multiple underlying iterators into a single
|
||||||
|
* iterable stream. The underlying iterators/files must all have the same sort order unless
|
||||||
|
* the requested output format is unsorted, in which case any combination is valid.
|
||||||
|
*/
|
||||||
|
public class MergingSamRecordIterator implements Iterator<SAMRecord> {
|
||||||
|
private final PriorityQueue<ComparableSamRecordIterator> pq;
|
||||||
|
private final SamFileHeaderMerger samHeaderMerger;
|
||||||
|
private final SAMFileHeader.SortOrder sortOrder;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Maps iterators back to the readers from which they are derived.
|
||||||
|
*/
|
||||||
|
private final Map<Iterator<SAMRecord>,SAMFileReader> iteratorToSourceMap = new HashMap<Iterator<SAMRecord>,SAMFileReader>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a new merging iterator with the same set of readers and sort order as
|
||||||
|
* provided by the header merger parameter.
|
||||||
|
* @param headerMerger The merged header and contents of readers.
|
||||||
|
* @param forcePresorted True to ensure that the iterator checks the headers of the readers for appropriate sort order.
|
||||||
|
*/
|
||||||
|
public MergingSamRecordIterator(final SamFileHeaderMerger headerMerger, final boolean forcePresorted) {
|
||||||
|
this(headerMerger,createWholeFileIterators(headerMerger.getReaders()),forcePresorted);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a new merging iterator with a given merged header and a subset of readers.
|
||||||
|
* @param headerMerger The merged header and contents of readers.
|
||||||
|
* @param readerToIteratorMap A mapping of reader to iterator.
|
||||||
|
* @param forcePresorted True to ensure that the iterator checks the headers of the readers for appropriate sort order.
|
||||||
|
*/
|
||||||
|
public MergingSamRecordIterator(final SamFileHeaderMerger headerMerger, final Map<SAMFileReader,Iterator<SAMRecord>> readerToIteratorMap, final boolean forcePresorted) {
|
||||||
|
this.samHeaderMerger = headerMerger;
|
||||||
|
this.sortOrder = headerMerger.getMergedHeader().getSortOrder();
|
||||||
|
final SAMRecordComparator comparator = getComparator();
|
||||||
|
|
||||||
|
final Collection<SAMFileReader> readers = headerMerger.getReaders();
|
||||||
|
this.pq = new PriorityQueue<ComparableSamRecordIterator>(readers.size());
|
||||||
|
|
||||||
|
for(final SAMFileReader reader: readerToIteratorMap.keySet()) {
|
||||||
|
if (!forcePresorted && this.sortOrder != SAMFileHeader.SortOrder.unsorted &&
|
||||||
|
reader.getFileHeader().getSortOrder() != this.sortOrder){
|
||||||
|
throw new PicardException("Files are not compatible with sort order");
|
||||||
|
}
|
||||||
|
|
||||||
|
final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(readerToIteratorMap.get(reader),comparator);
|
||||||
|
addIfNotEmpty(iterator);
|
||||||
|
iteratorToSourceMap.put(iterator,reader);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For each reader, derive an iterator that can walk the entire file and associate that back to
|
||||||
|
* @param readers The readers from which to derive iterators.
|
||||||
|
* @return A map of reader to its associated iterator.
|
||||||
|
*/
|
||||||
|
private static Map<SAMFileReader,Iterator<SAMRecord>> createWholeFileIterators(Collection<SAMFileReader> readers) {
|
||||||
|
Map<SAMFileReader,Iterator<SAMRecord>> readerToIteratorMap = new HashMap<SAMFileReader,Iterator<SAMRecord>>();
|
||||||
|
for(final SAMFileReader reader: readers)
|
||||||
|
readerToIteratorMap.put(reader,reader.iterator());
|
||||||
|
return readerToIteratorMap;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns true if any of the underlying iterators has more records, otherwise false. */
|
||||||
|
public boolean hasNext() {
|
||||||
|
return !this.pq.isEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the next record from the top most iterator during merging. */
|
||||||
|
public SAMRecord next() {
|
||||||
|
final ComparableSamRecordIterator iterator = this.pq.poll();
|
||||||
|
final SAMRecord record = iterator.next();
|
||||||
|
addIfNotEmpty(iterator);
|
||||||
|
record.setHeader(this.samHeaderMerger.getMergedHeader());
|
||||||
|
|
||||||
|
// Fix the read group if needs be
|
||||||
|
if (this.samHeaderMerger.hasReadGroupCollisions()) {
|
||||||
|
final String oldGroupId = (String) record.getAttribute(ReservedTagConstants.READ_GROUP_ID);
|
||||||
|
if (oldGroupId != null ) {
|
||||||
|
final String newGroupId = this.samHeaderMerger.getReadGroupId(iteratorToSourceMap.get(iterator), oldGroupId);
|
||||||
|
record.setAttribute(ReservedTagConstants.READ_GROUP_ID, newGroupId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fix the program group if needs be
|
||||||
|
if (this.samHeaderMerger.hasProgramGroupCollisions()) {
|
||||||
|
final String oldGroupId = (String) record.getAttribute(ReservedTagConstants.PROGRAM_GROUP_ID);
|
||||||
|
if (oldGroupId != null ) {
|
||||||
|
final String newGroupId = this.samHeaderMerger.getProgramGroupId(iteratorToSourceMap.get(iterator), oldGroupId);
|
||||||
|
record.setAttribute(ReservedTagConstants.PROGRAM_GROUP_ID, newGroupId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fix up the sequence indexes if needs be
|
||||||
|
if (this.samHeaderMerger.hasMergedSequenceDictionary()) {
|
||||||
|
if (record.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
|
||||||
|
record.setReferenceIndex(this.samHeaderMerger.getMergedSequenceIndex(iteratorToSourceMap.get(iterator),record.getReferenceIndex()));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (record.getReadPairedFlag() && record.getMateReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
|
||||||
|
record.setMateReferenceIndex(this.samHeaderMerger.getMergedSequenceIndex(iteratorToSourceMap.get(iterator), record.getMateReferenceIndex()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return record;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds iterator to priority queue. If the iterator has more records it is added
|
||||||
|
* otherwise it is closed and not added.
|
||||||
|
*/
|
||||||
|
private void addIfNotEmpty(final ComparableSamRecordIterator iterator) {
|
||||||
|
if (iterator.hasNext()) {
|
||||||
|
pq.offer(iterator);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
iterator.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Unsupported operation. */
|
||||||
|
public void remove() {
|
||||||
|
throw new UnsupportedOperationException("MergingSAMRecorderIterator.remove()");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the right comparator for a given sort order (coordinate, alphabetic). In the
|
||||||
|
* case of "unsorted" it will return a comparator that gives an arbitrary but reflexive
|
||||||
|
* ordering.
|
||||||
|
*/
|
||||||
|
private SAMRecordComparator getComparator() {
|
||||||
|
// For unsorted build a fake comparator that compares based on object ID
|
||||||
|
if (this.sortOrder == SAMFileHeader.SortOrder.unsorted) {
|
||||||
|
return new SAMRecordComparator() {
|
||||||
|
public int fileOrderCompare(final SAMRecord lhs, final SAMRecord rhs) {
|
||||||
|
return System.identityHashCode(lhs) - System.identityHashCode(rhs);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int compare(final SAMRecord lhs, final SAMRecord rhs) {
|
||||||
|
return fileOrderCompare(lhs, rhs);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (samHeaderMerger.hasMergedSequenceDictionary() && sortOrder.equals(SAMFileHeader.SortOrder.coordinate)) {
|
||||||
|
return new MergedSequenceDictionaryCoordinateOrderComparator();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise try and figure out what kind of comparator to return and build it
|
||||||
|
final Class<? extends SAMRecordComparator> type = this.sortOrder.getComparator();
|
||||||
|
|
||||||
|
try {
|
||||||
|
final Constructor<? extends SAMRecordComparator> ctor = type.getConstructor();
|
||||||
|
return ctor.newInstance();
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
throw new PicardException("Could not instantiate a comparator for sort order: " + this.sortOrder, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the merged header that the merging iterator is working from. */
|
||||||
|
public SAMFileHeader getMergedHeader() {
|
||||||
|
return this.samHeaderMerger.getMergedHeader();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ugh. Basically does a regular coordinate compare, but looks up the sequence indices in the merged
|
||||||
|
* sequence dictionary. I hate the fact that this extends SAMRecordCoordinateComparator, but it avoids
|
||||||
|
* more copy & paste.
|
||||||
|
*/
|
||||||
|
private class MergedSequenceDictionaryCoordinateOrderComparator extends SAMRecordCoordinateComparator {
|
||||||
|
|
||||||
|
public int fileOrderCompare(final SAMRecord samRecord1, final SAMRecord samRecord2) {
|
||||||
|
final int referenceIndex1 = getReferenceIndex(samRecord1);
|
||||||
|
final int referenceIndex2 = getReferenceIndex(samRecord2);
|
||||||
|
if (referenceIndex1 != referenceIndex2) {
|
||||||
|
if (referenceIndex1 == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
|
||||||
|
return 1;
|
||||||
|
} else if (referenceIndex2 == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
|
||||||
|
return -1;
|
||||||
|
} else {
|
||||||
|
return referenceIndex1 - referenceIndex2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (referenceIndex1 == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
|
||||||
|
// Both are unmapped.
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return samRecord1.getAlignmentStart() - samRecord2.getAlignmentStart();
|
||||||
|
}
|
||||||
|
|
||||||
|
private int getReferenceIndex(final SAMRecord samRecord) {
|
||||||
|
if (samRecord.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
|
||||||
|
return samHeaderMerger.getMergedSequenceIndex(samRecord.getHeader(), samRecord.getReferenceIndex());
|
||||||
|
}
|
||||||
|
if (samRecord.getMateReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
|
||||||
|
return samHeaderMerger.getMergedSequenceIndex(samRecord.getHeader(), samRecord.getMateReferenceIndex());
|
||||||
|
}
|
||||||
|
return SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -34,7 +34,7 @@ import java.util.*;
|
||||||
/**
|
/**
|
||||||
* Class for reading BAM file indexes.
|
* Class for reading BAM file indexes.
|
||||||
*/
|
*/
|
||||||
public class BAMFileIndex2
|
public class BAMFileIndex2 extends BAMFileIndex
|
||||||
{
|
{
|
||||||
private static final int MAX_BINS = 37450; // =(8^6-1)/7+1
|
private static final int MAX_BINS = 37450; // =(8^6-1)/7+1
|
||||||
private static final int BAM_LIDX_SHIFT = 14;
|
private static final int BAM_LIDX_SHIFT = 14;
|
||||||
|
|
@ -55,6 +55,7 @@ public class BAMFileIndex2
|
||||||
protected final SortedMap<Integer,LinearIndex> referenceToLinearIndices = new TreeMap<Integer,LinearIndex>();
|
protected final SortedMap<Integer,LinearIndex> referenceToLinearIndices = new TreeMap<Integer,LinearIndex>();
|
||||||
|
|
||||||
protected BAMFileIndex2(final File file) {
|
protected BAMFileIndex2(final File file) {
|
||||||
|
super(file);
|
||||||
loadIndex(file);
|
loadIndex(file);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -23,52 +23,22 @@
|
||||||
*/
|
*/
|
||||||
package net.sf.samtools;
|
package net.sf.samtools;
|
||||||
|
|
||||||
|
|
||||||
import net.sf.samtools.util.BlockCompressedInputStream;
|
|
||||||
import net.sf.samtools.util.CloseableIterator;
|
import net.sf.samtools.util.CloseableIterator;
|
||||||
import net.sf.samtools.util.IOUtil;
|
|
||||||
import net.sf.samtools.util.RuntimeIOException;
|
|
||||||
import net.sf.samtools.SAMFileReader.ReaderImplementation;
|
|
||||||
import net.sf.samtools.SAMFileReader.ValidationStringency;
|
|
||||||
import net.sf.picard.PicardException;
|
import net.sf.picard.PicardException;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.zip.GZIPInputStream;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.net.URL;
|
import java.lang.reflect.Field;
|
||||||
|
import java.lang.reflect.Method;
|
||||||
|
import java.lang.reflect.InvocationTargetException;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.JVMUtils;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class for reading and querying SAM/BAM files. Delegates to appropriate concrete implementation.
|
* Class for reading and querying SAM/BAM files. Delegates to appropriate concrete implementation.
|
||||||
*/
|
*/
|
||||||
public class SAMFileReader2 implements Iterable<SAMRecord> {
|
public class SAMFileReader2 extends SAMFileReader {
|
||||||
|
|
||||||
private static ValidationStringency defaultValidationStringency = ValidationStringency.DEFAULT_STRINGENCY;
|
|
||||||
|
|
||||||
public static ValidationStringency getDefaultValidationStringency() {
|
|
||||||
return defaultValidationStringency;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Set validation stringency for all subsequently-created SAMFileReaders. This is the only way to
|
|
||||||
* change the validation stringency for SAM header.
|
|
||||||
*/
|
|
||||||
public static void setDefaultValidationStringency(final ValidationStringency defaultValidationStringency) {
|
|
||||||
SAMFileReader2.defaultValidationStringency = defaultValidationStringency;
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean mIsBinary = false;
|
|
||||||
private BAMFileIndex2 mFileIndex = null;
|
|
||||||
private ReaderImplementation mReader = null;
|
|
||||||
private File samFile = null;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Prepare to read a SAM or BAM file. Indexed lookup not allowed because reading from InputStream.
|
|
||||||
*/
|
|
||||||
public SAMFileReader2(final InputStream stream) {
|
|
||||||
this(stream, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Prepare to read a SAM or BAM file. If the given file is a BAM, and has a companion BAI index file
|
* Prepare to read a SAM or BAM file. If the given file is a BAM, and has a companion BAI index file
|
||||||
* that is named according to the convention, it will be found and opened, and indexed query will be allowed.
|
* that is named according to the convention, it will be found and opened, and indexed query will be allowed.
|
||||||
|
|
@ -77,28 +47,6 @@ public class SAMFileReader2 implements Iterable<SAMRecord> {
|
||||||
this(file, null, false);
|
this(file, null, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Prepare to read a SAM or BAM file. If the given file is a BAM, and an index is present, indexed query
|
|
||||||
* will be allowed.
|
|
||||||
*
|
|
||||||
* @param file SAM or BAM to read.
|
|
||||||
* @param indexFile Index file that is companion to BAM, or null if no index file, or if index file
|
|
||||||
* should be found automatically.
|
|
||||||
*/
|
|
||||||
public SAMFileReader2(final File file, final File indexFile) {
|
|
||||||
this(file, indexFile, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Read a SAM or BAM file. Indexed lookup not allowed because reading from InputStream.
|
|
||||||
*
|
|
||||||
* @param stream input SAM or BAM.
|
|
||||||
* @param eagerDecode if true, decode SAM record entirely when reading it.
|
|
||||||
*/
|
|
||||||
public SAMFileReader2(final InputStream stream, final boolean eagerDecode) {
|
|
||||||
init(stream, eagerDecode, defaultValidationStringency);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Read a SAM or BAM file, possibly with an index file if present.
|
* Read a SAM or BAM file, possibly with an index file if present.
|
||||||
* If the given file is a BAM, and an index is present, indexed query will be allowed.
|
* If the given file is a BAM, and an index is present, indexed query will be allowed.
|
||||||
|
|
@ -107,7 +55,7 @@ public class SAMFileReader2 implements Iterable<SAMRecord> {
|
||||||
* @param eagerDecode if true, decode SAM record entirely when reading it.
|
* @param eagerDecode if true, decode SAM record entirely when reading it.
|
||||||
*/
|
*/
|
||||||
public SAMFileReader2(final File file, final boolean eagerDecode) {
|
public SAMFileReader2(final File file, final boolean eagerDecode) {
|
||||||
init(file, null, eagerDecode, defaultValidationStringency);
|
this(file,null,eagerDecode);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -119,41 +67,20 @@ public class SAMFileReader2 implements Iterable<SAMRecord> {
|
||||||
* @param eagerDecode eagerDecode if true, decode SAM record entirely when reading it.
|
* @param eagerDecode eagerDecode if true, decode SAM record entirely when reading it.
|
||||||
*/
|
*/
|
||||||
public SAMFileReader2(final File file, final File indexFile, final boolean eagerDecode){
|
public SAMFileReader2(final File file, final File indexFile, final boolean eagerDecode){
|
||||||
init(file, indexFile, eagerDecode, defaultValidationStringency);
|
super(file,indexFile,eagerDecode);
|
||||||
}
|
close();
|
||||||
|
|
||||||
/**
|
try {
|
||||||
* Read a BAM file by http
|
BAMFileReader2 reader = new BAMFileReader2(file,eagerDecode,getDefaultValidationStringency());
|
||||||
* indexed query will be allowed.
|
BAMFileIndex2 index = new BAMFileIndex2(indexFile != null ? indexFile : findIndexFileFromParent(file));
|
||||||
*
|
reader.setFileIndex(index);
|
||||||
* @param url BAM.
|
|
||||||
* @param indexFile Location of index file, or null in order to use the default index file (if present).
|
|
||||||
* @param eagerDecode eagerDecode if true, decode SAM record entirely when reading it.
|
|
||||||
*/
|
|
||||||
public SAMFileReader2(final URL url, final File indexFile, final boolean eagerDecode) {
|
|
||||||
init(url, indexFile, eagerDecode, defaultValidationStringency);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void close() {
|
JVMUtils.setFieldValue(getField("mReader"),this,reader);
|
||||||
if (mReader != null) {
|
JVMUtils.setFieldValue(getField("mFileIndex"),this,index);
|
||||||
mReader.close();
|
}
|
||||||
|
catch(IOException ex) {
|
||||||
|
throw new StingException("Unable to load BAM file: " + file,ex);
|
||||||
}
|
}
|
||||||
mReader = null;
|
|
||||||
mFileIndex = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return True if this is a BAM reader.
|
|
||||||
*/
|
|
||||||
public boolean isBinary() {
|
|
||||||
return mIsBinary;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return true if ths is a BAM file, and has an index
|
|
||||||
*/
|
|
||||||
public boolean hasIndex() {
|
|
||||||
return (mFileIndex != null);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -161,9 +88,10 @@ public class SAMFileReader2 implements Iterable<SAMRecord> {
|
||||||
* @return Number of levels in this index.
|
* @return Number of levels in this index.
|
||||||
*/
|
*/
|
||||||
public int getNumIndexLevels() {
|
public int getNumIndexLevels() {
|
||||||
if(mFileIndex == null)
|
BAMFileIndex2 fileIndex = (BAMFileIndex2)JVMUtils.getFieldValue(getField("mFileIndex"),this);
|
||||||
|
if(fileIndex == null)
|
||||||
throw new SAMException("Unable to determine number of index levels; BAM file index is not present.");
|
throw new SAMException("Unable to determine number of index levels; BAM file index is not present.");
|
||||||
return mFileIndex.getNumIndexLevels();
|
return fileIndex.getNumIndexLevels();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -172,34 +100,10 @@ public class SAMFileReader2 implements Iterable<SAMRecord> {
|
||||||
* @return the level associated with the given bin number.
|
* @return the level associated with the given bin number.
|
||||||
*/
|
*/
|
||||||
public int getLevelForBin(final Bin bin) {
|
public int getLevelForBin(final Bin bin) {
|
||||||
if(mFileIndex == null)
|
BAMFileIndex2 fileIndex = (BAMFileIndex2)JVMUtils.getFieldValue(getField("mFileIndex"),this);
|
||||||
throw new SAMException("Unable to determine level of bin number; BAM file index is not present.");
|
if(fileIndex == null)
|
||||||
return mFileIndex.getLevelForBinNumber(bin.binNumber);
|
throw new SAMException("Unable to determine number of index levels; BAM file index is not present.");
|
||||||
}
|
return fileIndex.getLevelForBinNumber(bin.binNumber);
|
||||||
|
|
||||||
public SAMFileHeader getFileHeader() {
|
|
||||||
return mReader.getFileHeader();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Control validation of SAMRecords as they are read from file.
|
|
||||||
* In order to control validation stringency for SAM Header, call SAMFileReader.setDefaultValidationStringency
|
|
||||||
* before constructing a SAMFileReader.
|
|
||||||
*/
|
|
||||||
public void setValidationStringency(final ValidationStringency validationStringency) {
|
|
||||||
mReader.setValidationStringency(validationStringency);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Iterate through file in order. For a SAMFileReader constructed from an InputStream, and for any SAM file,
|
|
||||||
* a 2nd iteration starts where the 1st one left off. For a BAM constructed from a File, each new iteration
|
|
||||||
* starts at the first record.
|
|
||||||
* <p/>
|
|
||||||
* Only a single open iterator on a SAM or BAM file may be extant at any one time. If you want to start
|
|
||||||
* a second iteration, the first one must be closed first.
|
|
||||||
*/
|
|
||||||
public CloseableIterator<SAMRecord> iterator() {
|
|
||||||
return mReader.getIterator();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -209,338 +113,46 @@ public class SAMFileReader2 implements Iterable<SAMRecord> {
|
||||||
*/
|
*/
|
||||||
public CloseableIterator<SAMRecord> iterator(List<Chunk> chunks) {
|
public CloseableIterator<SAMRecord> iterator(List<Chunk> chunks) {
|
||||||
// TODO: Add sanity checks so that we're not doing this against an unsupported BAM file.
|
// TODO: Add sanity checks so that we're not doing this against an unsupported BAM file.
|
||||||
if(!(mReader instanceof BAMFileReader2))
|
BAMFileReader2 reader = (BAMFileReader2)JVMUtils.getFieldValue(getField("mReader"),this);
|
||||||
throw new PicardException("This call cannot be performed without a backing BAMFileReader2");
|
return reader.getIterator(chunks);
|
||||||
return ((BAMFileReader2)mReader).getIterator(chunks);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Bin> getOverlappingBins(final String sequence, final int start, final int end) {
|
public List<Bin> getOverlappingBins(final String sequence, final int start, final int end) {
|
||||||
// TODO: Add sanity checks so that we're not doing this against an unsupported BAM file.
|
// TODO: Add sanity checks so that we're not doing this against an unsupported BAM file.
|
||||||
if(!(mReader instanceof BAMFileReader2))
|
BAMFileReader2 reader = (BAMFileReader2)JVMUtils.getFieldValue(getField("mReader"),this);
|
||||||
throw new PicardException("This call cannot be performed without a backing BAMFileReader2");
|
return reader.getOverlappingBins(sequence,start,end);
|
||||||
return ((BAMFileReader2)mReader).getOverlappingBins(sequence,start,end);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Chunk> getFilePointersBounding(final Bin bin) {
|
public List<Chunk> getFilePointersBounding(final Bin bin) {
|
||||||
if(!(mReader instanceof BAMFileReader2))
|
// TODO: Add sanity checks so that we're not doing this against an unsupported BAM file.
|
||||||
throw new PicardException("This call cannot be performed without a backing BAMFileReader2");
|
BAMFileReader2 reader = (BAMFileReader2)JVMUtils.getFieldValue(getField("mReader"),this);
|
||||||
return ((BAMFileReader2)mReader).getFilePointersBounding(bin);
|
return reader.getFilePointersBounding(bin);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private Field getField(String fieldName) {
|
||||||
/**
|
|
||||||
* Iterate over records that match the given interval. Only valid to call this if hasIndex() == true.
|
|
||||||
* <p/>
|
|
||||||
* Only a single open iterator on a given SAMFileReader may be extant at any one time. If you want to start
|
|
||||||
* a second iteration, the first one must be closed first. You can use a second SAMFileReader to iterate
|
|
||||||
* in parallel over the same underlying file.
|
|
||||||
* <p/>
|
|
||||||
* Note that indexed lookup is not perfectly efficient in terms of disk I/O. I.e. some SAMRecords may be read
|
|
||||||
* and then discarded because they do not match the interval of interest.
|
|
||||||
* <p/>
|
|
||||||
* Note that an unmapped read will be returned by this call if it has a coordinate for the purpose of sorting that
|
|
||||||
* is in the query region.
|
|
||||||
*
|
|
||||||
* @param sequence Reference sequence of interest.
|
|
||||||
* @param start 1-based, inclusive start of interval of interest. Zero implies start of the reference sequence.
|
|
||||||
* @param end 1-based, inclusive end of interval of interest. Zero implies end of the reference sequence.
|
|
||||||
* @param contained If true, each SAMRecord returned is will have its alignment completely contained in the
|
|
||||||
* interval of interest. If false, the alignment of the returned SAMRecords need only overlap the interval of interest.
|
|
||||||
* @return Iterator over the SAMRecords matching the interval.
|
|
||||||
*/
|
|
||||||
public CloseableIterator<SAMRecord> query(final String sequence, final int start, final int end, final boolean contained) {
|
|
||||||
return mReader.query(sequence, start, end, contained);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Iterate over records that overlap the given interval. Only valid to call this if hasIndex() == true.
|
|
||||||
* <p/>
|
|
||||||
* Only a single open iterator on a given SAMFileReader may be extant at any one time. If you want to start
|
|
||||||
* a second iteration, the first one must be closed first.
|
|
||||||
* <p/>
|
|
||||||
* Note that indexed lookup is not perfectly efficient in terms of disk I/O. I.e. some SAMRecords may be read
|
|
||||||
* and then discarded because they do not match the interval of interest.
|
|
||||||
* <p/>
|
|
||||||
* Note that an unmapped read will be returned by this call if it has a coordinate for the purpose of sorting that
|
|
||||||
* is in the query region.
|
|
||||||
*
|
|
||||||
* @param sequence Reference sequence of interest.
|
|
||||||
* @param start 1-based, inclusive start of interval of interest. Zero implies start of the reference sequence.
|
|
||||||
* @param end 1-based, inclusive end of interval of interest. Zero implies end of the reference sequence.
|
|
||||||
* @return Iterator over the SAMRecords overlapping the interval.
|
|
||||||
*/
|
|
||||||
public CloseableIterator<SAMRecord> queryOverlapping(final String sequence, final int start, final int end) {
|
|
||||||
return query(sequence, start, end, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Iterate over records that are contained in the given interval. Only valid to call this if hasIndex() == true.
|
|
||||||
* <p/>
|
|
||||||
* Only a single open iterator on a given SAMFileReader may be extant at any one time. If you want to start
|
|
||||||
* a second iteration, the first one must be closed first.
|
|
||||||
* <p/>
|
|
||||||
* Note that indexed lookup is not perfectly efficient in terms of disk I/O. I.e. some SAMRecords may be read
|
|
||||||
* and then discarded because they do not match the interval of interest.
|
|
||||||
* <p/>
|
|
||||||
* Note that an unmapped read will be returned by this call if it has a coordinate for the purpose of sorting that
|
|
||||||
* is in the query region.
|
|
||||||
*
|
|
||||||
* @param sequence Reference sequence of interest.
|
|
||||||
* @param start 1-based, inclusive start of interval of interest. Zero implies start of the reference sequence.
|
|
||||||
* @param end 1-based, inclusive end of interval of interest. Zero implies end of the reference sequence.
|
|
||||||
* @return Iterator over the SAMRecords contained in the interval.
|
|
||||||
*/
|
|
||||||
public CloseableIterator<SAMRecord> queryContained(final String sequence, final int start, final int end) {
|
|
||||||
return query(sequence, start, end, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
public CloseableIterator<SAMRecord> queryUnmapped() {
|
|
||||||
return mReader.queryUnmapped();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Iterate over records that map to the given sequence and start at the given position. Only valid to call this if hasIndex() == true.
|
|
||||||
* <p/>
|
|
||||||
* Only a single open iterator on a given SAMFileReader may be extant at any one time. If you want to start
|
|
||||||
* a second iteration, the first one must be closed first.
|
|
||||||
* <p/>
|
|
||||||
* Note that indexed lookup is not perfectly efficient in terms of disk I/O. I.e. some SAMRecords may be read
|
|
||||||
* and then discarded because they do not match the interval of interest.
|
|
||||||
* <p/>
|
|
||||||
* Note that an unmapped read will be returned by this call if it has a coordinate for the purpose of sorting that
|
|
||||||
* matches the arguments.
|
|
||||||
*
|
|
||||||
* @param sequence Reference sequence of interest.
|
|
||||||
* @param start Alignment start of interest.
|
|
||||||
* @return Iterator over the SAMRecords with the given alignment start.
|
|
||||||
*/
|
|
||||||
public CloseableIterator<SAMRecord> queryAlignmentStart(final String sequence, final int start) {
|
|
||||||
return mReader.queryAlignmentStart(sequence, start);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Fetch the mate for the given read. Only valid to call this if hasIndex() == true.
|
|
||||||
* This will work whether the mate has a coordinate or not, so long as the given read has correct
|
|
||||||
* mate information. This method iterates over the SAM file, so there may not be an unclosed
|
|
||||||
* iterator on the SAM file when this method is called.
|
|
||||||
*
|
|
||||||
* @param rec Record for which mate is sought. Must be a paired read.
|
|
||||||
* @return rec's mate, or null if it cannot be found.
|
|
||||||
*/
|
|
||||||
public SAMRecord queryMate(final SAMRecord rec) {
|
|
||||||
if (!rec.getReadPairedFlag()) {
|
|
||||||
throw new IllegalArgumentException("queryMate called for unpaired read.");
|
|
||||||
}
|
|
||||||
if (rec.getFirstOfPairFlag() == rec.getSecondOfPairFlag()) {
|
|
||||||
throw new IllegalArgumentException("SAMRecord must be either first and second of pair, but not both.");
|
|
||||||
}
|
|
||||||
final boolean firstOfPair = rec.getFirstOfPairFlag();
|
|
||||||
final CloseableIterator<SAMRecord> it;
|
|
||||||
if (rec.getMateReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
|
|
||||||
it = queryUnmapped();
|
|
||||||
} else {
|
|
||||||
it = queryAlignmentStart(rec.getMateReferenceName(), rec.getMateAlignmentStart());
|
|
||||||
}
|
|
||||||
try {
|
try {
|
||||||
SAMRecord mateRec = null;
|
return getClass().getSuperclass().getDeclaredField(fieldName);
|
||||||
while (it.hasNext()) {
|
}
|
||||||
final SAMRecord next = it.next();
|
catch(NoSuchFieldException ex) {
|
||||||
if (!next.getReadPairedFlag()) {
|
throw new StingException("Unable to load field: " + fieldName);
|
||||||
if (rec.getReadName().equals(next.getReadName())) {
|
|
||||||
throw new SAMFormatException("Paired and unpaired reads with same name: " + rec.getReadName());
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (firstOfPair) {
|
|
||||||
if (next.getFirstOfPairFlag()) continue;
|
|
||||||
} else {
|
|
||||||
if (next.getSecondOfPairFlag()) continue;
|
|
||||||
}
|
|
||||||
if (rec.getReadName().equals(next.getReadName())) {
|
|
||||||
if (mateRec != null) {
|
|
||||||
throw new SAMFormatException("Multiple SAMRecord with read name " + rec.getReadName() +
|
|
||||||
" for " + (firstOfPair ? "second" : "first") + " end.");
|
|
||||||
}
|
|
||||||
mateRec = next;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return mateRec;
|
|
||||||
} finally {
|
|
||||||
it.close();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void init(final InputStream stream, final boolean eagerDecode, final ValidationStringency validationStringency) {
|
private File findIndexFileFromParent(File bamFile) {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
final BufferedInputStream bufferedStream = IOUtil.toBufferedStream(stream);
|
Method method = getClass().getSuperclass().getDeclaredMethod("findIndexFile",File.class);
|
||||||
if (isBAMFile(bufferedStream)) {
|
method.setAccessible(true);
|
||||||
mIsBinary = true;
|
return (File)method.invoke(this,bamFile);
|
||||||
mReader = new BAMFileReader2(bufferedStream, eagerDecode, validationStringency);
|
|
||||||
} else if (isGzippedSAMFile(bufferedStream)) {
|
|
||||||
mIsBinary = false;
|
|
||||||
mReader = new SAMTextReader(new GZIPInputStream(bufferedStream), validationStringency);
|
|
||||||
} else if (isSAMFile(bufferedStream)) {
|
|
||||||
mIsBinary = false;
|
|
||||||
mReader = new SAMTextReader(bufferedStream, validationStringency);
|
|
||||||
} else {
|
|
||||||
throw new SAMFormatException("Unrecognized file format");
|
|
||||||
}
|
|
||||||
setValidationStringency(validationStringency);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new RuntimeIOException(e);
|
|
||||||
}
|
}
|
||||||
}
|
catch(IllegalAccessException ex) {
|
||||||
|
throw new StingException("Unable to run method findIndexFile",ex);
|
||||||
/**
|
|
||||||
* @param url
|
|
||||||
* @param indexFile
|
|
||||||
* @param eagerDecode
|
|
||||||
*/
|
|
||||||
private void init(final URL url, final File indexFile, final boolean eagerDecode, final ValidationStringency validationStringency) {
|
|
||||||
|
|
||||||
try {
|
|
||||||
// Its too expensive to examine the remote file to determine type.
|
|
||||||
// Rely on file extension.
|
|
||||||
if (url.toString().toLowerCase().endsWith(".bam")) {
|
|
||||||
mIsBinary = true;
|
|
||||||
final BAMFileReader2 reader = new BAMFileReader2(url, eagerDecode, validationStringency);
|
|
||||||
mReader = reader;
|
|
||||||
if (indexFile != null) {
|
|
||||||
mFileIndex = new BAMFileIndex2(indexFile);
|
|
||||||
reader.setFileIndex(mFileIndex);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
throw new SAMFormatException("Unrecognized file format: " + url);
|
|
||||||
}
|
|
||||||
setValidationStringency(validationStringency);
|
|
||||||
}
|
}
|
||||||
catch (IOException e) {
|
catch(InvocationTargetException ex) {
|
||||||
throw new RuntimeIOException(e);
|
throw new StingException("Unable to run method findIndexFile",ex);
|
||||||
|
}
|
||||||
|
catch(NoSuchMethodException ex) {
|
||||||
|
throw new StingException("Unable to run method findIndexFile",ex);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private void init(final File file, File indexFile, final boolean eagerDecode, final ValidationStringency validationStringency) {
|
|
||||||
this.samFile = file;
|
|
||||||
|
|
||||||
try {
|
|
||||||
final BufferedInputStream bufferedStream = new BufferedInputStream(new FileInputStream(file));
|
|
||||||
if (isBAMFile(bufferedStream)) {
|
|
||||||
bufferedStream.close();
|
|
||||||
mIsBinary = true;
|
|
||||||
final BAMFileReader2 reader = new BAMFileReader2(file, eagerDecode, validationStringency);
|
|
||||||
mReader = reader;
|
|
||||||
if (indexFile == null) {
|
|
||||||
indexFile = findIndexFile(file);
|
|
||||||
}
|
|
||||||
if (indexFile != null) {
|
|
||||||
mFileIndex = new BAMFileIndex2(indexFile);
|
|
||||||
reader.setFileIndex(mFileIndex);
|
|
||||||
if (indexFile.lastModified() < file.lastModified()) {
|
|
||||||
System.err.println("WARNING: BAM index file " + indexFile.getAbsolutePath() +
|
|
||||||
" is older than BAM " + file.getAbsolutePath());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (isGzippedSAMFile(bufferedStream)) {
|
|
||||||
mIsBinary = false;
|
|
||||||
mReader = new SAMTextReader(new GZIPInputStream(bufferedStream), validationStringency);
|
|
||||||
} else if (isSAMFile(bufferedStream)) {
|
|
||||||
if (indexFile != null) {
|
|
||||||
bufferedStream.close();
|
|
||||||
throw new RuntimeException("Cannot use index file with textual SAM file");
|
|
||||||
}
|
|
||||||
mIsBinary = false;
|
|
||||||
mReader = new SAMTextReader(bufferedStream, file, validationStringency);
|
|
||||||
} else {
|
|
||||||
bufferedStream.close();
|
|
||||||
throw new SAMFormatException("Unrecognized file format");
|
|
||||||
}
|
|
||||||
setValidationStringency(validationStringency);
|
|
||||||
}
|
|
||||||
catch (IOException e) {
|
|
||||||
throw new RuntimeIOException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Look for BAM index file according to standard naming convention.
|
|
||||||
*
|
|
||||||
* @param dataFile BAM file name.
|
|
||||||
* @return Index file name, or null if not found.
|
|
||||||
*/
|
|
||||||
private File findIndexFile(final File dataFile) {
|
|
||||||
// If input is foo.bam, look for foo.bai
|
|
||||||
final String bamExtension = ".bam";
|
|
||||||
File indexFile;
|
|
||||||
final String fileName = dataFile.getName();
|
|
||||||
if (fileName.endsWith(bamExtension)) {
|
|
||||||
final String bai = fileName.substring(0, fileName.length() - bamExtension.length()) + ".bai";
|
|
||||||
indexFile = new File(dataFile.getParent(), bai);
|
|
||||||
if (indexFile.exists()) {
|
|
||||||
return indexFile;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If foo.bai doesn't exist look for foo.bam.bai
|
|
||||||
indexFile = new File(dataFile.getParent(), dataFile.getName() + ".bai");
|
|
||||||
if (indexFile.exists()) {
|
|
||||||
return indexFile;
|
|
||||||
} else {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param stream stream.markSupported() must be true
|
|
||||||
* @return true if this looks like a BAM file.
|
|
||||||
*/
|
|
||||||
private boolean isBAMFile(final InputStream stream)
|
|
||||||
throws IOException {
|
|
||||||
return BlockCompressedInputStream.isValidFile(stream);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Attempts to check whether the file is a gzipped sam file. Returns true if it
|
|
||||||
* is and false otherwise.
|
|
||||||
*/
|
|
||||||
private boolean isGzippedSAMFile(final BufferedInputStream stream) {
|
|
||||||
if (!stream.markSupported()) {
|
|
||||||
throw new IllegalArgumentException("Cannot test a stream that doesn't support marking.");
|
|
||||||
}
|
|
||||||
stream.mark(8000);
|
|
||||||
|
|
||||||
try {
|
|
||||||
final GZIPInputStream gunzip = new GZIPInputStream(stream);
|
|
||||||
final int ch = gunzip.read();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
catch (IOException ioe) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
finally {
|
|
||||||
try {
|
|
||||||
stream.reset();
|
|
||||||
}
|
|
||||||
catch (IOException ioe) {
|
|
||||||
throw new IllegalStateException("Could not reset stream.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean isSAMFile(final InputStream stream) {
|
|
||||||
// For now, assume every non-binary file is a SAM text file.
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
if (this.samFile == null) {
|
|
||||||
return getClass().getSimpleName() + "{initialized with stream}";
|
|
||||||
} else {
|
|
||||||
return getClass().getSimpleName() + "{" + this.samFile.getAbsolutePath() + "}";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue