gatk-3.8/java/lib/edu/mit/broad/sam/SAMFileReader.java

214 lines
7.1 KiB
Java

/*
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2008 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever.
* Neither the Broad Institute nor MIT can be responsible for its use, misuse,
* or functionality.
*/
package edu.mit.broad.sam;
import edu.mit.broad.sam.util.CloseableIterator;
import edu.mit.broad.sam.util.RuntimeIOException;
import edu.mit.broad.sam.util.BlockCompressedInputStream;
import java.io.*;
/**
* Class for reading and querying SAM/BAM files.
*/
public class SAMFileReader implements Iterable<SAMRecord>
{
private boolean mIsBinary = false;
private BAMFileIndex mFileIndex = null;
private ReaderImplementation mReader = null;
public enum ValidationStringency {
STRICT, // Do the right thing, throw an exception if something looks wrong
LENIENT, // Emit warnings but keep going if possible
SILENT; // Like LENIENT, only don't emit warning messages
public static ValidationStringency DEFAULT_STRINGENCY = STRICT;
}
/**
* Internal interface for SAM/BAM file reader implementations.
* Implemented as an abstract class to enforce better access control.
*/
static abstract class ReaderImplementation {
abstract SAMFileHeader getFileHeader();
abstract CloseableIterator<SAMRecord> getIterator();
abstract CloseableIterator<SAMRecord> query(String sequence, int start, int end, boolean contained);
abstract void close();
// If true, emit warnings about format errors rather than throwing exceptions;
abstract void setValidationStringency(final ValidationStringency validationStringency);
}
public SAMFileReader(final InputStream stream) {
this(stream, false);
}
public SAMFileReader(final File file) {
this(file, null, false);
}
public SAMFileReader(final File file, final File indexFile) {
this(file, indexFile, false);
}
/**
* Read a SAM or BAM file
* @param stream input SAM or BAM
* @param eagerDecode if true, decode SAM record entirely when reading it
*/
public SAMFileReader(final InputStream stream, final boolean eagerDecode) {
init(stream, eagerDecode);
}
/**
* Read a SAM or BAM file, possibly with an index file if present
* @param file where to read from
* @param eagerDecode if true, decode SAM record entirely when reading it
*/
public SAMFileReader(final File file, final boolean eagerDecode) {
init(file, null, eagerDecode);
}
/**
* Read a SAM or BAM file, possibly with an index file
* @param file where to read from
* @param indexFile location of index file, or null in order to use the default index file (if present)
* @param eagerDecode eagerDecode if true, decode SAM record entirely when reading it
*/
public SAMFileReader(final File file, final File indexFile, final boolean eagerDecode){
init(file, indexFile, eagerDecode);
}
public void close() {
if (mReader != null) {
mReader.close();
}
if (mFileIndex != null) {
mFileIndex.close();
}
mReader = null;
mFileIndex = null;
}
public boolean isBinary() {
return mIsBinary;
}
public boolean hasIndex() {
return (mFileIndex != null);
}
public SAMFileHeader getFileHeader() {
return mReader.getFileHeader();
}
public void setValidationStringency(final ValidationStringency validationStringency) {
mReader.setValidationStringency(validationStringency);
}
public CloseableIterator<SAMRecord> iterator() {
return mReader.getIterator();
}
public CloseableIterator<SAMRecord> query(final String sequence, final int start, final int end, final boolean contained) {
return mReader.query(sequence, start, end, contained);
}
public CloseableIterator<SAMRecord> queryOverlapping(final String sequence, final int start, final int end) {
return query(sequence, start, end, false);
}
public CloseableIterator<SAMRecord> queryContained(final String sequence, final int start, final int end) {
return query(sequence, start, end, true);
}
private void init(final InputStream stream, final boolean eagerDecode) {
try {
final BufferedInputStream bufferedStream = toBufferedStream(stream);
if (isBAMFile(bufferedStream)) {
mIsBinary = true;
mReader = new BAMFileReader(bufferedStream, eagerDecode);
} else if (isSAMFile(bufferedStream)) {
mIsBinary = false;
mReader = new SAMTextReader(bufferedStream);
} else {
throw new SAMFormatException("Unrecognized file format");
}
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
private void init(final File file, File indexFile, final boolean eagerDecode) {
try {
final BufferedInputStream bufferedStream =
new BufferedInputStream(new FileInputStream(file));
if (isBAMFile(bufferedStream)) {
bufferedStream.close();
mIsBinary = true;
final BAMFileReader reader = new BAMFileReader(file, eagerDecode);
mReader = reader;
if (indexFile == null) {
indexFile = findIndexFile(file);
}
if (indexFile != null) {
mFileIndex = new BAMFileIndex(indexFile);
reader.setFileIndex(mFileIndex);
}
} else if (isSAMFile(bufferedStream)) {
if (indexFile != null) {
bufferedStream.close();
throw new RuntimeException("Cannot use index file with textual SAM file");
}
mIsBinary = false;
mReader = new SAMTextReader(bufferedStream, file);
} else {
bufferedStream.close();
throw new SAMFormatException("Unrecognized file format");
}
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
private File findIndexFile(final File dataFile) {
final File indexFile =
new File(dataFile.getParent(), dataFile.getName() + ".bai");
if (indexFile.exists()) {
return indexFile;
} else {
return null;
}
}
private boolean isBAMFile(final InputStream stream)
throws IOException {
return BlockCompressedInputStream.isValidFile(stream);
}
private boolean isSAMFile(final InputStream stream) {
// For now, assume every non-binary file is a SAM text file.
return true;
}
private BufferedInputStream toBufferedStream(final InputStream stream) {
if (stream instanceof BufferedInputStream) {
return (BufferedInputStream) stream;
} else {
return new BufferedInputStream(stream);
}
}
}