A bit of cleanup in preparation for Picard patch.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2286 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-12-08 16:09:04 +00:00
parent d3b78338da
commit 9e2f831206
4 changed files with 63 additions and 39 deletions

View File

@ -12,32 +12,27 @@ import java.io.File;
import java.io.FileNotFoundException;
/**
* Created by IntelliJ IDEA.
* User: hanna
* Date: Apr 14, 2009
* Time: 10:02:10 AM
*
* Reads a fasta index file (.fai).
* Reads a fasta index file (.fai), as generated by `samtools faidx`.
*/
public class FastaSequenceIndex implements Iterable {
// Use a linked hash map to preserve the ordering of the contigs.
private Map<String,FastaSequenceIndexEntry> sequenceEntries =
new LinkedHashMap<String,FastaSequenceIndexEntry>();
/**
* Store the entries. Use a LinkedHashMap for consistent iteration in insertion order.
*/
private Map<String,FastaSequenceIndexEntry> sequenceEntries = new LinkedHashMap<String,FastaSequenceIndexEntry>();
/**
* Build a sequence index from the specified file.
* @param indexFile File to open.
* @throws PicardException if file is of invalid format.
* @throws FileNotFoundException if the index file cannot be found.
*/
public FastaSequenceIndex( File indexFile ) throws FileNotFoundException {
if(!indexFile.exists())
throw new FileNotFoundException(String.format("Fasta index file is missing",indexFile.getAbsolutePath()));
throw new FileNotFoundException(String.format("Fasta index file is missing: %s",indexFile.getAbsolutePath()));
IoUtil.assertFileIsReadable(indexFile);
parseIndexFile(indexFile);
}
/**
* Parse the contents of an index file, caching the results internally.
* @param indexFile File to parse.
@ -92,6 +87,10 @@ public class FastaSequenceIndex implements Iterable {
return sequenceEntries.get(contigName);
}
/**
* Creates an iterator which can iterate through all entries in a fasta index.
* @return iterator over all fasta index entries.
*/
public Iterator<FastaSequenceIndexEntry> iterator() {
return sequenceEntries.values().iterator();
}
@ -105,6 +104,9 @@ public class FastaSequenceIndex implements Iterable {
}
}
/**
* Hold an individual entry in a fasta sequence index file.
*/
class FastaSequenceIndexEntry {
private String contig;
private long location;
@ -112,11 +114,19 @@ class FastaSequenceIndexEntry {
private int basesPerLine;
private int bytesPerLine;
/**
* Create a new entry with the given parameters.
* @param contig Contig this entry represents.
* @param location Location (byte coordinate) in the fasta file.
* @param size The number of bases in the contig.
* @param basesPerLine How many bases are on each line.
* @param bytesPerLine How many bytes are on each line (includes newline characters).
*/
public FastaSequenceIndexEntry( String contig,
long location,
long size,
int basesPerLine,
int bytesPerLine ) {
long location,
long size,
int basesPerLine,
int bytesPerLine ) {
this.contig = contig;
this.location = location;
this.size = size;
@ -165,6 +175,10 @@ class FastaSequenceIndexEntry {
return bytesPerLine;
}
/**
* For debugging. Emit the contents of each contig line.
* @return A string representation of the contig line.
*/
public String toString() {
return String.format("contig %s; location %d; size %d; basesPerLine %d; bytesPerLine %d", contig,
location,

View File

@ -17,34 +17,46 @@ import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.util.Iterator;
import java.util.Scanner;
/**
* Created by IntelliJ IDEA.
* User: hanna
* Date: Apr 14, 2009
* Time: 2:14:26 PM
*
* A fasta file driven by an index for fast, concurrent lookups. Supports two interfaces:
* the ReferenceSequenceFile for old-style, stateful lookups and a direct getter.
*/
public class IndexedFastaSequenceFile implements ReferenceSequenceFile {
/**
* Stores the main fasta file.
*/
private final File file;
private FileInputStream in;
/**
* The interface facilitating direct access to the fasta.
*/
private FileChannel channel;
private SAMSequenceDictionary sequenceDictionary = null;
/**
* A representation of the sequence dictionary, stored alongside the fasta in a .dict file.
*/
private SAMSequenceDictionary sequenceDictionary = null;
/**
* A representation of the sequence index, stored alongside the fasta in a .fasta.fai file.
*/
private FastaSequenceIndex index;
/**
* An iterator into the fasta index, for traversing iteratively across the fasta.
*/
private Iterator<FastaSequenceIndexEntry> indexIterator;
/**
* Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened.
* @param file The file to open.
* @throws FileNotFoundException If the fasta or any of its supporting files cannot be found.
*/
public IndexedFastaSequenceFile(File file) throws FileNotFoundException {
this.file = file;
in = new FileInputStream(file);
FileInputStream in = new FileInputStream(file);
channel = in.getChannel();
loadDictionary(file);
@ -55,7 +67,6 @@ public class IndexedFastaSequenceFile implements ReferenceSequenceFile {
/**
* Loads a dictionary, if available.
* @param fastaFile File to check for a match.
* TODO: This code is copied directly from FastaSequenceFile / FastaSequenceFile2. Bring it into a shared utility.
*/
private void loadDictionary( File fastaFile ) {
// Try and locate the dictionary
@ -207,11 +218,18 @@ public class IndexedFastaSequenceFile implements ReferenceSequenceFile {
return getSequence( indexIterator.next().getContig() );
}
/**
* Reset the iterator over the index.
*/
@Override
public void reset() {
indexIterator = index.iterator();
}
/**
* A simple toString implementation for debugging.
* @return String representation of the file.
*/
public String toString() {
return this.file.getAbsolutePath();
}

View File

@ -12,11 +12,7 @@ import java.io.FileNotFoundException;
import java.util.Iterator;
/**
* Created by IntelliJ IDEA.
* User: hanna
* Date: Apr 14, 2009
* Time: 10:34:15 AM
* To change this template use File | Settings | File Templates.
* Test the fasta sequence index reader.
*/
public class FastaSequenceIndexTest extends BaseTest {
// our basic human 18 fai

View File

@ -16,11 +16,7 @@ import net.sf.picard.PicardException;
import net.sf.samtools.util.StringUtil;
/**
* Created by IntelliJ IDEA.
* User: hanna
* Date: Apr 14, 2009
* Time: 2:37:29 PM
* To change this template use File | Settings | File Templates.
* Test the indexed fasta sequence file reader.
*/
public class IndexedFastaSequenceFileTest extends BaseTest {
private static String sequenceFileName;