A bit of cleanup in preparation for Picard patch.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2286 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d3b78338da
commit
9e2f831206
|
|
@ -12,32 +12,27 @@ import java.io.File;
|
|||
import java.io.FileNotFoundException;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: hanna
|
||||
* Date: Apr 14, 2009
|
||||
* Time: 10:02:10 AM
|
||||
*
|
||||
* Reads a fasta index file (.fai).
|
||||
* Reads a fasta index file (.fai), as generated by `samtools faidx`.
|
||||
*/
|
||||
public class FastaSequenceIndex implements Iterable {
|
||||
// Use a linked hash map to preserve the ordering of the contigs.
|
||||
private Map<String,FastaSequenceIndexEntry> sequenceEntries =
|
||||
new LinkedHashMap<String,FastaSequenceIndexEntry>();
|
||||
/**
|
||||
* Store the entries. Use a LinkedHashMap for consistent iteration in insertion order.
|
||||
*/
|
||||
private Map<String,FastaSequenceIndexEntry> sequenceEntries = new LinkedHashMap<String,FastaSequenceIndexEntry>();
|
||||
|
||||
/**
|
||||
* Build a sequence index from the specified file.
|
||||
* @param indexFile File to open.
|
||||
* @throws PicardException if file is of invalid format.
|
||||
* @throws FileNotFoundException if the index file cannot be found.
|
||||
*/
|
||||
public FastaSequenceIndex( File indexFile ) throws FileNotFoundException {
|
||||
if(!indexFile.exists())
|
||||
throw new FileNotFoundException(String.format("Fasta index file is missing",indexFile.getAbsolutePath()));
|
||||
throw new FileNotFoundException(String.format("Fasta index file is missing: %s",indexFile.getAbsolutePath()));
|
||||
|
||||
IoUtil.assertFileIsReadable(indexFile);
|
||||
parseIndexFile(indexFile);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Parse the contents of an index file, caching the results internally.
|
||||
* @param indexFile File to parse.
|
||||
|
|
@ -92,6 +87,10 @@ public class FastaSequenceIndex implements Iterable {
|
|||
return sequenceEntries.get(contigName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an iterator which can iterate through all entries in a fasta index.
|
||||
* @return iterator over all fasta index entries.
|
||||
*/
|
||||
public Iterator<FastaSequenceIndexEntry> iterator() {
|
||||
return sequenceEntries.values().iterator();
|
||||
}
|
||||
|
|
@ -105,6 +104,9 @@ public class FastaSequenceIndex implements Iterable {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Hold an individual entry in a fasta sequence index file.
|
||||
*/
|
||||
class FastaSequenceIndexEntry {
|
||||
private String contig;
|
||||
private long location;
|
||||
|
|
@ -112,11 +114,19 @@ class FastaSequenceIndexEntry {
|
|||
private int basesPerLine;
|
||||
private int bytesPerLine;
|
||||
|
||||
/**
|
||||
* Create a new entry with the given parameters.
|
||||
* @param contig Contig this entry represents.
|
||||
* @param location Location (byte coordinate) in the fasta file.
|
||||
* @param size The number of bases in the contig.
|
||||
* @param basesPerLine How many bases are on each line.
|
||||
* @param bytesPerLine How many bytes are on each line (includes newline characters).
|
||||
*/
|
||||
public FastaSequenceIndexEntry( String contig,
|
||||
long location,
|
||||
long size,
|
||||
int basesPerLine,
|
||||
int bytesPerLine ) {
|
||||
long location,
|
||||
long size,
|
||||
int basesPerLine,
|
||||
int bytesPerLine ) {
|
||||
this.contig = contig;
|
||||
this.location = location;
|
||||
this.size = size;
|
||||
|
|
@ -165,6 +175,10 @@ class FastaSequenceIndexEntry {
|
|||
return bytesPerLine;
|
||||
}
|
||||
|
||||
/**
|
||||
* For debugging. Emit the contents of each contig line.
|
||||
* @return A string representation of the contig line.
|
||||
*/
|
||||
public String toString() {
|
||||
return String.format("contig %s; location %d; size %d; basesPerLine %d; bytesPerLine %d", contig,
|
||||
location,
|
||||
|
|
|
|||
|
|
@ -17,34 +17,46 @@ import java.io.FileNotFoundException;
|
|||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.charset.CharacterCodingException;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.util.Iterator;
|
||||
import java.util.Scanner;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: hanna
|
||||
* Date: Apr 14, 2009
|
||||
* Time: 2:14:26 PM
|
||||
*
|
||||
* A fasta file driven by an index for fast, concurrent lookups. Supports two interfaces:
|
||||
* the ReferenceSequenceFile for old-style, stateful lookups and a direct getter.
|
||||
*/
|
||||
public class IndexedFastaSequenceFile implements ReferenceSequenceFile {
|
||||
/**
|
||||
* Stores the main fasta file.
|
||||
*/
|
||||
private final File file;
|
||||
private FileInputStream in;
|
||||
|
||||
/**
|
||||
* The interface facilitating direct access to the fasta.
|
||||
*/
|
||||
private FileChannel channel;
|
||||
|
||||
private SAMSequenceDictionary sequenceDictionary = null;
|
||||
/**
|
||||
* A representation of the sequence dictionary, stored alongside the fasta in a .dict file.
|
||||
*/
|
||||
private SAMSequenceDictionary sequenceDictionary = null;
|
||||
|
||||
/**
|
||||
* A representation of the sequence index, stored alongside the fasta in a .fasta.fai file.
|
||||
*/
|
||||
private FastaSequenceIndex index;
|
||||
|
||||
/**
|
||||
* An iterator into the fasta index, for traversing iteratively across the fasta.
|
||||
*/
|
||||
private Iterator<FastaSequenceIndexEntry> indexIterator;
|
||||
|
||||
/**
|
||||
* Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened.
|
||||
* @param file The file to open.
|
||||
* @throws FileNotFoundException If the fasta or any of its supporting files cannot be found.
|
||||
*/
|
||||
public IndexedFastaSequenceFile(File file) throws FileNotFoundException {
|
||||
this.file = file;
|
||||
in = new FileInputStream(file);
|
||||
FileInputStream in = new FileInputStream(file);
|
||||
channel = in.getChannel();
|
||||
|
||||
loadDictionary(file);
|
||||
|
|
@ -55,7 +67,6 @@ public class IndexedFastaSequenceFile implements ReferenceSequenceFile {
|
|||
/**
|
||||
* Loads a dictionary, if available.
|
||||
* @param fastaFile File to check for a match.
|
||||
* TODO: This code is copied directly from FastaSequenceFile / FastaSequenceFile2. Bring it into a shared utility.
|
||||
*/
|
||||
private void loadDictionary( File fastaFile ) {
|
||||
// Try and locate the dictionary
|
||||
|
|
@ -207,11 +218,18 @@ public class IndexedFastaSequenceFile implements ReferenceSequenceFile {
|
|||
return getSequence( indexIterator.next().getContig() );
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset the iterator over the index.
|
||||
*/
|
||||
@Override
|
||||
public void reset() {
|
||||
indexIterator = index.iterator();
|
||||
}
|
||||
|
||||
/**
|
||||
* A simple toString implementation for debugging.
|
||||
* @return String representation of the file.
|
||||
*/
|
||||
public String toString() {
|
||||
return this.file.getAbsolutePath();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,11 +12,7 @@ import java.io.FileNotFoundException;
|
|||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: hanna
|
||||
* Date: Apr 14, 2009
|
||||
* Time: 10:34:15 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
* Test the fasta sequence index reader.
|
||||
*/
|
||||
public class FastaSequenceIndexTest extends BaseTest {
|
||||
// our basic human 18 fai
|
||||
|
|
|
|||
|
|
@ -16,11 +16,7 @@ import net.sf.picard.PicardException;
|
|||
import net.sf.samtools.util.StringUtil;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: hanna
|
||||
* Date: Apr 14, 2009
|
||||
* Time: 2:37:29 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
* Test the indexed fasta sequence file reader.
|
||||
*/
|
||||
public class IndexedFastaSequenceFileTest extends BaseTest {
|
||||
private static String sequenceFileName;
|
||||
|
|
|
|||
Loading…
Reference in New Issue