A bit of cleanup in preparation for Picard patch.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2286 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d3b78338da
commit
9e2f831206
|
|
@ -12,32 +12,27 @@ import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Reads a fasta index file (.fai), as generated by `samtools faidx`.
|
||||||
* User: hanna
|
|
||||||
* Date: Apr 14, 2009
|
|
||||||
* Time: 10:02:10 AM
|
|
||||||
*
|
|
||||||
* Reads a fasta index file (.fai).
|
|
||||||
*/
|
*/
|
||||||
public class FastaSequenceIndex implements Iterable {
|
public class FastaSequenceIndex implements Iterable {
|
||||||
// Use a linked hash map to preserve the ordering of the contigs.
|
/**
|
||||||
private Map<String,FastaSequenceIndexEntry> sequenceEntries =
|
* Store the entries. Use a LinkedHashMap for consistent iteration in insertion order.
|
||||||
new LinkedHashMap<String,FastaSequenceIndexEntry>();
|
*/
|
||||||
|
private Map<String,FastaSequenceIndexEntry> sequenceEntries = new LinkedHashMap<String,FastaSequenceIndexEntry>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Build a sequence index from the specified file.
|
* Build a sequence index from the specified file.
|
||||||
* @param indexFile File to open.
|
* @param indexFile File to open.
|
||||||
* @throws PicardException if file is of invalid format.
|
* @throws FileNotFoundException if the index file cannot be found.
|
||||||
*/
|
*/
|
||||||
public FastaSequenceIndex( File indexFile ) throws FileNotFoundException {
|
public FastaSequenceIndex( File indexFile ) throws FileNotFoundException {
|
||||||
if(!indexFile.exists())
|
if(!indexFile.exists())
|
||||||
throw new FileNotFoundException(String.format("Fasta index file is missing",indexFile.getAbsolutePath()));
|
throw new FileNotFoundException(String.format("Fasta index file is missing: %s",indexFile.getAbsolutePath()));
|
||||||
|
|
||||||
IoUtil.assertFileIsReadable(indexFile);
|
IoUtil.assertFileIsReadable(indexFile);
|
||||||
parseIndexFile(indexFile);
|
parseIndexFile(indexFile);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse the contents of an index file, caching the results internally.
|
* Parse the contents of an index file, caching the results internally.
|
||||||
* @param indexFile File to parse.
|
* @param indexFile File to parse.
|
||||||
|
|
@ -92,6 +87,10 @@ public class FastaSequenceIndex implements Iterable {
|
||||||
return sequenceEntries.get(contigName);
|
return sequenceEntries.get(contigName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates an iterator which can iterate through all entries in a fasta index.
|
||||||
|
* @return iterator over all fasta index entries.
|
||||||
|
*/
|
||||||
public Iterator<FastaSequenceIndexEntry> iterator() {
|
public Iterator<FastaSequenceIndexEntry> iterator() {
|
||||||
return sequenceEntries.values().iterator();
|
return sequenceEntries.values().iterator();
|
||||||
}
|
}
|
||||||
|
|
@ -105,6 +104,9 @@ public class FastaSequenceIndex implements Iterable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Hold an individual entry in a fasta sequence index file.
|
||||||
|
*/
|
||||||
class FastaSequenceIndexEntry {
|
class FastaSequenceIndexEntry {
|
||||||
private String contig;
|
private String contig;
|
||||||
private long location;
|
private long location;
|
||||||
|
|
@ -112,11 +114,19 @@ class FastaSequenceIndexEntry {
|
||||||
private int basesPerLine;
|
private int basesPerLine;
|
||||||
private int bytesPerLine;
|
private int bytesPerLine;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new entry with the given parameters.
|
||||||
|
* @param contig Contig this entry represents.
|
||||||
|
* @param location Location (byte coordinate) in the fasta file.
|
||||||
|
* @param size The number of bases in the contig.
|
||||||
|
* @param basesPerLine How many bases are on each line.
|
||||||
|
* @param bytesPerLine How many bytes are on each line (includes newline characters).
|
||||||
|
*/
|
||||||
public FastaSequenceIndexEntry( String contig,
|
public FastaSequenceIndexEntry( String contig,
|
||||||
long location,
|
long location,
|
||||||
long size,
|
long size,
|
||||||
int basesPerLine,
|
int basesPerLine,
|
||||||
int bytesPerLine ) {
|
int bytesPerLine ) {
|
||||||
this.contig = contig;
|
this.contig = contig;
|
||||||
this.location = location;
|
this.location = location;
|
||||||
this.size = size;
|
this.size = size;
|
||||||
|
|
@ -165,6 +175,10 @@ class FastaSequenceIndexEntry {
|
||||||
return bytesPerLine;
|
return bytesPerLine;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For debugging. Emit the contents of each contig line.
|
||||||
|
* @return A string representation of the contig line.
|
||||||
|
*/
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return String.format("contig %s; location %d; size %d; basesPerLine %d; bytesPerLine %d", contig,
|
return String.format("contig %s; location %d; size %d; basesPerLine %d; bytesPerLine %d", contig,
|
||||||
location,
|
location,
|
||||||
|
|
|
||||||
|
|
@ -17,34 +17,46 @@ import java.io.FileNotFoundException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.nio.channels.FileChannel;
|
import java.nio.channels.FileChannel;
|
||||||
import java.nio.charset.CharacterCodingException;
|
|
||||||
import java.nio.charset.Charset;
|
|
||||||
import java.nio.charset.CharsetDecoder;
|
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.Scanner;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
|
||||||
* User: hanna
|
|
||||||
* Date: Apr 14, 2009
|
|
||||||
* Time: 2:14:26 PM
|
|
||||||
*
|
|
||||||
* A fasta file driven by an index for fast, concurrent lookups. Supports two interfaces:
|
* A fasta file driven by an index for fast, concurrent lookups. Supports two interfaces:
|
||||||
* the ReferenceSequenceFile for old-style, stateful lookups and a direct getter.
|
* the ReferenceSequenceFile for old-style, stateful lookups and a direct getter.
|
||||||
*/
|
*/
|
||||||
public class IndexedFastaSequenceFile implements ReferenceSequenceFile {
|
public class IndexedFastaSequenceFile implements ReferenceSequenceFile {
|
||||||
|
/**
|
||||||
|
* Stores the main fasta file.
|
||||||
|
*/
|
||||||
private final File file;
|
private final File file;
|
||||||
private FileInputStream in;
|
|
||||||
|
/**
|
||||||
|
* The interface facilitating direct access to the fasta.
|
||||||
|
*/
|
||||||
private FileChannel channel;
|
private FileChannel channel;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A representation of the sequence dictionary, stored alongside the fasta in a .dict file.
|
||||||
|
*/
|
||||||
private SAMSequenceDictionary sequenceDictionary = null;
|
private SAMSequenceDictionary sequenceDictionary = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A representation of the sequence index, stored alongside the fasta in a .fasta.fai file.
|
||||||
|
*/
|
||||||
private FastaSequenceIndex index;
|
private FastaSequenceIndex index;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An iterator into the fasta index, for traversing iteratively across the fasta.
|
||||||
|
*/
|
||||||
private Iterator<FastaSequenceIndexEntry> indexIterator;
|
private Iterator<FastaSequenceIndexEntry> indexIterator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened.
|
||||||
|
* @param file The file to open.
|
||||||
|
* @throws FileNotFoundException If the fasta or any of its supporting files cannot be found.
|
||||||
|
*/
|
||||||
public IndexedFastaSequenceFile(File file) throws FileNotFoundException {
|
public IndexedFastaSequenceFile(File file) throws FileNotFoundException {
|
||||||
this.file = file;
|
this.file = file;
|
||||||
in = new FileInputStream(file);
|
FileInputStream in = new FileInputStream(file);
|
||||||
channel = in.getChannel();
|
channel = in.getChannel();
|
||||||
|
|
||||||
loadDictionary(file);
|
loadDictionary(file);
|
||||||
|
|
@ -55,7 +67,6 @@ public class IndexedFastaSequenceFile implements ReferenceSequenceFile {
|
||||||
/**
|
/**
|
||||||
* Loads a dictionary, if available.
|
* Loads a dictionary, if available.
|
||||||
* @param fastaFile File to check for a match.
|
* @param fastaFile File to check for a match.
|
||||||
* TODO: This code is copied directly from FastaSequenceFile / FastaSequenceFile2. Bring it into a shared utility.
|
|
||||||
*/
|
*/
|
||||||
private void loadDictionary( File fastaFile ) {
|
private void loadDictionary( File fastaFile ) {
|
||||||
// Try and locate the dictionary
|
// Try and locate the dictionary
|
||||||
|
|
@ -207,11 +218,18 @@ public class IndexedFastaSequenceFile implements ReferenceSequenceFile {
|
||||||
return getSequence( indexIterator.next().getContig() );
|
return getSequence( indexIterator.next().getContig() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reset the iterator over the index.
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void reset() {
|
public void reset() {
|
||||||
indexIterator = index.iterator();
|
indexIterator = index.iterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A simple toString implementation for debugging.
|
||||||
|
* @return String representation of the file.
|
||||||
|
*/
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return this.file.getAbsolutePath();
|
return this.file.getAbsolutePath();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -12,11 +12,7 @@ import java.io.FileNotFoundException;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Test the fasta sequence index reader.
|
||||||
* User: hanna
|
|
||||||
* Date: Apr 14, 2009
|
|
||||||
* Time: 10:34:15 AM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
*/
|
||||||
public class FastaSequenceIndexTest extends BaseTest {
|
public class FastaSequenceIndexTest extends BaseTest {
|
||||||
// our basic human 18 fai
|
// our basic human 18 fai
|
||||||
|
|
|
||||||
|
|
@ -16,11 +16,7 @@ import net.sf.picard.PicardException;
|
||||||
import net.sf.samtools.util.StringUtil;
|
import net.sf.samtools.util.StringUtil;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Test the indexed fasta sequence file reader.
|
||||||
* User: hanna
|
|
||||||
* Date: Apr 14, 2009
|
|
||||||
* Time: 2:37:29 PM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
*/
|
||||||
public class IndexedFastaSequenceFileTest extends BaseTest {
|
public class IndexedFastaSequenceFileTest extends BaseTest {
|
||||||
private static String sequenceFileName;
|
private static String sequenceFileName;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue