From 9e2f831206365a3480fe6b9528bbd53162319ac0 Mon Sep 17 00:00:00 2001 From: hanna Date: Tue, 8 Dec 2009 16:09:04 +0000 Subject: [PATCH] A bit of cleanup in preparation for Picard patch. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2286 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/utils/fasta/FastaSequenceIndex.java | 46 ++++++++++++------- .../utils/fasta/IndexedFastaSequenceFile.java | 44 ++++++++++++------ .../utils/fasta/FastaSequenceIndexTest.java | 6 +-- .../fasta/IndexedFastaSequenceFileTest.java | 6 +-- 4 files changed, 63 insertions(+), 39 deletions(-) diff --git a/java/src/org/broadinstitute/sting/utils/fasta/FastaSequenceIndex.java b/java/src/org/broadinstitute/sting/utils/fasta/FastaSequenceIndex.java index 916317945..a65dfdabf 100755 --- a/java/src/org/broadinstitute/sting/utils/fasta/FastaSequenceIndex.java +++ b/java/src/org/broadinstitute/sting/utils/fasta/FastaSequenceIndex.java @@ -12,32 +12,27 @@ import java.io.File; import java.io.FileNotFoundException; /** - * Created by IntelliJ IDEA. - * User: hanna - * Date: Apr 14, 2009 - * Time: 10:02:10 AM - * - * Reads a fasta index file (.fai). + * Reads a fasta index file (.fai), as generated by `samtools faidx`. */ public class FastaSequenceIndex implements Iterable { - // Use a linked hash map to preserve the ordering of the contigs. - private Map sequenceEntries = - new LinkedHashMap(); + /** + * Store the entries. Use a LinkedHashMap for consistent iteration in insertion order. + */ + private Map sequenceEntries = new LinkedHashMap(); /** * Build a sequence index from the specified file. * @param indexFile File to open. - * @throws PicardException if file is of invalid format. + * @throws FileNotFoundException if the index file cannot be found. */ public FastaSequenceIndex( File indexFile ) throws FileNotFoundException { if(!indexFile.exists()) - throw new FileNotFoundException(String.format("Fasta index file is missing",indexFile.getAbsolutePath())); + throw new FileNotFoundException(String.format("Fasta index file is missing: %s",indexFile.getAbsolutePath())); IoUtil.assertFileIsReadable(indexFile); parseIndexFile(indexFile); } - /** * Parse the contents of an index file, caching the results internally. * @param indexFile File to parse. @@ -92,6 +87,10 @@ public class FastaSequenceIndex implements Iterable { return sequenceEntries.get(contigName); } + /** + * Creates an iterator which can iterate through all entries in a fasta index. + * @return iterator over all fasta index entries. + */ public Iterator iterator() { return sequenceEntries.values().iterator(); } @@ -105,6 +104,9 @@ public class FastaSequenceIndex implements Iterable { } } +/** + * Hold an individual entry in a fasta sequence index file. + */ class FastaSequenceIndexEntry { private String contig; private long location; @@ -112,11 +114,19 @@ class FastaSequenceIndexEntry { private int basesPerLine; private int bytesPerLine; + /** + * Create a new entry with the given parameters. + * @param contig Contig this entry represents. + * @param location Location (byte coordinate) in the fasta file. + * @param size The number of bases in the contig. + * @param basesPerLine How many bases are on each line. + * @param bytesPerLine How many bytes are on each line (includes newline characters). + */ public FastaSequenceIndexEntry( String contig, - long location, - long size, - int basesPerLine, - int bytesPerLine ) { + long location, + long size, + int basesPerLine, + int bytesPerLine ) { this.contig = contig; this.location = location; this.size = size; @@ -165,6 +175,10 @@ class FastaSequenceIndexEntry { return bytesPerLine; } + /** + * For debugging. Emit the contents of each contig line. + * @return A string representation of the contig line. + */ public String toString() { return String.format("contig %s; location %d; size %d; basesPerLine %d; bytesPerLine %d", contig, location, diff --git a/java/src/org/broadinstitute/sting/utils/fasta/IndexedFastaSequenceFile.java b/java/src/org/broadinstitute/sting/utils/fasta/IndexedFastaSequenceFile.java index 2262f12b2..ef9ad6a12 100755 --- a/java/src/org/broadinstitute/sting/utils/fasta/IndexedFastaSequenceFile.java +++ b/java/src/org/broadinstitute/sting/utils/fasta/IndexedFastaSequenceFile.java @@ -17,34 +17,46 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; -import java.nio.charset.CharacterCodingException; -import java.nio.charset.Charset; -import java.nio.charset.CharsetDecoder; import java.util.Iterator; -import java.util.Scanner; /** - * Created by IntelliJ IDEA. - * User: hanna - * Date: Apr 14, 2009 - * Time: 2:14:26 PM - * * A fasta file driven by an index for fast, concurrent lookups. Supports two interfaces: * the ReferenceSequenceFile for old-style, stateful lookups and a direct getter. */ public class IndexedFastaSequenceFile implements ReferenceSequenceFile { + /** + * Stores the main fasta file. + */ private final File file; - private FileInputStream in; + + /** + * The interface facilitating direct access to the fasta. + */ private FileChannel channel; - private SAMSequenceDictionary sequenceDictionary = null; + /** + * A representation of the sequence dictionary, stored alongside the fasta in a .dict file. + */ + private SAMSequenceDictionary sequenceDictionary = null; + /** + * A representation of the sequence index, stored alongside the fasta in a .fasta.fai file. + */ private FastaSequenceIndex index; + + /** + * An iterator into the fasta index, for traversing iteratively across the fasta. + */ private Iterator indexIterator; + /** + * Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened. + * @param file The file to open. + * @throws FileNotFoundException If the fasta or any of its supporting files cannot be found. + */ public IndexedFastaSequenceFile(File file) throws FileNotFoundException { this.file = file; - in = new FileInputStream(file); + FileInputStream in = new FileInputStream(file); channel = in.getChannel(); loadDictionary(file); @@ -55,7 +67,6 @@ public class IndexedFastaSequenceFile implements ReferenceSequenceFile { /** * Loads a dictionary, if available. * @param fastaFile File to check for a match. - * TODO: This code is copied directly from FastaSequenceFile / FastaSequenceFile2. Bring it into a shared utility. */ private void loadDictionary( File fastaFile ) { // Try and locate the dictionary @@ -207,11 +218,18 @@ public class IndexedFastaSequenceFile implements ReferenceSequenceFile { return getSequence( indexIterator.next().getContig() ); } + /** + * Reset the iterator over the index. + */ @Override public void reset() { indexIterator = index.iterator(); } + /** + * A simple toString implementation for debugging. + * @return String representation of the file. + */ public String toString() { return this.file.getAbsolutePath(); } diff --git a/java/test/org/broadinstitute/sting/utils/fasta/FastaSequenceIndexTest.java b/java/test/org/broadinstitute/sting/utils/fasta/FastaSequenceIndexTest.java index 50ddd6037..78704da6a 100755 --- a/java/test/org/broadinstitute/sting/utils/fasta/FastaSequenceIndexTest.java +++ b/java/test/org/broadinstitute/sting/utils/fasta/FastaSequenceIndexTest.java @@ -12,11 +12,7 @@ import java.io.FileNotFoundException; import java.util.Iterator; /** - * Created by IntelliJ IDEA. - * User: hanna - * Date: Apr 14, 2009 - * Time: 10:34:15 AM - * To change this template use File | Settings | File Templates. + * Test the fasta sequence index reader. */ public class FastaSequenceIndexTest extends BaseTest { // our basic human 18 fai diff --git a/java/test/org/broadinstitute/sting/utils/fasta/IndexedFastaSequenceFileTest.java b/java/test/org/broadinstitute/sting/utils/fasta/IndexedFastaSequenceFileTest.java index 2a05b307b..012353e10 100755 --- a/java/test/org/broadinstitute/sting/utils/fasta/IndexedFastaSequenceFileTest.java +++ b/java/test/org/broadinstitute/sting/utils/fasta/IndexedFastaSequenceFileTest.java @@ -16,11 +16,7 @@ import net.sf.picard.PicardException; import net.sf.samtools.util.StringUtil; /** - * Created by IntelliJ IDEA. - * User: hanna - * Date: Apr 14, 2009 - * Time: 2:37:29 PM - * To change this template use File | Settings | File Templates. + * Test the indexed fasta sequence file reader. */ public class IndexedFastaSequenceFileTest extends BaseTest { private static String sequenceFileName;