diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReferenceIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReferenceIterator.java index a3ee99b2f..59fc02093 100644 --- a/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReferenceIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReferenceIterator.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.gatk.iterators; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import java.util.Iterator; diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/ReferenceIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/ReferenceIterator.java deleted file mode 100755 index 58d40dfa1..000000000 --- a/java/src/org/broadinstitute/sting/gatk/iterators/ReferenceIterator.java +++ /dev/null @@ -1,213 +0,0 @@ -package org.broadinstitute.sting.gatk.iterators; - -import net.sf.picard.reference.ReferenceSequence; -import net.sf.samtools.util.RuntimeIOException; -import net.sf.samtools.util.StringUtil; -import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2; - -import java.util.Iterator; -import java.util.NoSuchElementException; - -/** - * Created by IntelliJ IDEA. - * User: depristo - * Date: Feb 24, 2009 - * Time: 10:45:01 AM - * To change this template use File | Settings | File Templates. - */ -public class ReferenceIterator implements Iterator { - - // enable debugging output? - private final boolean DEBUG = false; - - // The reference sequence file generator - private FastaSequenceFile2 refFile; - - private ReferenceSequence currentContig = null; - //private ReferenceSequence nextContig = null; - private long offset = -1; - - public ReferenceIterator(FastaSequenceFile2 refFile) { - this.refFile = refFile; - } - - /** our log, which we want to capture anything from this class */ - private static Logger logger = Logger.getLogger(ReferenceIterator.class); - - // -------------------------------------------------------------------------------------------------------------- - // - // Accessing data - // - // -------------------------------------------------------------------------------------------------------------- - public byte getBaseAsByte() { - return currentContig.getBases()[(int) offset]; - } - - public String getBaseAsString() { - assert offset > -1 : currentContig.getName() + " index is " + offset; - //assert offset < currentContig.getBases().(); - - return StringUtil.bytesToString(currentContig.getBases(), (int) offset, 1); - } - - public char getBaseAsChar() { - return getBaseAsString().charAt(0); - } - - public ReferenceSequence getCurrentContig() { - return currentContig; - } - - public long getPosition() { - return offset + 1; - } - - public GenomeLoc getLocation() { - return GenomeLocParser.createGenomeLoc(getCurrentContig().getName(), getPosition()); - } - - // -------------------------------------------------------------------------------------------------------------- - // - // Iterator routines - // - // -------------------------------------------------------------------------------------------------------------- - public synchronized boolean hasNext() { - if (currentContig == null || offset + 1 < currentContig.length()) { - return true; - } else { - return readNextContig(); - } - } - - public synchronized ReferenceIterator next() { - if (currentContig != null) { - if (DEBUG) - logger.debug(String.format(" -> %s:%d %d%n", currentContig.getName(), offset, currentContig.length())); - } - offset++; // move on to the next position - - if (currentContig == null || offset >= currentContig.length()) { - // We need to update the contig - if (readNextContig()) { - // We sucessfully loaded the next contig, recursively call next - return next(); - } else { - throw new NoSuchElementException(); - } - } else { - // We're good to go -- we're in the current contig - return this; - } - } - - public void remove() { - throw new UnsupportedOperationException(); - } - - - // -------------------------------------------------------------------------------------------------------------- - // - // Jumping forward - // - // -------------------------------------------------------------------------------------------------------------- - public ReferenceIterator seekForward(final GenomeLoc loc) { - assert loc != null : "seekForward location is null"; - - return seekForwardOffset(loc.getContig(), loc.getStart() - 1); - } - - public ReferenceIterator seekForward(final String contigName, final long pos) { - return seekForwardOffset(contigName, pos - 1); - } - - /** - * Helper routine that doesn't move the contigs around, it just checks that everything is kosher in the seek - * within this chromosome - * - * @param seekContigName name for printing pursues, asserted to be the current contig name - * @param seekOffset where we want to be in this contig - * @return this setup to be at seekoffset within seekContigName - */ - private ReferenceIterator seekForwardOffsetOnSameContig(final String seekContigName, final long seekOffset) { - assert seekContigName.equals(currentContig.getName()) : String.format("only works on this contig, but the current %s and sought %s contigs are different!", currentContig.getName(), seekContigName); - - // we're somewhere on this contig - if (seekOffset < offset ) { - // bad boy -- can't go backward safely - throw new IllegalArgumentException(String.format("Invalid seek %s => %s, which is usually due to out of order reads%n", - GenomeLocParser.createGenomeLoc(currentContig.getName(), offset), GenomeLocParser.createGenomeLoc(seekContigName, seekOffset))); - } else if (seekOffset >= currentContig.length()) { - // bad boy -- can't go beyond the contig length - throw new IllegalArgumentException(String.format("Invalid seek to %s, which is beyond the end of the contig%n", - GenomeLocParser.createGenomeLoc(currentContig.getName(), seekOffset + 1))); - } else { - offset = seekOffset - 1; - return next(); - } - } - - - private ReferenceIterator seekForwardOffset(final String seekContigName, final long seekOffset) { - assert seekContigName != null : "seekContigName is null"; - assert seekOffset >= 0 : "seekOffset < 0: " + seekOffset; - - // jumps us forward in the sequence to the contig / pos - if (currentContig == null) - next(); - - if (DEBUG) - logger.debug(String.format(" -> Seeking to %s %d from %s %d%n", seekContigName, seekOffset, currentContig.getName(), offset)); - - - int cmpContigs = GenomeLocParser.compareContigs(seekContigName,currentContig.getName()); - - if ( cmpContigs < 0 && GenomeLocParser.hasKnownContigOrdering() ) { // if we know the order of contigs and we are already past the contig we seek, it's too late! - // The contig we are looking for is before the currentContig -- it's an error - throw new IllegalArgumentException(String.format("Invalid seek %s => %s, contigs/sequences are out of order%n", - GenomeLocParser.createGenomeLoc(currentContig.getName(), offset), GenomeLocParser.createGenomeLoc(seekContigName, seekOffset))); - } - - if ( cmpContigs > 0 || (! GenomeLocParser.hasKnownContigOrdering() ) && cmpContigs != 0 ) { // if contig we seek is still ahead, or if we have no idea what the order is and current contig is not what we seek - // then try to seek forward in the reference file until we get the contig we need - if (DEBUG) - logger.debug(String.format(" -> Seeking in the fasta file to %s from %s%n", seekContigName, currentContig.getName())); - - if (!refFile.seekToContig(seekContigName)) { // ok, do the seek - // a false result indicates a failure, throw a somewhat cryptic call - throw new RuntimeIOException(String.format("Unexpected seek failure from %s to %s%n", - GenomeLocParser.createGenomeLoc(currentContig.getName(), offset), GenomeLocParser.createGenomeLoc(seekContigName, seekOffset))); - } - - readNextContig(); // since we haven't failed, we just read in the next contig (which is seekContigName) - } - - // at this point, the current contig is seekContigName, so just do a bit more error checking and be done - return seekForwardOffsetOnSameContig(seekContigName, seekOffset); - } - - - // -------------------------------------------------------------------------------------------------------------- - // - // Interal state manipulation - // - // -------------------------------------------------------------------------------------------------------------- - protected boolean readNextContig() { - // returns true if we had another contig to load - currentContig = refFile.nextSequence(); - offset = -1; - return currentContig != null; - } - - /** - * Simple forwarding method to the refFile itself - * - * @return - */ - public String nextContigName() { - return this.refFile.getNextContigName(); - } - -} diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java index bfd44589c..6bc3a5481 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java @@ -17,7 +17,6 @@ import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2; import java.io.File; import java.util.*; @@ -54,9 +53,6 @@ public abstract class TraversalEngine { // The reference data -- filename, refSeqFile, and iterator protected File refFileName = null; // the name of the reference file - //private ReferenceSequenceFile refFile = null; - protected FastaSequenceFile2 refFile = null; // todo: merge FastaSequenceFile2 into picard! - protected ReferenceIterator refIter = null; // Progress tracker for the sam file protected FileProgressTracker samReadingTracker = null; @@ -317,7 +313,6 @@ public abstract class TraversalEngine { */ public boolean initialize() { lastProgressPrintTime = startTime = System.currentTimeMillis(); - initializeReference(); // Initial the reference ordered data iterators initializeRODs(); @@ -350,21 +345,6 @@ public abstract class TraversalEngine { } } - /** - * Prepare the reference for stream processing - */ - protected void initializeReference() { - if (refFileName != null) { - //this.refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(refFileName); - this.refFile = new FastaSequenceFile2(refFileName); // todo: replace when FastaSequenceFile2 is in picard - this.refIter = new ReferenceIterator(this.refFile); - if (!GenomeLocParser.setupRefContigOrdering(this.refFile)) { - // We couldn't process the reference contig ordering, fail since we need it - Utils.scareUser(String.format("We couldn't load the contig dictionary associated with %s. At the current time we require this dictionary file to efficiently access the FASTA file. Please use /seq/software/picard/current/bin/CreateSequenceDictionary.jar to create a sequence dictionary for your file", refFileName)); - } - } - } - /** * Prepare the list of reference ordered data iterators for each of the rods * @@ -378,17 +358,6 @@ public abstract class TraversalEngine { } } - /** - * An inappropriately placed testing of reading the reference - */ - protected void testReference() { - while (true) { - ReferenceSequence ref = refFile.nextSequence(); - logger.debug(String.format("%s %d %d", ref.getName(), ref.length(), System.currentTimeMillis())); - printProgress(true, "loci", GenomeLocParser.createGenomeLoc("foo", 1)); - } - } - // -------------------------------------------------------------------------------------------------------------- // // shutdown diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/PileupWithContextWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/PileupWithContextWalker.java index d28f3fd7a..16d1739bd 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/PileupWithContextWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/PileupWithContextWalker.java @@ -8,9 +8,7 @@ import org.broadinstitute.sting.gatk.refdata.rodGFF; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.ReadBackedPileup; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; -import net.sf.samtools.SAMRecord; import net.sf.picard.reference.ReferenceSequence; import java.util.List; diff --git a/java/src/org/broadinstitute/sting/playground/piecemealannotator/TileAnnotator.java b/java/src/org/broadinstitute/sting/playground/piecemealannotator/TileAnnotator.java index b41990363..e8857f154 100755 --- a/java/src/org/broadinstitute/sting/playground/piecemealannotator/TileAnnotator.java +++ b/java/src/org/broadinstitute/sting/playground/piecemealannotator/TileAnnotator.java @@ -8,7 +8,6 @@ import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.containers.BoundedScoringSet; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram; -import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2; import java.io.File; import java.util.ArrayList; diff --git a/java/src/org/broadinstitute/sting/secondarybase/BasecallingTrainer.java b/java/src/org/broadinstitute/sting/secondarybase/BasecallingTrainer.java index b9b805caf..c4dd0a424 100755 --- a/java/src/org/broadinstitute/sting/secondarybase/BasecallingTrainer.java +++ b/java/src/org/broadinstitute/sting/secondarybase/BasecallingTrainer.java @@ -3,9 +3,11 @@ package org.broadinstitute.sting.secondarybase; import net.sf.samtools.SAMFileReader; import net.sf.samtools.SAMRecord; import net.sf.samtools.util.CloseableIterator; +import net.sf.picard.reference.ReferenceSequenceFile; +import net.sf.picard.reference.ReferenceSequenceFileFactory; +import net.sf.picard.reference.ReferenceSequence; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2; import java.io.File; import java.util.ArrayList; @@ -159,7 +161,7 @@ public class BasecallingTrainer { * @return a vector of perfect reads, grouped by tile */ private Vector> getPerfectAlignmentsByTile(File samIn, File reference) { - FastaSequenceFile2 ref = new FastaSequenceFile2(reference); + ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(reference); String currentContig = "none"; byte[] refbases = null; @@ -175,9 +177,11 @@ public class BasecallingTrainer { int offset = sr.getAlignmentStart(); if (!currentContig.matches(sr.getReferenceName())) { - ref.seekToContig(sr.getReferenceName()); + ReferenceSequence refSeq = ref.nextSequence(); + while( !refSeq.getName().equals(sr.getReferenceName()) ) + refSeq = ref.nextSequence(); currentContig = sr.getReferenceName(); - refbases = ref.nextSequence().getBases(); + refbases = refSeq.getBases(); } int mismatches = 0; diff --git a/java/src/org/broadinstitute/sting/utils/fasta/FastaSequenceFile2.java b/java/src/org/broadinstitute/sting/utils/fasta/FastaSequenceFile2.java deleted file mode 100644 index d73267ef5..000000000 --- a/java/src/org/broadinstitute/sting/utils/fasta/FastaSequenceFile2.java +++ /dev/null @@ -1,472 +0,0 @@ -package org.broadinstitute.sting.utils.fasta; - -import net.sf.picard.PicardException; -import net.sf.picard.reference.ReferenceSequenceFile; -import net.sf.picard.reference.ReferenceSequence; -import net.sf.picard.io.IoUtil; - -import java.io.*; - -import net.sf.samtools.SAMTextHeaderCodec; -import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMSequenceDictionary; -import net.sf.samtools.SAMSequenceRecord; -import net.sf.samtools.util.AsciiLineReader; -import net.sf.samtools.util.StringUtil; -import net.sf.samtools.util.RuntimeIOException; -import org.apache.log4j.Logger; - -/** - * Implementation of ReferenceSequenceFile for reading from FASTA files. - * - * Now supports additional operations to seek and query the next contig from the file, for efficient - * implementation of jumping forward in the file. - * - * @author Tim Fennell - * @author Extended in parts by Mark DePristo - */ -public class FastaSequenceFile2 implements ReferenceSequenceFile { - private final File file; - private BufferedInputStream in; - private SAMSequenceDictionary sequenceDictionary = null; - private String currentContigName = null; - - /** - * our log, which we want to capture anything from this class - */ - private static Logger logger = Logger.getLogger(FastaSequenceFile2.class); - - /** - * Set to true to see lots of debugging output during operation - */ - private final boolean DEBUG = false; - - /** - * The name, if known, of the next contig in the file. Can be null, indicating either that there is - * no known next contig name, or that the file has been completely read - */ - private String nextContigName = null; - - /** Constructs a FastaSequenceFile that reads from the specified file. */ - public FastaSequenceFile2(final File file) { - this.file = file; - initializeInputStream(); - - // Try and locate the dictionary - String dictionaryName = file.getAbsolutePath(); - dictionaryName = dictionaryName.substring(0, dictionaryName.lastIndexOf(".fasta")); - dictionaryName += ".dict"; - final File dictionary = new File(dictionaryName); - if (dictionary.exists()) { - IoUtil.assertFileIsReadable(dictionary); - - try { - final SAMTextHeaderCodec codec = new SAMTextHeaderCodec(); - final SAMFileHeader header = codec.decode(new AsciiLineReader(new FileInputStream(dictionary)), dictionary); - if (header.getSequenceDictionary() != null && header.getSequenceDictionary().size() > 0) { - this.sequenceDictionary = header.getSequenceDictionary(); - } - } - catch (Exception e) { - throw new PicardException("Could not open sequence dictionary file: " + dictionaryName, e); - } - } - } - - /** - * Opens the file for input. Closes the previous input stream if there is one, and sets up all of the key - * variables such that this object's state is clean and ready for processing sequences. - */ - private void initializeInputStream() { - // we're at the start and haven't processed anything yet - nextContigName = currentContigName = null; - - if ( this.in != null ) { // this isn't our first time here - try { - this.in.close(); - } catch ( IOException e ) { - throw new RuntimeIOException("initializing InputStream failure", e); - } - } - - // Now reopen the input stream - this.in = new BufferedInputStream(IoUtil.openFileForReading(file)); - } - - /** - * Returns the list of sequence records associated with the reference sequence if found - * otherwise null. - */ - public SAMSequenceDictionary getSequenceDictionary() { - return this.sequenceDictionary; - } - - // -------------------------------------------------------------------------------------------------------------- - // - // Support functions for seeking around in the file - // - // -------------------------------------------------------------------------------------------------------------- - - /** - * Returns the distance, in bp, between contig1 and contig2 according to this fasta file's dictionary. That is, - * the number of bp we'd have to traverse to move from the start of contig1 to reach the start of contig2. - * - * If contig1 occurs before contig2, a negative number is returned. 0 indicates that the contigs are the same. - * - * Returns Integer.MAX_VALUE if the sequence dictionary cannot be found - * - * @param contig1 - * @param contig2 - * @return distance in bp from the start of contig1 to the start of contig2 - */ - public long getDistanceBetweenContigs(final String contig1, final String contig2) { - assert contig1 != null; - assert contig2 != null; - - final SAMSequenceDictionary seqDict = getSequenceDictionary(); - - if ( seqDict == null ) // we couldn't load the reference dictionary - return Integer.MAX_VALUE; - - SAMSequenceRecord contig1Rec = seqDict.getSequence(contig1); - SAMSequenceRecord contig2Rec = seqDict.getSequence(contig2); - - assert contig1Rec != null : "Contig1 record is null: " + contig1; - assert contig1Rec != null : "Contig2 record is null: " + contig2; - - if ( DEBUG ) - logger.debug(String.format("Contig1=(%s, %d), contig2=(%s, %d)%n", - contig1, contig1Rec.getSequenceIndex(), - contig2, contig2Rec.getSequenceIndex())); - - int startIndex = Math.min(contig1Rec.getSequenceIndex(), contig2Rec.getSequenceIndex()); - int lastIndex = Math.max(contig1Rec.getSequenceIndex(), contig2Rec.getSequenceIndex()); - - long bytesToTraverse = 0; - for ( int i = startIndex; i < lastIndex; i++ ) { - SAMSequenceRecord rec = seqDict.getSequence(i); - bytesToTraverse += rec.getSequenceLength(); - if ( DEBUG ) - logger.debug(String.format(" -> Traversing from %15s to %15s requires reading at least %10d bytes to pass contig %15s, total bytes %10d%n", - contig1, contig2, rec.getSequenceLength(), rec.getSequenceName(), bytesToTraverse)); - } - - if ( contig1Rec.getSequenceIndex() > contig2Rec.getSequenceIndex() ) - bytesToTraverse *= -1; // we are going backward! - - if ( DEBUG ) logger.debug(String.format(" -> total distance is %d%n", bytesToTraverse)); - - return bytesToTraverse; - } - - /** - * Seeks to seekContig in the fasta file, such that nextSequence() will read the seekContig from the fasta - * file. Only allows forward seeks. Throws a RuntimeIOException if the seekContig is before the current - * contig. - * - * @param seekContig the contig I want to seek to - * @return true on success - * - * @see #seekToContig(String) - */ - public boolean seekToContig( final String seekContig ) { - return seekToContig(seekContig, false); - } - - /** - * Seeks to seekContig in the fasta file, such that nextSequence() will read the seekContig from the fasta - * file. If enableBacktracking is false, only allows forward seeks, and throws a RuntimeIOException if the - * seekContig is before the current contig. If enableBacktracking is true, then if seekContig is before - * the current contig, resets the input stream and seeks to the contig. - * - * Requires that the fasta file have a SequenceDictionary associated with it. Otherwises throws an error - * - * @param seekContig The contig I want to seek to - * @param enableBacktracking Should we allow seeks to contigs earlier in the file? - * @return true on success - */ - public boolean seekToContig(final String seekContig, boolean enableBacktracking ) { - if ( DEBUG ) logger.debug(String.format("seekToContig( %s, %b )%n", seekContig, enableBacktracking)); - - String curContig = getContigName(); - String nextContig = null; - - if ( curContig == null ) { - logger.info(String.format("CurrentContig is null")); - if ( this.sequenceDictionary == null ) - throw new PicardException( String.format("Seeking within contigs requires FASTA dictionary, but none was available for %s", this.file )); - - // We only reach this point when we're seeking before we've read in any of the fasta file, - // so assume we are at the start of the file - nextContig = this.sequenceDictionary.getSequence(0).getSequenceName(); - } - else - nextContig = getNextContigName(); - - if ( nextContig == null ) // we're are at the end of the stream - return false; - - // we have already read in the current contig, we are jumping from the next contig onwards - long dist = getDistanceBetweenContigs(nextContig, seekContig); - - if ( dist == Integer.MAX_VALUE ) - return false; // we don't know where to go - else if ( dist == 0 ) - return true; // We already here! - else if ( dist < 0 ) { - if ( enableBacktracking ) { - // System.out.printf("*** Backtracking to %s%n", seekContig); - // restart from the beginning, and try again - initializeInputStream(); - return seekToContig(seekContig, enableBacktracking); - } else - return false; // we're not going backwards just yet - } - else { - if ( DEBUG ) logger.debug(String.format("Going to seek to contig %s with skip %d%n", seekContig, dist)); - // we're actually going to jump somewhere, so prepare the state - this.nextContigName = null; // reset the contig info - - // TODO: this is a dangerous method -- can we get access to the underlying file object seek? - long bytesToSkip = dist; - while ( bytesToSkip > 0 ) { - try { - final long skipped = this.in.skip(bytesToSkip); - bytesToSkip -= skipped; - // System.out.printf(" -> skipping %d, %d remaining%n", skipped, bytesToSkip); - } catch (IOException ioe) { - throw new PicardException("Error reading from file: " + this.file.getAbsolutePath(), ioe); - } - } - - if ( bytesToSkip != 0 ) { // skip dist bytes - throw new PicardException(String.format("Failed to skip all of the %d bases requested, only got %d", dist, dist - bytesToSkip * 2)); - } - - // at this point we're ready to start looking for the next header, so call seekNextContigName() - final String next = seekForNextContig(seekContig); - - if ( ! next.equals(seekContig) ) // OMG, what the hell happened, throw a runtime exception - throw new PicardException(String.format("Failed to seek from %s to %s, ended up at %s", - curContig, seekContig, next)); - else { - this.currentContigName = next; - return true; - } - } - } - - /** - * Reads the next contig from the fasta file, and returns it as a ReferenceSequence. - * - * @return null if there are no more sequences in the fasta stream - */ - public ReferenceSequence nextSequence() { - if ( DEBUG ) logger.debug(String.format("Calling nextSequence()%n")); - - // Read the header line - currentContigName = getNextContigName(); - if ( currentContigName == null ) return null; // no more sequences! - - int index = -1; - if ( this.sequenceDictionary != null ) - index = this.sequenceDictionary.getSequenceIndex(currentContigName); - - // Read the sequence - byte[] tmp = new byte[4096]; - int basesRead; - int totalBasesRead = 0; - final int knownLength = (index == -1) ? -1 : this.sequenceDictionary.getSequence(index).getSequenceLength(); - final int lengthByteArray = (knownLength != -1) ? knownLength : 250000000; - byte[] bases = new byte[lengthByteArray]; - - while ((basesRead = readNextLine(bases, totalBasesRead)) != 0) { - totalBasesRead += basesRead; - - // Make sure we'll have space for the next iteration if we need it - if (totalBasesRead == knownLength) { - //System.out.printf("Read bases: %s%n", StringUtil.bytesToString(bases, totalBasesRead - basesRead, basesRead).trim()); - - assert peekOneByte() == -1 || peekOneByte() == '>' : "We somehow managed to read in enough bytes for the contig, but didn't pass through the entire contig"; - break; - } else { - final byte b = peekOneByte(); - if (b == -1 || b == '>') { - break; - } - else if (totalBasesRead == bases.length) { - tmp = new byte[bases.length * 2]; - System.arraycopy(bases, 0, tmp, 0, totalBasesRead); - bases = tmp; - tmp = null; - } - } - } - - // And lastly resize the array down to the right size - if (totalBasesRead != bases.length) { - tmp = new byte[totalBasesRead]; - System.arraycopy(bases, 0, tmp, 0, totalBasesRead); - bases = tmp; - tmp = null; - } - - assert knownLength == -1 || knownLength == bases.length; - - this.nextContigName = null; // we no longer know what the next contig name is - - if ( DEBUG ) logger.debug(String.format(" => nextSequence() is returning %s, known length = %d%n", this.currentContigName, knownLength)); - if ( DEBUG ) logger.debug(String.format(" => nextSequence() next is %s%n", this.getNextContigName())); - - return new ReferenceSequence(currentContigName, index, bases); - } - - /** - * Returns the next of the next contig, or null if there are no more contigs. Stateful function, it - * remembers the name of the next contig. Use readNextContigName for stateless operation. This function - * is primarily useful if you need to know what the next contig is in the stream for algorithmic purposes. - * - * Note that this function assumes the stream is sitting right at the end of the previous contig, or at the - * beginning of the file. - * - * @return the name of the next contig, or null if there is no next contig - */ - public String getNextContigName() { - if ( DEBUG ) logger.debug(String.format("getNextContigName() => %s%n", this.nextContigName)); - - if ( this.nextContigName == null ) { - // If it's not null, we've already looked up the next contig name, just return it and happily continue - // Otherwise we need to actually read in the name - this.nextContigName = readNextContigName(); - } - - if ( DEBUG ) logger.debug(String.format("nextContigName is now %s%n", nextContigName)); - return this.nextContigName; - } - - /** - * Simply reads the next contig name from the fasta input stream. It assumes that the stream is positioned - * immediately before the start of the next contig. Calling it without this condition met results in the - * RuntimeIOException being thrown. This method advances the fasta stream itself -- subsequent calls to the - * method will lead to errors. getNextContigName is the stateful version. - * - * @See getNextContigName() - * - * @return the string name of the next contig - */ - private String readNextContigName() { - // Otherwise we need to actually read in the name - byte[] tmp = new byte[4096]; - final int nameLength = readNextLine(tmp, 0); - if (nameLength != 0) { - // 0 means no more sequences! - if ( tmp[0] != '>' ) - throw new RuntimeIOException("The next line is supposed to be a fasta contig start but found " + StringUtil.bytesToString(tmp, 0, nameLength).trim()); - - return StringUtil.bytesToString(tmp, 1, nameLength).trim(); - } - - return null; - } - - /** - * @return The name of the contig we returned in the last call to nextSequence() - */ - public String getContigName() { - return this.currentContigName; - } - - /** - * Moves the IO stream to right before the next contig marker in the fasta file, for anywhere inside the - * previous contig. It is primarily useful as a supplementary routine for jumping forward in the file by - * N bases, since we don't know exactly how far way N bases in bytes will be in the file. So a guess jump - * will put us somewhere before the target contig, and we use this routine to seek forward to the actual - * contig we want. - * - * @return the name of the next contig, as a string - */ - public String seekForNextContig(final String targetContig ) { - //System.out.printf("seekForNextContig()%n"); - - int basesRead; - int totalBasesRead = 0; - byte[] bases = new byte[4096]; - int i = 0; - while ((basesRead = readNextLine(bases, 0)) != 0) { - totalBasesRead += basesRead; - - // Keep looking for the > marking the start of the line, and stop - final byte b = peekOneByte(); - if (b == -1 || b == '>') { - final String foundContig = readNextContigName(); - // System.out.printf("Found a contig name line %s%n", foundContig); - final int foundIndex = this.sequenceDictionary.getSequenceIndex(foundContig); - final int ourIndex = this.sequenceDictionary.getSequenceIndex(targetContig); - - if ( foundIndex == ourIndex ) { - // we found our target! - this.nextContigName = foundContig; // store the right answer - if ( DEBUG ) logger.debug(String.format("seekForNextContig found %s%n", foundContig)); - return foundContig; - } - else if ( foundIndex <= ourIndex ) - // we are still looking for our contig, the seek estimate was inaccurate relative to the size of contings in this area - continue; - else { - // This is really bad -- we are past our target - throw new RuntimeIOException(String.format("Seek pushes us past our target contig of %s, instead we found %s, which is after the target in the sequence dictions", targetContig, foundContig)); - } - } - } - - - return null; - } - - /** Peeks one non line-terminating byte from the file and returns it. */ - private byte peekOneByte() { - try { - this.in.mark(16); - byte b = '\n'; - while (b == '\n' || b == '\r') { - b = (byte) this.in.read(); - } - - this.in.reset(); - return b; - } - catch (IOException ioe) { - throw new PicardException("Error reading from file: " + this.file.getAbsolutePath(), ioe); - } - } - - /** - * Reads the next line from the file and puts the bytes into the buffer starting at - * the provided index. Stops when the buffer is full, a line terminator is hit or - * the end of the file is hit. - */ - private int readNextLine(final byte[] buffer, final int index) { - try { - int next; - int read = 0; - while ((next = this.in.read()) != -1 && index + read < buffer.length) { - final byte b = (byte) next; - if (b == '\r' || b == '\n') { - if (read != 0) return read; - } - else { - buffer[index + read++] = b; - } - } - return read; - } - catch (IOException ioe) { - throw new PicardException("Error reading line from file: " + this.file.getAbsolutePath(), ioe); - } - } - - /** Returns the full path to the reference file. */ - public String toString() { - return this.file.getAbsolutePath(); - } -} diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceTest.java index 52d70c57c..40a79cf86 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceTest.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources; import static junit.framework.Assert.fail; +import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.datasources.shards.Shard; @@ -9,12 +10,13 @@ import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2; +import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import org.junit.After; import org.junit.Before; import org.junit.Test; import java.io.File; +import java.io.FileNotFoundException; import java.util.ArrayList; import java.util.List; @@ -47,7 +49,7 @@ import java.util.List; public class SAMBAMDataSourceTest extends BaseTest { private List fl; - private FastaSequenceFile2 seq; + private ReferenceSequenceFile seq; /** * This function does the setup of our parser, before each method call. @@ -55,11 +57,11 @@ public class SAMBAMDataSourceTest extends BaseTest { * Called before every test case method. */ @Before - public void doForEachTest() { + public void doForEachTest() throws FileNotFoundException { fl = new ArrayList(); // sequence - seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")); + seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")); GenomeLocParser.setupRefContigOrdering(seq.getSequenceDictionary()); } diff --git a/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorTest.java b/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorTest.java index bbfa1e5ba..f1c35549c 100755 --- a/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorTest.java +++ b/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorTest.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.iterators; import static junit.framework.Assert.fail; import net.sf.samtools.SAMRecord; +import net.sf.picard.reference.ReferenceSequenceFile; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy; @@ -10,13 +11,14 @@ import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SimpleDataSourceLoadException; import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2; +import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import org.junit.Before; import org.junit.Test; import java.io.File; +import java.io.FileNotFoundException; import java.util.ArrayList; import java.util.List; @@ -60,7 +62,7 @@ public class BoundedReadIteratorTest extends BaseTest { /** the file list and the fasta sequence */ private List fl; - private FastaSequenceFile2 seq; + private ReferenceSequenceFile seq; /** * This function does the setup of our parser, before each method call. @@ -68,11 +70,11 @@ public class BoundedReadIteratorTest extends BaseTest { * Called before every test case method. */ @Before - public void doForEachTest() { + public void doForEachTest() throws FileNotFoundException { fl = new ArrayList(); // sequence - seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")); + seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")); GenomeLocParser.setupRefContigOrdering(seq.getSequenceDictionary()); } diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/TabularRODTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/TabularRODTest.java index a41bd40de..e1f866ed2 100755 --- a/java/test/org/broadinstitute/sting/gatk/refdata/TabularRODTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/TabularRODTest.java @@ -7,7 +7,7 @@ package org.broadinstitute.sting.gatk.refdata; import org.junit.*; import static org.junit.Assert.assertTrue; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2; +import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import org.broadinstitute.sting.utils.GenomeLocParser; import java.io.File; @@ -17,20 +17,22 @@ import java.io.FileNotFoundException; import java.util.Arrays; import java.util.ArrayList; +import net.sf.picard.reference.ReferenceSequenceFile; + /** * Basic unit test for TabularROD * */ public class TabularRODTest extends BaseTest { - private static FastaSequenceFile2 seq; + private static ReferenceSequenceFile seq; private ReferenceOrderedData ROD; private RODIterator iter; @BeforeClass - public static void init() { + public static void init() throws FileNotFoundException { // sequence - seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")); + seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")); GenomeLocParser.setupRefContigOrdering(seq); } diff --git a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java index af717c9f7..c96395bbc 100755 --- a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java +++ b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java @@ -12,7 +12,6 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.walkers.CountReadsWalker; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import static org.junit.Assert.fail; import org.junit.Before; @@ -26,6 +25,8 @@ import java.lang.reflect.Field; import java.util.ArrayList; import java.util.List; +import net.sf.picard.reference.ReferenceSequenceFile; + /** * * User: aaron @@ -54,7 +55,7 @@ import java.util.List; */ public class TraverseReadsTest extends BaseTest { - private FastaSequenceFile2 seq; + private ReferenceSequenceFile seq; private File bam = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/index_test.bam"); // TCGA-06-0188.aligned.duplicates_marked.bam"); private File refFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/Homo_sapiens_assembly17.fasta"); private List bamList; diff --git a/java/test/org/broadinstitute/sting/utils/GenomeLocTest.java b/java/test/org/broadinstitute/sting/utils/GenomeLocTest.java index dd15804bb..690e7f78b 100644 --- a/java/test/org/broadinstitute/sting/utils/GenomeLocTest.java +++ b/java/test/org/broadinstitute/sting/utils/GenomeLocTest.java @@ -8,20 +8,23 @@ import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2; +import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import java.io.File; +import java.io.FileNotFoundException; + +import net.sf.picard.reference.ReferenceSequenceFile; /** * Basic unit test for GenomeLoc */ public class GenomeLocTest extends BaseTest { - private static FastaSequenceFile2 seq; + private static ReferenceSequenceFile seq; @BeforeClass - public static void init() { + public static void init() throws FileNotFoundException { // sequence - seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")); + seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")); GenomeLocParser.setupRefContigOrdering(seq); } diff --git a/java/test/org/broadinstitute/sting/utils/RefHangerTest.java b/java/test/org/broadinstitute/sting/utils/RefHangerTest.java index 74d32e223..dd446494d 100755 --- a/java/test/org/broadinstitute/sting/utils/RefHangerTest.java +++ b/java/test/org/broadinstitute/sting/utils/RefHangerTest.java @@ -5,16 +5,19 @@ package org.broadinstitute.sting.utils; // the imports for unit testing. import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2; +import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import static org.junit.Assert.assertTrue; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import java.io.File; +import java.io.FileNotFoundException; import java.util.Arrays; import java.util.List; +import net.sf.picard.reference.ReferenceSequenceFile; + /** * Basic unit test for RefHanger * @@ -50,7 +53,7 @@ import java.util.List; */ public class RefHangerTest extends BaseTest { - private static FastaSequenceFile2 seq; + private static ReferenceSequenceFile seq; private GenomeLoc startLoc; private RefHanger emptyHanger; @@ -67,9 +70,9 @@ public class RefHangerTest extends BaseTest { private static GenomeLoc p1, p2, p3, p4, p5; @BeforeClass - public static void init() { + public static void init() throws FileNotFoundException { // sequence - seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")); + seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")); GenomeLocParser.setupRefContigOrdering(seq); System.out.printf("Filled hanger is %n%s%n", makeFilledHanger()); diff --git a/java/test/org/broadinstitute/sting/utils/fasta/FastaSequenceFile2Test.java b/java/test/org/broadinstitute/sting/utils/fasta/FastaSequenceFile2Test.java deleted file mode 100755 index 682ba6772..000000000 --- a/java/test/org/broadinstitute/sting/utils/fasta/FastaSequenceFile2Test.java +++ /dev/null @@ -1,197 +0,0 @@ -package org.broadinstitute.sting.utils.fasta; - -import net.sf.picard.reference.ReferenceSequence; -import net.sf.samtools.util.StringUtil; -import org.broadinstitute.sting.BaseTest; -import org.junit.*; - -import java.io.File; - -/** - * Created by IntelliJ IDEA. - * User: mhanna - * Date: Apr 11, 2009 - * Time: 2:32:52 PM - */ -public class FastaSequenceFile2Test extends BaseTest { - - private static String sequenceFileName; - private FastaSequenceFile2 sequenceFile = null; - - private final String firstBasesOfChrM = "GATCACAGGTCTATCACCCT"; - private final String firstBasesOfChr1 = "taaccctaaccctaacccta"; - private final String firstBasesOfChr8 = "GCAATTATGACACAAAAAAT"; - - @BeforeClass - public static void initialize() { - sequenceFileName = seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"; - } - - @Before - public void doForEachTest() { - sequenceFile = new FastaSequenceFile2( new File(sequenceFileName) ); - } - - /** - * Tears down the test fixture after each call. - *

- * Called after every test case method. - */ - @After - public void undoForEachTest() { - sequenceFile = null; - } - - - @Test - public void testOpenFile() { - logger.warn("Executing testOpenFile"); - - long startTime = System.currentTimeMillis(); - Assert.assertNotNull( sequenceFile ); - long endTime = System.currentTimeMillis(); - - System.err.printf("testOpenFile runtime: %dms%n", (endTime - startTime)) ; - } - - @Test - public void testFirstSequence() { - logger.warn("Executing testFirstSequence"); - - long startTime = System.currentTimeMillis(); - ReferenceSequence sequence = sequenceFile.nextSequence(); - Assert.assertEquals("First sequence contig is not correct", sequence.getName(), "chrM"); - Assert.assertEquals( "First n bases of chrM are incorrect", - StringUtil.bytesToString( sequence.getBases(), 0, firstBasesOfChrM.length() ), - firstBasesOfChrM ); - long endTime = System.currentTimeMillis(); - - System.err.printf("testFirstSequence runtime: %dms%n", (endTime - startTime)) ; - } - - @Test - public void testNextSequence() { - logger.warn("Executing testNextSequence"); - - long startTime = System.currentTimeMillis(); - - ReferenceSequence sequence = null; - - // Advance to chrM. - sequence = sequenceFile.nextSequence(); - sequence = sequenceFile.nextSequence(); - - Assert.assertEquals("First sequence contig is not correct", sequence.getName(), "chr1"); - - // Workaround: bytesToString for chr1 of the fasta file we've picked doesn't appear to work. - // TODO: Report this as sam-jdk bug. - byte[] firstOfChr1 = StringUtil.stringToBytes(firstBasesOfChr1); - byte[] firstOfSequence = new byte[firstBasesOfChr1.length()]; - System.arraycopy(sequence.getBases(), 0, firstOfSequence, 0, firstOfSequence.length ); - - Assert.assertArrayEquals("First bases of chr1 are not correct", firstOfChr1, firstOfSequence ); - - long endTime = System.currentTimeMillis(); - - System.err.printf("testNextSequence runtime: %dms%n", (endTime - startTime)) ; - } - - @Test - public void testSeekToSequence() { - logger.warn("Executing testSeekToSequence"); - - long startTime = System.currentTimeMillis(); - - boolean success = sequenceFile.seekToContig("chr8"); - Assert.assertTrue("Seek to seq chr8 failed", success ); - - ReferenceSequence sequence = sequenceFile.nextSequence(); - Assert.assertEquals("First sequence contig is not correct", sequence.getName(), "chr8"); - Assert.assertEquals( "First n bases of chrc are incorrect", - StringUtil.bytesToString( sequence.getBases(), 0, firstBasesOfChr8.length() ), - firstBasesOfChr8 ); - - long endTime = System.currentTimeMillis(); - - System.err.printf("testSeekToSequence runtime: %dms%n", (endTime - startTime)) ; - } - - // TODO: Is NullPointerException *really* the right exception when a sequence is missing? - @Test(expected=NullPointerException.class) - public void testSeekToMissingSequence() { - logger.warn("Executing testSeekToMissingSequence"); - - long startTime = 0L, endTime = 0L; - - try { - startTime = System.currentTimeMillis(); - boolean success = sequenceFile.seekToContig("absent"); - } - finally { - endTime = System.currentTimeMillis(); - System.err.printf("testSeekToMissingSequence runtime: %dms%n", (endTime - startTime)) ; - } - } - - @Test - public void testSeekBackward() { - logger.warn("Executing testSeekBackward"); - - long startTime = System.currentTimeMillis(); - - boolean success = sequenceFile.seekToContig("chr9"); - Assert.assertTrue("Unable to seek to contig 'chr9'", success); - - success = sequenceFile.seekToContig("chr8",true); - Assert.assertTrue("Unable to seek backward to contig 'chr8'", success); - - ReferenceSequence sequence = sequenceFile.nextSequence(); - Assert.assertEquals("First sequence contig is not correct", sequence.getName(), "chr8"); - Assert.assertEquals( "First n bases of chrc are incorrect", - StringUtil.bytesToString( sequence.getBases(), 0, firstBasesOfChr8.length() ), - firstBasesOfChr8 ); - - long endTime = System.currentTimeMillis(); - - System.err.printf("testSeekBackward runtime: %dms%n", (endTime - startTime)) ; - } - - @Test - public void testInvalidSeekBackward() { - logger.warn("Executing testInvalidSeekBackward"); - - long startTime = System.currentTimeMillis(); - - boolean success = sequenceFile.seekToContig("chr9"); - Assert.assertTrue("Unable to seek to contig 'chr9'", success); - - success = sequenceFile.seekToContig("chr8"); - Assert.assertFalse("Unable to seek backward to contig 'chr8'", success); - - long endTime = System.currentTimeMillis(); - - System.err.printf("testInvalidSeekBackward runtime: %dms%n", (endTime - startTime)) ; - } - - @Test - public void testSimultaneousAccess() { - logger.warn("Executing testSimultaneousAccess"); - - long startTime = System.currentTimeMillis(); - - // FastaSequenceFile2 other = (FastaSequenceFile2)sequenceFile.clone(); - - sequenceFile.seekToContig("chr1"); - ReferenceSequence chr1 = sequenceFile.nextSequence(); - -// other.seekToContig("chr8"); -// ReferenceSequence chr8 = other.nextSequence(); - -// System.err.printf( "sequenceFile contig: %s%n", sequenceFile.getContigName() ); -// System.err.printf( "other contig: %s%n", other.getContigName() ); - - long endTime = System.currentTimeMillis(); - - System.err.printf("testSimultaneousAccess runtime: %dms%n", (endTime - startTime)) ; - } -} diff --git a/java/test/org/broadinstitute/sting/utils/fasta/IndexedFastaSequenceFileTest.java b/java/test/org/broadinstitute/sting/utils/fasta/IndexedFastaSequenceFileTest.java index 84b8f14d3..8273a3b48 100755 --- a/java/test/org/broadinstitute/sting/utils/fasta/IndexedFastaSequenceFileTest.java +++ b/java/test/org/broadinstitute/sting/utils/fasta/IndexedFastaSequenceFileTest.java @@ -10,6 +10,8 @@ import java.io.File; import java.io.FileNotFoundException; import net.sf.picard.reference.ReferenceSequence; +import net.sf.picard.reference.ReferenceSequenceFile; +import net.sf.picard.reference.ReferenceSequenceFileFactory; import net.sf.picard.PicardException; import net.sf.samtools.util.StringUtil; @@ -122,7 +124,7 @@ public class IndexedFastaSequenceFileTest extends BaseTest { @Test public void testFirstCompleteContigRead() { - FastaSequenceFile2 originalSequenceFile = new FastaSequenceFile2(new File(sequenceFileName)); + ReferenceSequenceFile originalSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(new File(sequenceFileName)); ReferenceSequence expectedSequence = originalSequenceFile.nextSequence(); long startTime = System.currentTimeMillis(); @@ -164,9 +166,10 @@ public class IndexedFastaSequenceFileTest extends BaseTest { @Test public void testMiddleCompleteContigRead() { - FastaSequenceFile2 originalSequenceFile = new FastaSequenceFile2(new File(sequenceFileName)); - originalSequenceFile.seekToContig("chrY"); + ReferenceSequenceFile originalSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(new File(sequenceFileName)); ReferenceSequence expectedSequence = originalSequenceFile.nextSequence(); + while( !expectedSequence.getName().equals("chrY") ) + expectedSequence = originalSequenceFile.nextSequence(); long startTime = System.currentTimeMillis(); ReferenceSequence sequence = sequenceFile.getSequence("chrY"); @@ -183,9 +186,10 @@ public class IndexedFastaSequenceFileTest extends BaseTest { @Test public void testLastCompleteContigRead() { - FastaSequenceFile2 originalSequenceFile = new FastaSequenceFile2(new File(sequenceFileName)); - originalSequenceFile.seekToContig("chrX_random"); + ReferenceSequenceFile originalSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(new File(sequenceFileName)); ReferenceSequence expectedSequence = originalSequenceFile.nextSequence(); + while( !expectedSequence.getName().equals("chrX_random") ) + expectedSequence = originalSequenceFile.nextSequence(); long startTime = System.currentTimeMillis(); ReferenceSequence sequence = sequenceFile.getSequence("chrX_random"); @@ -233,7 +237,7 @@ public class IndexedFastaSequenceFileTest extends BaseTest { @Test public void testFirstElementOfIterator() { - FastaSequenceFile2 originalSequenceFile = new FastaSequenceFile2(new File(sequenceFileName)); + ReferenceSequenceFile originalSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(new File(sequenceFileName)); ReferenceSequence expectedSequence = originalSequenceFile.nextSequence(); long startTime = System.currentTimeMillis(); @@ -251,7 +255,7 @@ public class IndexedFastaSequenceFileTest extends BaseTest { @Test public void testNextElementOfIterator() { - FastaSequenceFile2 originalSequenceFile = new FastaSequenceFile2(new File(sequenceFileName)); + ReferenceSequenceFile originalSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(new File(sequenceFileName)); // Skip past the first one and load the second one. originalSequenceFile.nextSequence(); ReferenceSequence expectedSequence = originalSequenceFile.nextSequence();