diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceDataSource.java index 5764ea8f3..bcc79e024 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceDataSource.java @@ -25,15 +25,21 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources; +import net.sf.samtools.SAMSequenceDictionary; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.fasta.FastaSequenceIndex; import org.broadinstitute.sting.utils.fasta.FastaSequenceIndexBuilder; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import net.sf.picard.sam.CreateSequenceDictionary; +import org.broadinstitute.sting.utils.file.FSLock; import java.io.File; import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.channels.FileChannel; +import java.nio.channels.FileLock; + /** * Loads reference data from fasta file * Looks for fai and dict files, and tries to create them if they don't exist @@ -47,34 +53,41 @@ public class ReferenceDataSource implements ReferenceDataSourceProgressListener /** * Create reference data source from fasta file * @param fastaFile Fasta file to be used as reference - * - * Please note: This should be re-done when PIC-370 is fixed. (See below.) At that time, we may consider loading - * sequenceIndex and sequenceDict here, instead of in IndexedFastaSequenceFile. */ public ReferenceDataSource(File fastaFile) { File indexFile = new File(fastaFile.getAbsolutePath() + ".fai"); File dictFile = new File(fastaFile.getAbsolutePath().replace(".fasta", ".dict")); - FastaSequenceIndex sequenceIndex; // stores FastaSequenceIndex if file doesn't exist /* - Note: this code is temporary. See commented code below, which will be used when thread safety is resolved. - */ + if index file does not exist, create it manually + */ if (!indexFile.exists()) { - FastaSequenceIndexBuilder faiBuilder = new FastaSequenceIndexBuilder(fastaFile, this); - sequenceIndex = faiBuilder.sequenceIndex; + logger.info(String.format("Index file %s does not exist. Trying to create it now.", indexFile.getAbsolutePath())); + FileChannel indexChannel; + FileLock indexLock; try { - index = new IndexedFastaSequenceFile(fastaFile, sequenceIndex); + // get exclusive lock + indexChannel = new RandomAccessFile(indexFile, "rw").getChannel(); + if ((indexLock = indexChannel.tryLock(0, Long.MAX_VALUE, false)) == null) + throw new StingException("Index file could not be written because a lock could not be obtained." + + "If you are running multiple instances of GATK, another process is probably creating this " + + "file now. Please wait until it is finished and try again."); + FastaSequenceIndexBuilder faiBuilder = new FastaSequenceIndexBuilder(fastaFile, this); + FastaSequenceIndex sequenceIndex = faiBuilder.createIndex(); + FastaSequenceIndexBuilder.saveAsFaiFile(sequenceIndex, indexFile); + // unlock + try { + if (indexLock != null) + indexLock.release(); + if (indexChannel != null) + indexChannel.close(); + } + catch (Exception e) { + throw new StingException("An error occurred while unlocking file:" + indexFile.getAbsolutePath(), e); + } } catch (Exception e) { - throw new StingException("Could not load fasta file. Stack trace below. ", e); - } - } - else { - try { - index = new IndexedFastaSequenceFile(fastaFile); - } - catch (Exception e) { - throw new StingException("Could not load fasta file. Stack trace below.", e); + throw new StingException("Index file does not exist and could not be created. See error below.", e); } } @@ -83,57 +96,88 @@ public class ReferenceDataSource implements ReferenceDataSourceProgressListener * Currently, dictionary cannot be created without running CreateSequenceDictionary's main routine, hence the * argument string * This has been filed in trac as (PIC-370) Want programmatic interface to CreateSequenceDictionary - * - * Todo: Currently working on making the creation of dictionary and index thread-safe - * */ - /*if (!dictFile.exists()) { - logger.info(String.format("Fasta dictionary file %s does not exist. Trying to create it now.", - dictFile.getAbsolutePath())); - String args[] = {String.format("r=%s", fastaFile.getAbsolutePath()), - String.format("o=%s", dictFile.getAbsolutePath())}; - new CreateSequenceDictionary().instanceMain(args); - logger.info(String.format("Dictionary file created successfully!")); - }*/ - - /** - * Create fai file if it doesn't exist and throw exception if unsuccessful - * Note that this implies walkers cannot be run if a fai file is not provided and GATK cannot write to disk - * - * Todo: currently working on making this thread safe. - * Rather than creating a new fai file, structure is created in memory. This whole block will be fixed - * in a couple days when we figure out the thread stuff. - * - */ - /*if (!indexFile.exists()) { - logger.info(String.format("Fasta index file %s does not exist. Trying to create it now.", - indexFile.getAbsolutePath())); - FastaSequenceIndexBuilder faiBuilder = new FastaSequenceIndexBuilder(fastaFile, this); - + if (!dictFile.exists()) { + logger.info(String.format("Index file %s does not exist. Trying to create it now.", indexFile.getAbsolutePath())); + FileChannel dictChannel; + FileLock dictLock; try { - faiBuilder.saveAsFaiFile(); - logger.info(String.format("Index file created successfully!")); + // get exclusive lock + dictChannel = new RandomAccessFile(indexFile, "rw").getChannel(); + if ((dictLock = dictChannel.tryLock(0, Long.MAX_VALUE, false)) == null) + + throw new StingException("Dictionary file could not be written because a lock could not be obtained." + + "If you are running multiple instances of GATK, another process is probably creating this " + + "file now. Please wait until it is finished and try again."); + + // create dictionary by calling main routine. Temporary fix - see comment above. + String args[] = {String.format("r=%s", fastaFile.getAbsolutePath()), + String.format("o=%s", dictFile.getAbsolutePath())}; + new CreateSequenceDictionary().instanceMain(args); + // unlock + try { + if (dictLock != null) + dictLock.release(); + if (dictChannel != null) + dictChannel.close(); + } + catch (Exception e) { + throw new StingException("An error occurred while unlocking file:" + indexFile.getAbsolutePath(), e); + } } catch (Exception e) { - throw new StingException("Index file could not be saved", e); + throw new StingException("Dictionary file does not exist and could not be created. See error below.", e); } } - // now create IndexedFastaSequenceFile + /* + * Read reference data by creating an IndexedFastaSequenceFile. + * A note about thread safety: IndexFastaSequenceFile reads the fasta using dictionary and index files. It will + * fail if either does not exist, but not if either is currently being written (in which case it exists + * but is incomplete). To avoid this, obtain shared locks on both files before creating IndexedFastaSequenceFile. + */ + + FileChannel dictChannel; + FileChannel indexChannel; + FileLock dictLock; + FileLock indexLock; try { - index = indexFile.exists() ? new IndexedFastaSequenceFile(fastaFile) : - new IndexedFastaSequenceFile(fastaFile, sequenceIndex); - } - catch (IOException e) { - throw new StingException("An error occurred while loading the fasta file and its .fasta.fai and " + - ".dict counterparts.", e); + // set up dictionary and index locks + // channel is read only and lock is shared (third argument is true) + dictChannel = new RandomAccessFile(dictFile, "r").getChannel(); + if ((dictLock = dictChannel.tryLock(0, Long.MAX_VALUE, true)) == null) { + throw new StingException("Could not open dictionary file because a lock could not be obtained."); + } + indexChannel = new RandomAccessFile(indexFile, "r").getChannel(); + if ((indexLock = indexChannel.tryLock(0, Long.MAX_VALUE, true)) == null) { + throw new StingException("Could not open dictionary file because a lock could not be obtained."); + } + + index = new IndexedFastaSequenceFile(fastaFile); + + // unlock/close + try { + if (dictLock != null) + dictLock.release(); + if (dictChannel != null) + dictChannel.close(); + } + catch (Exception e) { + throw new StingException("An error occurred while unlocking file:" + dictFile.getAbsolutePath(), e); + } + try { + if (indexLock != null) + indexLock.release(); + if (indexChannel != null) + indexChannel.close(); + } + catch (Exception e) { + throw new StingException("An error occurred while unlocking file:" + indexFile.getAbsolutePath(), e); + } } catch (Exception e) { - throw new StingException("An error occurred while processing the fasta file and its .fasta.fai and " + - ".dict counterparts. If the error could have been caused by the .fasta.fai or .dict files, " + - "you can re-create them by removing them from the folder that the fasta file is in and " + - "running GATK again.", e); - }*/ + throw new StingException(String.format("Error reading fasta file %s. See stack trace below.", fastaFile.getAbsolutePath()), e); + } } /** diff --git a/java/src/org/broadinstitute/sting/utils/fasta/FastaSequenceIndexBuilder.java b/java/src/org/broadinstitute/sting/utils/fasta/FastaSequenceIndexBuilder.java index 7a96ccafa..9a5cd2e75 100644 --- a/java/src/org/broadinstitute/sting/utils/fasta/FastaSequenceIndexBuilder.java +++ b/java/src/org/broadinstitute/sting/utils/fasta/FastaSequenceIndexBuilder.java @@ -39,7 +39,6 @@ import java.util.Iterator; public class FastaSequenceIndexBuilder { public File fastaFile; ReferenceDataSourceProgressListener progress; // interface that provides a method for updating user on progress of reading file - public FastaSequenceIndex sequenceIndex = new FastaSequenceIndex(); // keep track of location in file long bytesRead, endOfLastLine, lastTimestamp, fileLength; // initialized to -1 to keep 0-indexed position in file; @@ -57,13 +56,13 @@ public class FastaSequenceIndexBuilder { this.progress = progress; this.fastaFile = fastaFile; fileLength = fastaFile.length(); - parseFastaFile(); } /** * Creates fasta sequence index from fasta file + * @return FastaSequenceIndex that is read from file */ - private void parseFastaFile() { + public FastaSequenceIndex createIndex() { // should this be static? bytesRead = -1; endOfLastLine = -1; contig = ""; @@ -73,6 +72,7 @@ public class FastaSequenceIndexBuilder { basesPerLine = 0; basesThisLine = 0; lastTimestamp = System.currentTimeMillis(); + FastaSequenceIndex sequenceIndex = new FastaSequenceIndex(); // initialize input stream DataInputStream in; @@ -156,7 +156,7 @@ public class FastaSequenceIndexBuilder { // if next char is ';' or '>', then there is only one contig => if (nextByte == ';' || nextByte == '>') - finishReadingContig(); + finishReadingContig(sequenceIndex); } } @@ -183,7 +183,7 @@ public class FastaSequenceIndexBuilder { // if comment or new contig, definitely end of sequence if (nextByte == ';' || nextByte == '>') - finishReadingContig(); + finishReadingContig(sequenceIndex); // if this line has different # of bases OR same # of bases and different # of bytes: // error if next char is a valid base, end of contig otherwise @@ -192,7 +192,7 @@ public class FastaSequenceIndexBuilder { throw new StingException(String.format("An invalid line was found in the contig: %s", contig)); } else - finishReadingContig(); + finishReadingContig(sequenceIndex); } endOfLastLine = bytesRead; } @@ -206,6 +206,7 @@ public class FastaSequenceIndexBuilder { break; } } + return sequenceIndex; } catch (IOException e) { throw new StingException(String.format("Could not read fasta file %s", fastaFile.getAbsolutePath()), e); } @@ -239,7 +240,7 @@ public class FastaSequenceIndexBuilder { * When reader reaches the end of a contig * Reset iterators and add contig to sequence index */ - private void finishReadingContig() { + private void finishReadingContig(FastaSequenceIndex sequenceIndex) { sequenceIndex.addIndexEntry(contig, location, size, (int) basesPerLine, (int) bytesPerLine); status = Status.NONE; contig = ""; @@ -256,22 +257,18 @@ public class FastaSequenceIndexBuilder { /** * Stores FastaSequenceIndex as a .fasta.fai file on local machine * Although method is public it cannot be called on any old FastaSequenceIndex - must be created by a FastaSequenceIndexBuilder + * @param sequenceIndex sequenceIndex to be saved + * @param faiFile file where we should store index */ - public void saveAsFaiFile() { - File indexFile = new File(fastaFile.getAbsolutePath() + ".fai"); - - if (indexFile.exists()) { - throw new StingException(String.format("Fai file could not be created, because a file with name %s already exists." + - "Please delete or rename this file and try again.", indexFile.getAbsolutePath())); - } + public static void saveAsFaiFile(FastaSequenceIndex sequenceIndex, File faiFile) { BufferedWriter out; try { - out = new BufferedWriter(new FileWriter(indexFile)); + out = new BufferedWriter(new FileWriter(faiFile)); } catch (Exception e) { throw new StingException(String.format("Could not open file %s for writing. Check that GATK is permitted to write to disk.", - indexFile.getAbsolutePath()), e); + faiFile.getAbsolutePath()), e); } Iterator iter = sequenceIndex.iterator(); diff --git a/java/test/org/broadinstitute/sting/utils/fasta/FastaSequenceIndexBuilderUnitTest.java b/java/test/org/broadinstitute/sting/utils/fasta/FastaSequenceIndexBuilderUnitTest.java index 107fb6d4e..af28a4e88 100644 --- a/java/test/org/broadinstitute/sting/utils/fasta/FastaSequenceIndexBuilderUnitTest.java +++ b/java/test/org/broadinstitute/sting/utils/fasta/FastaSequenceIndexBuilderUnitTest.java @@ -43,11 +43,11 @@ public class FastaSequenceIndexBuilderUnitTest extends BaseTest { private FastaSequenceIndexBuilder builder; private ReferenceDataSourceProgressListener progress; private File fastaFile; - private FastaSequenceIndex sequenceIndex; + private FastaSequenceIndex controlIndex; @Before public void doForEachTest() throws FileNotFoundException { - sequenceIndex = new FastaSequenceIndex(); + controlIndex = new FastaSequenceIndex(); } /** @@ -60,9 +60,10 @@ public class FastaSequenceIndexBuilderUnitTest extends BaseTest { fastaFile = new File(validationDataLocation + "exampleFASTA.fasta"); builder = new FastaSequenceIndexBuilder(fastaFile, progress); - sequenceIndex.addIndexEntry("chr1", 6, 100000, 60, 61); + FastaSequenceIndex index = builder.createIndex(); + controlIndex.addIndexEntry("chr1", 6, 100000, 60, 61); - Assert.assertTrue(sequenceIndex.equals(builder.sequenceIndex)); + Assert.assertTrue(index.equals(controlIndex)); } @@ -76,9 +77,10 @@ public class FastaSequenceIndexBuilderUnitTest extends BaseTest { fastaFile = new File(validationDataLocation + "exampleFASTA-windows.fasta"); builder = new FastaSequenceIndexBuilder(fastaFile, progress); - sequenceIndex.addIndexEntry("chr2", 7, 29, 7, 9); + FastaSequenceIndex index = builder.createIndex(); + controlIndex.addIndexEntry("chr2", 7, 29, 7, 9); - Assert.assertTrue(sequenceIndex.equals(builder.sequenceIndex)); + Assert.assertTrue(index.equals(controlIndex)); } /** @@ -91,10 +93,11 @@ public class FastaSequenceIndexBuilderUnitTest extends BaseTest { fastaFile = new File(validationDataLocation + "exampleFASTA-combined.fasta"); builder = new FastaSequenceIndexBuilder(fastaFile, progress); - sequenceIndex.addIndexEntry("chr1", 6, 100000, 60, 61); - sequenceIndex.addIndexEntry("chr2", 101680, 29, 7, 9); + FastaSequenceIndex index = builder.createIndex(); + controlIndex.addIndexEntry("chr1", 6, 100000, 60, 61); + controlIndex.addIndexEntry("chr2", 101680, 29, 7, 9); - Assert.assertTrue(sequenceIndex.equals(builder.sequenceIndex)); + Assert.assertTrue(index.equals(controlIndex)); } /** @@ -107,10 +110,11 @@ public class FastaSequenceIndexBuilderUnitTest extends BaseTest { fastaFile = new File(validationDataLocation + "exampleFASTA-3contigs.fasta"); builder = new FastaSequenceIndexBuilder(fastaFile, progress); - sequenceIndex.addIndexEntry("chr1", 6, 17, 5, 6); - sequenceIndex.addIndexEntry("chr2", 35, 21, 7, 8); - sequenceIndex.addIndexEntry("chr3", 66, 100, 10, 11); + FastaSequenceIndex index = builder.createIndex(); + controlIndex.addIndexEntry("chr1", 6, 17, 5, 6); + controlIndex.addIndexEntry("chr2", 35, 21, 7, 8); + controlIndex.addIndexEntry("chr3", 66, 100, 10, 11); - Assert.assertTrue(sequenceIndex.equals(builder.sequenceIndex)); + Assert.assertTrue(index.equals(controlIndex)); } } \ No newline at end of file