From 2eac97a76c74d1aa7e8a17ea590082ba23621377 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Tue, 2 Apr 2013 17:04:44 -0400 Subject: [PATCH] Remove auto-creation of fai/dict files for fasta references -A UserException is now thrown if either the fai or dict file for the reference does not exist, with pointers to instructions for creating these files. -Gets rid of problematic file locking that was causing intermittent errors on our farm. -Integration tests to verify that correct exceptions are thrown in the case of a missing fai / dict file. GSA-866 #resolve --- .../reference/ReferenceDataSource.java | 123 +----------------- .../sting/utils/exceptions/UserException.java | 36 +++-- .../ReferenceDataSourceIntegrationTest.java | 75 +++++++++++ 3 files changed, 98 insertions(+), 136 deletions(-) create mode 100644 public/java/test/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSourceIntegrationTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java index 79100e89a..01edd44ba 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java @@ -25,10 +25,7 @@ package org.broadinstitute.sting.gatk.datasources.reference; -import net.sf.picard.reference.FastaSequenceIndex; -import net.sf.picard.reference.FastaSequenceIndexBuilder; import net.sf.picard.reference.IndexedFastaSequenceFile; -import net.sf.picard.sam.CreateSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; import org.broadinstitute.sting.gatk.datasources.reads.LocusShard; import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; @@ -36,11 +33,8 @@ import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocSortedSet; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.file.FSLockWithShared; -import org.broadinstitute.sting.utils.file.FileSystemInabilityToLockException; import java.io.File; import java.util.ArrayList; @@ -77,128 +71,25 @@ public class ReferenceDataSource { final String fastaExt = fastaFile.getAbsolutePath().endsWith("fa") ? ".fa" : ".fasta"; final File dictFile = new File(fastaFile.getAbsolutePath().replace(fastaExt, ".dict")); - /* - * if index file does not exist, create it manually - */ + // It's an error if either the fai or dict file does not exist. The user is now responsible + // for creating these files. if (!indexFile.exists()) { - - logger.info(String.format("Index file %s does not exist. Trying to create it now.", indexFile.getAbsolutePath())); - FSLockWithShared indexLock = new FSLockWithShared(indexFile,true); - try { - // get exclusive lock - if (!indexLock.exclusiveLock()) - throw new UserException.CouldNotCreateReferenceIndexFileBecauseOfLock(dictFile); - FastaSequenceIndexBuilder faiBuilder = new FastaSequenceIndexBuilder(fastaFile, true); - FastaSequenceIndex sequenceIndex = faiBuilder.createIndex(); - FastaSequenceIndexBuilder.saveAsFaiFile(sequenceIndex, indexFile); - } - catch(FileSystemInabilityToLockException ex) { - logger.info("Unable to create write lock: " + ex.getMessage()); - logger.info("Skipping index creation."); - } - catch(UserException e) { - // Rethrow all user exceptions as-is; there should be more details in the UserException itself. - throw e; - } - catch (Exception e) { - // If lock creation succeeded, the failure must have been generating the index. - // If lock creation failed, just skip over index creation entirely. - throw new UserException.CouldNotCreateReferenceIndexFile(indexFile, e); - } - finally { - indexLock.unlock(); - } + throw new UserException.MissingReferenceFaiFile(indexFile, fastaFile); } - - /* - * If dict file doesn't exist, try to create it using Picard's CreateSequenceDictionary - * Currently, dictionary cannot be created without running CreateSequenceDictionary's main routine, hence the - * argument string - * This has been filed in trac as (PIC-370) Want programmatic interface to CreateSequenceDictionary - */ if (!dictFile.exists()) { - - logger.info(String.format("Dict file %s does not exist. Trying to create it now.", dictFile.getAbsolutePath())); - - /* - * Please note another hack here: we have to create a temporary file b/c CreateSequenceDictionary cannot - * create a dictionary file if that file is locked. - */ - - // get read lock on dict file so nobody else can read it - FSLockWithShared dictLock = new FSLockWithShared(dictFile,true); - try { - // get shared lock on dict file so nobody else can start creating it - if (!dictLock.exclusiveLock()) - throw new UserException.CouldNotCreateReferenceIndexFileBecauseOfLock(dictFile); - // dict will be written to random temporary file in same directory (see note above) - File tempFile = File.createTempFile("dict", null, dictFile.getParentFile()); - tempFile.deleteOnExit(); - - // create dictionary by calling main routine. Temporary fix - see comment above. - String args[] = {String.format("r=%s", fastaFile.getAbsolutePath()), - String.format("o=%s", tempFile.getAbsolutePath())}; - new CreateSequenceDictionary().instanceMain(args); - - if (!tempFile.renameTo(dictFile)) - throw new UserException("Error transferring temp file " + tempFile + " to dict file " + dictFile); - } - catch(FileSystemInabilityToLockException ex) { - logger.info("Unable to create write lock: " + ex.getMessage()); - logger.info("Skipping dictionary creation."); - } - catch (Exception e) { - // If lock creation succeeded, the failure must have been generating the index. - // If lock creation failed, just skip over index creation entirely. - throw new UserException.CouldNotCreateReferenceIndexFile(dictFile, e); - } - finally { - dictLock.unlock(); - } + throw new UserException.MissingReferenceDictFile(dictFile, fastaFile); } - /* - * Read reference data by creating an IndexedFastaSequenceFile. - * A note about thread safety: IndexFastaSequenceFile reads the fasta using dictionary and index files. It will - * fail if either does not exist, but not if either is currently being written (in which case it exists - * but is incomplete). To avoid this, obtain shared locks on both files before creating IndexedFastaSequenceFile. - */ - - FSLockWithShared dictLock = new FSLockWithShared(dictFile,true); - FSLockWithShared indexLock = new FSLockWithShared(indexFile,true); + // Read reference data by creating an IndexedFastaSequenceFile. try { - try { - if (!dictLock.sharedLock()) { - throw new ReviewedStingException("Could not open dictionary file because a lock could not be obtained."); - } - } - catch(FileSystemInabilityToLockException ex) { - logger.info(String.format("Unable to create a lock on dictionary file: %s",ex.getMessage())); - logger.info("Treating existing dictionary file as complete."); - } - - try { - if (!indexLock.sharedLock()) { - throw new ReviewedStingException("Could not open index file because a lock could not be obtained."); - } - } - catch(FileSystemInabilityToLockException ex) { - logger.info(String.format("Unable to create a lock on index file: %s",ex.getMessage())); - logger.info("Treating existing index file as complete."); - } - reference = new CachingIndexedFastaSequenceFile(fastaFile); - - } catch (IllegalArgumentException e) { + } + catch (IllegalArgumentException e) { throw new UserException.CouldNotReadInputFile(fastaFile, "Could not read reference sequence. The FASTA must have either a .fasta or .fa extension", e); } catch (Exception e) { throw new UserException.CouldNotReadInputFile(fastaFile, e); } - finally { - dictLock.unlock(); - indexLock.unlock(); - } } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java index fcc132ffe..83400cc73 100644 --- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java +++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java @@ -392,29 +392,25 @@ public class UserException extends ReviewedStingException { } } - public static class CouldNotCreateReferenceIndexFile extends UserException { - public CouldNotCreateReferenceIndexFile(File f, Exception e) { - this(f, "", e); - } - - public CouldNotCreateReferenceIndexFile(File f, String message, Exception e) { - super(String.format("Index file %s does not exist but could not be created because: %s. ", f, message) - + (e == null ? "" : getMessage(e))); - } - } - public static class CannotHandleGzippedRef extends UserException { - public CannotHandleGzippedRef() { - super("The GATK cannot process compressed (.gz) reference sequences. Please unzip the file and try again. Sorry for the inconvenience."); - } + public CannotHandleGzippedRef() { + super("The GATK cannot process compressed (.gz) reference sequences. Please unzip the file and try again. Sorry for the inconvenience."); + } } - public static class CouldNotCreateReferenceIndexFileBecauseOfLock extends UserException.CouldNotCreateReferenceIndexFile { - public CouldNotCreateReferenceIndexFileBecauseOfLock(File f) { - super(f, "could not be written because an exclusive file lock could not be obtained. " + - "If you are running multiple instances of GATK, another GATK process is " + - "probably creating this file now, and has locked it. Please wait until this process finishes " + - "and try again.", null); + public static class MissingReferenceFaiFile extends UserException { + public MissingReferenceFaiFile( final File indexFile, final File fastaFile ) { + super(String.format("Fasta index file %s for reference %s does not exist. Please see %s for help creating it.", + indexFile.getAbsolutePath(), fastaFile.getAbsolutePath(), + HelpConstants.forumPost("discussion/1601/how-can-i-prepare-a-fasta-file-to-use-as-reference"))); + } + } + + public static class MissingReferenceDictFile extends UserException { + public MissingReferenceDictFile( final File dictFile, final File fastaFile ) { + super(String.format("Fasta dict file %s for reference %s does not exist. Please see %s for help creating it.", + dictFile.getAbsolutePath(), fastaFile.getAbsolutePath(), + HelpConstants.forumPost("discussion/1601/how-can-i-prepare-a-fasta-file-to-use-as-reference"))); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSourceIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSourceIntegrationTest.java new file mode 100644 index 000000000..00d0dd051 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSourceIntegrationTest.java @@ -0,0 +1,75 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.gatk.datasources.reference; + +import junit.framework.Assert; +import org.broadinstitute.sting.WalkerTest; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.IOException; + +public class ReferenceDataSourceIntegrationTest extends WalkerTest { + + @Test + public void testReferenceWithMissingFaiFile() throws IOException { + final File dummyReference = createTempFile("dummy", ".fasta"); + final File dictFile = new File(dummyReference.getAbsolutePath().replace(".fasta", ".dict")); + dictFile.deleteOnExit(); + Assert.assertTrue(dictFile.createNewFile()); + + final WalkerTestSpec spec = new WalkerTestSpec( + " -T PrintReads" + + " -R " + dummyReference.getAbsolutePath() + + " -I " + privateTestDir + "NA12878.4.snippet.bam" + + " -o %s", + 1, + UserException.MissingReferenceFaiFile.class + ); + + executeTest("testReferenceWithMissingFaiFile", spec); + } + + @Test + public void testReferenceWithMissingDictFile() throws IOException { + final File dummyReference = createTempFile("dummy", ".fasta"); + final File faiFile = new File(dummyReference.getAbsolutePath() + ".fai"); + faiFile.deleteOnExit(); + Assert.assertTrue(faiFile.createNewFile()); + + final WalkerTestSpec spec = new WalkerTestSpec( + " -T PrintReads" + + " -R " + dummyReference.getAbsolutePath() + + " -I " + privateTestDir + "NA12878.4.snippet.bam" + + " -o %s", + 1, + UserException.MissingReferenceDictFile.class + ); + + executeTest("testReferenceWithMissingDictFile", spec); + } +}