Remove auto-creation of fai/dict files for fasta references
-A UserException is now thrown if either the fai or dict file for the reference does not exist, with pointers to instructions for creating these files. -Gets rid of problematic file locking that was causing intermittent errors on our farm. -Integration tests to verify that correct exceptions are thrown in the case of a missing fai / dict file. GSA-866 #resolve
This commit is contained in:
parent
e7a8e6e8ee
commit
2eac97a76c
|
|
@ -25,10 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.datasources.reference;
|
||||
|
||||
import net.sf.picard.reference.FastaSequenceIndex;
|
||||
import net.sf.picard.reference.FastaSequenceIndexBuilder;
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.picard.sam.CreateSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.LocusShard;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
|
||||
|
|
@ -36,11 +33,8 @@ import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.file.FSLockWithShared;
|
||||
import org.broadinstitute.sting.utils.file.FileSystemInabilityToLockException;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
|
|
@ -77,128 +71,25 @@ public class ReferenceDataSource {
|
|||
final String fastaExt = fastaFile.getAbsolutePath().endsWith("fa") ? ".fa" : ".fasta";
|
||||
final File dictFile = new File(fastaFile.getAbsolutePath().replace(fastaExt, ".dict"));
|
||||
|
||||
/*
|
||||
* if index file does not exist, create it manually
|
||||
*/
|
||||
// It's an error if either the fai or dict file does not exist. The user is now responsible
|
||||
// for creating these files.
|
||||
if (!indexFile.exists()) {
|
||||
|
||||
logger.info(String.format("Index file %s does not exist. Trying to create it now.", indexFile.getAbsolutePath()));
|
||||
FSLockWithShared indexLock = new FSLockWithShared(indexFile,true);
|
||||
try {
|
||||
// get exclusive lock
|
||||
if (!indexLock.exclusiveLock())
|
||||
throw new UserException.CouldNotCreateReferenceIndexFileBecauseOfLock(dictFile);
|
||||
FastaSequenceIndexBuilder faiBuilder = new FastaSequenceIndexBuilder(fastaFile, true);
|
||||
FastaSequenceIndex sequenceIndex = faiBuilder.createIndex();
|
||||
FastaSequenceIndexBuilder.saveAsFaiFile(sequenceIndex, indexFile);
|
||||
}
|
||||
catch(FileSystemInabilityToLockException ex) {
|
||||
logger.info("Unable to create write lock: " + ex.getMessage());
|
||||
logger.info("Skipping index creation.");
|
||||
}
|
||||
catch(UserException e) {
|
||||
// Rethrow all user exceptions as-is; there should be more details in the UserException itself.
|
||||
throw e;
|
||||
}
|
||||
catch (Exception e) {
|
||||
// If lock creation succeeded, the failure must have been generating the index.
|
||||
// If lock creation failed, just skip over index creation entirely.
|
||||
throw new UserException.CouldNotCreateReferenceIndexFile(indexFile, e);
|
||||
}
|
||||
finally {
|
||||
indexLock.unlock();
|
||||
}
|
||||
throw new UserException.MissingReferenceFaiFile(indexFile, fastaFile);
|
||||
}
|
||||
|
||||
/*
|
||||
* If dict file doesn't exist, try to create it using Picard's CreateSequenceDictionary
|
||||
* Currently, dictionary cannot be created without running CreateSequenceDictionary's main routine, hence the
|
||||
* argument string
|
||||
* This has been filed in trac as (PIC-370) Want programmatic interface to CreateSequenceDictionary
|
||||
*/
|
||||
if (!dictFile.exists()) {
|
||||
|
||||
logger.info(String.format("Dict file %s does not exist. Trying to create it now.", dictFile.getAbsolutePath()));
|
||||
|
||||
/*
|
||||
* Please note another hack here: we have to create a temporary file b/c CreateSequenceDictionary cannot
|
||||
* create a dictionary file if that file is locked.
|
||||
*/
|
||||
|
||||
// get read lock on dict file so nobody else can read it
|
||||
FSLockWithShared dictLock = new FSLockWithShared(dictFile,true);
|
||||
try {
|
||||
// get shared lock on dict file so nobody else can start creating it
|
||||
if (!dictLock.exclusiveLock())
|
||||
throw new UserException.CouldNotCreateReferenceIndexFileBecauseOfLock(dictFile);
|
||||
// dict will be written to random temporary file in same directory (see note above)
|
||||
File tempFile = File.createTempFile("dict", null, dictFile.getParentFile());
|
||||
tempFile.deleteOnExit();
|
||||
|
||||
// create dictionary by calling main routine. Temporary fix - see comment above.
|
||||
String args[] = {String.format("r=%s", fastaFile.getAbsolutePath()),
|
||||
String.format("o=%s", tempFile.getAbsolutePath())};
|
||||
new CreateSequenceDictionary().instanceMain(args);
|
||||
|
||||
if (!tempFile.renameTo(dictFile))
|
||||
throw new UserException("Error transferring temp file " + tempFile + " to dict file " + dictFile);
|
||||
}
|
||||
catch(FileSystemInabilityToLockException ex) {
|
||||
logger.info("Unable to create write lock: " + ex.getMessage());
|
||||
logger.info("Skipping dictionary creation.");
|
||||
}
|
||||
catch (Exception e) {
|
||||
// If lock creation succeeded, the failure must have been generating the index.
|
||||
// If lock creation failed, just skip over index creation entirely.
|
||||
throw new UserException.CouldNotCreateReferenceIndexFile(dictFile, e);
|
||||
}
|
||||
finally {
|
||||
dictLock.unlock();
|
||||
}
|
||||
throw new UserException.MissingReferenceDictFile(dictFile, fastaFile);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read reference data by creating an IndexedFastaSequenceFile.
|
||||
* A note about thread safety: IndexFastaSequenceFile reads the fasta using dictionary and index files. It will
|
||||
* fail if either does not exist, but not if either is currently being written (in which case it exists
|
||||
* but is incomplete). To avoid this, obtain shared locks on both files before creating IndexedFastaSequenceFile.
|
||||
*/
|
||||
|
||||
FSLockWithShared dictLock = new FSLockWithShared(dictFile,true);
|
||||
FSLockWithShared indexLock = new FSLockWithShared(indexFile,true);
|
||||
// Read reference data by creating an IndexedFastaSequenceFile.
|
||||
try {
|
||||
try {
|
||||
if (!dictLock.sharedLock()) {
|
||||
throw new ReviewedStingException("Could not open dictionary file because a lock could not be obtained.");
|
||||
}
|
||||
}
|
||||
catch(FileSystemInabilityToLockException ex) {
|
||||
logger.info(String.format("Unable to create a lock on dictionary file: %s",ex.getMessage()));
|
||||
logger.info("Treating existing dictionary file as complete.");
|
||||
}
|
||||
|
||||
try {
|
||||
if (!indexLock.sharedLock()) {
|
||||
throw new ReviewedStingException("Could not open index file because a lock could not be obtained.");
|
||||
}
|
||||
}
|
||||
catch(FileSystemInabilityToLockException ex) {
|
||||
logger.info(String.format("Unable to create a lock on index file: %s",ex.getMessage()));
|
||||
logger.info("Treating existing index file as complete.");
|
||||
}
|
||||
|
||||
reference = new CachingIndexedFastaSequenceFile(fastaFile);
|
||||
|
||||
} catch (IllegalArgumentException e) {
|
||||
}
|
||||
catch (IllegalArgumentException e) {
|
||||
throw new UserException.CouldNotReadInputFile(fastaFile, "Could not read reference sequence. The FASTA must have either a .fasta or .fa extension", e);
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new UserException.CouldNotReadInputFile(fastaFile, e);
|
||||
}
|
||||
finally {
|
||||
dictLock.unlock();
|
||||
indexLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -392,29 +392,25 @@ public class UserException extends ReviewedStingException {
|
|||
}
|
||||
}
|
||||
|
||||
public static class CouldNotCreateReferenceIndexFile extends UserException {
|
||||
public CouldNotCreateReferenceIndexFile(File f, Exception e) {
|
||||
this(f, "", e);
|
||||
}
|
||||
|
||||
public CouldNotCreateReferenceIndexFile(File f, String message, Exception e) {
|
||||
super(String.format("Index file %s does not exist but could not be created because: %s. ", f, message)
|
||||
+ (e == null ? "" : getMessage(e)));
|
||||
}
|
||||
}
|
||||
|
||||
public static class CannotHandleGzippedRef extends UserException {
|
||||
public CannotHandleGzippedRef() {
|
||||
super("The GATK cannot process compressed (.gz) reference sequences. Please unzip the file and try again. Sorry for the inconvenience.");
|
||||
}
|
||||
public CannotHandleGzippedRef() {
|
||||
super("The GATK cannot process compressed (.gz) reference sequences. Please unzip the file and try again. Sorry for the inconvenience.");
|
||||
}
|
||||
}
|
||||
|
||||
public static class CouldNotCreateReferenceIndexFileBecauseOfLock extends UserException.CouldNotCreateReferenceIndexFile {
|
||||
public CouldNotCreateReferenceIndexFileBecauseOfLock(File f) {
|
||||
super(f, "could not be written because an exclusive file lock could not be obtained. " +
|
||||
"If you are running multiple instances of GATK, another GATK process is " +
|
||||
"probably creating this file now, and has locked it. Please wait until this process finishes " +
|
||||
"and try again.", null);
|
||||
public static class MissingReferenceFaiFile extends UserException {
|
||||
public MissingReferenceFaiFile( final File indexFile, final File fastaFile ) {
|
||||
super(String.format("Fasta index file %s for reference %s does not exist. Please see %s for help creating it.",
|
||||
indexFile.getAbsolutePath(), fastaFile.getAbsolutePath(),
|
||||
HelpConstants.forumPost("discussion/1601/how-can-i-prepare-a-fasta-file-to-use-as-reference")));
|
||||
}
|
||||
}
|
||||
|
||||
public static class MissingReferenceDictFile extends UserException {
|
||||
public MissingReferenceDictFile( final File dictFile, final File fastaFile ) {
|
||||
super(String.format("Fasta dict file %s for reference %s does not exist. Please see %s for help creating it.",
|
||||
dictFile.getAbsolutePath(), fastaFile.getAbsolutePath(),
|
||||
HelpConstants.forumPost("discussion/1601/how-can-i-prepare-a-fasta-file-to-use-as-reference")));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,75 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.reference;
|
||||
|
||||
import junit.framework.Assert;
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
||||
public class ReferenceDataSourceIntegrationTest extends WalkerTest {
|
||||
|
||||
@Test
|
||||
public void testReferenceWithMissingFaiFile() throws IOException {
|
||||
final File dummyReference = createTempFile("dummy", ".fasta");
|
||||
final File dictFile = new File(dummyReference.getAbsolutePath().replace(".fasta", ".dict"));
|
||||
dictFile.deleteOnExit();
|
||||
Assert.assertTrue(dictFile.createNewFile());
|
||||
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(
|
||||
" -T PrintReads" +
|
||||
" -R " + dummyReference.getAbsolutePath() +
|
||||
" -I " + privateTestDir + "NA12878.4.snippet.bam" +
|
||||
" -o %s",
|
||||
1,
|
||||
UserException.MissingReferenceFaiFile.class
|
||||
);
|
||||
|
||||
executeTest("testReferenceWithMissingFaiFile", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReferenceWithMissingDictFile() throws IOException {
|
||||
final File dummyReference = createTempFile("dummy", ".fasta");
|
||||
final File faiFile = new File(dummyReference.getAbsolutePath() + ".fai");
|
||||
faiFile.deleteOnExit();
|
||||
Assert.assertTrue(faiFile.createNewFile());
|
||||
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(
|
||||
" -T PrintReads" +
|
||||
" -R " + dummyReference.getAbsolutePath() +
|
||||
" -I " + privateTestDir + "NA12878.4.snippet.bam" +
|
||||
" -o %s",
|
||||
1,
|
||||
UserException.MissingReferenceDictFile.class
|
||||
);
|
||||
|
||||
executeTest("testReferenceWithMissingDictFile", spec);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue