Merge pull request #143 from broadinstitute/dr_disable_auto_fai_dict_creation_GSA-866
Remove auto-creation of fai/dict files for fasta references
This commit is contained in:
commit
95be16dca7
|
|
@ -25,10 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.datasources.reference;
|
package org.broadinstitute.sting.gatk.datasources.reference;
|
||||||
|
|
||||||
import net.sf.picard.reference.FastaSequenceIndex;
|
|
||||||
import net.sf.picard.reference.FastaSequenceIndexBuilder;
|
|
||||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
import net.sf.picard.sam.CreateSequenceDictionary;
|
|
||||||
import net.sf.samtools.SAMSequenceRecord;
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
import org.broadinstitute.sting.gatk.datasources.reads.LocusShard;
|
import org.broadinstitute.sting.gatk.datasources.reads.LocusShard;
|
||||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
|
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
|
||||||
|
|
@ -36,11 +33,8 @@ import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||||
import org.broadinstitute.sting.utils.file.FSLockWithShared;
|
|
||||||
import org.broadinstitute.sting.utils.file.FileSystemInabilityToLockException;
|
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
@ -77,128 +71,25 @@ public class ReferenceDataSource {
|
||||||
final String fastaExt = fastaFile.getAbsolutePath().endsWith("fa") ? ".fa" : ".fasta";
|
final String fastaExt = fastaFile.getAbsolutePath().endsWith("fa") ? ".fa" : ".fasta";
|
||||||
final File dictFile = new File(fastaFile.getAbsolutePath().replace(fastaExt, ".dict"));
|
final File dictFile = new File(fastaFile.getAbsolutePath().replace(fastaExt, ".dict"));
|
||||||
|
|
||||||
/*
|
// It's an error if either the fai or dict file does not exist. The user is now responsible
|
||||||
* if index file does not exist, create it manually
|
// for creating these files.
|
||||||
*/
|
|
||||||
if (!indexFile.exists()) {
|
if (!indexFile.exists()) {
|
||||||
|
throw new UserException.MissingReferenceFaiFile(indexFile, fastaFile);
|
||||||
logger.info(String.format("Index file %s does not exist. Trying to create it now.", indexFile.getAbsolutePath()));
|
|
||||||
FSLockWithShared indexLock = new FSLockWithShared(indexFile,true);
|
|
||||||
try {
|
|
||||||
// get exclusive lock
|
|
||||||
if (!indexLock.exclusiveLock())
|
|
||||||
throw new UserException.CouldNotCreateReferenceIndexFileBecauseOfLock(dictFile);
|
|
||||||
FastaSequenceIndexBuilder faiBuilder = new FastaSequenceIndexBuilder(fastaFile, true);
|
|
||||||
FastaSequenceIndex sequenceIndex = faiBuilder.createIndex();
|
|
||||||
FastaSequenceIndexBuilder.saveAsFaiFile(sequenceIndex, indexFile);
|
|
||||||
}
|
|
||||||
catch(FileSystemInabilityToLockException ex) {
|
|
||||||
logger.info("Unable to create write lock: " + ex.getMessage());
|
|
||||||
logger.info("Skipping index creation.");
|
|
||||||
}
|
|
||||||
catch(UserException e) {
|
|
||||||
// Rethrow all user exceptions as-is; there should be more details in the UserException itself.
|
|
||||||
throw e;
|
|
||||||
}
|
|
||||||
catch (Exception e) {
|
|
||||||
// If lock creation succeeded, the failure must have been generating the index.
|
|
||||||
// If lock creation failed, just skip over index creation entirely.
|
|
||||||
throw new UserException.CouldNotCreateReferenceIndexFile(indexFile, e);
|
|
||||||
}
|
|
||||||
finally {
|
|
||||||
indexLock.unlock();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* If dict file doesn't exist, try to create it using Picard's CreateSequenceDictionary
|
|
||||||
* Currently, dictionary cannot be created without running CreateSequenceDictionary's main routine, hence the
|
|
||||||
* argument string
|
|
||||||
* This has been filed in trac as (PIC-370) Want programmatic interface to CreateSequenceDictionary
|
|
||||||
*/
|
|
||||||
if (!dictFile.exists()) {
|
if (!dictFile.exists()) {
|
||||||
|
throw new UserException.MissingReferenceDictFile(dictFile, fastaFile);
|
||||||
logger.info(String.format("Dict file %s does not exist. Trying to create it now.", dictFile.getAbsolutePath()));
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Please note another hack here: we have to create a temporary file b/c CreateSequenceDictionary cannot
|
|
||||||
* create a dictionary file if that file is locked.
|
|
||||||
*/
|
|
||||||
|
|
||||||
// get read lock on dict file so nobody else can read it
|
|
||||||
FSLockWithShared dictLock = new FSLockWithShared(dictFile,true);
|
|
||||||
try {
|
|
||||||
// get shared lock on dict file so nobody else can start creating it
|
|
||||||
if (!dictLock.exclusiveLock())
|
|
||||||
throw new UserException.CouldNotCreateReferenceIndexFileBecauseOfLock(dictFile);
|
|
||||||
// dict will be written to random temporary file in same directory (see note above)
|
|
||||||
File tempFile = File.createTempFile("dict", null, dictFile.getParentFile());
|
|
||||||
tempFile.deleteOnExit();
|
|
||||||
|
|
||||||
// create dictionary by calling main routine. Temporary fix - see comment above.
|
|
||||||
String args[] = {String.format("r=%s", fastaFile.getAbsolutePath()),
|
|
||||||
String.format("o=%s", tempFile.getAbsolutePath())};
|
|
||||||
new CreateSequenceDictionary().instanceMain(args);
|
|
||||||
|
|
||||||
if (!tempFile.renameTo(dictFile))
|
|
||||||
throw new UserException("Error transferring temp file " + tempFile + " to dict file " + dictFile);
|
|
||||||
}
|
|
||||||
catch(FileSystemInabilityToLockException ex) {
|
|
||||||
logger.info("Unable to create write lock: " + ex.getMessage());
|
|
||||||
logger.info("Skipping dictionary creation.");
|
|
||||||
}
|
|
||||||
catch (Exception e) {
|
|
||||||
// If lock creation succeeded, the failure must have been generating the index.
|
|
||||||
// If lock creation failed, just skip over index creation entirely.
|
|
||||||
throw new UserException.CouldNotCreateReferenceIndexFile(dictFile, e);
|
|
||||||
}
|
|
||||||
finally {
|
|
||||||
dictLock.unlock();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
// Read reference data by creating an IndexedFastaSequenceFile.
|
||||||
* Read reference data by creating an IndexedFastaSequenceFile.
|
|
||||||
* A note about thread safety: IndexFastaSequenceFile reads the fasta using dictionary and index files. It will
|
|
||||||
* fail if either does not exist, but not if either is currently being written (in which case it exists
|
|
||||||
* but is incomplete). To avoid this, obtain shared locks on both files before creating IndexedFastaSequenceFile.
|
|
||||||
*/
|
|
||||||
|
|
||||||
FSLockWithShared dictLock = new FSLockWithShared(dictFile,true);
|
|
||||||
FSLockWithShared indexLock = new FSLockWithShared(indexFile,true);
|
|
||||||
try {
|
try {
|
||||||
try {
|
|
||||||
if (!dictLock.sharedLock()) {
|
|
||||||
throw new ReviewedStingException("Could not open dictionary file because a lock could not be obtained.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch(FileSystemInabilityToLockException ex) {
|
|
||||||
logger.info(String.format("Unable to create a lock on dictionary file: %s",ex.getMessage()));
|
|
||||||
logger.info("Treating existing dictionary file as complete.");
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
if (!indexLock.sharedLock()) {
|
|
||||||
throw new ReviewedStingException("Could not open index file because a lock could not be obtained.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch(FileSystemInabilityToLockException ex) {
|
|
||||||
logger.info(String.format("Unable to create a lock on index file: %s",ex.getMessage()));
|
|
||||||
logger.info("Treating existing index file as complete.");
|
|
||||||
}
|
|
||||||
|
|
||||||
reference = new CachingIndexedFastaSequenceFile(fastaFile);
|
reference = new CachingIndexedFastaSequenceFile(fastaFile);
|
||||||
|
}
|
||||||
} catch (IllegalArgumentException e) {
|
catch (IllegalArgumentException e) {
|
||||||
throw new UserException.CouldNotReadInputFile(fastaFile, "Could not read reference sequence. The FASTA must have either a .fasta or .fa extension", e);
|
throw new UserException.CouldNotReadInputFile(fastaFile, "Could not read reference sequence. The FASTA must have either a .fasta or .fa extension", e);
|
||||||
}
|
}
|
||||||
catch (Exception e) {
|
catch (Exception e) {
|
||||||
throw new UserException.CouldNotReadInputFile(fastaFile, e);
|
throw new UserException.CouldNotReadInputFile(fastaFile, e);
|
||||||
}
|
}
|
||||||
finally {
|
|
||||||
dictLock.unlock();
|
|
||||||
indexLock.unlock();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -392,29 +392,25 @@ public class UserException extends ReviewedStingException {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class CouldNotCreateReferenceIndexFile extends UserException {
|
|
||||||
public CouldNotCreateReferenceIndexFile(File f, Exception e) {
|
|
||||||
this(f, "", e);
|
|
||||||
}
|
|
||||||
|
|
||||||
public CouldNotCreateReferenceIndexFile(File f, String message, Exception e) {
|
|
||||||
super(String.format("Index file %s does not exist but could not be created because: %s. ", f, message)
|
|
||||||
+ (e == null ? "" : getMessage(e)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class CannotHandleGzippedRef extends UserException {
|
public static class CannotHandleGzippedRef extends UserException {
|
||||||
public CannotHandleGzippedRef() {
|
public CannotHandleGzippedRef() {
|
||||||
super("The GATK cannot process compressed (.gz) reference sequences. Please unzip the file and try again. Sorry for the inconvenience.");
|
super("The GATK cannot process compressed (.gz) reference sequences. Please unzip the file and try again. Sorry for the inconvenience.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class CouldNotCreateReferenceIndexFileBecauseOfLock extends UserException.CouldNotCreateReferenceIndexFile {
|
public static class MissingReferenceFaiFile extends UserException {
|
||||||
public CouldNotCreateReferenceIndexFileBecauseOfLock(File f) {
|
public MissingReferenceFaiFile( final File indexFile, final File fastaFile ) {
|
||||||
super(f, "could not be written because an exclusive file lock could not be obtained. " +
|
super(String.format("Fasta index file %s for reference %s does not exist. Please see %s for help creating it.",
|
||||||
"If you are running multiple instances of GATK, another GATK process is " +
|
indexFile.getAbsolutePath(), fastaFile.getAbsolutePath(),
|
||||||
"probably creating this file now, and has locked it. Please wait until this process finishes " +
|
HelpConstants.forumPost("discussion/1601/how-can-i-prepare-a-fasta-file-to-use-as-reference")));
|
||||||
"and try again.", null);
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class MissingReferenceDictFile extends UserException {
|
||||||
|
public MissingReferenceDictFile( final File dictFile, final File fastaFile ) {
|
||||||
|
super(String.format("Fasta dict file %s for reference %s does not exist. Please see %s for help creating it.",
|
||||||
|
dictFile.getAbsolutePath(), fastaFile.getAbsolutePath(),
|
||||||
|
HelpConstants.forumPost("discussion/1601/how-can-i-prepare-a-fasta-file-to-use-as-reference")));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,75 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.datasources.reference;
|
||||||
|
|
||||||
|
import junit.framework.Assert;
|
||||||
|
import org.broadinstitute.sting.WalkerTest;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
public class ReferenceDataSourceIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testReferenceWithMissingFaiFile() throws IOException {
|
||||||
|
final File dummyReference = createTempFile("dummy", ".fasta");
|
||||||
|
final File dictFile = new File(dummyReference.getAbsolutePath().replace(".fasta", ".dict"));
|
||||||
|
dictFile.deleteOnExit();
|
||||||
|
Assert.assertTrue(dictFile.createNewFile());
|
||||||
|
|
||||||
|
final WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
" -T PrintReads" +
|
||||||
|
" -R " + dummyReference.getAbsolutePath() +
|
||||||
|
" -I " + privateTestDir + "NA12878.4.snippet.bam" +
|
||||||
|
" -o %s",
|
||||||
|
1,
|
||||||
|
UserException.MissingReferenceFaiFile.class
|
||||||
|
);
|
||||||
|
|
||||||
|
executeTest("testReferenceWithMissingFaiFile", spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testReferenceWithMissingDictFile() throws IOException {
|
||||||
|
final File dummyReference = createTempFile("dummy", ".fasta");
|
||||||
|
final File faiFile = new File(dummyReference.getAbsolutePath() + ".fai");
|
||||||
|
faiFile.deleteOnExit();
|
||||||
|
Assert.assertTrue(faiFile.createNewFile());
|
||||||
|
|
||||||
|
final WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
" -T PrintReads" +
|
||||||
|
" -R " + dummyReference.getAbsolutePath() +
|
||||||
|
" -I " + privateTestDir + "NA12878.4.snippet.bam" +
|
||||||
|
" -o %s",
|
||||||
|
1,
|
||||||
|
UserException.MissingReferenceDictFile.class
|
||||||
|
);
|
||||||
|
|
||||||
|
executeTest("testReferenceWithMissingDictFile", spec);
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue