Fixed GSA-434: GATK should generate error when gzipped FASTA is passed in.
-- The GATK sort of handles this now, but only if you have the exactly correct sequence dictionary and FAI files associated with the reference. If you do, the file can be .gz. If not, the GATK will fail on creating the FAI and DICT files. Added an error message that handles this case and clearly says what to do.
This commit is contained in:
parent
a3d2764d11
commit
be0f8beebb
|
|
@ -62,23 +62,24 @@ public class ReferenceDataSource {
|
|||
* @param fastaFile Fasta file to be used as reference
|
||||
*/
|
||||
public ReferenceDataSource(File fastaFile) {
|
||||
|
||||
// does the fasta file exist? check that first...
|
||||
if (!fastaFile.exists())
|
||||
throw new UserException("The fasta file you specified (" + fastaFile.getAbsolutePath() + ") does not exist.");
|
||||
|
||||
File indexFile = new File(fastaFile.getAbsolutePath() + ".fai");
|
||||
File dictFile;
|
||||
if (fastaFile.getAbsolutePath().endsWith("fa")) {
|
||||
dictFile = new File(fastaFile.getAbsolutePath().replace(".fa", ".dict"));
|
||||
}
|
||||
else
|
||||
dictFile = new File(fastaFile.getAbsolutePath().replace(".fasta", ".dict"));
|
||||
final boolean isGzipped = fastaFile.getAbsolutePath().endsWith(".gz");
|
||||
|
||||
final File indexFile = new File(fastaFile.getAbsolutePath() + ".fai");
|
||||
|
||||
// determine the name for the dict file
|
||||
final String fastaExt = (fastaFile.getAbsolutePath().endsWith("fa") ? ".fa" : ".fasta" ) + (isGzipped ? ".gz" : "");
|
||||
final File dictFile = new File(fastaFile.getAbsolutePath().replace(fastaExt, ".dict"));
|
||||
|
||||
/*
|
||||
if index file does not exist, create it manually
|
||||
*/
|
||||
* if index file does not exist, create it manually
|
||||
*/
|
||||
if (!indexFile.exists()) {
|
||||
if ( isGzipped ) throw new UserException.CouldNotCreateReferenceFAIorDictForGzippedRef(fastaFile);
|
||||
|
||||
logger.info(String.format("Index file %s does not exist. Trying to create it now.", indexFile.getAbsolutePath()));
|
||||
FSLockWithShared indexLock = new FSLockWithShared(indexFile,true);
|
||||
try {
|
||||
|
|
@ -95,7 +96,7 @@ public class ReferenceDataSource {
|
|||
}
|
||||
catch(UserException e) {
|
||||
// Rethrow all user exceptions as-is; there should be more details in the UserException itself.
|
||||
throw e;
|
||||
throw e;
|
||||
}
|
||||
catch (Exception e) {
|
||||
// If lock creation succeeded, the failure must have been generating the index.
|
||||
|
|
@ -114,6 +115,8 @@ public class ReferenceDataSource {
|
|||
* This has been filed in trac as (PIC-370) Want programmatic interface to CreateSequenceDictionary
|
||||
*/
|
||||
if (!dictFile.exists()) {
|
||||
if ( isGzipped ) throw new UserException.CouldNotCreateReferenceFAIorDictForGzippedRef(fastaFile);
|
||||
|
||||
logger.info(String.format("Dict file %s does not exist. Trying to create it now.", dictFile.getAbsolutePath()));
|
||||
|
||||
/*
|
||||
|
|
@ -218,9 +221,9 @@ public class ReferenceDataSource {
|
|||
for(int shardStart = 1; shardStart <= refSequenceRecord.getSequenceLength(); shardStart += maxShardSize) {
|
||||
final int shardStop = Math.min(shardStart+maxShardSize-1, refSequenceRecord.getSequenceLength());
|
||||
shards.add(new LocusShard(parser,
|
||||
readsDataSource,
|
||||
Collections.singletonList(parser.createGenomeLoc(refSequenceRecord.getSequenceName(),shardStart,shardStop)),
|
||||
null));
|
||||
readsDataSource,
|
||||
Collections.singletonList(parser.createGenomeLoc(refSequenceRecord.getSequenceName(),shardStart,shardStop)),
|
||||
null));
|
||||
}
|
||||
}
|
||||
return shards;
|
||||
|
|
|
|||
|
|
@ -340,6 +340,17 @@ public class UserException extends ReviewedStingException {
|
|||
}
|
||||
}
|
||||
|
||||
public static class CouldNotCreateReferenceFAIorDictForGzippedRef extends UserException {
|
||||
public CouldNotCreateReferenceFAIorDictForGzippedRef(final File f) {
|
||||
super("Although the GATK can process .gz reference sequences, it currently cannot create FAI " +
|
||||
"or DICT files for them. In order to use the GATK with reference.fasta.gz you will need to " +
|
||||
"create .dict and .fai files for reference.fasta.gz and name them reference.fasta.gz.fai and " +
|
||||
"reference.dict. Potentially the easiest way to do this is to uncompress reference.fasta, " +
|
||||
"run the GATK to create the .dict and .fai files, and copy them to the appropriate location. " +
|
||||
"Sorry for the inconvenience.");
|
||||
}
|
||||
}
|
||||
|
||||
public static class CouldNotCreateReferenceIndexFileBecauseOfLock extends UserException.CouldNotCreateReferenceIndexFile {
|
||||
public CouldNotCreateReferenceIndexFileBecauseOfLock(File f) {
|
||||
super(f, "could not be written because an exclusive file lock could not be obtained. " +
|
||||
|
|
|
|||
Loading…
Reference in New Issue