Fixing bug in fasta .fai generation: trim the contig names to the first whitespace if one appears. We now generate indexes identical to samtools.

This commit is contained in:
Eric Banks 2012-08-29 22:36:27 -04:00
parent 4d38befe86
commit 1acf0f0b2c
1 changed files with 9 additions and 1 deletions

View File

@ -245,7 +245,7 @@ public class FastaSequenceIndexBuilder {
* Reset iterators and add contig to sequence index
*/
private void finishReadingContig(FastaSequenceIndex sequenceIndex) {
sequenceIndex.add(new FastaSequenceIndexEntry(contig, location, size, (int) basesPerLine, (int) bytesPerLine, thisSequenceIndex++));
sequenceIndex.add(new FastaSequenceIndexEntry(trimContigName(contig), location, size, (int) basesPerLine, (int) bytesPerLine, thisSequenceIndex++));
status = Status.NONE;
contig = "";
size = 0;
@ -258,6 +258,14 @@ public class FastaSequenceIndexBuilder {
}
}
/*
* Trims the contig name to the expected value by removing any characters after the first whitespace
*/
private static String trimContigName(final String contigName) {
int whitespaceIndex = contigName.indexOf(' ');
return ( whitespaceIndex == -1 ) ? contigName : contigName.substring(0, whitespaceIndex);
}
/**
* Stores FastaSequenceIndex as a .fasta.fai file on local machine
* Although method is public it cannot be called on any old FastaSequenceIndex - must be created by a FastaSequenceIndexBuilder