Added wrappers for fputc and fputs; more efficient sequence printing

Added wrappers err_fputc and err_fputs to catch failures in fput and fputs.
Macros err_putchar and err_puts call the new wrappers and can be used in
place of putchar and puts.

To avoid having to make millions of function calls when printing out
sequences, the code to print them in bwa_print_sam1 using putchar has
been replaced by a new version in bwa_print_seq that puts the sequence
into a buffer and then outputs the lot with err_fwrite.  In testing, the
new code was slightly faster than the old version, with the added benefit
that it will stop promptly if IO problems are detected.
This commit is contained in:
Rob Davies 2013-01-09 14:43:36 +00:00
parent 55f1b36534
commit 4f4e998d7f
4 changed files with 61 additions and 15 deletions

38
bwase.c
View File

@ -404,6 +404,26 @@ static int64_t pos_5(const bwa_seq_t *p)
return -1;
}
void bwa_print_seq(FILE *stream, bwa_seq_t *seq) {
char buffer[4096];
const int bsz = sizeof(buffer);
int i, j, l;
if (seq->strand == 0) {
for (i = 0; i < seq->full_len; i += bsz) {
l = seq->full_len - i > bsz ? bsz : seq->full_len - i;
for (j = 0; j < l; j++) buffer[j] = "ACGTN"[seq->seq[i + j]];
err_fwrite(buffer, 1, l, stream);
}
} else {
for (i = seq->full_len - 1; i >= 0; i -= bsz) {
l = i + 1 > bsz ? bsz : i + 1;
for (j = 0; j < l; j++) buffer[j] = "TGCAN"[seq->seq[i - j]];
err_fwrite(buffer, 1, l, stream);
}
}
}
void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, int mode, int max_top2)
{
int j;
@ -455,10 +475,8 @@ void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, in
else err_printf("\t*\t0\t0\t");
// print sequence and quality
if (p->strand == 0)
for (j = 0; j != p->full_len; ++j) putchar("ACGTN"[(int)p->seq[j]]);
else for (j = 0; j != p->full_len; ++j) putchar("TGCAN"[p->seq[p->full_len - 1 - j]]);
putchar('\t');
bwa_print_seq(stdout, p);
err_putchar('\t');
if (p->qual) {
if (p->strand) seq_reverse(p->len, p->qual, 0); // reverse quality
err_printf("%s", p->qual);
@ -500,14 +518,16 @@ void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, in
}
}
}
putchar('\n');
err_putchar('\n');
} else { // this read has no match
ubyte_t *s = p->strand? p->rseq : p->seq;
//ubyte_t *s = p->strand? p->rseq : p->seq;
int flag = p->extra_flag | SAM_FSU;
if (mate && mate->type == BWA_TYPE_NO_MATCH) flag |= SAM_FMU;
err_printf("%s\t%d\t*\t0\t0\t*\t*\t0\t0\t", p->name, flag);
for (j = 0; j != p->len; ++j) putchar("ACGTN"[(int)s[j]]);
putchar('\t');
//Why did this work differently to the version above??
//for (j = 0; j != p->len; ++j) putchar("ACGTN"[(int)s[j]]);
bwa_print_seq(stdout, p);
err_putchar('\t');
if (p->qual) {
if (p->strand) seq_reverse(p->len, p->qual, 0); // reverse quality
err_printf("%s", p->qual);
@ -515,7 +535,7 @@ void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, in
if (bwa_rg_id) err_printf("\tRG:Z:%s", bwa_rg_id);
if (p->bc[0]) err_printf("\tBC:Z:%s", p->bc);
if (p->clip_len < p->full_len) err_printf("\tXC:i:%d", p->clip_len);
putchar('\n');
err_putchar('\n');
}
}

View File

@ -91,9 +91,9 @@ int main_fastmap(int argc, char *argv[])
while (kseq_read(seq) >= 0) {
printf("SQ\t%s\t%ld", seq->name.s, seq->seq.l);
if (print_seq) {
putchar('\t');
puts(seq->seq.s);
} else putchar('\n');
err_putchar('\t');
err_puts(seq->seq.s);
} else err_putchar('\n');
for (i = 0; i < seq->seq.l; ++i)
seq->seq.s[i] = nst_nt4_table[(int)seq->seq.s[i]];
smem_set_query(iter, seq->seq.l, (uint8_t*)seq->seq.s);
@ -112,11 +112,11 @@ int main_fastmap(int argc, char *argv[])
bns_cnt_ambi(bns, pos, len, &ref_id);
printf("\t%s:%c%ld", bns->anns[ref_id].name, "+-"[is_rev], (long)(pos - bns->anns[ref_id].offset) + 1);
}
} else fputs("\t*", stdout);
putchar('\n');
} else err_puts("\t*");
err_putchar('\n');
}
}
puts("//");
err_puts("//");
}
smem_iter_destroy(iter);

22
utils.c
View File

@ -195,6 +195,28 @@ int err_fprintf(FILE *stream, const char *format, ...)
return done;
}
int err_fputc(int c, FILE *stream)
{
int ret = putc(c, stream);
if (EOF == ret)
{
_err_fatal_simple("fputc", strerror(errno));
}
return ret;
}
int err_fputs(const char *s, FILE *stream)
{
int ret = fputs(s, stream);
if (EOF == ret)
{
_err_fatal_simple("fputs", strerror(errno));
}
return ret;
}
int err_fflush(FILE *stream)
{
int ret = fflush(stream);

View File

@ -76,6 +76,10 @@ extern "C" {
ATTRIBUTE((format(printf, 2, 3)));
int err_printf(const char *format, ...)
ATTRIBUTE((format(printf, 1, 2)));
int err_fputc(int c, FILE *stream);
#define err_putchar(C) err_fputc((C), stdout)
int err_fputs(const char *s, FILE *stream);
#define err_puts(S) err_fputs((S), stdout)
int err_fflush(FILE *stream);
int err_fclose(FILE *stream);
int err_gzclose(gzFile file);