minimap2/bseq.c

63 lines
1.3 KiB
C
Raw Normal View History

2017-04-07 03:37:34 +08:00
#include <zlib.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "bseq.h"
#include "kseq.h"
KSEQ_INIT2(, gzFile, gzread)
2017-04-07 03:37:34 +08:00
struct mm_bseq_file_s {
2017-04-07 03:37:34 +08:00
gzFile fp;
kseq_t *ks;
};
mm_bseq_file_t *mm_bseq_open(const char *fn)
2017-04-07 03:37:34 +08:00
{
mm_bseq_file_t *fp;
2017-04-07 03:37:34 +08:00
gzFile f;
f = fn && strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
if (f == 0) return 0;
fp = (mm_bseq_file_t*)calloc(1, sizeof(mm_bseq_file_t));
2017-04-07 03:37:34 +08:00
fp->fp = f;
fp->ks = kseq_init(fp->fp);
return fp;
}
void mm_bseq_close(mm_bseq_file_t *fp)
2017-04-07 03:37:34 +08:00
{
kseq_destroy(fp->ks);
gzclose(fp->fp);
free(fp);
}
mm_bseq1_t *mm_bseq_read(mm_bseq_file_t *fp, int chunk_size, int with_qual, int *n_)
2017-04-07 03:37:34 +08:00
{
int size = 0, m, n;
mm_bseq1_t *seqs;
2017-04-07 03:37:34 +08:00
kseq_t *ks = fp->ks;
m = n = 0; seqs = 0;
while (kseq_read(ks) >= 0) {
mm_bseq1_t *s;
2017-04-07 03:37:34 +08:00
assert(ks->seq.l <= INT32_MAX);
if (n >= m) {
m = m? m<<1 : 256;
seqs = (mm_bseq1_t*)realloc(seqs, m * sizeof(mm_bseq1_t));
2017-04-07 03:37:34 +08:00
}
s = &seqs[n];
s->name = strdup(ks->name.s);
s->seq = strdup(ks->seq.s);
s->qual = with_qual && ks->qual.l? strdup(ks->qual.s) : 0;
2017-04-07 03:37:34 +08:00
s->l_seq = ks->seq.l;
size += seqs[n++].l_seq;
if (size >= chunk_size) break;
}
*n_ = n;
return seqs;
}
int mm_bseq_eof(mm_bseq_file_t *fp)
2017-04-07 03:37:34 +08:00
{
2017-08-25 10:35:58 +08:00
return ks_eof(fp->ks->f);
2017-04-07 03:37:34 +08:00
}