In bwtsw, replace the batch seq-reader with bseq
This commit is contained in:
parent
901d28d5f5
commit
a09db69037
2
bseq.c
2
bseq.c
|
|
@ -48,7 +48,7 @@ bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_)
|
|||
}
|
||||
*n_ = n;
|
||||
if (size < chunk_size) { // test if the 2nd file is finished
|
||||
if (kseq_read(ks2) >= 0)
|
||||
if (ks2 && kseq_read(ks2) >= 0)
|
||||
fprintf(stderr, "[W::%s] the 1st file has fewer sequences.\n", __func__);
|
||||
}
|
||||
return seqs;
|
||||
|
|
|
|||
54
bwtsw2_aux.c
54
bwtsw2_aux.c
|
|
@ -13,6 +13,7 @@
|
|||
#include "bwtsw2.h"
|
||||
#include "stdaln.h"
|
||||
#include "kstring.h"
|
||||
#include "bseq.h"
|
||||
|
||||
#include "kseq.h"
|
||||
KSEQ_DECLARE(gzFile)
|
||||
|
|
@ -756,24 +757,14 @@ static void process_seqs(bsw2seq_t *_seq, const bsw2opt_t *opt, const bntseq_t *
|
|||
_seq->n = 0;
|
||||
}
|
||||
|
||||
static void kseq_to_bsw2seq(const kseq_t *ks, bsw2seq1_t *p)
|
||||
{
|
||||
p->tid = -1;
|
||||
p->l = ks->seq.l;
|
||||
p->name = strdup(ks->name.s);
|
||||
p->seq = strdup(ks->seq.s);
|
||||
p->qual = ks->qual.l? strdup(ks->qual.s) : 0;
|
||||
p->comment = ks->comment.l? strdup(ks->comment.s) : 0;
|
||||
p->sam = 0;
|
||||
}
|
||||
|
||||
void bsw2_aln(const bsw2opt_t *opt, const bntseq_t *bns, bwt_t * const target, const char *fn, const char *fn2)
|
||||
{
|
||||
gzFile fp, fp2;
|
||||
kseq_t *ks, *ks2;
|
||||
int l, size = 0, is_pe = 0;
|
||||
int l, is_pe = 0, i, n;
|
||||
uint8_t *pac;
|
||||
bsw2seq_t *_seq;
|
||||
bseq1_t *bseq;
|
||||
|
||||
pac = calloc(bns->l_pac/4+1, 1);
|
||||
if (pac == 0) {
|
||||
|
|
@ -791,34 +782,25 @@ void bsw2_aln(const bsw2opt_t *opt, const bntseq_t *bns, bwt_t * const target, c
|
|||
ks2 = kseq_init(fp2);
|
||||
is_pe = 1;
|
||||
} else fp2 = 0, ks2 = 0, is_pe = 0;
|
||||
while (kseq_read(ks) >= 0) {
|
||||
if (ks->name.l > 2 && ks->name.s[ks->name.l-2] == '/')
|
||||
ks->name.l -= 2, ks->name.s[ks->name.l] = 0;
|
||||
if (_seq->n == _seq->max) {
|
||||
_seq->max = _seq->max? _seq->max<<1 : 1024;
|
||||
while ((bseq = bseq_read(opt->chunk_size * opt->n_threads, &n, ks, ks2)) != 0) {
|
||||
int size = 0;
|
||||
if (n > _seq->max) {
|
||||
_seq->max = n;
|
||||
kroundup32(_seq->max);
|
||||
_seq->seq = realloc(_seq->seq, _seq->max * sizeof(bsw2seq1_t));
|
||||
}
|
||||
kseq_to_bsw2seq(ks, &_seq->seq[_seq->n++]);
|
||||
size += ks->seq.l;
|
||||
if (ks2) {
|
||||
if (kseq_read(ks2) >= 0) {
|
||||
if (ks2->name.l > 2 && ks2->name.s[ks2->name.l-2] == '/')
|
||||
ks2->name.l -= 2, ks2->name.s[ks2->name.l] = 0;
|
||||
kseq_to_bsw2seq(ks2, &_seq->seq[_seq->n++]); // for PE, _seq->n here must be odd and we do not need to enlarge
|
||||
size += ks->seq.l;
|
||||
} else {
|
||||
fprintf(stderr, "[%s] The second query file has fewer reads. Switched to the single-end mode for the following batches.\n", __func__);
|
||||
is_pe = 0;
|
||||
}
|
||||
}
|
||||
if (size > opt->chunk_size * opt->n_threads) {
|
||||
fprintf(stderr, "[bsw2_aln] read %d sequences/pairs (%d bp)...\n", _seq->n, size);
|
||||
process_seqs(_seq, opt, bns, pac, target, is_pe);
|
||||
size = 0;
|
||||
_seq->n = n;
|
||||
for (i = 0; i < n; ++i) {
|
||||
bseq1_t *b = &bseq[i];
|
||||
bsw2seq1_t *p = &_seq->seq[i];
|
||||
p->tid = -1; p->l = b->l_seq;
|
||||
p->name = b->name; p->seq = b->seq; p->qual = b->qual; p->comment = b->comment; p->sam = 0;
|
||||
size += p->l;
|
||||
}
|
||||
fprintf(stderr, "[bsw2_aln] read %d sequences/pairs (%d bp) ...\n", n, size);
|
||||
free(bseq);
|
||||
process_seqs(_seq, opt, bns, pac, target, is_pe);
|
||||
}
|
||||
fprintf(stderr, "[bsw2_aln] read %d sequences/pairs (%d bp)...\n", _seq->n, size);
|
||||
process_seqs(_seq, opt, bns, pac, target, is_pe);
|
||||
// free
|
||||
free(pac);
|
||||
free(_seq->seq); free(_seq);
|
||||
|
|
|
|||
Loading…
Reference in New Issue