code backup
This commit is contained in:
parent
5a0b32bfd2
commit
901d28d5f5
51
bseq.c
51
bseq.c
|
|
@ -1,4 +1,55 @@
|
||||||
#include <zlib.h>
|
#include <zlib.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
#include "bseq.h"
|
#include "bseq.h"
|
||||||
#include "kseq.h"
|
#include "kseq.h"
|
||||||
KSEQ_INIT2(, gzFile, gzread)
|
KSEQ_INIT2(, gzFile, gzread)
|
||||||
|
|
||||||
|
static inline void trim_readno(kstring_t *s)
|
||||||
|
{
|
||||||
|
if (s->l > 2 && s->s[s->l-2] == '/' && isdigit(s->s[s->l-1]))
|
||||||
|
s->l -= 2, s->s[s->l] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void kseq2bseq1(const kseq_t *ks, bseq1_t *s)
|
||||||
|
{ // TODO: it would be better to allocate one chunk of memory, but probably it does not matter in practice
|
||||||
|
s->name = strdup(ks->name.s);
|
||||||
|
s->comment = ks->comment.l? strdup(s->comment) : 0;
|
||||||
|
s->seq = strdup(ks->seq.s);
|
||||||
|
s->qual = ks->qual.l? strdup(ks->qual.s) : 0;
|
||||||
|
s->l_seq = strlen(s->seq);
|
||||||
|
}
|
||||||
|
|
||||||
|
bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_)
|
||||||
|
{
|
||||||
|
kseq_t *ks = (kseq_t*)ks1_, *ks2 = (kseq_t*)ks2_;
|
||||||
|
int size = 0, m, n;
|
||||||
|
bseq1_t *seqs;
|
||||||
|
m = n = 0; seqs = 0;
|
||||||
|
while (kseq_read(ks) >= 0) {
|
||||||
|
if (ks2 && kseq_read(ks2) < 0) { // the 2nd file has fewer reads
|
||||||
|
fprintf(stderr, "[W::%s] the 2nd file has fewer sequences.\n", __func__);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (n >= m) {
|
||||||
|
m = m? m<<1 : 256;
|
||||||
|
seqs = realloc(seqs, m * sizeof(bseq1_t));
|
||||||
|
}
|
||||||
|
trim_readno(&ks->name);
|
||||||
|
kseq2bseq1(ks, &seqs[n]);
|
||||||
|
size += seqs[n++].l_seq;
|
||||||
|
if (ks2) {
|
||||||
|
trim_readno(&ks2->name);
|
||||||
|
kseq2bseq1(ks2, &seqs[n++]);
|
||||||
|
size += seqs[n++].l_seq;
|
||||||
|
}
|
||||||
|
if (size >= chunk_size) break;
|
||||||
|
}
|
||||||
|
*n_ = n;
|
||||||
|
if (size < chunk_size) { // test if the 2nd file is finished
|
||||||
|
if (kseq_read(ks2) >= 0)
|
||||||
|
fprintf(stderr, "[W::%s] the 1st file has fewer sequences.\n", __func__);
|
||||||
|
}
|
||||||
|
return seqs;
|
||||||
|
}
|
||||||
|
|
|
||||||
8
bseq.h
8
bseq.h
|
|
@ -2,14 +2,10 @@
|
||||||
#define BATCHSEQ_H_
|
#define BATCHSEQ_H_
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
int l_seq;
|
||||||
char *name, *comment, *seq, *qual;
|
char *name, *comment, *seq, *qual;
|
||||||
} bseq1_t;
|
} bseq1_t;
|
||||||
|
|
||||||
typedef struct {
|
bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_);
|
||||||
int n, m;
|
|
||||||
bseq1_t *seqs;
|
|
||||||
} bseq_t;
|
|
||||||
|
|
||||||
int bseq_read(int chunk_size, bseq_t *bs);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue