From 901d28d5f54c6e58a966c233b471c315df453580 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Wed, 6 Feb 2013 15:03:09 -0500 Subject: [PATCH] code backup --- bseq.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ bseq.h | 8 ++------ 2 files changed, 53 insertions(+), 6 deletions(-) diff --git a/bseq.c b/bseq.c index 0ec57fa..0889851 100644 --- a/bseq.c +++ b/bseq.c @@ -1,4 +1,55 @@ #include +#include +#include +#include #include "bseq.h" #include "kseq.h" KSEQ_INIT2(, gzFile, gzread) + +static inline void trim_readno(kstring_t *s) +{ + if (s->l > 2 && s->s[s->l-2] == '/' && isdigit(s->s[s->l-1])) + s->l -= 2, s->s[s->l] = 0; +} + +static inline void kseq2bseq1(const kseq_t *ks, bseq1_t *s) +{ // TODO: it would be better to allocate one chunk of memory, but probably it does not matter in practice + s->name = strdup(ks->name.s); + s->comment = ks->comment.l? strdup(s->comment) : 0; + s->seq = strdup(ks->seq.s); + s->qual = ks->qual.l? strdup(ks->qual.s) : 0; + s->l_seq = strlen(s->seq); +} + +bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_) +{ + kseq_t *ks = (kseq_t*)ks1_, *ks2 = (kseq_t*)ks2_; + int size = 0, m, n; + bseq1_t *seqs; + m = n = 0; seqs = 0; + while (kseq_read(ks) >= 0) { + if (ks2 && kseq_read(ks2) < 0) { // the 2nd file has fewer reads + fprintf(stderr, "[W::%s] the 2nd file has fewer sequences.\n", __func__); + break; + } + if (n >= m) { + m = m? m<<1 : 256; + seqs = realloc(seqs, m * sizeof(bseq1_t)); + } + trim_readno(&ks->name); + kseq2bseq1(ks, &seqs[n]); + size += seqs[n++].l_seq; + if (ks2) { + trim_readno(&ks2->name); + kseq2bseq1(ks2, &seqs[n++]); + size += seqs[n++].l_seq; + } + if (size >= chunk_size) break; + } + *n_ = n; + if (size < chunk_size) { // test if the 2nd file is finished + if (kseq_read(ks2) >= 0) + fprintf(stderr, "[W::%s] the 1st file has fewer sequences.\n", __func__); + } + return seqs; +} diff --git a/bseq.h b/bseq.h index 73afb63..b54a268 100644 --- a/bseq.h +++ b/bseq.h @@ -2,14 +2,10 @@ #define BATCHSEQ_H_ typedef struct { + int l_seq; char *name, *comment, *seq, *qual; } bseq1_t; -typedef struct { - int n, m; - bseq1_t *seqs; -} bseq_t; - -int bseq_read(int chunk_size, bseq_t *bs); +bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_); #endif