From 1fd51fc3f7ac5887e27f6f2be356bfd295729bcb Mon Sep 17 00:00:00 2001 From: Heng Li Date: Thu, 7 Feb 2013 14:36:18 -0500 Subject: [PATCH] code backup --- bseq.h | 2 +- bwamem.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++------- fastmap.c | 5 +--- kstring.h | 45 ++++++++++++++++++++++++++-- 4 files changed, 123 insertions(+), 19 deletions(-) diff --git a/bseq.h b/bseq.h index b54a268..978312a 100644 --- a/bseq.h +++ b/bseq.h @@ -3,7 +3,7 @@ typedef struct { int l_seq; - char *name, *comment, *seq, *qual; + char *name, *comment, *seq, *qual, *sam; } bseq1_t; bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_); diff --git a/bwamem.c b/bwamem.c index 51bcd3c..6b8d365 100644 --- a/bwamem.c +++ b/bwamem.c @@ -5,6 +5,7 @@ #ifdef HAVE_PTHREAD #include #endif +#include "kstring.h" #include "bwamem.h" #include "bntseq.h" #include "ksw.h" @@ -420,8 +421,51 @@ ret_gen_cigar: return cigar; } -static void process_seq1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s) +/************************ + * Integrated interface * + ************************/ + +void mem_sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a) { + int k, n_cigar = 0, score, is_rev, nn, rid, i; + uint32_t *cigar = 0; + int64_t pos; + kstring_t str; + mem_alnreg_t *p; + + str.l = str.m = 0; str.s = 0; + k = mem_choose_alnreg_se(opt, a->n, a->a); + p = &a->a[k]; + cigar = mem_gen_cigar(opt, bns->l_pac, pac, p->qe - p->qb, (uint8_t*)&s->seq[p->qb], p->rb, p->re, &score, &n_cigar); + pos = bns_depos(bns, p->rb, &is_rev); + nn = bns_cnt_ambi(bns, pos, p->re - p->rb, &rid); + kputs(s->name, &str); kputc('\t', &str); kputw(is_rev? 16 : 0, &str); kputc('\t', &str); + kputs(bns->anns[rid].name, &str); kputc('\t', &str); kputuw(pos - bns->anns[rid].offset, &str); kputc('\t', &str); + kputw(0, &str); kputc('\t', &str); + for (i = 0; i < s->l_seq; ++i) s->seq[i] = "ACGTN"[(int)s->seq[i]]; + kputsn(s->seq, s->l_seq, &str); kputc('\t', &str); + if (s->qual) kputsn(s->qual, s->l_seq, &str); + free(cigar); + s->sam = str.s; +} + +static mem_alnreg_v find_alnreg(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s) +{ + int i; + mem_chain_v chn; + mem_alnreg_v regs; + for (i = 0; i < s->l_seq; ++i) + s->seq[i] = nst_nt4_table[(int)s->seq[i]]; + chn = mem_chain(opt, bwt, s->l_seq, (uint8_t*)s->seq); + chn.n = mem_chain_flt(opt, chn.n, chn.a); + regs.n = regs.m = chn.n; + regs.a = malloc(regs.n * sizeof(mem_alnreg_t)); + for (i = 0; i < chn.n; ++i) { + mem_chain2aln(opt, bns->l_pac, pac, s->l_seq, (uint8_t*)s->seq, &chn.a[i], ®s.a[i]); + free(chn.a[i].seeds); + } + free(chn.a); + return regs; } typedef struct { @@ -431,41 +475,65 @@ typedef struct { const bntseq_t *bns; const uint8_t *pac; bseq1_t *seqs; -} worker1_t; + mem_alnreg_v *regs; +} worker_t; static void *worker1(void *data) { - worker1_t *w = (worker1_t*)data; + worker_t *w = (worker_t*)data; int i; for (i = w->start; i < w->n; i += w->step) - process_seq1(w->opt, w->bwt, w->bns, w->pac, &w->seqs[i]); + w->regs[i] = find_alnreg(w->opt, w->bwt, w->bns, w->pac, &w->seqs[i]); + return 0; +} + +static void *worker2(void *data) +{ + worker_t *w = (worker_t*)data; + int i; + if (!w->opt->is_pe) { + for (i = 0; i < w->n; i += w->step) { + mem_sam_se(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i]); + free(w->regs[i].a); + } + } else { + for (i = 0; i < w->n>>1; i += w->step) { // not implemented yet + free(w->regs[i<<1|0].a); free(w->regs[i<<1|1].a); + } + } return 0; } int mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int n, bseq1_t *seqs) { int i; - worker1_t *w1; - w1 = calloc(opt->n_threads, sizeof(worker1_t)); + worker_t *w; + w = calloc(opt->n_threads, sizeof(worker_t)); for (i = 0; i < opt->n_threads; ++i) { - worker1_t *w = &w1[i]; + worker_t *w = &w[i]; w->start = i; w->step = opt->n_threads; w->n = n; w->opt = opt; w->bwt = bwt; w->bns = bns; w->pac = pac; w->seqs = seqs; } #ifdef HAVE_PTHREAD if (opt->n_threads == 1) { - worker1(w1); + worker1(w); worker2(w); } else { pthread_t *tid; tid = (pthread_t*)calloc(opt->n_threads, sizeof(pthread_t)); - for (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker1, &w1[i]); + for (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker1, &w[i]); + for (i = 0; i < opt->n_threads; ++i) pthread_join(tid[i], 0); + for (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker2, &w[i]); for (i = 0; i < opt->n_threads; ++i) pthread_join(tid[i], 0); free(tid); } #else - worker1(w1); + worker1(w); worker2(w); #endif - free(w1); + for (i = 0; i < n; ++i) { + puts(seqs[i].sam); + free(seqs[i].name); free(seqs[i].comment); free(seqs[i].seq); free(seqs[i].qual); free(seqs[i].sam); + } + free(w); return 0; } diff --git a/fastmap.c b/fastmap.c index 32f8db0..812c2db 100644 --- a/fastmap.c +++ b/fastmap.c @@ -17,7 +17,7 @@ int main_mem(int argc, char *argv[]) mem_opt_t *opt; bwt_t *bwt; bntseq_t *bns; - int i, c, n; + int c, n; gzFile fp, fp2 = 0; kseq_t *ks, *ks2 = 0; uint8_t *pac = 0; @@ -55,9 +55,6 @@ int main_mem(int argc, char *argv[]) } while ((seqs = bseq_read(opt->n_threads * opt->chunk_size, &n, ks, ks2)) != 0) { mem_process_seqs(opt, bwt, bns, pac, n, seqs); - for (i = 0; i < n; ++i) { - free(seqs[i].name); free(seqs[i].comment); free(seqs[i].seq); free(seqs[i].qual); - } free(seqs); } /* diff --git a/kstring.h b/kstring.h index 398901f..cf14e39 100644 --- a/kstring.h +++ b/kstring.h @@ -16,19 +16,24 @@ typedef struct __kstring_t { } kstring_t; #endif -static inline int kputs(const char *p, kstring_t *s) +static inline int kputsn(const char *p, int l, kstring_t *s) { - int l = strlen(p); if (s->l + l + 1 >= s->m) { s->m = s->l + l + 2; kroundup32(s->m); s->s = (char*)realloc(s->s, s->m); } - strcpy(s->s + s->l, p); + memcpy(s->s + s->l, p, l); s->l += l; + s->s[s->l] = 0; return l; } +static inline int kputs(const char *p, kstring_t *s) +{ + return kputsn(p, strlen(p), s); +} + static inline int kputc(int c, kstring_t *s) { if (s->l + 1 >= s->m) { @@ -41,6 +46,40 @@ static inline int kputc(int c, kstring_t *s) return c; } +static inline int kputw(int c, kstring_t *s) +{ + char buf[16]; + int l, x; + if (c == 0) return kputc('0', s); + for (l = 0, x = c < 0? -c : c; x > 0; x /= 10) buf[l++] = x%10 + '0'; + if (c < 0) buf[l++] = '-'; + if (s->l + l + 1 >= s->m) { + s->m = s->l + l + 2; + kroundup32(s->m); + s->s = (char*)realloc(s->s, s->m); + } + for (x = l - 1; x >= 0; --x) s->s[s->l++] = buf[x]; + s->s[s->l] = 0; + return 0; +} + +static inline int kputuw(unsigned c, kstring_t *s) +{ + char buf[16]; + int l, i; + unsigned x; + if (c == 0) return kputc('0', s); + for (l = 0, x = c; x > 0; x /= 10) buf[l++] = x%10 + '0'; + if (s->l + l + 1 >= s->m) { + s->m = s->l + l + 2; + kroundup32(s->m); + s->s = (char*)realloc(s->s, s->m); + } + for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i]; + s->s[s->l] = 0; + return 0; +} + int ksprintf(kstring_t *s, const char *fmt, ...); #endif