code backup
This commit is contained in:
parent
bfeb37c4de
commit
1fd51fc3f7
2
bseq.h
2
bseq.h
|
|
@ -3,7 +3,7 @@
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int l_seq;
|
int l_seq;
|
||||||
char *name, *comment, *seq, *qual;
|
char *name, *comment, *seq, *qual, *sam;
|
||||||
} bseq1_t;
|
} bseq1_t;
|
||||||
|
|
||||||
bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_);
|
bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_);
|
||||||
|
|
|
||||||
90
bwamem.c
90
bwamem.c
|
|
@ -5,6 +5,7 @@
|
||||||
#ifdef HAVE_PTHREAD
|
#ifdef HAVE_PTHREAD
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
#endif
|
#endif
|
||||||
|
#include "kstring.h"
|
||||||
#include "bwamem.h"
|
#include "bwamem.h"
|
||||||
#include "bntseq.h"
|
#include "bntseq.h"
|
||||||
#include "ksw.h"
|
#include "ksw.h"
|
||||||
|
|
@ -420,8 +421,51 @@ ret_gen_cigar:
|
||||||
return cigar;
|
return cigar;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void process_seq1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s)
|
/************************
|
||||||
|
* Integrated interface *
|
||||||
|
************************/
|
||||||
|
|
||||||
|
void mem_sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a)
|
||||||
{
|
{
|
||||||
|
int k, n_cigar = 0, score, is_rev, nn, rid, i;
|
||||||
|
uint32_t *cigar = 0;
|
||||||
|
int64_t pos;
|
||||||
|
kstring_t str;
|
||||||
|
mem_alnreg_t *p;
|
||||||
|
|
||||||
|
str.l = str.m = 0; str.s = 0;
|
||||||
|
k = mem_choose_alnreg_se(opt, a->n, a->a);
|
||||||
|
p = &a->a[k];
|
||||||
|
cigar = mem_gen_cigar(opt, bns->l_pac, pac, p->qe - p->qb, (uint8_t*)&s->seq[p->qb], p->rb, p->re, &score, &n_cigar);
|
||||||
|
pos = bns_depos(bns, p->rb, &is_rev);
|
||||||
|
nn = bns_cnt_ambi(bns, pos, p->re - p->rb, &rid);
|
||||||
|
kputs(s->name, &str); kputc('\t', &str); kputw(is_rev? 16 : 0, &str); kputc('\t', &str);
|
||||||
|
kputs(bns->anns[rid].name, &str); kputc('\t', &str); kputuw(pos - bns->anns[rid].offset, &str); kputc('\t', &str);
|
||||||
|
kputw(0, &str); kputc('\t', &str);
|
||||||
|
for (i = 0; i < s->l_seq; ++i) s->seq[i] = "ACGTN"[(int)s->seq[i]];
|
||||||
|
kputsn(s->seq, s->l_seq, &str); kputc('\t', &str);
|
||||||
|
if (s->qual) kputsn(s->qual, s->l_seq, &str);
|
||||||
|
free(cigar);
|
||||||
|
s->sam = str.s;
|
||||||
|
}
|
||||||
|
|
||||||
|
static mem_alnreg_v find_alnreg(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
mem_chain_v chn;
|
||||||
|
mem_alnreg_v regs;
|
||||||
|
for (i = 0; i < s->l_seq; ++i)
|
||||||
|
s->seq[i] = nst_nt4_table[(int)s->seq[i]];
|
||||||
|
chn = mem_chain(opt, bwt, s->l_seq, (uint8_t*)s->seq);
|
||||||
|
chn.n = mem_chain_flt(opt, chn.n, chn.a);
|
||||||
|
regs.n = regs.m = chn.n;
|
||||||
|
regs.a = malloc(regs.n * sizeof(mem_alnreg_t));
|
||||||
|
for (i = 0; i < chn.n; ++i) {
|
||||||
|
mem_chain2aln(opt, bns->l_pac, pac, s->l_seq, (uint8_t*)s->seq, &chn.a[i], ®s.a[i]);
|
||||||
|
free(chn.a[i].seeds);
|
||||||
|
}
|
||||||
|
free(chn.a);
|
||||||
|
return regs;
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
|
@ -431,41 +475,65 @@ typedef struct {
|
||||||
const bntseq_t *bns;
|
const bntseq_t *bns;
|
||||||
const uint8_t *pac;
|
const uint8_t *pac;
|
||||||
bseq1_t *seqs;
|
bseq1_t *seqs;
|
||||||
} worker1_t;
|
mem_alnreg_v *regs;
|
||||||
|
} worker_t;
|
||||||
|
|
||||||
static void *worker1(void *data)
|
static void *worker1(void *data)
|
||||||
{
|
{
|
||||||
worker1_t *w = (worker1_t*)data;
|
worker_t *w = (worker_t*)data;
|
||||||
int i;
|
int i;
|
||||||
for (i = w->start; i < w->n; i += w->step)
|
for (i = w->start; i < w->n; i += w->step)
|
||||||
process_seq1(w->opt, w->bwt, w->bns, w->pac, &w->seqs[i]);
|
w->regs[i] = find_alnreg(w->opt, w->bwt, w->bns, w->pac, &w->seqs[i]);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *worker2(void *data)
|
||||||
|
{
|
||||||
|
worker_t *w = (worker_t*)data;
|
||||||
|
int i;
|
||||||
|
if (!w->opt->is_pe) {
|
||||||
|
for (i = 0; i < w->n; i += w->step) {
|
||||||
|
mem_sam_se(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i]);
|
||||||
|
free(w->regs[i].a);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (i = 0; i < w->n>>1; i += w->step) { // not implemented yet
|
||||||
|
free(w->regs[i<<1|0].a); free(w->regs[i<<1|1].a);
|
||||||
|
}
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int n, bseq1_t *seqs)
|
int mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int n, bseq1_t *seqs)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
worker1_t *w1;
|
worker_t *w;
|
||||||
w1 = calloc(opt->n_threads, sizeof(worker1_t));
|
w = calloc(opt->n_threads, sizeof(worker_t));
|
||||||
for (i = 0; i < opt->n_threads; ++i) {
|
for (i = 0; i < opt->n_threads; ++i) {
|
||||||
worker1_t *w = &w1[i];
|
worker_t *w = &w[i];
|
||||||
w->start = i; w->step = opt->n_threads; w->n = n;
|
w->start = i; w->step = opt->n_threads; w->n = n;
|
||||||
w->opt = opt; w->bwt = bwt; w->bns = bns; w->pac = pac;
|
w->opt = opt; w->bwt = bwt; w->bns = bns; w->pac = pac;
|
||||||
w->seqs = seqs;
|
w->seqs = seqs;
|
||||||
}
|
}
|
||||||
#ifdef HAVE_PTHREAD
|
#ifdef HAVE_PTHREAD
|
||||||
if (opt->n_threads == 1) {
|
if (opt->n_threads == 1) {
|
||||||
worker1(w1);
|
worker1(w); worker2(w);
|
||||||
} else {
|
} else {
|
||||||
pthread_t *tid;
|
pthread_t *tid;
|
||||||
tid = (pthread_t*)calloc(opt->n_threads, sizeof(pthread_t));
|
tid = (pthread_t*)calloc(opt->n_threads, sizeof(pthread_t));
|
||||||
for (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker1, &w1[i]);
|
for (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker1, &w[i]);
|
||||||
|
for (i = 0; i < opt->n_threads; ++i) pthread_join(tid[i], 0);
|
||||||
|
for (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker2, &w[i]);
|
||||||
for (i = 0; i < opt->n_threads; ++i) pthread_join(tid[i], 0);
|
for (i = 0; i < opt->n_threads; ++i) pthread_join(tid[i], 0);
|
||||||
free(tid);
|
free(tid);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
worker1(w1);
|
worker1(w); worker2(w);
|
||||||
#endif
|
#endif
|
||||||
free(w1);
|
for (i = 0; i < n; ++i) {
|
||||||
|
puts(seqs[i].sam);
|
||||||
|
free(seqs[i].name); free(seqs[i].comment); free(seqs[i].seq); free(seqs[i].qual); free(seqs[i].sam);
|
||||||
|
}
|
||||||
|
free(w);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,7 @@ int main_mem(int argc, char *argv[])
|
||||||
mem_opt_t *opt;
|
mem_opt_t *opt;
|
||||||
bwt_t *bwt;
|
bwt_t *bwt;
|
||||||
bntseq_t *bns;
|
bntseq_t *bns;
|
||||||
int i, c, n;
|
int c, n;
|
||||||
gzFile fp, fp2 = 0;
|
gzFile fp, fp2 = 0;
|
||||||
kseq_t *ks, *ks2 = 0;
|
kseq_t *ks, *ks2 = 0;
|
||||||
uint8_t *pac = 0;
|
uint8_t *pac = 0;
|
||||||
|
|
@ -55,9 +55,6 @@ int main_mem(int argc, char *argv[])
|
||||||
}
|
}
|
||||||
while ((seqs = bseq_read(opt->n_threads * opt->chunk_size, &n, ks, ks2)) != 0) {
|
while ((seqs = bseq_read(opt->n_threads * opt->chunk_size, &n, ks, ks2)) != 0) {
|
||||||
mem_process_seqs(opt, bwt, bns, pac, n, seqs);
|
mem_process_seqs(opt, bwt, bns, pac, n, seqs);
|
||||||
for (i = 0; i < n; ++i) {
|
|
||||||
free(seqs[i].name); free(seqs[i].comment); free(seqs[i].seq); free(seqs[i].qual);
|
|
||||||
}
|
|
||||||
free(seqs);
|
free(seqs);
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
|
|
|
||||||
45
kstring.h
45
kstring.h
|
|
@ -16,19 +16,24 @@ typedef struct __kstring_t {
|
||||||
} kstring_t;
|
} kstring_t;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static inline int kputs(const char *p, kstring_t *s)
|
static inline int kputsn(const char *p, int l, kstring_t *s)
|
||||||
{
|
{
|
||||||
int l = strlen(p);
|
|
||||||
if (s->l + l + 1 >= s->m) {
|
if (s->l + l + 1 >= s->m) {
|
||||||
s->m = s->l + l + 2;
|
s->m = s->l + l + 2;
|
||||||
kroundup32(s->m);
|
kroundup32(s->m);
|
||||||
s->s = (char*)realloc(s->s, s->m);
|
s->s = (char*)realloc(s->s, s->m);
|
||||||
}
|
}
|
||||||
strcpy(s->s + s->l, p);
|
memcpy(s->s + s->l, p, l);
|
||||||
s->l += l;
|
s->l += l;
|
||||||
|
s->s[s->l] = 0;
|
||||||
return l;
|
return l;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int kputs(const char *p, kstring_t *s)
|
||||||
|
{
|
||||||
|
return kputsn(p, strlen(p), s);
|
||||||
|
}
|
||||||
|
|
||||||
static inline int kputc(int c, kstring_t *s)
|
static inline int kputc(int c, kstring_t *s)
|
||||||
{
|
{
|
||||||
if (s->l + 1 >= s->m) {
|
if (s->l + 1 >= s->m) {
|
||||||
|
|
@ -41,6 +46,40 @@ static inline int kputc(int c, kstring_t *s)
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int kputw(int c, kstring_t *s)
|
||||||
|
{
|
||||||
|
char buf[16];
|
||||||
|
int l, x;
|
||||||
|
if (c == 0) return kputc('0', s);
|
||||||
|
for (l = 0, x = c < 0? -c : c; x > 0; x /= 10) buf[l++] = x%10 + '0';
|
||||||
|
if (c < 0) buf[l++] = '-';
|
||||||
|
if (s->l + l + 1 >= s->m) {
|
||||||
|
s->m = s->l + l + 2;
|
||||||
|
kroundup32(s->m);
|
||||||
|
s->s = (char*)realloc(s->s, s->m);
|
||||||
|
}
|
||||||
|
for (x = l - 1; x >= 0; --x) s->s[s->l++] = buf[x];
|
||||||
|
s->s[s->l] = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int kputuw(unsigned c, kstring_t *s)
|
||||||
|
{
|
||||||
|
char buf[16];
|
||||||
|
int l, i;
|
||||||
|
unsigned x;
|
||||||
|
if (c == 0) return kputc('0', s);
|
||||||
|
for (l = 0, x = c; x > 0; x /= 10) buf[l++] = x%10 + '0';
|
||||||
|
if (s->l + l + 1 >= s->m) {
|
||||||
|
s->m = s->l + l + 2;
|
||||||
|
kroundup32(s->m);
|
||||||
|
s->s = (char*)realloc(s->s, s->m);
|
||||||
|
}
|
||||||
|
for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i];
|
||||||
|
s->s[s->l] = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
int ksprintf(kstring_t *s, const char *fmt, ...);
|
int ksprintf(kstring_t *s, const char *fmt, ...);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue