From 66585b7982ea5820511ee1701234c50a94d98067 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Mon, 18 Feb 2013 16:33:06 -0500 Subject: [PATCH] code backup --- bwamem.c | 27 ++++++++++++++------------- bwamem.h | 8 ++++++-- bwamem_pair.c | 32 ++++++++++++++++++-------------- fastmap.c | 6 ++++-- 4 files changed, 42 insertions(+), 31 deletions(-) diff --git a/bwamem.c b/bwamem.c index ae4992f..397422f 100644 --- a/bwamem.c +++ b/bwamem.c @@ -31,6 +31,7 @@ mem_opt_t *mem_opt_init() mem_opt_t *o; o = calloc(1, sizeof(mem_opt_t)); o->a = 1; o->b = 5; o->q = 8; o->r = 1; o->w = 100; + o->flag = 0; o->min_seed_len = 19; o->split_width = 10; o->max_occ = 10000; @@ -41,7 +42,6 @@ mem_opt_t *mem_opt_init() o->chunk_size = 10000000; o->n_threads = 1; o->pe_dir = 0<<1|1; - o->is_pe = 0; mem_fill_scmat(o->a, o->b, o->mat); return o; } @@ -598,11 +598,11 @@ void mem_alnreg2hit(const mem_alnreg_t *a, bwahit_t *h) h->score = a->score; h->sub = a->sub > a->csub? a->sub : a->csub; h->qual = 0; // quality unset - h->flag = a->secondary? 0x100 : 0; // only the "secondary" bit is set + h->flag = a->secondary >= 0? 0x100 : 0; // only the "secondary" bit is set h->mb = h->me = -2; // mate positions are unset } -void mem_sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a) +void mem_sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag) { int k; kstring_t str; @@ -612,10 +612,11 @@ void mem_sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, b bwahit_t h; if (a->a[k].secondary >= 0) continue; mem_alnreg2hit(&a->a[k], &h); + h.flag |= extra_flag; h.qual = approx_mapq_se(opt, &a->a[k]); - bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, s, &h, opt->is_hard); + bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, s, &h, opt->flag&MEM_F_HARDCLIP); } - } else bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, s, 0, opt->is_hard); + } else bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, s, 0, opt->flag&MEM_F_HARDCLIP); s->sam = str.s; } @@ -657,25 +658,25 @@ static void *worker1(void *data) for (i = w->start; i < w->n; i += w->step) { w->regs[i] = find_alnreg(w->opt, w->bwt, w->bns, w->pac, &w->seqs[i]); w->regs[i].n = mem_sort_and_dedup(w->regs[i].n, w->regs[i].a); - mem_mark_primary_se(w->opt, w->regs[i].n, w->regs[i].a); } return 0; } static void *worker2(void *data) { - extern int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], bseq1_t s[2], mem_alnreg_v a[2]); + extern int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2]); worker_t *w = (worker_t*)data; int i; - if (!w->opt->is_pe) { + if (!(w->opt->flag&MEM_F_PE)) { for (i = 0; i < w->n; i += w->step) { - mem_sam_se(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i]); + mem_mark_primary_se(w->opt, w->regs[i].n, w->regs[i].a); + mem_sam_se(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i], 0); free(w->regs[i].a); } } else { int n = 0; for (i = 0; i < w->n>>1; i += w->step) { // not implemented yet - n += mem_sam_pe(w->opt, w->bns, w->pac, w->pes, &w->seqs[i<<1], &w->regs[i<<1]); + n += mem_sam_pe(w->opt, w->bns, w->pac, w->pes, i, &w->seqs[i<<1], &w->regs[i<<1]); free(w->regs[i<<1|0].a); free(w->regs[i<<1|1].a); } fprintf(stderr, "[M::%s@%d] performed mate-SW for %d reads\n", __func__, w->start, n); @@ -702,21 +703,21 @@ int mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns #ifdef HAVE_PTHREAD if (opt->n_threads == 1) { worker1(w); - if (opt->is_pe) mem_pestat(opt, bns->l_pac, n, regs, pes); + if (opt->flag&MEM_F_PE) mem_pestat(opt, bns->l_pac, n, regs, pes); worker2(w); } else { pthread_t *tid; tid = (pthread_t*)calloc(opt->n_threads, sizeof(pthread_t)); for (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker1, &w[i]); for (i = 0; i < opt->n_threads; ++i) pthread_join(tid[i], 0); - if (opt->is_pe) mem_pestat(opt, bns->l_pac, n, regs, pes); + if (opt->flag&MEM_F_PE) mem_pestat(opt, bns->l_pac, n, regs, pes); for (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker2, &w[i]); for (i = 0; i < opt->n_threads; ++i) pthread_join(tid[i], 0); free(tid); } #else worker1(w); - if (opt->is_pe) mem_pestat(opt, bns->l_pac, n, regs, pes); + if (opt->flag&MEM_F_PE) mem_pestat(opt, bns->l_pac, n, regs, pes); worker2(w); #endif for (i = 0; i < n; ++i) { diff --git a/bwamem.h b/bwamem.h index 1f9605d..5fa49e4 100644 --- a/bwamem.h +++ b/bwamem.h @@ -15,13 +15,17 @@ typedef struct { int32_t qbeg, len; } mem_seed_t; +#define MEM_F_HARDCLIP 0x1 +#define MEM_F_PE 0x2 +#define MEM_F_NOPAIRING 0x4 + typedef struct { int a, b, q, r, w; + int flag; int split_width; int min_seed_len, max_occ, max_chain_gap; int n_threads, chunk_size; - int pe_dir, is_pe; - int is_hard; // if to use hard clip + int pe_dir; float mask_level, chain_drop_ratio; int max_ins; // maximum insert size int8_t mat[25]; // scoring matrix; mat[0] == 0 if unset diff --git a/bwamem_pair.c b/bwamem_pair.c index fe6f697..9d4d590 100644 --- a/bwamem_pair.c +++ b/bwamem_pair.c @@ -159,11 +159,11 @@ int mem_matesw(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const me return n; } -int mem_pair(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_pestat_t pes[4], bseq1_t s[2], mem_alnreg_v a[2], bwahit_t h[2]) +uint64_t mem_pair(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_pestat_t pes[4], bseq1_t s[2], mem_alnreg_v a[2], int id, uint64_t *sub, int z[2]) { extern void mem_alnreg2hit(const mem_alnreg_t *a, bwahit_t *h); pair64_v v; - pair64_t o, subo; // score<<32 | raw_score<<8 | hash + pair64_t o, subo; // .x: score<<32 | raw_score<<8 | hash; .y: pair int r, i, k, y[4]; // y[] keeps the last hit kv_init(v); for (r = 0; r < 2; ++r) { // loop through read number @@ -198,7 +198,7 @@ int mem_pair(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_ ns = (dist - pes[dir].avg) / pes[dir].std; score = (int)(raw_score - 4.343 / 23. * (opt->a + opt->b) * log(erfc(fabs(ns) * M_SQRT1_2)) + .499); pair = (uint64_t)k<<32 | i; - x = (uint64_t)score<<32 | (int64_t)raw_score<<8 | (hash_64(pair)&0xff); + x = (uint64_t)score<<32 | (int64_t)raw_score<<8 | (hash_64(pair ^ id<<8)&0xff); if (x > o.x) subo = o, o.x = x, o.y = pair; else if (x > subo.x) subo.x = x, subo.y = pair; } @@ -207,19 +207,24 @@ int mem_pair(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_ } if (o.x > 0) { i = o.y >> 32; k = o.y << 32 >> 32; - mem_alnreg2hit(&a[v.a[i].y&1].a[v.a[i].y<<32>>34], &h[v.a[i].y&1]); - mem_alnreg2hit(&a[v.a[k].y&1].a[v.a[k].y<<32>>34], &h[v.a[k].y&1]); + z[v.a[i].y&1] = v.a[i].y<<32>>34; + z[v.a[k].y&1] = v.a[k].y<<32>>34; } free(v.a); - return o.x == 0? -1 : 0; + *sub = subo.x; + return o.x; } -int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], bseq1_t s[2], mem_alnreg_v a[2]) +int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2]) { - int n = 0, i, j; + extern void mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a); + extern void mem_sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag); + int n = 0, i, j, z[2]; kstring_t str; bwahit_t h[2]; mem_alnreg_t b[2][2]; + uint64_t o, subo; + str.l = str.m = 0; str.s = 0; // perform SW for the best alignment for (i = 0; i < 2; ++i) @@ -233,12 +238,11 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co for (j = 0; j < 2; ++j) if (b[i][j].score > 0) n += mem_matesw(opt, bns->l_pac, pac, pes, &b[i][j], s[!i].l_seq, (uint8_t*)s[!i].seq, &a[!i]); // pairing single-end hits - if (mem_pair(opt, bns->l_pac, pac, pes, s, a, h) == 0) { // successful pairing - bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, &s[0], &h[0], opt->is_hard); - s[0].sam = strdup(str.s); str.l = 0; - bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, &s[1], &h[1], opt->is_hard); - s[1].sam = str.s; - } else { + o = mem_pair(opt, bns->l_pac, pac, pes, s, a, id, &subo, z); + if (0&&o) { // with proper pairing + } else { // no proper pairing + mem_mark_primary_se(opt, a[0].n, a[0].a); mem_sam_se(opt, bns, pac, &s[0], &a[0], 0x41); + mem_mark_primary_se(opt, a[1].n, a[1].a); mem_sam_se(opt, bns, pac, &s[1], &a[1], 0x81); } return n; } diff --git a/fastmap.c b/fastmap.c index f2677eb..a2d7d94 100644 --- a/fastmap.c +++ b/fastmap.c @@ -24,8 +24,10 @@ int main_mem(int argc, char *argv[]) bseq1_t *seqs; opt = mem_opt_init(); - while ((c = getopt(argc, argv, "k:c:v:s:")) >= 0) { + while ((c = getopt(argc, argv, "PHk:c:v:s:")) >= 0) { if (c == 'k') opt->min_seed_len = atoi(optarg); + else if (c == 'P') opt->flag |= MEM_F_NOPAIRING; + else if (c == 'H') opt->flag |= MEM_F_HARDCLIP; else if (c == 'c') opt->max_occ = atoi(optarg); else if (c == 'v') mem_verbose = atoi(optarg); else if (c == 's') opt->split_width = atoi(optarg); @@ -59,7 +61,7 @@ int main_mem(int argc, char *argv[]) if (optind + 2 < argc) { fp2 = gzopen(argv[optind + 2], "r"); ks2 = kseq_init(fp2); - opt->is_pe = 1; + opt->flag |= MEM_F_PE; } while ((seqs = bseq_read(opt->n_threads * opt->chunk_size, &n, ks, ks2)) != 0) { mem_process_seqs(opt, bwt, bns, pac, n, seqs);