From 4ae0b469724fb9cc1e402967593f43d0f0d38f64 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Fri, 23 Jun 2017 14:38:28 -0400 Subject: [PATCH] min_ksw_len --- align.c | 35 +++++++++++++++++++---------------- map.c | 3 +++ minimap.h | 2 ++ 3 files changed, 24 insertions(+), 16 deletions(-) diff --git a/align.c b/align.c index 7f9e138..70c69a3 100644 --- a/align.c +++ b/align.c @@ -18,23 +18,26 @@ void mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int for (reg = 0; reg < n_regs; ++reg) { mm_reg1_t *r = ®s[reg]; int32_t rid = a[r->as].x<<1>>33, rev = a[r->as].x>>63; - uint8_t *tseq; - tseq = (uint8_t*)kmalloc(km, r->re - r->rs); - for (i = 0; i < r->cnt - 1; ++i) { - uint8_t *qseq; - int32_t rs, re, qs, qe, ret; - rs = (int32_t)a[r->as + i].x + 1; - re = (int32_t)a[r->as + i + 1].x + 1; - qs = (int32_t)a[r->as + i].y + 1; - qe = (int32_t)a[r->as + i + 1].y + 1; - qseq = &qseq0[rev][qs]; - ret = mm_idx_getseq(mi, rid, rs, re, tseq); - assert(ret > 0); - fprintf(stderr, "===> [%d,%d] %d-%d %c (%s:%d-%d) <===\n", reg, i, qs, qe, "+-"[rev], mi->seq[rid].name, rs, re); - for (k = 0; k < re - rs; ++k) fputc("ACGTN"[tseq[k]], stderr); fputc('\n', stderr); - for (k = 0; k < qe - qs; ++k) fputc("ACGTN"[qseq[k]], stderr); fputc('\n', stderr); + uint8_t *tseq0, *tseq, *qseq; + int32_t rs, re, qs, qe, ret; + tseq0 = (uint8_t*)kmalloc(km, r->re - r->rs); + ret = mm_idx_getseq(mi, rid, r->rs, r->re, tseq0); + assert(ret > 0); + rs = (int32_t)a[r->as].x + 1; + qs = (int32_t)a[r->as].y + 1; + for (i = 1; i < r->cnt; ++i) { + re = (int32_t)a[r->as + i].x + 1; + qe = (int32_t)a[r->as + i].y + 1; + if (qe - qs >= opt->min_ksw_len || re - rs >= opt->min_ksw_len) { + qseq = &qseq0[rev][qs]; + tseq = &tseq0[rs - r->rs]; + fprintf(stderr, "===> [%d,%d] %d-%d %c (%s:%d-%d) <===\n", reg, i, qs, qe, "+-"[rev], mi->seq[rid].name, rs, re); + for (k = 0; k < re - rs; ++k) fputc("ACGTN"[tseq[k]], stderr); fputc('\n', stderr); + for (k = 0; k < qe - qs; ++k) fputc("ACGTN"[qseq[k]], stderr); fputc('\n', stderr); + rs = re, qs = qe; + } } - kfree(km, tseq); + kfree(km, tseq0); } kfree(km, qseq0[0]); kfree(km, qseq0[1]); diff --git a/map.c b/map.c index 95e652d..795e175 100644 --- a/map.c +++ b/map.c @@ -22,6 +22,9 @@ void mm_mapopt_init(mm_mapopt_t *opt) opt->pri_ratio = 2.0f; opt->mask_level = 0.5f; + + opt->a = 1, opt->b = 1, opt->q = 1, opt->e = 1; + opt->min_ksw_len = 100; } void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi) diff --git a/minimap.h b/minimap.h index 53273d3..33c6128 100644 --- a/minimap.h +++ b/minimap.h @@ -61,9 +61,11 @@ typedef struct { int min_score; float pri_ratio; float mask_level; + int a, b, q, e; // matching score, mismatch, gap-open and gap-ext penalties int max_occ; int mid_occ; + int min_ksw_len; } mm_mapopt_t; extern int mm_verbose;