diff --git a/Makefile b/Makefile index 972ecd6..67e05d6 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ CFLAGS= -g -Wall -O2 -Wc++-compat CPPFLAGS= -DHAVE_KALLOC INCLUDES= -OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o index.o chain.o align.o hit.o map.o format.o pe.o ksw2_ll_sse.o +OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o index.o chain.o align.o hit.o map.o format.o pe.o esterr.o ksw2_ll_sse.o PROG= minimap2 PROG_EXTRA= sdust minimap2-lite LIBS= -lm -lz -lpthread diff --git a/format.c b/format.c index efb0d3e..fb8924d 100644 --- a/format.c +++ b/format.c @@ -210,6 +210,12 @@ static inline void write_tags(kstring_t *s, const mm_reg1_t *r) } mm_sprintf_lite(s, "\ttp:A:%c\tcm:i:%d\ts1:i:%d", type, r->cnt, r->score); if (r->parent == r->id) mm_sprintf_lite(s, "\ts2:i:%d", r->subsc); + if (r->div >= 0.0f && r->div <= 1.0f) { + char buf[8]; + if (r->div == 0.0f) buf[0] = '0', buf[1] = 0; + else sprintf(buf, "%.4f", r->div); + mm_sprintf_lite(s, "\tdv:f:%s", buf); + } if (r->split) mm_sprintf_lite(s, "\tzd:i:%d", r->split); } diff --git a/main.c b/main.c index c91dc4d..3b71b74 100644 --- a/main.c +++ b/main.c @@ -6,7 +6,7 @@ #include "mmpriv.h" #include "getopt.h" -#define MM_VERSION "2.5-r606-dirty" +#define MM_VERSION "2.5-r607-dirty" #ifdef __linux__ #include diff --git a/map.c b/map.c index 6330f88..b9177ae 100644 --- a/map.c +++ b/map.c @@ -125,6 +125,8 @@ struct mm_tbuf_s { sdust_buf_t *sdb; mm128_v mini; void *km; + int32_t n_mini_pos; + uint64_t *mini_pos; }; mm_tbuf_t *mm_tbuf_init(void) @@ -139,6 +141,7 @@ mm_tbuf_t *mm_tbuf_init(void) void mm_tbuf_destroy(mm_tbuf_t *b) { if (b == 0) return; + kfree(b->km, b->mini_pos); kfree(b->km, b->mini.a); sdust_buf_destroy(b->sdb); km_destroy(b->km); @@ -188,6 +191,8 @@ static mm128_t *collect_seed_hits(const mm_mapopt_t *opt, int max_occ, const mm_ mm_match_t *m; mm128_t *a; + b->n_mini_pos = 0; + b->mini_pos = (uint64_t*)kmalloc(b->km, b->mini.n * sizeof(uint64_t)); m = (mm_match_t*)kmalloc(b->km, b->mini.n * sizeof(mm_match_t)); for (i = 0; i < b->mini.n; ++i) { int t; @@ -213,6 +218,7 @@ static mm128_t *collect_seed_hits(const mm_mapopt_t *opt, int max_occ, const mm_ } else rep_en = en; continue; } + b->mini_pos[b->n_mini_pos++] = (uint64_t)q_span<<32 | q->qpos>>1; if (i > 0 && p->x>>8 == b->mini.a[i - 1].x>>8) is_tandem = 1; if (i < b->mini.n - 1 && p->x>>8 == b->mini.a[i + 1].x>>8) is_tandem = 1; for (k = 0; k < q->n; ++k) { @@ -342,6 +348,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char ** i == regs0[j].as? 0 : ((int32_t)a[i].y - (int32_t)a[i-1].y) - ((int32_t)a[i].x - (int32_t)a[i-1].x)); chain_post(opt, max_chain_gap_ref, mi, b->km, qlen_sum, n_segs, qlens, &n_regs0, regs0, a); + if (!is_sr) mm_est_err(qlen_sum, n_regs0, regs0, a, b->n_mini_pos, b->mini_pos); if (n_segs == 1) { // uni-segment regs0 = align_regs(opt, mi, b->km, qlens[0], seqs[0], quals? quals[0] : 0, &n_regs0, regs0, a); diff --git a/minimap.h b/minimap.h index 45a8f30..b33ffb3 100644 --- a/minimap.h +++ b/minimap.h @@ -68,17 +68,19 @@ typedef struct { } mm_extra_t; typedef struct { - int32_t id; // ID for internal uses (see also parent below) - uint32_t cnt:28, rev:1, seg_split:1, sam_pri:1, proper_frag:1; // number of minimizers; if on the reverse strand - uint32_t rid:31, inv:1; // reference index; if this is an alignment from inversion rescue - int32_t score; // DP alignment score - int32_t qs, qe, rs, re; // query start and end; reference start and end - int32_t parent, subsc; // parent==id if primary; best alternate mapping score - int32_t as; // offset in the a[] array (for internal uses only) - int32_t mlen, blen; // seeded exact match length; seeded alignment block length - uint32_t mapq:8, split:2, n_sub:22; // mapQ; split pattern; number of suboptimal mappings - uint32_t pe_thru:1, score0:31; + int32_t id; // ID for internal uses (see also parent below) + int32_t cnt; // number of minimizers; if on the reverse strand + int32_t rid; // reference index; if this is an alignment from inversion rescue + int32_t score; // DP alignment score + int32_t qs, qe, rs, re; // query start and end; reference start and end + int32_t parent, subsc; // parent==id if primary; best alternate mapping score + int32_t as; // offset in the a[] array (for internal uses only) + int32_t mlen, blen; // seeded exact match length; seeded alignment block length + int32_t n_sub; // number of suboptimal mappings + int32_t score0; // initial chaining score (before chain merging/spliting) + uint32_t mapq:8, split:2, rev:1, inv:1, sam_pri:1, proper_frag:1, pe_thru:1, seg_split:1, dummy:16; uint32_t hash; + float div; mm_extra_t *p; } mm_reg1_t; diff --git a/mmpriv.h b/mmpriv.h index e0b06c9..9ec080e 100644 --- a/mmpriv.h +++ b/mmpriv.h @@ -79,6 +79,8 @@ void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int *n_regs, mm_re void mm_hit_sort_by_dp(void *km, int *n_regs, mm_reg1_t *r); void mm_set_mapq(int n_regs, mm_reg1_t *regs, int min_chain_sc, int match_sc, int rep_len, int is_sr); +void mm_est_err(int qlen, int n_regs, mm_reg1_t *regs, const mm128_t *a, int32_t n, uint64_t *mini_pos); + mm_seg_t *mm_seg_gen(void *km, uint32_t hash, int n_segs, const int *qlens, int n_regs0, const mm_reg1_t *regs0, int *n_regs, mm_reg1_t **regs, const mm128_t *a); void mm_seg_free(void *km, int n_segs, mm_seg_t *segs); void mm_pair(void *km, int max_gap_ref, int dp_bonus, int sub_diff, int match_sc, const int *qlens, int *n_regs, mm_reg1_t **regs);