From 17944a75c278997b34799f2bde068a81eb87715d Mon Sep 17 00:00:00 2001 From: Heng Li Date: Thu, 29 Jun 2017 14:20:14 -0400 Subject: [PATCH] merge into map.c --- Makefile | 3 +-- map.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++ patch.c | 57 -------------------------------------------------------- 3 files changed, 54 insertions(+), 59 deletions(-) delete mode 100644 patch.c diff --git a/Makefile b/Makefile index 2dd603f..00de261 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ CFLAGS= -g -Wall -O2 -Wc++-compat -Wno-unused-function CPPFLAGS= -DHAVE_KALLOC INCLUDES= -I. OBJS= kalloc.o kthread.o misc.o bseq.o sketch.o chain.o align.o sdust.o \ - index.o patch.o format.o map.o ksw2_extz2_sse.o + index.o format.o map.o ksw2_extz2_sse.o PROG= minimap2 PROG_EXTRA= sdust LIBS= -lm -lz -lpthread @@ -48,6 +48,5 @@ ksw2_extz2_sse.o: ksw2.h main.o: bseq.h minimap.h mmpriv.h map.o: kthread.h kvec.h kalloc.h sdust.h mmpriv.h minimap.h bseq.h misc.o: minimap.h ksort.h -patch.o: mmpriv.h minimap.h bseq.h kalloc.h sdust.o: kalloc.h kdq.h kvec.h sdust.h sketch.o: kvec.h kalloc.h minimap.h diff --git a/map.c b/map.c index c3535c9..b8816f7 100644 --- a/map.c +++ b/map.c @@ -225,6 +225,59 @@ void mm_select_sub(float mask_level, float pri_ratio, int *n_, mm_reg1_t *r, voi } } +static int mm_squeeze_a_core(void *km, int n_regs, mm_reg1_t *regs, mm128_t *a, const uint64_t *aux) +{ // squeeze out regions in a[] that are not referenced by regs[] + int i, as = 0; + for (i = 0; i < n_regs; ++i) { + mm_reg1_t *r = ®s[i]; + if (r->as != as) { + memmove(&a[as], &a[r->as], r->cnt * 16); + r->as = as; + } + as += r->cnt; + } + return as; +} + +void mm_join_long(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, int n_regs, mm_reg1_t *regs, mm128_t *a) +{ + int i, n_aux; + uint64_t *aux; + + if (n_regs < 2) return; // nothing to join + aux = (uint64_t*)kmalloc(km, n_regs * 8); + for (i = n_aux = 0; i < n_regs; ++i) + aux[n_aux++] = (uint64_t)regs[i].as << 32 | i; + assert(n_aux == n_regs); // TODO: may be relaxed in future for other use cases + radix_sort_64(aux, aux + n_aux); + mm_squeeze_a_core(km, n_regs, regs, a, aux); + + for (i = n_aux - 1; i >= 1; --i) { + mm_reg1_t *r0 = ®s[(int32_t)aux[i-1]], *r1 = ®s[(int32_t)aux[i]]; + mm128_t *a0e, *a1s; + int max_gap, min_gap; + + // test + if (r0->as + r0->cnt != r1->as) continue; // not adjacent in a[] + if (r0->rid != r1->rid || r0->rev != r1->rev) continue; // make sure on the same target and strand + if (r0->score < opt->min_join_flank_sc || r1->score < opt->min_join_flank_sc) continue; // require good flanking chains + a0e = &a[r0->as + r0->cnt - 1]; + a1s = &a[r1->as]; + if (a1s->x <= a0e->x || (int32_t)a1s->y <= (int32_t)a0e->y) continue; // keep colinearity + max_gap = min_gap = (int32_t)a1s->y - (int32_t)a0e->y; + max_gap = max_gap > a1s->x - a0e->x? max_gap : a1s->x - a0e->x; + min_gap = min_gap < a1s->x - a0e->x? min_gap : a1s->x - a0e->x; + if (max_gap > opt->max_join_long || min_gap > opt->max_join_short) continue; + + // all conditions satisfied; join + a[r1->as].y |= 1ULL<<40; + r0->cnt += r1->cnt, r0->score += r1->score; + mm_reg_set_coor(r0, qlen, a); + r1->cnt = 0; + } + kfree(km, aux); +} + mm_reg1_t *mm_map_frag(const mm_mapopt_t *opt, const mm_idx_t *mi, mm_tbuf_t *b, uint32_t m_st, uint32_t m_en, const char *qname, int qlen, const char *seq, int *n_regs) { int i, n = m_en - m_st, j, n_u; diff --git a/patch.c b/patch.c deleted file mode 100644 index 5e21ba9..0000000 --- a/patch.c +++ /dev/null @@ -1,57 +0,0 @@ -#include -#include -#include "mmpriv.h" -#include "kalloc.h" - -static int mm_squeeze_a_core(void *km, int n_regs, mm_reg1_t *regs, mm128_t *a, const uint64_t *aux) -{ // squeeze out regions in a[] that are not referenced by regs[] - int i, as = 0; - for (i = 0; i < n_regs; ++i) { - mm_reg1_t *r = ®s[i]; - if (r->as != as) { - memmove(&a[as], &a[r->as], r->cnt * 16); - r->as = as; - } - as += r->cnt; - } - return as; -} - -void mm_join_long(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, int n_regs, mm_reg1_t *regs, mm128_t *a) -{ - int i, n_aux; - uint64_t *aux; - - if (n_regs < 2) return; // nothing to join - aux = (uint64_t*)kmalloc(km, n_regs * 8); - for (i = n_aux = 0; i < n_regs; ++i) - aux[n_aux++] = (uint64_t)regs[i].as << 32 | i; - assert(n_aux == n_regs); // TODO: may be relaxed in future for other use cases - radix_sort_64(aux, aux + n_aux); - mm_squeeze_a_core(km, n_regs, regs, a, aux); - - for (i = n_aux - 1; i >= 1; --i) { - mm_reg1_t *r0 = ®s[(int32_t)aux[i-1]], *r1 = ®s[(int32_t)aux[i]]; - mm128_t *a0e, *a1s; - int max_gap, min_gap; - - // test - if (r0->as + r0->cnt != r1->as) continue; // not adjacent in a[] - if (r0->rid != r1->rid || r0->rev != r1->rev) continue; // make sure on the same target and strand - if (r0->score < opt->min_join_flank_sc || r1->score < opt->min_join_flank_sc) continue; // require good flanking chains - a0e = &a[r0->as + r0->cnt - 1]; - a1s = &a[r1->as]; - if (a1s->x <= a0e->x || (int32_t)a1s->y <= (int32_t)a0e->y) continue; // keep colinearity - max_gap = min_gap = (int32_t)a1s->y - (int32_t)a0e->y; - max_gap = max_gap > a1s->x - a0e->x? max_gap : a1s->x - a0e->x; - min_gap = min_gap < a1s->x - a0e->x? min_gap : a1s->x - a0e->x; - if (max_gap > opt->max_join_long || min_gap > opt->max_join_short) continue; - - // all conditions satisfied; join - a[r1->as].y |= 1ULL<<40; - r0->cnt += r1->cnt, r0->score += r1->score; - mm_reg_set_coor(r0, qlen, a); - r1->cnt = 0; - } - kfree(km, aux); -}