backup
This commit is contained in:
parent
c8d122bcdb
commit
d274e1b743
3
Makefile
3
Makefile
|
|
@ -3,7 +3,7 @@ CFLAGS= -g -Wall -O2 -Wc++-compat -Wno-unused-function
|
||||||
CPPFLAGS= -DHAVE_KALLOC
|
CPPFLAGS= -DHAVE_KALLOC
|
||||||
INCLUDES= -I.
|
INCLUDES= -I.
|
||||||
OBJS= kalloc.o kthread.o misc.o bseq.o sketch.o chain.o align.o sdust.o \
|
OBJS= kalloc.o kthread.o misc.o bseq.o sketch.o chain.o align.o sdust.o \
|
||||||
index.o format.o map.o ksw2_extz2_sse.o
|
index.o patch.o format.o map.o ksw2_extz2_sse.o
|
||||||
PROG= minimap2
|
PROG= minimap2
|
||||||
PROG_EXTRA= sdust
|
PROG_EXTRA= sdust
|
||||||
LIBS= -lm -lz -lpthread
|
LIBS= -lm -lz -lpthread
|
||||||
|
|
@ -48,5 +48,6 @@ ksw2_extz2_sse.o: ksw2.h
|
||||||
main.o: bseq.h minimap.h mmpriv.h
|
main.o: bseq.h minimap.h mmpriv.h
|
||||||
map.o: kthread.h kvec.h kalloc.h sdust.h mmpriv.h minimap.h bseq.h
|
map.o: kthread.h kvec.h kalloc.h sdust.h mmpriv.h minimap.h bseq.h
|
||||||
misc.o: minimap.h ksort.h
|
misc.o: minimap.h ksort.h
|
||||||
|
patch.o: mmpriv.h minimap.h bseq.h kalloc.h
|
||||||
sdust.o: kalloc.h kdq.h kvec.h sdust.h
|
sdust.o: kalloc.h kdq.h kvec.h sdust.h
|
||||||
sketch.o: kvec.h kalloc.h minimap.h
|
sketch.o: kvec.h kalloc.h minimap.h
|
||||||
|
|
|
||||||
2
align.c
2
align.c
|
|
@ -230,7 +230,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
||||||
for (k = 0; k < ql; ++k) fputc("ACGTN"[qseq[k]], stderr); fputc('\n', stderr);
|
for (k = 0; k < ql; ++k) fputc("ACGTN"[qseq[k]], stderr); fputc('\n', stderr);
|
||||||
#endif
|
#endif
|
||||||
ksw_extz2_sse(km, qe - qs, qseq, re - rs, tseq, 5, mat, opt->q, opt->e, bw, opt->zdrop, KSW_EZ_APPROX_MAX, ez);
|
ksw_extz2_sse(km, qe - qs, qseq, re - rs, tseq, 5, mat, opt->q, opt->e, bw, opt->zdrop, KSW_EZ_APPROX_MAX, ez);
|
||||||
if (ez->zdropped || ez->score < 0 || mm_check_zdrop(qseq, tseq, ez->n_cigar, ez->cigar, mat, opt->q, opt->e, opt->zdrop))
|
if (mm_check_zdrop(qseq, tseq, ez->n_cigar, ez->cigar, mat, opt->q, opt->e, opt->zdrop))
|
||||||
ksw_extz2_sse(km, qe - qs, qseq, re - rs, tseq, 5, mat, opt->q, opt->e, bw, opt->zdrop, 0, ez);
|
ksw_extz2_sse(km, qe - qs, qseq, re - rs, tseq, 5, mat, opt->q, opt->e, bw, opt->zdrop, 0, ez);
|
||||||
if (ez->n_cigar > 0) {
|
if (ez->n_cigar > 0) {
|
||||||
mm_append_cigar(r, ez->n_cigar, ez->cigar);
|
mm_append_cigar(r, ez->n_cigar, ez->cigar);
|
||||||
|
|
|
||||||
2
chain.c
2
chain.c
|
|
@ -35,7 +35,7 @@ int mm_chain_dp(int max_dist, int bw, int max_skip, int min_cnt, int min_sc, int
|
||||||
// fill the score and backtrack arrays
|
// fill the score and backtrack arrays
|
||||||
for (i = 0; i < n; ++i) {
|
for (i = 0; i < n; ++i) {
|
||||||
uint64_t ri = a[i].x;
|
uint64_t ri = a[i].x;
|
||||||
int32_t qi = (int32_t)a[i].y, q_span = a[i].y>>32;
|
int32_t qi = (int32_t)a[i].y, q_span = a[i].y>>32&0xff; // NB: only 8 bits of span is used!!!
|
||||||
int32_t max_f = -INT32_MAX, max_j = -1, n_skip = 0;
|
int32_t max_f = -INT32_MAX, max_j = -1, n_skip = 0;
|
||||||
while (st < i && ri - a[st].x > max_dist) ++st;
|
while (st < i && ri - a[st].x > max_dist) ++st;
|
||||||
for (j = i - 1; j >= st; --j) {
|
for (j = i - 1; j >= st; --j) {
|
||||||
|
|
|
||||||
9
ksw2.h
9
ksw2.h
|
|
@ -8,10 +8,11 @@
|
||||||
#define KSW_EZ_SCORE_ONLY 0x01 // don't record alignment path/cigar
|
#define KSW_EZ_SCORE_ONLY 0x01 // don't record alignment path/cigar
|
||||||
#define KSW_EZ_RIGHT 0x02 // right-align gaps
|
#define KSW_EZ_RIGHT 0x02 // right-align gaps
|
||||||
#define KSW_EZ_GENERIC_SC 0x04 // without this flag: match/mismatch only; last symbol is a wildcard
|
#define KSW_EZ_GENERIC_SC 0x04 // without this flag: match/mismatch only; last symbol is a wildcard
|
||||||
#define KSW_EZ_APPROX_MAX 0x08 // approximate max and Z-drop; this is faster
|
#define KSW_EZ_APPROX_MAX 0x08 // approximate max; this is faster with sse
|
||||||
#define KSW_EZ_DYN_BAND 0x10 // once used, ksw_extz_t::{mqe,mte} may be wrong
|
#define KSW_EZ_APPROX_DROP 0x10 // approximate Z-drop; faster with sse
|
||||||
#define KSW_EZ_EXTZ_ONLY 0x20 // only perform extension
|
#define KSW_EZ_DYN_BAND 0x20 // once used, ksw_extz_t::{mqe,mte} may be wrong
|
||||||
#define KSW_EZ_REV_CIGAR 0x40 // reverse CIGAR in the output
|
#define KSW_EZ_EXTZ_ONLY 0x40 // only perform extension
|
||||||
|
#define KSW_EZ_REV_CIGAR 0x80 // reverse CIGAR in the output
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
uint32_t max:31, zdropped:1;
|
uint32_t max:31, zdropped:1;
|
||||||
|
|
|
||||||
|
|
@ -295,7 +295,7 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
||||||
} else {
|
} else {
|
||||||
++last_H0_t, H0 += u8[last_H0_t] - qe;
|
++last_H0_t, H0 += u8[last_H0_t] - qe;
|
||||||
}
|
}
|
||||||
if (apply_zdrop(ez, H0, r, last_H0_t, zdrop, e)) break;
|
if ((flag & KSW_EZ_APPROX_DROP) && apply_zdrop(ez, H0, r, last_H0_t, zdrop, e)) break;
|
||||||
} else H0 = v8[0] - qe - qe, last_H0_t = 0;
|
} else H0 = v8[0] - qe - qe, last_H0_t = 0;
|
||||||
if (r == qlen + tlen - 2 && en0 == tlen - 1)
|
if (r == qlen + tlen - 2 && en0 == tlen - 1)
|
||||||
ez->score = H0;
|
ez->score = H0;
|
||||||
|
|
|
||||||
9
mmpriv.h
9
mmpriv.h
|
|
@ -39,18 +39,17 @@ mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *m
|
||||||
|
|
||||||
static inline void mm_reg_set_coor(mm_reg1_t *r, int32_t qlen, const mm128_t *a)
|
static inline void mm_reg_set_coor(mm_reg1_t *r, int32_t qlen, const mm128_t *a)
|
||||||
{
|
{
|
||||||
int32_t k = r->as;
|
int32_t k = r->as, q_span = (int32_t)(a[k].y>>32&0xff);
|
||||||
r->rev = a[k].x>>63;
|
r->rev = a[k].x>>63;
|
||||||
r->rid = a[k].x<<1>>33;
|
r->rid = a[k].x<<1>>33;
|
||||||
assert(r->rid != 0x7fffffff);
|
r->rs = (int32_t)a[k].x + 1 > q_span? (int32_t)a[k].x + 1 - q_span : 0; // NB: target span may be shorter, so this test is necessary
|
||||||
r->rs = (int32_t)a[k].x + 1 > (int32_t)(a[k].y>>32)? (int32_t)a[k].x + 1 - (int32_t)(a[k].y>>32) : 0;
|
|
||||||
r->re = (int32_t)a[k + r->cnt - 1].x + 1;
|
r->re = (int32_t)a[k + r->cnt - 1].x + 1;
|
||||||
if (!r->rev) {
|
if (!r->rev) {
|
||||||
r->qs = (int32_t)a[k].y + 1 - (int32_t)(a[k].y>>32);
|
r->qs = (int32_t)a[k].y + 1 - q_span;
|
||||||
r->qe = (int32_t)a[k + r->cnt - 1].y + 1;
|
r->qe = (int32_t)a[k + r->cnt - 1].y + 1;
|
||||||
} else {
|
} else {
|
||||||
r->qs = qlen - ((int32_t)a[k + r->cnt - 1].y + 1);
|
r->qs = qlen - ((int32_t)a[k + r->cnt - 1].y + 1);
|
||||||
r->qe = qlen - ((int32_t)a[k].y + 1 - (int32_t)(a[k].y>>32));
|
r->qe = qlen - ((int32_t)a[k].y + 1 - q_span);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,57 @@
|
||||||
|
#include <string.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include "mmpriv.h"
|
||||||
|
#include "kalloc.h"
|
||||||
|
|
||||||
|
static int mm_squeeze_a_core(void *km, int n_regs, mm_reg1_t *regs, mm128_t *a, const uint64_t *aux)
|
||||||
|
{ // squeeze out regions in a[] that are not referenced by regs[]
|
||||||
|
int i, as = 0;
|
||||||
|
for (i = 0; i < n_regs; ++i) {
|
||||||
|
mm_reg1_t *r = ®s[i];
|
||||||
|
if (r->as != as) {
|
||||||
|
memmove(&a[as], &a[r->as], r->cnt * 16);
|
||||||
|
r->as = as;
|
||||||
|
}
|
||||||
|
as += r->cnt;
|
||||||
|
}
|
||||||
|
return as;
|
||||||
|
}
|
||||||
|
|
||||||
|
void mm_join_long(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, int n_regs, mm_reg1_t *regs, mm128_t *a)
|
||||||
|
{
|
||||||
|
int i, n_aux;
|
||||||
|
uint64_t *aux;
|
||||||
|
|
||||||
|
if (n_regs < 2) return; // nothing to join
|
||||||
|
aux = (uint64_t*)kmalloc(km, n_regs * 8);
|
||||||
|
for (i = n_aux = 0; i < n_regs; ++i)
|
||||||
|
aux[n_aux++] = (uint64_t)regs[i].as << 32 | i;
|
||||||
|
assert(n_aux == n_regs); // TODO: may be relaxed in future for other use cases
|
||||||
|
radix_sort_64(aux, aux + n_aux);
|
||||||
|
mm_squeeze_a_core(km, n_regs, regs, a, aux);
|
||||||
|
|
||||||
|
for (i = n_aux - 1; i >= 1; --i) {
|
||||||
|
mm_reg1_t *r0 = ®s[(int32_t)aux[i-1]], *r1 = ®s[(int32_t)aux[i]];
|
||||||
|
mm128_t *a0e, *a1s;
|
||||||
|
int max_gap, min_gap;
|
||||||
|
|
||||||
|
// test
|
||||||
|
if (r0->as + r0->cnt != r1->as) continue; // not adjacent in a[]
|
||||||
|
if (r0->rid != r1->rid || r0->rev != r1->rev) continue; // make sure on the same target and strand
|
||||||
|
if (r0->score < opt->min_join_flank_sc || r1->score < opt->min_join_flank_sc) continue; // require good flanking chains
|
||||||
|
a0e = &a[r0->as + r0->cnt - 1];
|
||||||
|
a1s = &a[r1->as];
|
||||||
|
if (a1s->x <= a0e->x || (int32_t)a1s->y <= (int32_t)a0e->y) continue; // keep colinearity
|
||||||
|
max_gap = min_gap = (int32_t)a1s->y - (int32_t)a0e->y;
|
||||||
|
max_gap = max_gap > a1s->x - a0e->x? max_gap : a1s->x - a0e->x;
|
||||||
|
min_gap = min_gap < a1s->x - a0e->x? min_gap : a1s->x - a0e->x;
|
||||||
|
if (max_gap > opt->max_join_long || min_gap > opt->max_join_short) continue;
|
||||||
|
|
||||||
|
// all conditions satisfied; join
|
||||||
|
a[r1->as].y |= 1ULL<<40;
|
||||||
|
r0->cnt += r1->cnt, r0->score += r1->score;
|
||||||
|
mm_reg_set_coor(r0, qlen, a);
|
||||||
|
r1->cnt = 0;
|
||||||
|
}
|
||||||
|
kfree(km, aux);
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue