r104: long gap patching

This commit is contained in:
Heng Li 2017-06-29 14:54:54 -04:00
parent 17944a75c2
commit b9075d39a8
3 changed files with 26 additions and 13 deletions

14
align.c
View File

@ -167,6 +167,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
int32_t rs1, qs1, re1, qe1;
int8_t mat[25];
if (r->cnt == 0) return;
ksw_gen_simple_mat(5, mat, opt->a, opt->b);
bw = (int)(opt->bw * 1.5 + 1.);
@ -221,7 +222,10 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
for (i = 1; i < r->cnt; ++i) { // gap filling
mm_adjust_minier(mi, qseq0, &a[r->as + i], &re, &qe);
re1 = re, qe1 = qe;
if (i == r->cnt - 1 || qe - qs >= opt->min_ksw_len || re - rs >= opt->min_ksw_len) {
if (i == r->cnt - 1 || (a[r->as+i].y>>40&1) || qe - qs >= opt->min_ksw_len || re - rs >= opt->min_ksw_len) {
int bw1 = bw;
if (a[r->as+i].y>>40&1)
bw1 = qe - qs > re - rs? qe - qs : re - rs;
qseq = &qseq0[rev][qs];
mm_idx_getseq(mi, rid, rs, re, tseq);
#if 0
@ -229,9 +233,9 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
for (k = 0; k < tl; ++k) fputc("ACGTN"[tseq[k]], stderr); fputc('\n', stderr);
for (k = 0; k < ql; ++k) fputc("ACGTN"[qseq[k]], stderr); fputc('\n', stderr);
#endif
ksw_extz2_sse(km, qe - qs, qseq, re - rs, tseq, 5, mat, opt->q, opt->e, bw, opt->zdrop, KSW_EZ_APPROX_MAX, ez);
ksw_extz2_sse(km, qe - qs, qseq, re - rs, tseq, 5, mat, opt->q, opt->e, bw1, opt->zdrop, KSW_EZ_APPROX_MAX, ez);
if (mm_check_zdrop(qseq, tseq, ez->n_cigar, ez->cigar, mat, opt->q, opt->e, opt->zdrop))
ksw_extz2_sse(km, qe - qs, qseq, re - rs, tseq, 5, mat, opt->q, opt->e, bw, opt->zdrop, 0, ez);
ksw_extz2_sse(km, qe - qs, qseq, re - rs, tseq, 5, mat, opt->q, opt->e, bw1, opt->zdrop, 0, ez);
if (ez->n_cigar > 0) {
mm_append_cigar(r, ez->n_cigar, ez->cigar);
mm_update_extra(r->p, qseq, tseq, ez->n_cigar, ez->cigar, 0);
@ -313,8 +317,8 @@ mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *m
for (r = i = 0; r < n_regs; ++r) {
mm_reg1_t *reg = &regs[r];
int flt = 0;
if (reg->p->blen - reg->p->n_ambi - reg->p->n_diff < opt->min_chain_score) flt = 1;
else if (reg->cnt < opt->min_cnt) flt = 1;
if (reg->cnt < opt->min_cnt) flt = 1;
else if (reg->p->blen - reg->p->n_ambi - reg->p->n_diff < opt->min_chain_score) flt = 1;
else if (reg->p->score < opt->min_dp_score && (reg->qe - reg->qs) * 2 < qlen) flt = 1;
if (flt) free(reg->p);
else if (i < r) regs[i++] = regs[r]; // NB: this also move the regs[r].p pointer

2
main.c
View File

@ -10,7 +10,7 @@
#include "minimap.h"
#include "mmpriv.h"
#define MM_VERSION "2.0-r99-pre"
#define MM_VERSION "2.0-r104-pre"
void liftrlimit()
{

23
map.c
View File

@ -225,9 +225,13 @@ void mm_select_sub(float mask_level, float pri_ratio, int *n_, mm_reg1_t *r, voi
}
}
static int mm_squeeze_a_core(void *km, int n_regs, mm_reg1_t *regs, mm128_t *a, const uint64_t *aux)
static int mm_squeeze_a(void *km, int n_regs, mm_reg1_t *regs, mm128_t *a)
{ // squeeze out regions in a[] that are not referenced by regs[]
int i, as = 0;
int i, n_aux, as = 0;
uint64_t *aux;
aux = (uint64_t*)kmalloc(km, n_regs * 8);
for (i = n_aux = 0; i < n_regs; ++i)
aux[n_aux++] = (uint64_t)regs[i].as << 32 | i;
for (i = 0; i < n_regs; ++i) {
mm_reg1_t *r = &regs[i];
if (r->as != as) {
@ -236,21 +240,23 @@ static int mm_squeeze_a_core(void *km, int n_regs, mm_reg1_t *regs, mm128_t *a,
}
as += r->cnt;
}
kfree(km, aux);
return as;
}
void mm_join_long(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, int n_regs, mm_reg1_t *regs, mm128_t *a)
void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int n_regs, mm_reg1_t *regs, mm128_t *a)
{
int i, n_aux;
uint64_t *aux;
if (n_regs < 2) return; // nothing to join
mm_squeeze_a(km, n_regs, regs, a);
aux = (uint64_t*)kmalloc(km, n_regs * 8);
for (i = n_aux = 0; i < n_regs; ++i)
aux[n_aux++] = (uint64_t)regs[i].as << 32 | i;
assert(n_aux == n_regs); // TODO: may be relaxed in future for other use cases
if (regs[i].parent == i || regs[i].parent < 0)
aux[n_aux++] = (uint64_t)regs[i].as << 32 | i;
radix_sort_64(aux, aux + n_aux);
mm_squeeze_a_core(km, n_regs, regs, a, aux);
for (i = n_aux - 1; i >= 1; --i) {
mm_reg1_t *r0 = &regs[(int32_t)aux[i-1]], *r1 = &regs[(int32_t)aux[i]];
@ -351,8 +357,10 @@ mm_reg1_t *mm_map_frag(const mm_mapopt_t *opt, const mm_idx_t *mi, mm_tbuf_t *b,
n_u = mm_chain_dp(opt->max_gap, opt->bw, opt->max_chain_skip, opt->min_cnt, opt->min_chain_score, n_a, a, &u, b->km);
regs = mm_gen_reg(qlen, n_u, u, a);
*n_regs = n_u;
if (!(opt->flag & MM_F_AVA)) // don't choose primary mapping(s) for read overlap
if (!(opt->flag & MM_F_AVA)) { // don't choose primary mapping(s) for read overlap
mm_select_sub(opt->mask_level, opt->pri_ratio, n_regs, regs, b->km);
mm_join_long(b->km, opt, qlen, *n_regs, regs, a); // TODO: this can be applied to all-vs-all in principle
}
if (opt->flag & MM_F_CIGAR)
regs = mm_align_skeleton(b->km, opt, mi, qlen, seq, n_regs, regs, a);
@ -432,6 +440,7 @@ static void *worker_pipeline(void *shared, int step, void *in)
bseq1_t *t = &s->seq[i];
for (j = 0; j < s->n_reg[i]; ++j) {
mm_reg1_t *r = &s->reg[i][j];
if (r->cnt == 0) continue;
if (p->opt->flag & MM_F_OUT_SAM) mm_write_sam(&p->str, mi, t, j, r);
else mm_write_paf(&p->str, mi, t, j, r);
puts(p->str.s);