r224: inversion alignment around Z-drop break
This commit is contained in:
parent
120bebc290
commit
19d6ec885e
65
align.c
65
align.c
|
|
@ -355,23 +355,53 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|||
kfree(km, tseq);
|
||||
}
|
||||
|
||||
static void mm_align1_inv(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, uint8_t *qseq0[2], const mm_reg1_t *r1, const mm_reg1_t *r2, mm_reg1_t *r_inv, ksw_extz_t *ez)
|
||||
static int mm_align1_inv(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, uint8_t *qseq0[2], const mm_reg1_t *r1, const mm_reg1_t *r2, mm_reg1_t *r_inv, ksw_extz_t *ez)
|
||||
{
|
||||
int tlen, qlen;
|
||||
int tl, ql, ret = 0;
|
||||
uint8_t *tseq, *qseq;
|
||||
int8_t mat[25];
|
||||
memset(r_inv, 0, sizeof(mm_reg1_t));
|
||||
if (r1->rid != r2->rid || r1->rev != r2->rev) return;
|
||||
qlen = r2->qs - r1->qe;
|
||||
tlen = r2->rs - r1->re;
|
||||
if (qlen < 0 || qlen > opt->max_gap) return;
|
||||
if (tlen < 0 || tlen > opt->max_gap) return;
|
||||
if (!(r1->split&1) || !(r2->split&2)) return 0;
|
||||
if (r1->rid != r2->rid || r1->rev != r2->rev) return 0;
|
||||
ql = r2->qs - r1->qe;
|
||||
tl = r2->rs - r1->re;
|
||||
if (ql < 0 || ql > opt->max_gap) return 0;
|
||||
if (tl < 0 || tl > opt->max_gap) return 0;
|
||||
ksw_gen_simple_mat(5, mat, opt->a, opt->b);
|
||||
tseq = (uint8_t*)kmalloc(km, tlen);
|
||||
tseq = (uint8_t*)kmalloc(km, tl);
|
||||
mm_idx_getseq(mi, r1->rid, r1->re, r2->rs, tseq);
|
||||
qseq = &qseq0[!r1->rev][r1->qe];
|
||||
mm_align_pair(km, opt, qlen, qseq, tlen, tseq, mat, -1, KSW_EZ_APPROX_MAX, ez);
|
||||
fprintf(stderr, "%d; (%d,%d); (%d,%d)\n", ez->score, r1->re, r2->rs, r1->qe, r2->qs);
|
||||
qseq = &qseq0[!r1->rev][qlen - r2->qs];
|
||||
mm_align_pair(km, opt, ql, qseq, tl, tseq, mat, (int)(opt->bw * 1.5), KSW_EZ_APPROX_MAX, ez);
|
||||
if (ez->n_cigar > 0 && ez->score > 0) {
|
||||
mm_append_cigar(r_inv, ez->n_cigar, ez->cigar);
|
||||
r_inv->p->dp_score = ez->score;
|
||||
mm_update_extra(r_inv->p, qseq, tseq, mat, opt->q, opt->e);
|
||||
if (r_inv->p->dp_max < opt->min_dp_max) { // discard this hit
|
||||
free(r_inv->p);
|
||||
r_inv->p = 0;
|
||||
} else {
|
||||
r_inv->id = -1;
|
||||
r_inv->parent = MM_PARENT_UNSET;
|
||||
r_inv->inv = 1;
|
||||
r_inv->rev = !r1->rev;
|
||||
r_inv->qs = r1->qe, r_inv->qe = r2->qs;
|
||||
r_inv->rs = r1->re, r_inv->re = r2->rs;
|
||||
ret = 1;
|
||||
fprintf(stderr, "here!\n");
|
||||
}
|
||||
}
|
||||
kfree(km, tseq);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline mm_reg1_t *mm_insert_reg(const mm_reg1_t *r, int i, int *n_regs, mm_reg1_t *regs)
|
||||
{
|
||||
regs = (mm_reg1_t*)realloc(regs, (*n_regs + 1) * sizeof(mm_reg1_t));
|
||||
if (i + 1 != *n_regs)
|
||||
memmove(®s[i + 2], ®s[i + 1], sizeof(mm_reg1_t) * (*n_regs - i - 1));
|
||||
regs[i + 1] = *r;
|
||||
++*n_regs;
|
||||
return regs;
|
||||
}
|
||||
|
||||
mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, const char *qstr, int *n_regs_, mm_reg1_t *regs, mm128_t *a)
|
||||
|
|
@ -394,15 +424,10 @@ mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *m
|
|||
for (i = 0; i < n_regs; ++i) {
|
||||
mm_reg1_t r2;
|
||||
mm_align1(km, opt, mi, qlen, qseq0, ®s[i], &r2, a, &ez);
|
||||
if (r2.cnt > 0) {
|
||||
regs = (mm_reg1_t*)realloc(regs, (n_regs + 1) * sizeof(mm_reg1_t)); // this should be very rare
|
||||
if (i + 1 != n_regs)
|
||||
memmove(®s[i + 2], ®s[i + 1], sizeof(mm_reg1_t) * (n_regs - i - 1));
|
||||
regs[i + 1] = r2;
|
||||
++n_regs;
|
||||
}
|
||||
if (i > 0 && (regs[i].split&2) && (regs[i-1].split&1)) {
|
||||
mm_align1_inv(km, opt, mi, qseq0, ®s[i-1], ®s[i], &r2, &ez);
|
||||
if (r2.cnt > 0) regs = mm_insert_reg(&r2, i, &n_regs, regs);
|
||||
if (i > 0 && mm_align1_inv(km, opt, mi, qlen, qseq0, ®s[i-1], ®s[i], &r2, &ez)) {
|
||||
regs = mm_insert_reg(&r2, i, &n_regs, regs);
|
||||
++i; // skip the inserted INV alignment
|
||||
}
|
||||
}
|
||||
*n_regs_ = n_regs;
|
||||
|
|
|
|||
3
format.c
3
format.c
|
|
@ -56,7 +56,8 @@ static void mm_sprintf_lite(kstring_t *s, const char *fmt, ...)
|
|||
|
||||
static inline void write_tags(kstring_t *s, const mm_reg1_t *r)
|
||||
{
|
||||
mm_sprintf_lite(s, "\tcm:i:%d\ts1:i:%d", r->cnt, r->score);
|
||||
int type = r->inv? 'I' : r->id == r->parent? 'P' : 'S';
|
||||
mm_sprintf_lite(s, "\ttp:A:%c\tcm:i:%d\ts1:i:%d", type, r->cnt, r->score);
|
||||
if (r->parent == r->id) mm_sprintf_lite(s, "\ts2:i:%d", r->subsc);
|
||||
if (r->split) mm_sprintf_lite(s, "\tzd:i:%d", r->split);
|
||||
if (r->p) mm_sprintf_lite(s, "\tNM:i:%d\tms:i:%d\tAS:i:%d\tnn:i:%d", r->p->n_diff, r->p->dp_max, r->p->dp_score, r->p->n_ambi);
|
||||
|
|
|
|||
10
hit.c
10
hit.c
|
|
@ -125,11 +125,11 @@ void mm_hit_sort_by_dp(void *km, int *n_regs, mm_reg1_t *r)
|
|||
aux = (uint64_t*)kmalloc(km, n * 8);
|
||||
t = (mm_reg1_t*)kmalloc(km, n * sizeof(mm_reg1_t));
|
||||
for (i = n_aux = 0; i < n; ++i) {
|
||||
if (r[i].cnt > 0) { // squeeze out elements with cnt==0 (soft deleted)
|
||||
if (r[i].inv || r[i].cnt > 0) { // squeeze out elements with cnt==0 (soft deleted)
|
||||
assert(r[i].p);
|
||||
aux[n_aux++] = (uint64_t)r[i].p->dp_max << 32 | i;
|
||||
} else if (r[i].p) {
|
||||
kfree(km, r[i].p);
|
||||
free(r[i].p);
|
||||
r[i].p = 0;
|
||||
}
|
||||
}
|
||||
|
|
@ -197,7 +197,7 @@ void mm_filter_regs(void *km, const mm_mapopt_t *opt, int *n_regs, mm_reg1_t *re
|
|||
for (i = k = 0; i < *n_regs; ++i) {
|
||||
mm_reg1_t *r = ®s[i];
|
||||
int flt = 0;
|
||||
if (r->cnt < opt->min_cnt) flt = 1;
|
||||
if (!r->inv && r->cnt < opt->min_cnt) flt = 1;
|
||||
if (r->p) {
|
||||
if (r->p->blen - r->p->n_ambi - r->p->n_diff < opt->min_chain_score) flt = 1;
|
||||
else if (r->p->dp_max < opt->min_dp_max) flt = 1;
|
||||
|
|
@ -294,7 +294,9 @@ void mm_set_mapq(int n_regs, mm_reg1_t *regs, int min_chain_sc)
|
|||
int i;
|
||||
for (i = 0; i < n_regs; ++i) {
|
||||
mm_reg1_t *r = ®s[i];
|
||||
if (r->parent == r->id) {
|
||||
if (r->inv) {
|
||||
r->mapq = 0;
|
||||
} else if (r->parent == r->id) {
|
||||
int mapq, subsc;
|
||||
float pen_cm = r->cnt >= 10? 1.0f : 0.1f * r->cnt;
|
||||
subsc = r->subsc > min_chain_sc? r->subsc : min_chain_sc;
|
||||
|
|
|
|||
2
main.c
2
main.c
|
|
@ -8,7 +8,7 @@
|
|||
#include "minimap.h"
|
||||
#include "mmpriv.h"
|
||||
|
||||
#define MM_VERSION "2.0-r222-dirty"
|
||||
#define MM_VERSION "2.0-r224-dirty"
|
||||
|
||||
void liftrlimit()
|
||||
{
|
||||
|
|
|
|||
2
map.c
2
map.c
|
|
@ -31,7 +31,7 @@ void mm_mapopt_init(mm_mapopt_t *opt)
|
|||
|
||||
opt->a = 2, opt->b = 4, opt->q = 4, opt->e = 2, opt->q2 = 24, opt->e2 = 1;
|
||||
opt->zdrop = 400;
|
||||
opt->min_dp_max = opt->min_chain_score;
|
||||
opt->min_dp_max = opt->min_chain_score * opt->a;
|
||||
opt->min_ksw_len = 200;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -61,7 +61,7 @@ typedef struct {
|
|||
typedef struct {
|
||||
int32_t id;
|
||||
uint32_t cnt:31, rev:1;
|
||||
uint32_t rid:31, rep:1;
|
||||
uint32_t rid:31, inv:1;
|
||||
int32_t score;
|
||||
int32_t qs, qe, rs, re;
|
||||
int32_t parent, subsc;
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
.TH minimap2 1 "28 July 2017" "minimap2-2.0-r219-dirty" "Bioinformatics tools"
|
||||
.TH minimap2 1 "29 July 2017" "minimap2-2.0-r224-dirty" "Bioinformatics tools"
|
||||
.SH NAME
|
||||
.PP
|
||||
minimap2 - mapping and alignment between collections of DNA sequences
|
||||
|
|
@ -329,6 +329,7 @@ cb | cb | cb
|
|||
r | c | l .
|
||||
Tag Type Description
|
||||
_
|
||||
tp A Type of aln: P/primary, S/secondary and I/inversion
|
||||
cm i Number of minimizers on the chain
|
||||
s1 i Chaining score
|
||||
s2 i Chaining score of the best secondary chain
|
||||
|
|
@ -343,7 +344,8 @@ cg Z CIGAR string (only in PAF)
|
|||
.TP 2
|
||||
*
|
||||
Minimap2 may produce suboptimal alignments through long low-complexity regions
|
||||
where seed positions may be inaccurate.
|
||||
where seed positions may be suboptimal. This should not be a big concern
|
||||
because even the optimal alignment may be wrong in such regions.
|
||||
.TP
|
||||
*
|
||||
Minimap2 may produce poor alignments that may need post-filtering. We are still
|
||||
|
|
|
|||
Loading…
Reference in New Issue