get the left-extension sequence correctly
This commit is contained in:
parent
990f7b0b71
commit
6c8368c24c
64
align.c
64
align.c
|
|
@ -2,31 +2,60 @@
|
||||||
#include "minimap.h"
|
#include "minimap.h"
|
||||||
#include "ksw2.h"
|
#include "ksw2.h"
|
||||||
|
|
||||||
static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, uint8_t *qseq0[2], mm_reg1_t *r, mm_reg1_t *r_split, mm128_t *a)
|
static inline void mm_seq_rev(uint32_t len, uint8_t *seq)
|
||||||
|
{
|
||||||
|
uint32_t i;
|
||||||
|
uint8_t t;
|
||||||
|
for (i = 0; i < len>>1; ++i)
|
||||||
|
t = seq[i], seq[i] = seq[len - 1 - i], seq[len - 1 - i] = t;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, uint8_t *qseq0[2], mm_reg1_t *r, mm_reg1_t *r2, mm128_t *a)
|
||||||
{
|
{
|
||||||
int32_t rid = a[r->as].x<<1>>33, rev = a[r->as].x>>63;
|
int32_t rid = a[r->as].x<<1>>33, rev = a[r->as].x>>63;
|
||||||
uint8_t *tseq0, *tseq, *qseq;
|
uint8_t *tseq, *qseq;
|
||||||
int32_t i, l, rs0, re0;
|
int32_t i, k, l, rs0, re0, qs0, qe0;
|
||||||
int32_t rs, re, qs, qe, ret;
|
int32_t rs, re, qs, qe, ret;
|
||||||
|
mm_reg1_t r1;
|
||||||
|
|
||||||
l = r->qs < opt->max_gap? r->qs : opt->max_gap;
|
rs = (int32_t)a[r->as].x + 1; // NB: this is the same as r->{rs,re}
|
||||||
|
re = (int32_t)a[r->as + r->cnt - 1].x + 1;
|
||||||
|
qs = (int32_t)a[r->as].y + 1; // NB: this is the coordinate on the reverse strand; r->{qs,qe} are on the reverse strand
|
||||||
|
qe = (int32_t)a[r->as + r->cnt - 1].y + 1;
|
||||||
|
|
||||||
|
if (qs > 0 && rs > 0) {
|
||||||
|
l = qs < opt->max_gap? qs : opt->max_gap;
|
||||||
|
qs0 = qs - l;
|
||||||
l = (l * opt->a - opt->q) / opt->e;
|
l = (l * opt->a - opt->q) / opt->e;
|
||||||
l = l < opt->max_gap? l : opt->max_gap;
|
l = l < opt->max_gap? l : opt->max_gap;
|
||||||
l = l < r->rs? l : r->rs;
|
l = l < rs? l : rs;
|
||||||
rs0 = r->rs - l;
|
rs0 = rs - l;
|
||||||
|
} else rs0 = rs, qs0 = qs;
|
||||||
|
|
||||||
l = qlen - r->re < opt->max_gap? qlen - r->re : opt->max_gap;
|
if (qe < qlen && re < mi->seq[rid].len) {
|
||||||
|
l = qlen - re < opt->max_gap? qlen - re : opt->max_gap;
|
||||||
|
qe0 = qe + l;
|
||||||
l = (l * opt->a - opt->q) / opt->e;
|
l = (l * opt->a - opt->q) / opt->e;
|
||||||
l = l < opt->max_gap? l : opt->max_gap;
|
l = l < opt->max_gap? l : opt->max_gap;
|
||||||
l = l < mi->seq[rid].len - r->re? l : mi->seq[rid].len - r->re;
|
l = l < mi->seq[rid].len - re? l : mi->seq[rid].len - re;
|
||||||
re0 = r->re + l;
|
re0 = re + l;
|
||||||
|
} else re0 = re, qe0 = qe;
|
||||||
|
|
||||||
tseq0 = (uint8_t*)kmalloc(km, re0 - rs0);
|
tseq = (uint8_t*)kmalloc(km, re0 - rs0);
|
||||||
ret = mm_idx_getseq(mi, rid, rs0, re0, tseq0);
|
|
||||||
|
if (qs > 0 && rs > 0) { // left extension
|
||||||
|
uint32_t ql = qs - qs0, tl = rs - rs0;
|
||||||
|
qseq = &qseq0[rev][qs0];
|
||||||
|
ret = mm_idx_getseq(mi, rid, rs0, rs, tseq);
|
||||||
assert(ret > 0);
|
assert(ret > 0);
|
||||||
|
mm_seq_rev(ql, qseq);
|
||||||
rs = (int32_t)a[r->as].x + 1;
|
mm_seq_rev(tl, tseq);
|
||||||
qs = (int32_t)a[r->as].y + 1;
|
fprintf(stderr, "===> [-1] %d-%d %c (%s:%d-%d) <===\n", qs0, qs, "+-"[rev], mi->seq[rid].name, rs0, rs);
|
||||||
|
for (k = 0; k < tl; ++k) fputc("ACGTN"[tseq[k]], stderr); fputc('\n', stderr);
|
||||||
|
for (k = 0; k < ql; ++k) fputc("ACGTN"[qseq[k]], stderr); fputc('\n', stderr);
|
||||||
|
mm_seq_rev(ql, qseq);
|
||||||
|
}
|
||||||
|
/*
|
||||||
for (i = 1; i < r->cnt; ++i) {
|
for (i = 1; i < r->cnt; ++i) {
|
||||||
re = (int32_t)a[r->as + i].x + 1;
|
re = (int32_t)a[r->as + i].x + 1;
|
||||||
qe = (int32_t)a[r->as + i].y + 1;
|
qe = (int32_t)a[r->as + i].y + 1;
|
||||||
|
|
@ -41,7 +70,8 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
||||||
rs = re, qs = qe;
|
rs = re, qs = qe;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
kfree(km, tseq0);
|
*/
|
||||||
|
kfree(km, tseq);
|
||||||
}
|
}
|
||||||
|
|
||||||
void mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, const char *qstr, int n_regs, mm_reg1_t *regs, mm128_t *a)
|
void mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, const char *qstr, int n_regs, mm_reg1_t *regs, mm128_t *a)
|
||||||
|
|
@ -58,8 +88,8 @@ void mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
||||||
}
|
}
|
||||||
|
|
||||||
for (reg = 0; reg < n_regs; ++reg) {
|
for (reg = 0; reg < n_regs; ++reg) {
|
||||||
mm_reg1_t r_split;
|
mm_reg1_t r2;
|
||||||
mm_align1(km, opt, mi, qlen, qseq0, ®s[reg], &r_split, a);
|
mm_align1(km, opt, mi, qlen, qseq0, ®s[reg], &r2, a);
|
||||||
}
|
}
|
||||||
|
|
||||||
kfree(km, qseq0[0]); kfree(km, qseq0[1]);
|
kfree(km, qseq0[0]); kfree(km, qseq0[1]);
|
||||||
|
|
|
||||||
|
|
@ -41,6 +41,11 @@ typedef struct {
|
||||||
mm_idx_bucket_t *B; // index
|
mm_idx_bucket_t *B; // index
|
||||||
} mm_idx_t;
|
} mm_idx_t;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
uint32_t n_cigar, m_cigar;
|
||||||
|
uint32_t cigar[];
|
||||||
|
} mm_cigar_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
uint32_t cnt:31, rev:1;
|
uint32_t cnt:31, rev:1;
|
||||||
uint32_t rid:31, rep:1;
|
uint32_t rid:31, rep:1;
|
||||||
|
|
@ -48,6 +53,7 @@ typedef struct {
|
||||||
int32_t qs, qe, rs, re;
|
int32_t qs, qe, rs, re;
|
||||||
int32_t parent, subsc;
|
int32_t parent, subsc;
|
||||||
int32_t as;
|
int32_t as;
|
||||||
|
mm_cigar_t *cigar;
|
||||||
} mm_reg1_t;
|
} mm_reg1_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue