backup
This commit is contained in:
parent
523a8832ad
commit
35b84f88c6
102
align.c
102
align.c
|
|
@ -29,26 +29,56 @@ static inline void mm_seq_rev(uint32_t len, uint8_t *seq)
|
|||
t = seq[i], seq[i] = seq[len - 1 - i], seq[len - 1 - i] = t;
|
||||
}
|
||||
|
||||
static void mm_update_extra(mm_extra_t *p, const uint8_t *qseq, const uint8_t *tseq, uint32_t n_cigar, uint32_t *cigar)
|
||||
{
|
||||
uint32_t k, l, toff = 0, qoff = 0;
|
||||
for (k = 0; k < n_cigar; ++k) {
|
||||
uint32_t op = cigar[k]&0xf, len = cigar[k]>>4;
|
||||
if (op == 0) {
|
||||
for (l = 0; l < len; ++l) {
|
||||
if (tseq[toff + l] > 3 || qseq[qoff + l] > 3) ++p->n_ambi;
|
||||
else if (tseq[toff + l] != qseq[qoff + l]) ++p->n_diff;
|
||||
}
|
||||
toff += len, qoff += len, p->blen += len;
|
||||
} else if (op == 1) {
|
||||
int n_ambi = 0;
|
||||
for (l = 0; l < len; ++l)
|
||||
if (qseq[qoff + l] > 3) ++n_ambi;
|
||||
qoff += len, p->blen += len;
|
||||
p->n_ambi += n_ambi, p->n_diff += len - n_ambi;
|
||||
} else if (op == 2) {
|
||||
int n_ambi = 0;
|
||||
for (l = 0; l < len; ++l)
|
||||
if (tseq[toff + l] > 3) ++n_ambi;
|
||||
toff += len, p->blen += len;
|
||||
p->n_ambi += n_ambi, p->n_diff += len - n_ambi;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void mm_append_cigar(mm_reg1_t *r, uint32_t n_cigar, uint32_t *cigar) // TODO: this calls the libc realloc()
|
||||
{
|
||||
mm_extra_t *p;
|
||||
if (n_cigar == 0) return;
|
||||
if (r->cigar == 0) {
|
||||
uint32_t m_cigar = n_cigar + 2;
|
||||
kroundup32(m_cigar);
|
||||
r->cigar = (mm_cigar_t*)malloc(m_cigar * 4);
|
||||
r->cigar->n = 0, r->cigar->m = m_cigar;
|
||||
} else if (r->cigar->n + n_cigar > r->cigar->m - 2) {
|
||||
r->cigar->m = r->cigar->n + n_cigar + 2;
|
||||
kroundup32(r->cigar->m);
|
||||
r->cigar = (mm_cigar_t*)realloc(r->cigar, r->cigar->m * 4);
|
||||
if (r->p == 0) {
|
||||
uint32_t capacity = n_cigar + sizeof(mm_extra_t);
|
||||
kroundup32(capacity);
|
||||
r->p = (mm_extra_t*)malloc(capacity * 4);
|
||||
r->p->n_cigar = 0, r->p->capacity = capacity;
|
||||
r->p->n_diff = r->p->n_ambi = 0;
|
||||
} else if (r->p->n_cigar + n_cigar + sizeof(mm_extra_t) > r->p->capacity) {
|
||||
r->p->capacity = r->p->n_cigar + n_cigar + sizeof(mm_extra_t);
|
||||
kroundup32(r->p->capacity);
|
||||
r->p = (mm_extra_t*)realloc(r->p, r->p->capacity * 4);
|
||||
}
|
||||
if (r->cigar->n > 0 && (r->cigar->cigar[r->cigar->n-1]&0xf) == (cigar[0]&0xf)) { // same CIGAR op at the boundary
|
||||
r->cigar->cigar[r->cigar->n-1] += cigar[0]>>4<<4;
|
||||
if (n_cigar > 1) memcpy(r->cigar->cigar + r->cigar->n, cigar + 1, (n_cigar - 1) * 4);
|
||||
r->cigar->n += n_cigar - 1;
|
||||
p = r->p;
|
||||
if (p->n_cigar > 0 && (p->cigar[p->n_cigar-1]&0xf) == (cigar[0]&0xf)) { // same CIGAR op at the boundary
|
||||
p->cigar[p->n_cigar-1] += cigar[0]>>4<<4;
|
||||
if (n_cigar > 1) memcpy(p->cigar + p->n_cigar, cigar + 1, (n_cigar - 1) * 4);
|
||||
p->n_cigar += n_cigar - 1;
|
||||
} else {
|
||||
memcpy(r->cigar->cigar + r->cigar->n, cigar, n_cigar * 4);
|
||||
r->cigar->n += n_cigar;
|
||||
memcpy(p->cigar + p->n_cigar, cigar, n_cigar * 4);
|
||||
p->n_cigar += n_cigar;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -56,12 +86,13 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|||
{
|
||||
int32_t rid = a[r->as].x<<1>>33, rev = a[r->as].x>>63;
|
||||
uint8_t *tseq, *qseq;
|
||||
int32_t i, k, l, rs0, re0, qs0, qe0;
|
||||
int32_t i, l, bw, rs0, re0, qs0, qe0;
|
||||
int32_t rs, re, qs, qe;
|
||||
int32_t rs1, qs1;
|
||||
int32_t rs1, qs1, re1, qe1;
|
||||
int8_t mat[25];
|
||||
|
||||
ksw_gen_simple_mat(5, mat, opt->a, opt->b);
|
||||
bw = (int)(opt->bw * 1.5 + 1.);
|
||||
|
||||
rs = (int32_t)a[r->as].x + 1; // NB: this is the same as r->{rs,re}
|
||||
re = (int32_t)a[r->as + r->cnt - 1].x + 1;
|
||||
|
|
@ -78,7 +109,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|||
} else rs0 = rs, qs0 = qs;
|
||||
|
||||
if (qe < qlen && re < mi->seq[rid].len) {
|
||||
l = qlen - re < opt->max_gap? qlen - re : opt->max_gap;
|
||||
l = qlen - qe < opt->max_gap? qlen - qe : opt->max_gap;
|
||||
qe0 = qe + l;
|
||||
l += (l * opt->a - opt->q) / opt->e;
|
||||
l = l < opt->max_gap? l : opt->max_gap;
|
||||
|
|
@ -86,7 +117,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|||
re0 = re + l;
|
||||
} else re0 = re, qe0 = qe;
|
||||
|
||||
tseq = (uint8_t*)kmalloc(km, re0 - rs0);
|
||||
tseq = (uint8_t*)kmalloc(km, re0 - rs0); // TODO: we can allocate a smaller size
|
||||
|
||||
if (qs > 0 && rs > 0) { // left extension
|
||||
uint32_t ql = qs - qs0, tl = rs - rs0;
|
||||
|
|
@ -99,12 +130,15 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|||
for (k = 0; k < tl; ++k) fputc("ACGTN"[tseq[k]], stderr); fputc('\n', stderr);
|
||||
for (k = 0; k < ql; ++k) fputc("ACGTN"[qseq[k]], stderr); fputc('\n', stderr);
|
||||
#endif
|
||||
ksw_extz2_sse(km, ql, qseq, tl, tseq, 5, mat, opt->q, opt->e, (int)(opt->bw * 1.5 + .499), opt->zdrop, KSW_EZ_EXTZ_ONLY|KSW_EZ_RIGHT|KSW_EZ_REV_CIGAR, ez);
|
||||
mm_seq_rev(ql, qseq);
|
||||
ksw_extz2_sse(km, ql, qseq, tl, tseq, 5, mat, opt->q, opt->e, bw, opt->zdrop, KSW_EZ_EXTZ_ONLY|KSW_EZ_RIGHT|KSW_EZ_REV_CIGAR, ez);
|
||||
mm_append_cigar(r, ez->n_cigar, ez->cigar);
|
||||
mm_update_extra(r->p, qseq, tseq, ez->n_cigar, ez->cigar);
|
||||
r->p->score += ez->score;
|
||||
rs1 = rs - (ez->max_t + 1);
|
||||
qs1 = qs - (ez->max_q + 1);
|
||||
mm_seq_rev(ql, qseq);
|
||||
} else rs1 = rs, qs1 = qs;
|
||||
assert(qs1 >= 0 && rs1 >= 0);
|
||||
|
||||
for (i = 1; i < r->cnt; ++i) { // gap filling
|
||||
re = (int32_t)a[r->as + i].x + 1;
|
||||
|
|
@ -117,21 +151,37 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|||
for (k = 0; k < re - rs; ++k) fputc("ACGTN"[tseq[k]], stderr); fputc('\n', stderr);
|
||||
for (k = 0; k < qe - qs; ++k) fputc("ACGTN"[qseq[k]], stderr); fputc('\n', stderr);
|
||||
#endif
|
||||
ksw_extz2_sse(km, qe-qs, qseq, re-rs, tseq, 5, mat, opt->q, opt->e, (int)(opt->bw * 1.5 + .499), opt->zdrop, KSW_EZ_DYN_BAND, ez);
|
||||
ksw_extz2_sse(km, qe-qs, qseq, re-rs, tseq, 5, mat, opt->q, opt->e, bw, opt->zdrop, KSW_EZ_DYN_BAND, ez);
|
||||
mm_append_cigar(r, ez->n_cigar, ez->cigar);
|
||||
mm_update_extra(r->p, qseq, tseq, ez->n_cigar, ez->cigar);
|
||||
if (ez->score == KSW_NEG_INF) { // truncated by Z-drop
|
||||
r->p->score += ez->max;
|
||||
abort();
|
||||
} else {
|
||||
mm_append_cigar(r, ez->n_cigar, ez->cigar);
|
||||
r->p->score += ez->score;
|
||||
}
|
||||
//for (k = 0; k < r->cigar->n; ++k) fprintf(stderr, "%d%c", r->cigar->cigar[k]>>4, "MID"[r->cigar->cigar[k]&0xf]); fputc('\n', stderr);
|
||||
rs = re, qs = qe;
|
||||
}
|
||||
}
|
||||
|
||||
if (i == r->cnt) { // right extension
|
||||
}
|
||||
if (i == r->cnt && qe < qe0 && re < re0) { // right extension
|
||||
qseq = &qseq0[rev][qe];
|
||||
mm_idx_getseq(mi, rid, re, re0, tseq);
|
||||
ksw_extz2_sse(km, qe0-qe, qseq, re0-re, tseq, 5, mat, opt->q, opt->e, bw, opt->zdrop, KSW_EZ_EXTZ_ONLY, ez);
|
||||
mm_append_cigar(r, ez->n_cigar, ez->cigar);
|
||||
mm_update_extra(r->p, qseq, tseq, ez->n_cigar, ez->cigar);
|
||||
r->p->score += ez->score;
|
||||
re1 = re + (ez->max_t + 1);
|
||||
qe1 = qe + (ez->max_q + 1);
|
||||
} else re1 = re, qe1 = qe;
|
||||
assert(qe1 <= qlen);
|
||||
|
||||
for (i = 0; i < r->cigar->n; ++i) fprintf(stderr, "%d%c", r->cigar->cigar[i]>>4, "MID"[r->cigar->cigar[i]&0xf]); fputc('\n', stderr);
|
||||
r->rs = rs1, r->re = re1;
|
||||
if (rev) r->qs = qlen - qe1, r->qe = qlen - qs1;
|
||||
else r->qs = qs1, r->qe = qe1;
|
||||
|
||||
// for (i = 0; i < r->p->n_cigar; ++i) fprintf(stderr, "%d%c", r->p->cigar[i]>>4, "MID"[r->p->cigar[i]&0xf]); fputc('\n', stderr);
|
||||
kfree(km, tseq);
|
||||
}
|
||||
|
||||
|
|
|
|||
11
map.c
11
map.c
|
|
@ -16,7 +16,7 @@ void mm_mapopt_init(mm_mapopt_t *opt)
|
|||
opt->sdust_thres = 0;
|
||||
|
||||
opt->min_score = 40;
|
||||
opt->bw = 500;
|
||||
opt->bw = 1000;
|
||||
opt->max_gap = 10000;
|
||||
opt->max_skip = 15;
|
||||
|
||||
|
|
@ -381,11 +381,14 @@ static void *worker_pipeline(void *shared, int step, void *in)
|
|||
printf("%s\t%d\t%d\t%d\t%c\t", t->name, t->l_seq, r->qs, r->qe, "+-"[r->rev]);
|
||||
if (mi->seq[r->rid].name) fputs(mi->seq[r->rid].name, stdout);
|
||||
else printf("%d", r->rid + 1);
|
||||
printf("\t%d\t%d\t%d\t%d\t%d\t255\tcm:i:%d", mi->seq[r->rid].len, r->rs, r->re, r->score,
|
||||
r->re - r->rs > r->qe - r->qs? r->re - r->rs : r->qe - r->qs, r->cnt);
|
||||
printf("\t%d\t%d\t%d", mi->seq[r->rid].len, r->rs, r->re);
|
||||
if (r->p) printf("\t%d\t%d\t255", r->p->blen - r->p->n_ambi - r->p->n_diff, r->p->blen);
|
||||
else printf("\t%d\t%d\t255", r->score, r->re - r->rs > r->qe - r->qs? r->re - r->rs : r->qe - r->qs);
|
||||
printf("\tcm:i:%d", r->cnt);
|
||||
if (r->parent == j) printf("\tss:i:%d", r->subsc);
|
||||
if (r->p) printf("\tNM:i:%d\tAS:i:%d\tnn:i:%d", r->p->n_diff, r->p->score, r->p->n_ambi);
|
||||
putchar('\n');
|
||||
free(r->cigar);
|
||||
free(r->p);
|
||||
}
|
||||
free(s->reg[i]);
|
||||
free(s->seq[i].seq); free(s->seq[i].name);
|
||||
|
|
|
|||
10
minimap.h
10
minimap.h
|
|
@ -42,9 +42,13 @@ typedef struct {
|
|||
} mm_idx_t;
|
||||
|
||||
typedef struct {
|
||||
uint32_t n, m;
|
||||
uint32_t capacity;
|
||||
int32_t score;
|
||||
uint32_t blen;
|
||||
uint32_t n_diff, n_ambi;
|
||||
uint32_t n_cigar;
|
||||
uint32_t cigar[];
|
||||
} mm_cigar_t;
|
||||
} mm_extra_t;
|
||||
|
||||
typedef struct {
|
||||
uint32_t cnt:31, rev:1;
|
||||
|
|
@ -53,7 +57,7 @@ typedef struct {
|
|||
int32_t qs, qe, rs, re;
|
||||
int32_t parent, subsc;
|
||||
int32_t as;
|
||||
mm_cigar_t *cigar;
|
||||
mm_extra_t *p;
|
||||
} mm_reg1_t;
|
||||
|
||||
typedef struct {
|
||||
|
|
|
|||
Loading…
Reference in New Issue