From d75b5b4e8a74e4f13c71e35cb8458ccb814afc59 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Fri, 23 Jun 2017 19:21:47 -0400 Subject: [PATCH] backup; NOT working yet --- align.c | 46 +++++++++++++++++++++++++++++++++------------- map.c | 1 + 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/align.c b/align.c index 14b4f3d..6a76f50 100644 --- a/align.c +++ b/align.c @@ -31,18 +31,25 @@ static inline void mm_seq_rev(uint32_t len, uint8_t *seq) static void mm_append_cigar(mm_reg1_t *r, uint32_t n_cigar, uint32_t *cigar) // TODO: this calls the libc realloc() { + if (n_cigar == 0) return; if (r->cigar == 0) { uint32_t m_cigar = n_cigar + 2; kroundup32(m_cigar); r->cigar = (mm_cigar_t*)malloc(m_cigar * 4); r->cigar->n = 0, r->cigar->m = m_cigar; } else if (r->cigar->n + n_cigar > r->cigar->m - 2) { + r->cigar->m = r->cigar->n + n_cigar + 2; kroundup32(r->cigar->m); - r->cigar->m = r->cigar->n + n_cigar; r->cigar = (mm_cigar_t*)realloc(r->cigar, r->cigar->m * 4); } - memcpy(r->cigar->cigar + r->cigar->n, cigar, n_cigar * 4); - r->cigar->n += n_cigar; + if (r->cigar->n > 0 && (r->cigar->cigar[r->cigar->n-1]&0xf) == (cigar[0]&0xf)) { // same CIGAR op at the boundary + r->cigar->cigar[r->cigar->n-1] += cigar[0]>>4<<4; + if (n_cigar > 1) memcpy(r->cigar->cigar + r->cigar->n, cigar + 1, (n_cigar - 1) * 4); + r->cigar->n += n_cigar - 1; + } else { + memcpy(r->cigar->cigar + r->cigar->n, cigar, n_cigar * 4); + r->cigar->n += n_cigar; + } } static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, uint8_t *qseq0[2], mm_reg1_t *r, mm_reg1_t *r2, mm128_t *a, ksw_extz_t *ez) @@ -50,7 +57,8 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int int32_t rid = a[r->as].x<<1>>33, rev = a[r->as].x>>63; uint8_t *tseq, *qseq; int32_t i, k, l, rs0, re0, qs0, qe0; - int32_t rs, re, qs, qe, ret; + int32_t rs, re, qs, qe; + int32_t rs1, qs1; int8_t mat[25]; ksw_gen_simple_mat(5, mat, opt->a, opt->b); @@ -83,34 +91,46 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int if (qs > 0 && rs > 0) { // left extension uint32_t ql = qs - qs0, tl = rs - rs0; qseq = &qseq0[rev][qs0]; - ret = mm_idx_getseq(mi, rid, rs0, rs, tseq); - assert(ret > 0); + mm_idx_getseq(mi, rid, rs0, rs, tseq); mm_seq_rev(ql, qseq); mm_seq_rev(tl, tseq); + #if 0 fprintf(stderr, "===> [-1] %d-%d %c (%s:%d-%d) <===\n", qs0, qs, "+-"[rev], mi->seq[rid].name, rs0, rs); for (k = 0; k < tl; ++k) fputc("ACGTN"[tseq[k]], stderr); fputc('\n', stderr); for (k = 0; k < ql; ++k) fputc("ACGTN"[qseq[k]], stderr); fputc('\n', stderr); + #endif ksw_extz2_sse(km, ql, qseq, tl, tseq, 5, mat, opt->q, opt->e, (int)(opt->bw * 1.5 + .499), opt->zdrop, KSW_EZ_EXTZ_ONLY|KSW_EZ_RIGHT|KSW_EZ_REV_CIGAR, ez); mm_seq_rev(ql, qseq); mm_append_cigar(r, ez->n_cigar, ez->cigar); - } - for (i = 0; i < r->cigar->n; ++i) fprintf(stderr, "%d%c", r->cigar->cigar[i]>>4, "MID"[r->cigar->cigar[i]&0xf]); fputc('\n', stderr); -/* - for (i = 1; i < r->cnt; ++i) { + rs1 = rs - (ez->max_t + 1); + qs1 = qs - (ez->max_q + 1); + } else rs1 = rs, qs1 = qs; + fprintf(stderr, "%d,%d\n", rs1, qs1); for (i = 0; i < r->cigar->n; ++i) fprintf(stderr, "%d%c", r->cigar->cigar[i]>>4, "MID"[r->cigar->cigar[i]&0xf]); fputc('\n', stderr); + + for (i = 1; i < r->cnt; ++i) { // gap filling re = (int32_t)a[r->as + i].x + 1; qe = (int32_t)a[r->as + i].y + 1; if (i == r->cnt - 1 || qe - qs >= opt->min_ksw_len || re - rs >= opt->min_ksw_len) { qseq = &qseq0[rev][qs]; - tseq = &tseq0[rs - rs0]; - #if 0 + mm_idx_getseq(mi, rid, rs, re, tseq); + #if 1 fprintf(stderr, "===> [%d] %d-%d %c (%s:%d-%d) <===\n", i, qs, qe, "+-"[rev], mi->seq[rid].name, rs, re); for (k = 0; k < re - rs; ++k) fputc("ACGTN"[tseq[k]], stderr); fputc('\n', stderr); for (k = 0; k < qe - qs; ++k) fputc("ACGTN"[qseq[k]], stderr); fputc('\n', stderr); #endif + ksw_extz2_sse(km, qe-qs, qseq, re-rs, tseq, 5, mat, opt->q, opt->e, (int)(opt->bw * 1.5 + .499), opt->zdrop, KSW_EZ_DYN_BAND, ez); + if (ez->score == KSW_NEG_INF) { // truncated by Z-drop + } else { + mm_append_cigar(r, ez->n_cigar, ez->cigar); + } + for (i = 0; i < r->cigar->n; ++i) fprintf(stderr, "%d%c", r->cigar->cigar[i]>>4, "MID"[r->cigar->cigar[i]&0xf]); fputc('\n', stderr); rs = re, qs = qe; } } -*/ + + if (i == r->cnt) { + } +// fprintf(stderr, "%d,%d\n", rs1, qs1); for (i = 0; i < r->cigar->n; ++i) fprintf(stderr, "%d%c", r->cigar->cigar[i]>>4, "MID"[r->cigar->cigar[i]&0xf]); fputc('\n', stderr); kfree(km, tseq); } diff --git a/map.c b/map.c index e86143c..7839f86 100644 --- a/map.c +++ b/map.c @@ -385,6 +385,7 @@ static void *worker_pipeline(void *shared, int step, void *in) r->re - r->rs > r->qe - r->qs? r->re - r->rs : r->qe - r->qs, r->cnt); if (r->parent == j) printf("\tss:i:%d", r->subsc); putchar('\n'); + free(r->cigar); } free(s->reg[i]); free(s->seq[i].seq); free(s->seq[i].name);