diff --git a/format.c b/format.c index 5b293d8..16a120c 100644 --- a/format.c +++ b/format.c @@ -240,13 +240,38 @@ static void sam_write_sq(kstring_t *s, char *seq, int l, int rev, int comp) } else str_copy(s, seq, seq + l); } +static inline const mm_reg1_t *get_sam_pri(int n_regs, const mm_reg1_t *regs) +{ + int i; + for (i = 0; i < n_regs; ++i) + if (regs[i].sam_pri) + return ®s[i]; + assert(n_regs == 0); + return NULL; +} + void mm_write_sam2(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int seg_idx, int reg_idx, int n_seg, const int *n_regss, const mm_reg1_t *const* regss, void *km, int opt_flag) { int flag, n_regs = n_regss[seg_idx]; - int next_sid = n_seg > 1? (seg_idx + 1) % n_seg : -1; - const mm_reg1_t *regs = regss[seg_idx]; + int this_rid = -1, this_pos = -1, this_rev = 0; + const mm_reg1_t *regs = regss[seg_idx], *r_prev, *r_next; const mm_reg1_t *r = n_regs > 0 && reg_idx < n_regs && reg_idx >= 0? ®s[reg_idx] : NULL; + // find the primary of the previous and the next segments, if they are mapped + if (n_seg > 1) { + int i, next_sid = (seg_idx + 1) % n_seg; + r_next = get_sam_pri(n_regss[next_sid], regss[next_sid]); + if (n_seg > 2) { + for (i = 1; i <= n_seg - 1; ++i) { + int prev_sid = (seg_idx + n_seg - i) % n_seg; + if (n_regss[prev_sid] > 0) { + r_prev = get_sam_pri(n_regss[prev_sid], regss[prev_sid]); + break; + } + } + } else r_prev = r_next; + } else r_prev = r_next = NULL; + // write QNAME s->l = 0; mm_sprintf_lite(s, "%s", t->name); @@ -254,12 +279,6 @@ void mm_write_sam2(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int se // write flag flag = n_seg > 1? 0x1 : 0x0; - if (n_seg > 1) { - if (r && r->proper_frag) flag |= 0x2; // FIXME: this doesn't work when there are more than 2 segments - if (seg_idx == 0) flag |= 0x40; - else if (seg_idx == n_seg - 1) flag |= 0x80; - if (n_regss[next_sid] == 0) flag |= 0x8; - } if (r == 0) { flag |= 0x4; } else { @@ -267,12 +286,23 @@ void mm_write_sam2(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int se if (r->parent != r->id) flag |= 0x100; else if (!r->sam_pri) flag |= 0x800; } + if (n_seg > 1) { + if (r && r->proper_frag) flag |= 0x2; // TODO: this doesn't work when there are more than 2 segments + if (seg_idx == 0) flag |= 0x40; + else if (seg_idx == n_seg - 1) flag |= 0x80; + if (r_next == NULL) flag |= 0x8; + else if (r_next->rev) flag |= 0x20; + } mm_sprintf_lite(s, "\t%d", flag); - // write up to CIGAR + // write coordinate, MAPQ and CIGAR if (r == 0) { - mm_sprintf_lite(s, "\t*\t0\t0\t*\t"); + if (r_prev) { + this_rid = r_prev->rid, this_pos = r_prev->rs; + mm_sprintf_lite(s, "\t%s\t%d", mi->seq[this_rid].name, this_pos); + } else mm_sprintf_lite(s, "\t0\t*"); } else { + this_rid = r->rid, this_pos = r->rs, this_rev = r->rev; mm_sprintf_lite(s, "\t%s\t%d\t%d\t", mi->seq[r->rid].name, r->rs+1, r->mapq); if (r->p) { // actually this should always be true for SAM output uint32_t k, clip_len = r->rev? t->l_seq - r->qe : r->qs; @@ -287,7 +317,25 @@ void mm_write_sam2(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int se // write mate positions if (n_seg > 1) { - mm_sprintf_lite(s, "\t*\t0\t0\t"); + int tlen = 0; + if (this_rid >= 0 && r_next) { + if (this_rid == r_next->rid) { + int this_pos5 = r && r->rev? r->re - 1 : this_pos; + int next_pos5 = r_next->rev? r_next->re - 1 : r_next->rs; + tlen = next_pos5 - this_pos5; + mm_sprintf_lite(s, "\t=\t"); + } else mm_sprintf_lite(s, "\t%s\t", mi->seq[r_next->rid].name); + mm_sprintf_lite(s, "%d\t", r_next->rs + 1); + } else if (r_next) { // && this_rid < 0 + mm_sprintf_lite(s, "\t%s\t%d\t", mi->seq[r_next->rid].name, r_next->rs + 1); + } else if (this_rid >= 0) { // && r_next == NULL + int this_pos5 = this_rev? r->re - 1 : this_pos; // this_rev is only true when r != NULL + tlen = this_pos - this_pos5; // next_pos5 will be this_pos + mm_sprintf_lite(s, "\t=\t%d\t", this_pos + 1); // next segment will take r's coordinate + } else mm_sprintf_lite(s, "\t*\t0\t"); // neither has coordinates + if (tlen > 0) ++tlen; + else if (tlen < 0) --tlen; + mm_sprintf_lite(s, "%d\t", tlen); } else mm_sprintf_lite(s, "\t*\t0\t0\t"); // write SEQ and QUAL diff --git a/main.c b/main.c index aefa7f4..b210c8e 100644 --- a/main.c +++ b/main.c @@ -6,7 +6,7 @@ #include "mmpriv.h" #include "getopt.h" -#define MM_VERSION "2.2-r469-dirty" +#define MM_VERSION "2.2-r471-dirty" #ifdef __linux__ #include