r467: added : (equal length) and ^ (intron) ops

This commit is contained in:
Heng Li 2017-10-04 21:55:37 -04:00
parent 7d50e646dd
commit 9aba11769c
3 changed files with 19 additions and 8 deletions

View File

@ -127,7 +127,7 @@ void mm_write_sam_hdr(const mm_idx_t *idx, const char *rg, const char *ver, int
free(str.s);
}
static void write_cs(void *km, kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r)
static void write_cs(void *km, kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, int no_iden)
{
extern unsigned char seq_nt4_table[256];
int i, q_off, t_off;
@ -150,22 +150,26 @@ static void write_cs(void *km, kstring_t *s, const mm_idx_t *mi, const mm_bseq1_
}
for (i = q_off = t_off = 0; i < r->p->n_cigar; ++i) {
int j, op = r->p->cigar[i]&0xf, len = r->p->cigar[i]>>4;
assert(op >= 0 && op <= 2);
assert(op >= 0 && op <= 3);
if (op == 0) {
int l_tmp = 0;
for (j = 0; j < len; ++j) {
if (qseq[q_off + j] != tseq[t_off + j]) {
if (l_tmp > 0) {
tmp[l_tmp] = 0;
mm_sprintf_lite(s, "=%s", tmp);
if (!no_iden) {
tmp[l_tmp] = 0;
mm_sprintf_lite(s, "=%s", tmp);
} else mm_sprintf_lite(s, ":%d", l_tmp);
l_tmp = 0;
}
mm_sprintf_lite(s, "*%c%c", "acgtn"[tseq[t_off + j]], "acgtn"[qseq[q_off + j]]);
} else tmp[l_tmp++] = "ACGTN"[qseq[q_off + j]];
}
if (l_tmp > 0) {
tmp[l_tmp] = 0;
mm_sprintf_lite(s, "=%s", tmp);
if (!no_iden) {
tmp[l_tmp] = 0;
mm_sprintf_lite(s, "=%s", tmp);
} else mm_sprintf_lite(s, ":%d", l_tmp);
}
q_off += len, t_off += len;
} else if (op == 1) {
@ -173,11 +177,15 @@ static void write_cs(void *km, kstring_t *s, const mm_idx_t *mi, const mm_bseq1_
tmp[j] = "acgtn"[qseq[q_off + j]];
mm_sprintf_lite(s, "+%s", tmp);
q_off += len;
} else if (op == 2) {
} else if (op == 2 || (op == 3 && len < 4)) {
for (j = 0, tmp[len] = 0; j < len; ++j)
tmp[j] = "acgtn"[tseq[t_off + j]];
mm_sprintf_lite(s, "-%s", tmp);
t_off += len;
} else { // op == 3 && len >= 4
mm_sprintf_lite(s, "^%c%c%d%c%c", "acgtn"[tseq[t_off]], "acgtn"[tseq[t_off+1]],
len, "acgtn"[tseq[t_off+len-2]], "acgtn"[tseq[t_off+len-1]]);
t_off += len;
}
}
assert(t_off == r->re - r->rs && q_off == r->qe - r->qs);
@ -215,7 +223,7 @@ void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const m
mm_sprintf_lite(s, "%d%c", r->p->cigar[k]>>4, "MIDN"[r->p->cigar[k]&0xf]);
}
if (r->p && (opt_flag & MM_F_OUT_CS))
write_cs(km, s, mi, t, r);
write_cs(km, s, mi, t, r, opt_flag&MM_F_CS_NO_EQUAL);
}
static void sam_write_sq(kstring_t *s, char *seq, int l, int rev, int comp)

2
main.c
View File

@ -39,6 +39,7 @@ static struct option long_options[] = {
{ "sr", no_argument, 0, 0 },
{ "multi", optional_argument, 0, 0 },
{ "print-2nd", optional_argument, 0, 0 },
{ "cs-no-equal", no_argument, 0, 0 },
{ "help", no_argument, 0, 'h' },
{ "max-intron-len", required_argument, 0, 'G' },
{ "version", no_argument, 0, 'V' },
@ -129,6 +130,7 @@ int main(int argc, char *argv[])
else if (c == 0 && long_idx ==11) opt.noncan = atoi(optarg); // --cost-non-gt-ag
else if (c == 0 && long_idx ==12) opt.flag |= MM_F_NO_LJOIN; // --no-long-join
else if (c == 0 && long_idx ==13) opt.flag |= MM_F_SR; // --sr
else if (c == 0 && long_idx ==16) opt.flag |= MM_F_CS_NO_EQUAL | MM_F_OUT_CS | MM_F_CIGAR; // --cs-no-equal
else if (c == 0 && long_idx ==14) { // --multi
if (optarg == 0 || strcmp(optarg, "yes") == 0 || strcmp(optarg, "y") == 0)
opt.flag |= MM_F_MULTI_SEG;

View File

@ -16,6 +16,7 @@
#define MM_F_SPLICE_FOR 0x100 // match GT-AG
#define MM_F_SPLICE_REV 0x200 // match CT-AC, the reverse complement of GT-AG
#define MM_F_NO_LJOIN 0x400
#define MM_F_CS_NO_EQUAL 0x800
#define MM_F_SR 0x1000
#define MM_F_MULTI_SEG 0x2000
#define MM_F_NO_PRINT_2ND 0x4000