r784: support the =/X CIGAR operators (#156)
This commit is contained in:
parent
a3afeec0b2
commit
154d2caf5b
72
align.c
72
align.c
|
|
@ -198,7 +198,7 @@ static void mm_append_cigar(mm_reg1_t *r, uint32_t n_cigar, uint32_t *cigar) //
|
|||
mm_extra_t *p;
|
||||
if (n_cigar == 0) return;
|
||||
if (r->p == 0) {
|
||||
uint32_t capacity = n_cigar + sizeof(mm_extra_t);
|
||||
uint32_t capacity = n_cigar + sizeof(mm_extra_t); // TODO: should this be "n_cigar + sizeof(mm_extra_t)/4" instead?
|
||||
kroundup32(capacity);
|
||||
r->p = (mm_extra_t*)calloc(capacity, 4);
|
||||
r->p->capacity = capacity;
|
||||
|
|
@ -218,6 +218,74 @@ static void mm_append_cigar(mm_reg1_t *r, uint32_t n_cigar, uint32_t *cigar) //
|
|||
}
|
||||
}
|
||||
|
||||
static void mm_update_cigar_eqx(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *tseq) // written by @armintoepfer
|
||||
{
|
||||
int n_diff = 0;
|
||||
uint32_t k, l, m, cap, toff = 0, qoff = 0, n_M = 0;
|
||||
mm_extra_t *p;
|
||||
if (r->p == 0) return;
|
||||
for (k = 0; k < r->p->n_cigar; ++k) {
|
||||
uint32_t op = r->p->cigar[k]&0xf, len = r->p->cigar[k]>>4;
|
||||
if (op == 0) {
|
||||
for (l = 0; l < len; ++l)
|
||||
if (qseq[qoff + l] != tseq[toff + l]) // TODO: N<=>N is converted to "="
|
||||
++n_diff;
|
||||
++n_M;
|
||||
toff += len, qoff += len;
|
||||
} else if (op == 1) { // insertion
|
||||
qoff += len;
|
||||
} else if (op == 2) { // deletion
|
||||
toff += len;
|
||||
} else if (op == 3) { // intron
|
||||
toff += len;
|
||||
}
|
||||
}
|
||||
// update in-place if we can
|
||||
if (n_diff == 0) {
|
||||
for (k = 0; k < r->p->n_cigar; ++k) {
|
||||
uint32_t op = r->p->cigar[k]&0xf, len = r->p->cigar[k]>>4;
|
||||
if (op == 0) r->p->cigar[k] = len << 4 | 7;
|
||||
}
|
||||
return;
|
||||
}
|
||||
// allocate new storage
|
||||
cap = r->p->n_cigar + (2 * n_diff - n_M) + sizeof(mm_extra_t);
|
||||
kroundup32(cap);
|
||||
p = (mm_extra_t*)calloc(cap, 4);
|
||||
memcpy(p, r->p, sizeof(mm_extra_t));
|
||||
p->capacity = cap;
|
||||
// update cigar while copying
|
||||
toff = qoff = m = 0;
|
||||
for (k = 0; k < r->p->n_cigar; ++k) {
|
||||
uint32_t op = r->p->cigar[k]&0xf, len = r->p->cigar[k]>>4;
|
||||
if (op == 0) { // match/mismatch
|
||||
while (len > 0) {
|
||||
// match
|
||||
for (l = 0; l < len && qseq[qoff + l] == tseq[toff + l]; ++l) {}
|
||||
if (l > 0) p->cigar[m++] = l << 4 | 7;
|
||||
len -= l;
|
||||
toff += l, qoff += l;
|
||||
// mismatch
|
||||
for (l = 0; l < len && qseq[qoff + l] != tseq[toff + l]; ++l) {}
|
||||
if (l > 0) p->cigar[m++] = l << 4 | 8;
|
||||
len -= l;
|
||||
toff += l, qoff += l;
|
||||
}
|
||||
continue;
|
||||
} else if (op == 1) { // insertion
|
||||
qoff += len;
|
||||
} else if (op == 2) { // deletion
|
||||
toff += len;
|
||||
} else if (op == 3) { // intron
|
||||
toff += len;
|
||||
}
|
||||
p->cigar[m++] = r->p->cigar[k];
|
||||
}
|
||||
p->n_cigar = m;
|
||||
free(r->p);
|
||||
r->p = p;
|
||||
}
|
||||
|
||||
static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint8_t *qseq, int tlen, const uint8_t *tseq, const int8_t *mat, int w, int end_bonus, int zdrop, int flag, ksw_extz_t *ez)
|
||||
{
|
||||
if (mm_dbg_flag & MM_DBG_PRINT_ALN_SEQ) {
|
||||
|
|
@ -675,6 +743,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|||
if (r->p) {
|
||||
mm_idx_getseq(mi, rid, rs1, re1, tseq);
|
||||
mm_update_extra(r, &qseq0[r->rev][qs1], tseq, mat, opt->q, opt->e);
|
||||
if (opt->flag & MM_F_EQX) mm_update_cigar_eqx(r, &qseq0[r->rev][qs1], tseq);
|
||||
if (rev && r->p->trans_strand)
|
||||
r->p->trans_strand ^= 3; // flip to the read strand
|
||||
}
|
||||
|
|
@ -733,6 +802,7 @@ static int mm_align1_inv(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, i
|
|||
r_inv->rs = r1->re + t_off;
|
||||
r_inv->re = r_inv->rs + ez->max_t + 1;
|
||||
mm_update_extra(r_inv, &qseq[q_off], &tseq[t_off], mat, opt->q, opt->e);
|
||||
if (opt->flag & MM_F_EQX) mm_update_cigar_eqx(r_inv, &qseq[q_off], &tseq[t_off]);
|
||||
ret = 1;
|
||||
end_align1_inv:
|
||||
kfree(km, tseq);
|
||||
|
|
|
|||
4
format.c
4
format.c
|
|
@ -270,7 +270,7 @@ void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const m
|
|||
uint32_t k;
|
||||
mm_sprintf_lite(s, "\tcg:Z:");
|
||||
for (k = 0; k < r->p->n_cigar; ++k)
|
||||
mm_sprintf_lite(s, "%d%c", r->p->cigar[k]>>4, "MIDN"[r->p->cigar[k]&0xf]);
|
||||
mm_sprintf_lite(s, "%d%c", r->p->cigar[k]>>4, "MIDNSHP=XB"[r->p->cigar[k]&0xf]);
|
||||
}
|
||||
if (r->p && (opt_flag & (MM_F_OUT_CS|MM_F_OUT_MD)))
|
||||
write_cs_or_MD(km, s, mi, t, r, !(opt_flag&MM_F_OUT_CS_LONG), opt_flag&MM_F_OUT_MD);
|
||||
|
|
@ -321,7 +321,7 @@ static void write_sam_cigar(kstring_t *s, int sam_flag, int in_tag, int qlen, co
|
|||
int clip_char = (sam_flag&0x800) && !(opt_flag&MM_F_SOFTCLIP)? 'H' : 'S';
|
||||
if (clip_len[0]) mm_sprintf_lite(s, "%d%c", clip_len[0], clip_char);
|
||||
for (k = 0; k < r->p->n_cigar; ++k)
|
||||
mm_sprintf_lite(s, "%d%c", r->p->cigar[k]>>4, "MIDN"[r->p->cigar[k]&0xf]);
|
||||
mm_sprintf_lite(s, "%d%c", r->p->cigar[k]>>4, "MIDNSHP=XB"[r->p->cigar[k]&0xf]);
|
||||
if (clip_len[1]) mm_sprintf_lite(s, "%d%c", clip_len[1], clip_char);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
5
main.c
5
main.c
|
|
@ -10,7 +10,7 @@
|
|||
#include "getopt.h"
|
||||
#endif
|
||||
|
||||
#define MM_VERSION "2.10-r783-dirty"
|
||||
#define MM_VERSION "2.10-r784-dirty"
|
||||
|
||||
#ifdef __linux__
|
||||
#include <sys/resource.h>
|
||||
|
|
@ -59,6 +59,7 @@ static struct option long_options[] = {
|
|||
{ "MD", no_argument, 0, 0 }, // 29
|
||||
{ "lj-min-ratio", required_argument, 0, 0 }, // 30
|
||||
{ "score-N", required_argument, 0, 0 }, // 31
|
||||
{ "eqx", no_argument, 0, 0 }, // 32
|
||||
{ "help", no_argument, 0, 'h' },
|
||||
{ "max-intron-len", required_argument, 0, 'G' },
|
||||
{ "version", no_argument, 0, 'V' },
|
||||
|
|
@ -177,6 +178,7 @@ int main(int argc, char *argv[])
|
|||
else if (c == 0 && long_idx ==29) opt.flag |= MM_F_OUT_MD; // --MD
|
||||
else if (c == 0 && long_idx ==30) opt.min_join_flank_ratio = atof(optarg); // --lj-min-ratio
|
||||
else if (c == 0 && long_idx ==31) opt.sc_ambi = atoi(optarg); // --score-N
|
||||
else if (c == 0 && long_idx ==32) opt.flag |= MM_F_EQX; // --eqx
|
||||
else if (c == 0 && long_idx == 14) { // --frag
|
||||
yes_or_no(&opt, MM_F_FRAG_MODE, long_idx, optarg, 1);
|
||||
} else if (c == 0 && long_idx == 15) { // --secondary
|
||||
|
|
@ -278,6 +280,7 @@ int main(int argc, char *argv[])
|
|||
fprintf(fp_help, " -c output CIGAR in PAF\n");
|
||||
fprintf(fp_help, " --cs[=STR] output the cs tag; STR is 'short' (if absent) or 'long' [none]\n");
|
||||
fprintf(fp_help, " --MD output the MD tag\n");
|
||||
fprintf(fp_help, " --eqx write =/X CIGAR operators\n");
|
||||
fprintf(fp_help, " -Y use soft clipping for supplementary alignments\n");
|
||||
fprintf(fp_help, " -t INT number of threads [%d]\n", n_threads);
|
||||
fprintf(fp_help, " -K NUM minibatch size for mapping [500M]\n");
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@
|
|||
#define MM_F_ALL_CHAINS 0x800000
|
||||
#define MM_F_OUT_MD 0x1000000
|
||||
#define MM_F_COPY_COMMENT 0x2000000
|
||||
#define MM_F_EQX 0x4000000 // use =/X instead of M
|
||||
|
||||
#define MM_I_HPC 0x1
|
||||
#define MM_I_NO_SEQ 0x2
|
||||
|
|
|
|||
|
|
@ -397,6 +397,9 @@ is assumed. [none]
|
|||
.B --MD
|
||||
Output the MD tag (see the SAM spec).
|
||||
.TP
|
||||
.B --eqx
|
||||
Output =/X CIGAR operators for sequence match/mismatch.
|
||||
.TP
|
||||
.B -Y
|
||||
In SAM output, use soft clipping for supplementary alignments.
|
||||
.TP
|
||||
|
|
|
|||
Loading…
Reference in New Issue