r520: added option -L to write long cigar to CG
This commit is contained in:
parent
ffd953029f
commit
4683da2455
55
format.c
55
format.c
|
|
@ -43,6 +43,13 @@ static void mm_sprintf_lite(kstring_t *s, const char *fmt, ...)
|
|||
if (c < 0) buf[l++] = '-';
|
||||
str_enlarge(s, l);
|
||||
for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i];
|
||||
} else if (*p == 'u') {
|
||||
int i, l = 0;
|
||||
uint32_t x;
|
||||
x = va_arg(ap, uint32_t);
|
||||
do { buf[l++] = x%10 + '0'; x /= 10; } while (x > 0);
|
||||
str_enlarge(s, l);
|
||||
for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i];
|
||||
} else if (*p == 's') {
|
||||
char *r = va_arg(ap, char*);
|
||||
str_copy(s, r, r + strlen(r));
|
||||
|
|
@ -251,9 +258,35 @@ static inline const mm_reg1_t *get_sam_pri(int n_regs, const mm_reg1_t *regs)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static void write_sam_cigar(kstring_t *s, int sam_flag, int in_tag, int qlen, const mm_reg1_t *r)
|
||||
{
|
||||
if (r->p == 0) {
|
||||
mm_sprintf_lite(s, "*");
|
||||
} else {
|
||||
uint32_t k, clip_len[2];
|
||||
clip_len[0] = r->rev? qlen - r->qe : r->qs;
|
||||
clip_len[1] = r->rev? r->qs : qlen - r->qe;
|
||||
if (in_tag) {
|
||||
int clip_char = (sam_flag&0x800)? 5 : 4;
|
||||
mm_sprintf_lite(s, "\tCG:B:I");
|
||||
if (clip_len[0]) mm_sprintf_lite(s, ",%u", clip_len[0]<<4|clip_char);
|
||||
for (k = 0; k < r->p->n_cigar; ++k)
|
||||
mm_sprintf_lite(s, ",%u", r->p->cigar[k]);
|
||||
if (clip_len[1]) mm_sprintf_lite(s, ",%u", clip_len[1]<<4|clip_char);
|
||||
} else {
|
||||
int clip_char = (sam_flag&0x800)? 'H' : 'S';
|
||||
if (clip_len[0]) mm_sprintf_lite(s, "%d%c", clip_len[0], clip_char);
|
||||
for (k = 0; k < r->p->n_cigar; ++k)
|
||||
mm_sprintf_lite(s, "%d%c", r->p->cigar[k]>>4, "MIDN"[r->p->cigar[k]&0xf]);
|
||||
if (clip_len[1]) mm_sprintf_lite(s, "%d%c", clip_len[1], clip_char);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void mm_write_sam2(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int seg_idx, int reg_idx, int n_seg, const int *n_regss, const mm_reg1_t *const* regss, void *km, int opt_flag)
|
||||
{
|
||||
int flag, n_regs = n_regss[seg_idx];
|
||||
const int max_bam_cigar_op = 65535;
|
||||
int flag, n_regs = n_regss[seg_idx], cigar_in_tag = 0;
|
||||
int this_rid = -1, this_pos = -1, this_rev = 0;
|
||||
const mm_reg1_t *regs = regss[seg_idx], *r_prev = NULL, *r_next;
|
||||
const mm_reg1_t *r = n_regs > 0 && reg_idx < n_regs && reg_idx >= 0? ®s[reg_idx] : NULL;
|
||||
|
|
@ -306,15 +339,15 @@ void mm_write_sam2(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int se
|
|||
int mapq = !r->iden_flt? r->mapq : r->mapq < 3? r->mapq : 3;
|
||||
this_rid = r->rid, this_pos = r->rs, this_rev = r->rev;
|
||||
mm_sprintf_lite(s, "\t%s\t%d\t%d\t", mi->seq[r->rid].name, r->rs+1, mapq);
|
||||
if (r->p) { // actually this should always be true for SAM output
|
||||
uint32_t k, clip_len = r->rev? t->l_seq - r->qe : r->qs;
|
||||
int clip_char = (flag&0x800)? 'H' : 'S';
|
||||
if (clip_len) mm_sprintf_lite(s, "%d%c", clip_len, clip_char);
|
||||
for (k = 0; k < r->p->n_cigar; ++k)
|
||||
mm_sprintf_lite(s, "%d%c", r->p->cigar[k]>>4, "MIDN"[r->p->cigar[k]&0xf]);
|
||||
clip_len = r->rev? r->qs : t->l_seq - r->qe;
|
||||
if (clip_len) mm_sprintf_lite(s, "%d%c", clip_len, clip_char);
|
||||
} else mm_sprintf_lite(s, "*");
|
||||
if ((opt_flag & MM_F_LONG_CIGAR) && r->p && r->p->n_cigar > max_bam_cigar_op - 2) {
|
||||
int n_cigar = r->p->n_cigar;
|
||||
if (r->qs != 0) ++n_cigar;
|
||||
if (r->qe != t->l_seq) ++n_cigar;
|
||||
if (n_cigar > max_bam_cigar_op)
|
||||
cigar_in_tag = 1;
|
||||
}
|
||||
if (cigar_in_tag) mm_sprintf_lite(s, "%dS", t->l_seq);
|
||||
else write_sam_cigar(s, flag, 0, t->l_seq, r);
|
||||
}
|
||||
|
||||
// write mate positions
|
||||
|
|
@ -394,6 +427,8 @@ void mm_write_sam2(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int se
|
|||
}
|
||||
if (r->p && (opt_flag & MM_F_OUT_CS))
|
||||
write_cs(km, s, mi, t, r, !(opt_flag&MM_F_OUT_CS_LONG));
|
||||
if (cigar_in_tag)
|
||||
write_sam_cigar(s, flag, 1, t->l_seq, r);
|
||||
}
|
||||
|
||||
s->s[s->l] = 0; // we always have room for an extra byte (see str_enlarge)
|
||||
|
|
|
|||
8
main.c
8
main.c
|
|
@ -6,7 +6,7 @@
|
|||
#include "mmpriv.h"
|
||||
#include "getopt.h"
|
||||
|
||||
#define MM_VERSION "2.2-r519-dirty"
|
||||
#define MM_VERSION "2.2-r520-dirty"
|
||||
|
||||
#ifdef __linux__
|
||||
#include <sys/resource.h>
|
||||
|
|
@ -65,7 +65,7 @@ static inline int64_t mm_parse_num(const char *str)
|
|||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
const char *opt_str = "2aSw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:i:";
|
||||
const char *opt_str = "2aSw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:i:L";
|
||||
mm_mapopt_t opt;
|
||||
mm_idxopt_t ipt;
|
||||
int i, c, n_threads = 3, long_idx, max_gap_ref = 0;
|
||||
|
|
@ -108,6 +108,7 @@ int main(int argc, char *argv[])
|
|||
else if (c == 'X') opt.flag |= MM_F_AVA | MM_F_NO_SELF;
|
||||
else if (c == 'a') opt.flag |= MM_F_OUT_SAM | MM_F_CIGAR;
|
||||
else if (c == 'Q') opt.flag |= MM_F_NO_QUAL;
|
||||
else if (c == 'L') opt.flag |= MM_F_LONG_CIGAR;
|
||||
else if (c == 'T') opt.sdust_thres = atoi(optarg);
|
||||
else if (c == 'n') opt.min_cnt = atoi(optarg);
|
||||
else if (c == 'm') opt.min_chain_score = atoi(optarg);
|
||||
|
|
@ -206,7 +207,7 @@ int main(int argc, char *argv[])
|
|||
fprintf(fp_help, " Mapping:\n");
|
||||
fprintf(fp_help, " -f FLOAT filter out top FLOAT fraction of repetitive minimizers [%g]\n", opt.mid_occ_frac);
|
||||
fprintf(fp_help, " -g NUM stop chain enlongation if there are no minimizers in INT-bp [%d]\n", opt.max_gap);
|
||||
fprintf(fp_help, " -G NUM max reference skip length [-xsplice:200k]\n");
|
||||
fprintf(fp_help, " -G NUM max reference skip/intron length [-xsplice:200k]\n");
|
||||
fprintf(fp_help, " -F NUM max fragment length in the fragment mode [-xsr:800]\n");
|
||||
fprintf(fp_help, " -r NUM bandwidth used in chaining and DP-based alignment [%d]\n", opt.bw);
|
||||
fprintf(fp_help, " -n INT minimal number of minimizers on a chain [%d]\n", opt.min_cnt);
|
||||
|
|
@ -227,6 +228,7 @@ int main(int argc, char *argv[])
|
|||
fprintf(fp_help, " Input/Output:\n");
|
||||
fprintf(fp_help, " -a output in the SAM format (PAF by default)\n");
|
||||
fprintf(fp_help, " -Q don't output base quality in SAM\n");
|
||||
fprintf(fp_help, " -L write CIGAR with >65535 ops in the CG tag (compatible with future tools)\n");
|
||||
fprintf(fp_help, " -R STR SAM read group line in a format like '@RG\\tID:foo\\tSM:bar' []\n");
|
||||
fprintf(fp_help, " -c output CIGAR in PAF\n");
|
||||
fprintf(fp_help, " --cs[=STR] output the cs tag; STR is 'short' (if absent) or 'long' [none]\n");
|
||||
|
|
|
|||
Loading…
Reference in New Issue