From a349d85280931e3b4cb39329655e6b95db151304 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Wed, 27 Sep 2017 12:33:10 -0400 Subject: [PATCH] r444: changed the way orientation is specified The old model doesn't work with RF or RR orientation. The new model only works with paired-end reads. For >2 segments, only FF is supported. --- main.c | 7 +------ map.c | 14 ++++++++------ minimap.h | 2 +- pe.c | 29 +++++++++++++++++++++++++++++ 4 files changed, 39 insertions(+), 13 deletions(-) diff --git a/main.c b/main.c index 014c57b..cb85276 100644 --- a/main.c +++ b/main.c @@ -6,7 +6,7 @@ #include "mmpriv.h" #include "getopt.h" -#define MM_VERSION "2.2-r443-dirty" +#define MM_VERSION "2.2-r444-dirty" #ifdef __linux__ #include @@ -38,7 +38,6 @@ static struct option long_options[] = { { "no-sam-sq", no_argument, 0, 0 }, { "sr", no_argument, 0, 0 }, { "multi-seg", optional_argument, 0, 0 }, - { "2nd-seg-rev", optional_argument, 0, 0 }, { "help", no_argument, 0, 'h' }, { "max-intron-len", required_argument, 0, 'G' }, { "version", no_argument, 0, 'V' }, @@ -121,10 +120,6 @@ int main(int argc, char *argv[]) if (optarg == 0 || strcmp(optarg, "yes") == 0 || strcmp(optarg, "y") == 0) opt.flag |= MM_F_MULTI_SEG; else opt.flag &= ~MM_F_MULTI_SEG; - } else if (c == 0 && long_idx ==15) { // --2nd-seg-rev - if (optarg == 0 || strcmp(optarg, "yes") == 0 || strcmp(optarg, "y") == 0) - opt.flag |= MM_F_SEG_REV; - else opt.flag &= ~MM_F_SEG_REV; } else if (c == 'V') { puts(MM_VERSION); return 0; diff --git a/map.c b/map.c index 10e5054..a997f43 100644 --- a/map.c +++ b/map.c @@ -12,7 +12,8 @@ void mm_mapopt_init(mm_mapopt_t *opt) { memset(opt, 0, sizeof(mm_mapopt_t)); opt->mid_occ_frac = 2e-4f; - opt->sdust_thres = 0; + opt->sdust_thres = 0; // no SDUST masking + opt->pe_ori = 0; // FF opt->min_cnt = 3; opt->min_chain_score = 40; @@ -75,7 +76,8 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo) mo->min_dp_max = 200; } else if (strcmp(preset, "short") == 0 || strcmp(preset, "sr") == 0) { io->is_hpc = 0, io->k = 21, io->w = 11; - mo->flag |= MM_F_SR | MM_F_MULTI_SEG | MM_F_SEG_REV; + mo->flag |= MM_F_SR | MM_F_MULTI_SEG; + mo->pe_ori = 0<<1|1; // FR mo->a = 2, mo->b = 8, mo->q = 12, mo->e = 2, mo->q2 = 32, mo->e2 = 1; mo->max_gap = 200; mo->max_gap_ref = 1000; @@ -339,7 +341,7 @@ typedef struct { static void worker_for(void *_data, long i, int tid) // kt_for() callback { step_t *s = (step_t*)_data; - int *qlens, j, off = s->seg_off[i]; + int *qlens, j, off = s->seg_off[i], pe_ori = s->p->opt->pe_ori; const char **qseqs; mm_tbuf_t *b = s->buf[tid]; if (mm_dbg_flag & MM_DBG_PRINT_QNAME) @@ -347,14 +349,14 @@ static void worker_for(void *_data, long i, int tid) // kt_for() callback qlens = (int*)kmalloc(b->km, s->n_seg[i] * sizeof(int)); qseqs = (const char**)kmalloc(b->km, s->n_seg[i] * sizeof(const char**)); for (j = 0; j < s->n_seg[i]; ++j) { - if (j > 0 && (s->p->opt->flag & MM_F_SEG_REV)) + if (s->n_seg[i] == 2 && ((j == 0 && (pe_ori>>1&1)) || (j == 1 && (pe_ori&1)))) mm_revcomp_bseq(&s->seq[off + j]); qlens[j] = s->seq[off + j].l_seq; qseqs[j] = s->seq[off + j].seq; } mm_map_multi(s->p->mi, s->n_seg[i], qlens, qseqs, &s->n_reg[off], &s->reg[off], b, s->p->opt, s->seq[off].name); - if (s->n_seg[i] > 1 && (s->p->opt->flag & MM_F_SEG_REV)) - for (j = 1; j < s->n_seg[i]; ++j) { // flip the query strand and coordinate to the original read strand + for (j = 0; j < s->n_seg[i]; ++j) // flip the query strand and coordinate to the original read strand + if (s->n_seg[i] == 2 && ((j == 0 && (pe_ori>>1&1)) || (j == 1 && (pe_ori&1)))) { int k, t; mm_revcomp_bseq(&s->seq[off + j]); for (k = 0; k < s->n_reg[off + j]; ++k) { diff --git a/minimap.h b/minimap.h index 4d7f393..4b2d9a6 100644 --- a/minimap.h +++ b/minimap.h @@ -18,7 +18,6 @@ #define MM_F_NO_SAM_SQ 0x800 #define MM_F_SR 0x1000 #define MM_F_MULTI_SEG 0x2000 -#define MM_F_SEG_REV 0x4000 #define MM_IDX_MAGIC "MMI\2" @@ -82,6 +81,7 @@ typedef struct { typedef struct { int sdust_thres; // score threshold for SDUST; 0 to disable int flag; // see MM_F_* macros + int pe_ori; int bw; // bandwidth int max_gap, max_gap_ref; // break a chain if there are no minimizers in a max_gap window diff --git a/pe.c b/pe.c index eb21a14..2c79106 100644 --- a/pe.c +++ b/pe.c @@ -40,3 +40,32 @@ void mm_select_sub_multi(void *km, float pri_ratio, float pri1, float pri2, int } } +#include "ksort.h" + +typedef struct { + int s; + uint64_t key; + mm_reg1_t *r; +} pair_arr_t; + +#define sort_key_pair(a) ((a).key) +KRADIX_SORT_INIT(pair, pair_arr_t, sort_key_pair, 8) +/* +void mm_pair(void *km, int max_gap_ref, int *n_regs, mm_reg1_t **regs) +{ + int i, j, s, n, last = -1; + pair_arr_t *a; + a = (pair_arr_t*)kmalloc(km, (n_regs[0] + n_regs[1]) * sizeof(pair_arr_t)); + for (s = n = 0; s < 2; ++s) + for (i = 0; i < n_regs[s]; ++i) { + a[n].s = s; + a[n].r = ®s[s][i]; + a[n++].key = (uint64_t)a[n].r->rid << 32 | a[n].r->rs; + } + radix_sort_pair(n, a); + for (i = 0; i < n; ++i) { + int pre_dir = seg_rev? !a[i].r->rev : a[i].r->rev; + } + kfree(km, a); +} +*/