From d7f2ac1d4fd12d18ce938a4d4bf736755fa38670 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Tue, 12 Sep 2017 16:11:23 -0400 Subject: [PATCH] better parameters for short reads It turns out the key problem is not the minimizer density. It is the max occurrence that tends to affect results more, especially sensitivity. There is still lots of work to do, but for now, it seems a good start. --- align.c | 4 ++++ main.c | 14 ++++++++------ minimap.h | 1 + 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/align.c b/align.c index 163a299..1ed42e5 100644 --- a/align.c +++ b/align.c @@ -143,6 +143,10 @@ static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint for (i = 0; i < qlen; ++i) fputc("ACGTN"[qseq[i]], stderr); fputc('\n', stderr); } + if (opt->flag & MM_F_SR) { + flag |= KSW_EZ_APPROX_MAX; + if (flag & KSW_EZ_EXTZ_ONLY) flag |= KSW_EZ_APPROX_DROP; + } if (opt->flag & MM_F_SPLICE) ksw_exts2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, opt->noncan, opt->zdrop, flag, ez); else if (opt->q == opt->q2 && opt->e == opt->e2) diff --git a/main.c b/main.c index b574782..d0ac576 100644 --- a/main.c +++ b/main.c @@ -156,17 +156,19 @@ int main(int argc, char *argv[]) k = 19, w = 19; opt.a = 1, opt.b = 9, opt.q = 16, opt.q2 = 41, opt.e = 2, opt.e2 = 1, opt.zdrop = 200; opt.min_dp_max = 200; - } else if (strcmp(optarg, "short") == 0) { - k = 17, w = 7, is_hpc = 0; + } else if (strcmp(optarg, "short") == 0 || strcmp(optarg, "sr") == 0) { + k = 21, w = 11, is_hpc = 0; + minibatch_size = 50000000; + opt.flag |= MM_F_SR; opt.a = 2, opt.b = 8, opt.q = 12, opt.e = 2, opt.q2 = 32, opt.e2 = 1; opt.max_gap = 100; opt.pri_ratio = 0.5f; opt.min_cnt = 2; opt.min_chain_score = 20; - opt.min_dp_max = 50; - opt.best_n = 10; - opt.bw = 100; - opt.mid_occ_frac = 1e-4f; + opt.min_dp_max = 40; + opt.best_n = 20; + opt.bw = 50; + opt.mid_occ_frac = 2e-5f; } else if (strcmp(optarg, "splice") == 0 || strcmp(optarg, "cdna") == 0) { k = 15, w = 5; opt.flag |= MM_F_SPLICE | MM_F_SPLICE_FOR | MM_F_SPLICE_REV; diff --git a/minimap.h b/minimap.h index e345915..def9640 100644 --- a/minimap.h +++ b/minimap.h @@ -19,6 +19,7 @@ #define MM_F_SPLICE_REV 0x200 #define MM_F_SPLICE_BOTH 0x400 #define MM_F_NO_SAM_SQ 0x800 +#define MM_F_SR 0x1000 #define MM_IDX_MAGIC "MMI\2"