better parameters for short reads

It turns out the key problem is not the minimizer density. It is the max
occurrence that tends to affect results more, especially sensitivity. There is
still lots of work to do, but for now, it seems a good start.
This commit is contained in:
Heng Li 2017-09-12 16:11:23 -04:00
parent eea9e851d8
commit d7f2ac1d4f
3 changed files with 13 additions and 6 deletions

View File

@ -143,6 +143,10 @@ static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint
for (i = 0; i < qlen; ++i) fputc("ACGTN"[qseq[i]], stderr);
fputc('\n', stderr);
}
if (opt->flag & MM_F_SR) {
flag |= KSW_EZ_APPROX_MAX;
if (flag & KSW_EZ_EXTZ_ONLY) flag |= KSW_EZ_APPROX_DROP;
}
if (opt->flag & MM_F_SPLICE)
ksw_exts2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, opt->noncan, opt->zdrop, flag, ez);
else if (opt->q == opt->q2 && opt->e == opt->e2)

14
main.c
View File

@ -156,17 +156,19 @@ int main(int argc, char *argv[])
k = 19, w = 19;
opt.a = 1, opt.b = 9, opt.q = 16, opt.q2 = 41, opt.e = 2, opt.e2 = 1, opt.zdrop = 200;
opt.min_dp_max = 200;
} else if (strcmp(optarg, "short") == 0) {
k = 17, w = 7, is_hpc = 0;
} else if (strcmp(optarg, "short") == 0 || strcmp(optarg, "sr") == 0) {
k = 21, w = 11, is_hpc = 0;
minibatch_size = 50000000;
opt.flag |= MM_F_SR;
opt.a = 2, opt.b = 8, opt.q = 12, opt.e = 2, opt.q2 = 32, opt.e2 = 1;
opt.max_gap = 100;
opt.pri_ratio = 0.5f;
opt.min_cnt = 2;
opt.min_chain_score = 20;
opt.min_dp_max = 50;
opt.best_n = 10;
opt.bw = 100;
opt.mid_occ_frac = 1e-4f;
opt.min_dp_max = 40;
opt.best_n = 20;
opt.bw = 50;
opt.mid_occ_frac = 2e-5f;
} else if (strcmp(optarg, "splice") == 0 || strcmp(optarg, "cdna") == 0) {
k = 15, w = 5;
opt.flag |= MM_F_SPLICE | MM_F_SPLICE_FOR | MM_F_SPLICE_REV;

View File

@ -19,6 +19,7 @@
#define MM_F_SPLICE_REV 0x200
#define MM_F_SPLICE_BOTH 0x400
#define MM_F_NO_SAM_SQ 0x800
#define MM_F_SR 0x1000
#define MM_IDX_MAGIC "MMI\2"