put option operations in another file
This commit is contained in:
parent
dd18307e66
commit
123bc1d91d
9
Makefile
9
Makefile
|
|
@ -1,7 +1,7 @@
|
||||||
CFLAGS= -g -Wall -O2 -Wc++-compat
|
CFLAGS= -g -Wall -O2 -Wc++-compat
|
||||||
CPPFLAGS= -DHAVE_KALLOC
|
CPPFLAGS= -DHAVE_KALLOC
|
||||||
INCLUDES=
|
INCLUDES=
|
||||||
OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o index.o chain.o align.o hit.o map.o format.o pe.o esterr.o ksw2_ll_sse.o
|
OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o options.o index.o chain.o align.o hit.o map.o format.o pe.o esterr.o ksw2_ll_sse.o
|
||||||
PROG= minimap2
|
PROG= minimap2
|
||||||
PROG_EXTRA= sdust minimap2-lite
|
PROG_EXTRA= sdust minimap2-lite
|
||||||
LIBS= -lm -lz -lpthread
|
LIBS= -lm -lz -lpthread
|
||||||
|
|
@ -98,9 +98,12 @@ ksw2_extd2_sse.o: ksw2.h kalloc.h
|
||||||
ksw2_exts2_sse.o: ksw2.h kalloc.h
|
ksw2_exts2_sse.o: ksw2.h kalloc.h
|
||||||
ksw2_extz2_sse.o: ksw2.h kalloc.h
|
ksw2_extz2_sse.o: ksw2.h kalloc.h
|
||||||
ksw2_ll_sse.o: ksw2.h kalloc.h
|
ksw2_ll_sse.o: ksw2.h kalloc.h
|
||||||
|
kthread.o: kthread.h
|
||||||
main.o: bseq.h minimap.h mmpriv.h getopt.h
|
main.o: bseq.h minimap.h mmpriv.h getopt.h
|
||||||
map.o: kthread.h kvec.h kalloc.h sdust.h mmpriv.h minimap.h bseq.h khash.h
|
map.o: kthread.h kvec.h kalloc.h sdust.h mmpriv.h minimap.h bseq.h khash.h
|
||||||
misc.o: minimap.h ksort.h
|
map.o: ksort.h
|
||||||
|
misc.o: mmpriv.h minimap.h bseq.h ksort.h
|
||||||
|
options.o: mmpriv.h minimap.h bseq.h
|
||||||
pe.o: mmpriv.h minimap.h bseq.h kvec.h kalloc.h ksort.h
|
pe.o: mmpriv.h minimap.h bseq.h kvec.h kalloc.h ksort.h
|
||||||
sdust.o: kalloc.h kdq.h kvec.h sdust.h
|
sdust.o: kalloc.h kdq.h kvec.h sdust.h
|
||||||
sketch.o: kvec.h kalloc.h minimap.h
|
sketch.o: kvec.h kalloc.h mmpriv.h minimap.h bseq.h
|
||||||
|
|
|
||||||
5
main.c
5
main.c
|
|
@ -47,6 +47,7 @@ static struct option long_options[] = {
|
||||||
{ "end-seed-pen", required_argument, 0, 0 }, // 21
|
{ "end-seed-pen", required_argument, 0, 0 }, // 21
|
||||||
{ "for-only", no_argument, 0, 0 }, // 22
|
{ "for-only", no_argument, 0, 0 }, // 22
|
||||||
{ "rev-only", no_argument, 0, 0 }, // 23
|
{ "rev-only", no_argument, 0, 0 }, // 23
|
||||||
|
{ "heap-sort", optional_argument, 0, 0 }, // 24
|
||||||
{ "help", no_argument, 0, 'h' },
|
{ "help", no_argument, 0, 'h' },
|
||||||
{ "max-intron-len", required_argument, 0, 'G' },
|
{ "max-intron-len", required_argument, 0, 'G' },
|
||||||
{ "version", no_argument, 0, 'V' },
|
{ "version", no_argument, 0, 'V' },
|
||||||
|
|
@ -169,6 +170,10 @@ int main(int argc, char *argv[])
|
||||||
if (optarg == 0 || strcmp(optarg, "yes") == 0 || strcmp(optarg, "y") == 0)
|
if (optarg == 0 || strcmp(optarg, "yes") == 0 || strcmp(optarg, "y") == 0)
|
||||||
opt.flag |= MM_F_SPLICE_FLANK;
|
opt.flag |= MM_F_SPLICE_FLANK;
|
||||||
else opt.flag &= ~MM_F_SPLICE_FLANK;
|
else opt.flag &= ~MM_F_SPLICE_FLANK;
|
||||||
|
} else if (c == 0 && long_idx == 24) { // --heap-sort
|
||||||
|
if (optarg == 0 || strcmp(optarg, "yes") == 0 || strcmp(optarg, "y") == 0)
|
||||||
|
opt.flag |= MM_F_HEAP_SORT;
|
||||||
|
else opt.flag &= ~MM_F_HEAP_SORT;
|
||||||
} else if (c == 'S') {
|
} else if (c == 'S') {
|
||||||
opt.flag |= MM_F_OUT_CS | MM_F_CIGAR | MM_F_OUT_CS_LONG;
|
opt.flag |= MM_F_OUT_CS | MM_F_CIGAR | MM_F_OUT_CS_LONG;
|
||||||
if (mm_verbose >= 2)
|
if (mm_verbose >= 2)
|
||||||
|
|
|
||||||
144
map.c
144
map.c
|
|
@ -9,144 +9,6 @@
|
||||||
#include "bseq.h"
|
#include "bseq.h"
|
||||||
#include "khash.h"
|
#include "khash.h"
|
||||||
|
|
||||||
void mm_mapopt_init(mm_mapopt_t *opt)
|
|
||||||
{
|
|
||||||
memset(opt, 0, sizeof(mm_mapopt_t));
|
|
||||||
opt->seed = 11;
|
|
||||||
opt->mid_occ_frac = 2e-4f;
|
|
||||||
opt->sdust_thres = 0; // no SDUST masking
|
|
||||||
|
|
||||||
opt->min_cnt = 3;
|
|
||||||
opt->min_chain_score = 40;
|
|
||||||
opt->bw = 500;
|
|
||||||
opt->max_gap = 5000;
|
|
||||||
opt->max_gap_ref = -1;
|
|
||||||
opt->max_chain_skip = 25;
|
|
||||||
|
|
||||||
opt->mask_level = 0.5f;
|
|
||||||
opt->pri_ratio = 0.8f;
|
|
||||||
opt->best_n = 5;
|
|
||||||
|
|
||||||
opt->max_join_long = 20000;
|
|
||||||
opt->max_join_short = 2000;
|
|
||||||
opt->min_join_flank_sc = 1000;
|
|
||||||
|
|
||||||
opt->a = 2, opt->b = 4, opt->q = 4, opt->e = 2, opt->q2 = 24, opt->e2 = 1;
|
|
||||||
opt->zdrop = 400;
|
|
||||||
opt->end_bonus = -1;
|
|
||||||
opt->min_dp_max = opt->min_chain_score * opt->a;
|
|
||||||
opt->min_ksw_len = 200;
|
|
||||||
opt->anchor_ext_len = 20, opt->anchor_ext_shift = 6;
|
|
||||||
opt->mini_batch_size = 500000000;
|
|
||||||
|
|
||||||
opt->pe_ori = 0; // FF
|
|
||||||
opt->pe_bonus = 33;
|
|
||||||
}
|
|
||||||
|
|
||||||
void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi)
|
|
||||||
{
|
|
||||||
if ((opt->flag & MM_F_SPLICE_FOR) && (opt->flag & MM_F_SPLICE_REV))
|
|
||||||
opt->flag |= MM_F_SPLICE;
|
|
||||||
if (opt->mid_occ <= 0)
|
|
||||||
opt->mid_occ = mm_idx_cal_max_occ(mi, opt->mid_occ_frac);
|
|
||||||
if (mm_verbose >= 3)
|
|
||||||
fprintf(stderr, "[M::%s::%.3f*%.2f] mid_occ = %d\n", __func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), opt->mid_occ);
|
|
||||||
}
|
|
||||||
|
|
||||||
void mm_mapopt_max_intron_len(mm_mapopt_t *opt, int max_intron_len)
|
|
||||||
{
|
|
||||||
if ((opt->flag & MM_F_SPLICE) && max_intron_len > 0)
|
|
||||||
opt->max_gap_ref = opt->bw = max_intron_len;
|
|
||||||
}
|
|
||||||
|
|
||||||
int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
|
|
||||||
{
|
|
||||||
if (preset == 0) {
|
|
||||||
mm_idxopt_init(io);
|
|
||||||
mm_mapopt_init(mo);
|
|
||||||
} else if (strcmp(preset, "ava-ont") == 0) {
|
|
||||||
io->flag = 0, io->k = 15, io->w = 5;
|
|
||||||
mo->flag |= MM_F_AVA | MM_F_NO_SELF;
|
|
||||||
mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_gap = 10000, mo->max_chain_skip = 25;
|
|
||||||
} else if (strcmp(preset, "ava-pb") == 0) {
|
|
||||||
io->flag |= MM_I_HPC, io->k = 19, io->w = 5;
|
|
||||||
mo->flag |= MM_F_AVA | MM_F_NO_SELF;
|
|
||||||
mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_gap = 10000, mo->max_chain_skip = 25;
|
|
||||||
} else if (strcmp(preset, "map10k") == 0 || strcmp(preset, "map-pb") == 0) {
|
|
||||||
io->flag |= MM_I_HPC, io->k = 19;
|
|
||||||
} else if (strcmp(preset, "map-ont") == 0) {
|
|
||||||
io->flag = 0, io->k = 15;
|
|
||||||
} else if (strcmp(preset, "asm5") == 0) {
|
|
||||||
io->flag = 0, io->k = 19, io->w = 19;
|
|
||||||
mo->a = 1, mo->b = 19, mo->q = 39, mo->q2 = 81, mo->e = 3, mo->e2 = 1, mo->zdrop = 200;
|
|
||||||
mo->min_dp_max = 200;
|
|
||||||
mo->best_n = 50;
|
|
||||||
} else if (strcmp(preset, "asm10") == 0) {
|
|
||||||
io->flag = 0, io->k = 19, io->w = 19;
|
|
||||||
mo->a = 1, mo->b = 9, mo->q = 16, mo->q2 = 41, mo->e = 2, mo->e2 = 1, mo->zdrop = 200;
|
|
||||||
mo->min_dp_max = 200;
|
|
||||||
mo->best_n = 50;
|
|
||||||
} else if (strcmp(preset, "short") == 0 || strcmp(preset, "sr") == 0) {
|
|
||||||
io->flag = 0, io->k = 21, io->w = 11;
|
|
||||||
mo->flag |= MM_F_SR | MM_F_FRAG_MODE | MM_F_NO_PRINT_2ND | MM_F_2_IO_THREADS;
|
|
||||||
mo->pe_ori = 0<<1|1; // FR
|
|
||||||
mo->a = 2, mo->b = 8, mo->q = 12, mo->e = 2, mo->q2 = 24, mo->e2 = 1;
|
|
||||||
mo->zdrop = 100;
|
|
||||||
mo->end_bonus = 10;
|
|
||||||
mo->max_frag_len = 800;
|
|
||||||
mo->max_gap = 100;
|
|
||||||
mo->bw = 100;
|
|
||||||
mo->pri_ratio = 0.5f;
|
|
||||||
mo->min_cnt = 2;
|
|
||||||
mo->min_chain_score = 25;
|
|
||||||
mo->min_dp_max = 40;
|
|
||||||
mo->best_n = 20;
|
|
||||||
mo->mid_occ = 1000;
|
|
||||||
mo->max_occ = 5000;
|
|
||||||
mo->mini_batch_size = 50000000;
|
|
||||||
} else if (strcmp(preset, "splice") == 0 || strcmp(preset, "cdna") == 0) {
|
|
||||||
io->flag = 0, io->k = 15, io->w = 5;
|
|
||||||
mo->flag |= MM_F_SPLICE | MM_F_SPLICE_FOR | MM_F_SPLICE_REV | MM_F_SPLICE_FLANK;
|
|
||||||
mo->max_gap = 2000, mo->max_gap_ref = mo->bw = 200000;
|
|
||||||
mo->a = 1, mo->b = 2, mo->q = 2, mo->e = 1, mo->q2 = 32, mo->e2 = 0;
|
|
||||||
mo->noncan = 9;
|
|
||||||
mo->zdrop = 200;
|
|
||||||
} else return -1;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int mm_check_opt(const mm_idxopt_t *io, const mm_mapopt_t *mo)
|
|
||||||
{
|
|
||||||
if (mo->best_n < 0) {
|
|
||||||
if (mm_verbose >= 1)
|
|
||||||
fprintf(stderr, "[ERROR]\033[1;31m -N must be no less than 0\033[0m\n");
|
|
||||||
return -4;
|
|
||||||
}
|
|
||||||
if (mo->best_n == 0 && mm_verbose >= 2)
|
|
||||||
fprintf(stderr, "[WARNING]\033[1;31m '-N 0' reduces mapping accuracy. Please use '--secondary=no' instead.\033[0m\n");
|
|
||||||
if (mo->pri_ratio < 0.0f || mo->pri_ratio > 1.0f) {
|
|
||||||
if (mm_verbose >= 1)
|
|
||||||
fprintf(stderr, "[ERROR]\033[1;31m -p must be within 0 and 1 (including 0 and 1)\033[0m\n");
|
|
||||||
return -4;
|
|
||||||
}
|
|
||||||
if ((mo->flag & MM_F_FOR_ONLY) && (mo->flag & MM_F_REV_ONLY)) {
|
|
||||||
if (mm_verbose >= 1)
|
|
||||||
fprintf(stderr, "[ERROR]\033[1;31m --for-only and --rev-only can't be applied at the same time\033[0m\n");
|
|
||||||
return -3;
|
|
||||||
}
|
|
||||||
if ((mo->q != mo->q2 || mo->e != mo->e2) && !(mo->e > mo->e2 && mo->q + mo->e < mo->q2 + mo->e2)) {
|
|
||||||
if (mm_verbose >= 1)
|
|
||||||
fprintf(stderr, "[ERROR]\033[1;31m dual gap penalties violating E1>E2 and O1+E1<O2+E2\033[0m\n");
|
|
||||||
return -2;
|
|
||||||
}
|
|
||||||
if ((mo->q + mo->e) + (mo->q2 + mo->e2) > 127) {
|
|
||||||
if (mm_verbose >= 1)
|
|
||||||
fprintf(stderr, "[ERROR]\033[1;31m scoring system violating ({-O}+{-E})+({-O2}+{-E2}) <= 127\033[0m\n");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct mm_tbuf_s {
|
struct mm_tbuf_s {
|
||||||
void *km;
|
void *km;
|
||||||
};
|
};
|
||||||
|
|
@ -436,7 +298,8 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
|
||||||
hash = __ac_Wang_hash(hash);
|
hash = __ac_Wang_hash(hash);
|
||||||
|
|
||||||
collect_minimizers(b->km, opt, mi, n_segs, qlens, seqs, &mv);
|
collect_minimizers(b->km, opt, mi, n_segs, qlens, seqs, &mv);
|
||||||
a = collect_seed_hits(b->km, opt, opt->mid_occ, mi, qname, &mv, qlen_sum, &n_a, &rep_len, &n_mini_pos, &mini_pos);
|
if (opt->flag & MM_F_HEAP_SORT) a = collect_seed_hits_heap(b->km, opt, opt->mid_occ, mi, qname, &mv, qlen_sum, &n_a, &rep_len, &n_mini_pos, &mini_pos);
|
||||||
|
else a = collect_seed_hits(b->km, opt, opt->mid_occ, mi, qname, &mv, qlen_sum, &n_a, &rep_len, &n_mini_pos, &mini_pos);
|
||||||
|
|
||||||
if (mm_dbg_flag & MM_DBG_PRINT_SEED) {
|
if (mm_dbg_flag & MM_DBG_PRINT_SEED) {
|
||||||
fprintf(stderr, "RS\t%d\n", rep_len);
|
fprintf(stderr, "RS\t%d\n", rep_len);
|
||||||
|
|
@ -476,7 +339,8 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
|
||||||
kfree(b->km, a);
|
kfree(b->km, a);
|
||||||
kfree(b->km, u);
|
kfree(b->km, u);
|
||||||
kfree(b->km, mini_pos);
|
kfree(b->km, mini_pos);
|
||||||
a = collect_seed_hits(b->km, opt, opt->max_occ, mi, qname, &mv, qlen_sum, &n_a, &rep_len, &n_mini_pos, &mini_pos);
|
if (opt->flag & MM_F_HEAP_SORT) a = collect_seed_hits_heap(b->km, opt, opt->max_occ, mi, qname, &mv, qlen_sum, &n_a, &rep_len, &n_mini_pos, &mini_pos);
|
||||||
|
else a = collect_seed_hits(b->km, opt, opt->max_occ, mi, qname, &mv, qlen_sum, &n_a, &rep_len, &n_mini_pos, &mini_pos);
|
||||||
a = mm_chain_dp(max_chain_gap_ref, max_chain_gap_qry, opt->bw, opt->max_chain_skip, opt->min_cnt, opt->min_chain_score, is_splice, n_segs, n_a, a, &n_regs0, &u, b->km);
|
a = mm_chain_dp(max_chain_gap_ref, max_chain_gap_qry, opt->bw, opt->max_chain_skip, opt->min_cnt, opt->min_chain_score, is_splice, n_segs, n_a, a, &n_regs0, &u, b->km);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@
|
||||||
#define MM_F_SOFTCLIP 0x80000
|
#define MM_F_SOFTCLIP 0x80000
|
||||||
#define MM_F_FOR_ONLY 0x100000
|
#define MM_F_FOR_ONLY 0x100000
|
||||||
#define MM_F_REV_ONLY 0x200000
|
#define MM_F_REV_ONLY 0x200000
|
||||||
|
#define MM_F_HEAP_SORT 0x400000
|
||||||
|
|
||||||
#define MM_I_HPC 0x1
|
#define MM_I_HPC 0x1
|
||||||
#define MM_I_NO_SEQ 0x2
|
#define MM_I_NO_SEQ 0x2
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,140 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include "mmpriv.h"
|
||||||
|
|
||||||
|
void mm_mapopt_init(mm_mapopt_t *opt)
|
||||||
|
{
|
||||||
|
memset(opt, 0, sizeof(mm_mapopt_t));
|
||||||
|
opt->seed = 11;
|
||||||
|
opt->mid_occ_frac = 2e-4f;
|
||||||
|
opt->sdust_thres = 0; // no SDUST masking
|
||||||
|
|
||||||
|
opt->min_cnt = 3;
|
||||||
|
opt->min_chain_score = 40;
|
||||||
|
opt->bw = 500;
|
||||||
|
opt->max_gap = 5000;
|
||||||
|
opt->max_gap_ref = -1;
|
||||||
|
opt->max_chain_skip = 25;
|
||||||
|
|
||||||
|
opt->mask_level = 0.5f;
|
||||||
|
opt->pri_ratio = 0.8f;
|
||||||
|
opt->best_n = 5;
|
||||||
|
|
||||||
|
opt->max_join_long = 20000;
|
||||||
|
opt->max_join_short = 2000;
|
||||||
|
opt->min_join_flank_sc = 1000;
|
||||||
|
|
||||||
|
opt->a = 2, opt->b = 4, opt->q = 4, opt->e = 2, opt->q2 = 24, opt->e2 = 1;
|
||||||
|
opt->zdrop = 400;
|
||||||
|
opt->end_bonus = -1;
|
||||||
|
opt->min_dp_max = opt->min_chain_score * opt->a;
|
||||||
|
opt->min_ksw_len = 200;
|
||||||
|
opt->anchor_ext_len = 20, opt->anchor_ext_shift = 6;
|
||||||
|
opt->mini_batch_size = 500000000;
|
||||||
|
|
||||||
|
opt->pe_ori = 0; // FF
|
||||||
|
opt->pe_bonus = 33;
|
||||||
|
}
|
||||||
|
|
||||||
|
void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi)
|
||||||
|
{
|
||||||
|
if ((opt->flag & MM_F_SPLICE_FOR) && (opt->flag & MM_F_SPLICE_REV))
|
||||||
|
opt->flag |= MM_F_SPLICE;
|
||||||
|
if (opt->mid_occ <= 0)
|
||||||
|
opt->mid_occ = mm_idx_cal_max_occ(mi, opt->mid_occ_frac);
|
||||||
|
if (mm_verbose >= 3)
|
||||||
|
fprintf(stderr, "[M::%s::%.3f*%.2f] mid_occ = %d\n", __func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), opt->mid_occ);
|
||||||
|
}
|
||||||
|
|
||||||
|
void mm_mapopt_max_intron_len(mm_mapopt_t *opt, int max_intron_len)
|
||||||
|
{
|
||||||
|
if ((opt->flag & MM_F_SPLICE) && max_intron_len > 0)
|
||||||
|
opt->max_gap_ref = opt->bw = max_intron_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
|
||||||
|
{
|
||||||
|
if (preset == 0) {
|
||||||
|
mm_idxopt_init(io);
|
||||||
|
mm_mapopt_init(mo);
|
||||||
|
} else if (strcmp(preset, "ava-ont") == 0) {
|
||||||
|
io->flag = 0, io->k = 15, io->w = 5;
|
||||||
|
mo->flag |= MM_F_AVA | MM_F_NO_SELF;
|
||||||
|
mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_gap = 10000, mo->max_chain_skip = 25;
|
||||||
|
} else if (strcmp(preset, "ava-pb") == 0) {
|
||||||
|
io->flag |= MM_I_HPC, io->k = 19, io->w = 5;
|
||||||
|
mo->flag |= MM_F_AVA | MM_F_NO_SELF;
|
||||||
|
mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_gap = 10000, mo->max_chain_skip = 25;
|
||||||
|
} else if (strcmp(preset, "map10k") == 0 || strcmp(preset, "map-pb") == 0) {
|
||||||
|
io->flag |= MM_I_HPC, io->k = 19;
|
||||||
|
} else if (strcmp(preset, "map-ont") == 0) {
|
||||||
|
io->flag = 0, io->k = 15;
|
||||||
|
} else if (strcmp(preset, "asm5") == 0) {
|
||||||
|
io->flag = 0, io->k = 19, io->w = 19;
|
||||||
|
mo->a = 1, mo->b = 19, mo->q = 39, mo->q2 = 81, mo->e = 3, mo->e2 = 1, mo->zdrop = 200;
|
||||||
|
mo->min_dp_max = 200;
|
||||||
|
mo->best_n = 50;
|
||||||
|
} else if (strcmp(preset, "asm10") == 0) {
|
||||||
|
io->flag = 0, io->k = 19, io->w = 19;
|
||||||
|
mo->a = 1, mo->b = 9, mo->q = 16, mo->q2 = 41, mo->e = 2, mo->e2 = 1, mo->zdrop = 200;
|
||||||
|
mo->min_dp_max = 200;
|
||||||
|
mo->best_n = 50;
|
||||||
|
} else if (strcmp(preset, "short") == 0 || strcmp(preset, "sr") == 0) {
|
||||||
|
io->flag = 0, io->k = 21, io->w = 11;
|
||||||
|
mo->flag |= MM_F_SR | MM_F_FRAG_MODE | MM_F_NO_PRINT_2ND | MM_F_2_IO_THREADS;
|
||||||
|
mo->pe_ori = 0<<1|1; // FR
|
||||||
|
mo->a = 2, mo->b = 8, mo->q = 12, mo->e = 2, mo->q2 = 24, mo->e2 = 1;
|
||||||
|
mo->zdrop = 100;
|
||||||
|
mo->end_bonus = 10;
|
||||||
|
mo->max_frag_len = 800;
|
||||||
|
mo->max_gap = 100;
|
||||||
|
mo->bw = 100;
|
||||||
|
mo->pri_ratio = 0.5f;
|
||||||
|
mo->min_cnt = 2;
|
||||||
|
mo->min_chain_score = 25;
|
||||||
|
mo->min_dp_max = 40;
|
||||||
|
mo->best_n = 20;
|
||||||
|
mo->mid_occ = 1000;
|
||||||
|
mo->max_occ = 5000;
|
||||||
|
mo->mini_batch_size = 50000000;
|
||||||
|
} else if (strcmp(preset, "splice") == 0 || strcmp(preset, "cdna") == 0) {
|
||||||
|
io->flag = 0, io->k = 15, io->w = 5;
|
||||||
|
mo->flag |= MM_F_SPLICE | MM_F_SPLICE_FOR | MM_F_SPLICE_REV | MM_F_SPLICE_FLANK;
|
||||||
|
mo->max_gap = 2000, mo->max_gap_ref = mo->bw = 200000;
|
||||||
|
mo->a = 1, mo->b = 2, mo->q = 2, mo->e = 1, mo->q2 = 32, mo->e2 = 0;
|
||||||
|
mo->noncan = 9;
|
||||||
|
mo->zdrop = 200;
|
||||||
|
} else return -1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int mm_check_opt(const mm_idxopt_t *io, const mm_mapopt_t *mo)
|
||||||
|
{
|
||||||
|
if (mo->best_n < 0) {
|
||||||
|
if (mm_verbose >= 1)
|
||||||
|
fprintf(stderr, "[ERROR]\033[1;31m -N must be no less than 0\033[0m\n");
|
||||||
|
return -4;
|
||||||
|
}
|
||||||
|
if (mo->best_n == 0 && mm_verbose >= 2)
|
||||||
|
fprintf(stderr, "[WARNING]\033[1;31m '-N 0' reduces mapping accuracy. Please use '--secondary=no' instead.\033[0m\n");
|
||||||
|
if (mo->pri_ratio < 0.0f || mo->pri_ratio > 1.0f) {
|
||||||
|
if (mm_verbose >= 1)
|
||||||
|
fprintf(stderr, "[ERROR]\033[1;31m -p must be within 0 and 1 (including 0 and 1)\033[0m\n");
|
||||||
|
return -4;
|
||||||
|
}
|
||||||
|
if ((mo->flag & MM_F_FOR_ONLY) && (mo->flag & MM_F_REV_ONLY)) {
|
||||||
|
if (mm_verbose >= 1)
|
||||||
|
fprintf(stderr, "[ERROR]\033[1;31m --for-only and --rev-only can't be applied at the same time\033[0m\n");
|
||||||
|
return -3;
|
||||||
|
}
|
||||||
|
if ((mo->q != mo->q2 || mo->e != mo->e2) && !(mo->e > mo->e2 && mo->q + mo->e < mo->q2 + mo->e2)) {
|
||||||
|
if (mm_verbose >= 1)
|
||||||
|
fprintf(stderr, "[ERROR]\033[1;31m dual gap penalties violating E1>E2 and O1+E1<O2+E2\033[0m\n");
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
|
if ((mo->q + mo->e) + (mo->q2 + mo->e2) > 127) {
|
||||||
|
if (mm_verbose >= 1)
|
||||||
|
fprintf(stderr, "[ERROR]\033[1;31m scoring system violating ({-O}+{-E})+({-O2}+{-E2}) <= 127\033[0m\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
2
setup.py
2
setup.py
|
|
@ -33,7 +33,7 @@ setup(
|
||||||
keywords = 'sequence-alignment',
|
keywords = 'sequence-alignment',
|
||||||
scripts = ['python/minimap2.py'],
|
scripts = ['python/minimap2.py'],
|
||||||
ext_modules = [Extension('mappy',
|
ext_modules = [Extension('mappy',
|
||||||
sources = [module_src, 'align.c', 'bseq.c', 'chain.c', 'format.c', 'hit.c', 'index.c', 'pe.c',
|
sources = [module_src, 'align.c', 'bseq.c', 'chain.c', 'format.c', 'hit.c', 'index.c', 'pe.c', 'options.c',
|
||||||
'ksw2_extd2_sse.c', 'ksw2_exts2_sse.c', 'ksw2_extz2_sse.c', 'ksw2_ll_sse.c',
|
'ksw2_extd2_sse.c', 'ksw2_exts2_sse.c', 'ksw2_extz2_sse.c', 'ksw2_ll_sse.c',
|
||||||
'kalloc.c', 'kthread.c', 'map.c', 'misc.c', 'sdust.c', 'sketch.c', 'esterr.c'],
|
'kalloc.c', 'kthread.c', 'map.c', 'misc.c', 'sdust.c', 'sketch.c', 'esterr.c'],
|
||||||
depends = ['minimap.h', 'bseq.h', 'kalloc.h', 'kdq.h', 'khash.h', 'kseq.h', 'ksort.h',
|
depends = ['minimap.h', 'bseq.h', 'kalloc.h', 'kdq.h', 'khash.h', 'kseq.h', 'ksort.h',
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue