From 7998fe9906819313e95198771b26c6b9c4958b32 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Sat, 1 Sep 2018 21:15:43 -0400 Subject: [PATCH] r829: replaced musl's getopt with ketopt --- Makefile | 15 +-- getopt.c | 216 ------------------------------------------ getopt.h | 53 ----------- ketopt.h | 116 +++++++++++++++++++++++ main.c | 279 ++++++++++++++++++++++++++++--------------------------- sdust.c | 13 +-- 6 files changed, 272 insertions(+), 420 deletions(-) delete mode 100644 getopt.c delete mode 100644 getopt.h create mode 100644 ketopt.h diff --git a/Makefile b/Makefile index f3b96dc..81412cf 100644 --- a/Makefile +++ b/Makefile @@ -32,8 +32,8 @@ all:$(PROG) extra:all $(PROG_EXTRA) -minimap2:main.o getopt.o libminimap2.a - $(CC) $(CFLAGS) main.o getopt.o -o $@ -L. -lminimap2 $(LIBS) +minimap2:main.o libminimap2.a + $(CC) $(CFLAGS) main.o -o $@ -L. -lminimap2 $(LIBS) minimap2-lite:example.o libminimap2.a $(CC) $(CFLAGS) $< -o $@ -L. -lminimap2 $(LIBS) @@ -41,8 +41,8 @@ minimap2-lite:example.o libminimap2.a libminimap2.a:$(OBJS) $(AR) -csru $@ $(OBJS) -sdust:sdust.c getopt.o kalloc.o kalloc.h kdq.h kvec.h kseq.h sdust.h - $(CC) -D_SDUST_MAIN $(CFLAGS) $< getopt.o kalloc.o -o $@ -lz +sdust:sdust.c kalloc.o kalloc.h kdq.h kvec.h kseq.h ketopt.h sdust.h + $(CC) -D_SDUST_MAIN $(CFLAGS) $< kalloc.o -o $@ -lz # SSE-specific targets on x86/x86_64 @@ -99,7 +99,6 @@ chain.o: minimap.h mmpriv.h bseq.h kalloc.h esterr.o: mmpriv.h minimap.h bseq.h example.o: minimap.h kseq.h format.o: kalloc.h mmpriv.h minimap.h bseq.h -getopt.o: getopt.h hit.o: mmpriv.h minimap.h bseq.h kalloc.h khash.h index.o: kthread.h bseq.h minimap.h mmpriv.h kvec.h kalloc.h khash.h kalloc.o: kalloc.h @@ -108,11 +107,13 @@ ksw2_exts2_sse.o: ksw2.h kalloc.h ksw2_extz2_sse.o: ksw2.h kalloc.h ksw2_ll_sse.o: ksw2.h kalloc.h kthread.o: kthread.h -main.o: bseq.h minimap.h mmpriv.h getopt.h +main.o: bseq.h minimap.h mmpriv.h ketopt.h map.o: kthread.h kvec.h kalloc.h sdust.h mmpriv.h minimap.h bseq.h khash.h map.o: ksort.h misc.o: mmpriv.h minimap.h bseq.h ksort.h options.o: mmpriv.h minimap.h bseq.h pe.o: mmpriv.h minimap.h bseq.h kvec.h kalloc.h ksort.h -sdust.o: kalloc.h kdq.h kvec.h sdust.h +rl.o: kseq.h +sdust.o: kalloc.h kdq.h kvec.h ketopt.h sdust.h sketch.o: kvec.h kalloc.h mmpriv.h minimap.h bseq.h +splitidx.o: mmpriv.h minimap.h bseq.h diff --git a/getopt.c b/getopt.c deleted file mode 100644 index 756b31c..0000000 --- a/getopt.c +++ /dev/null @@ -1,216 +0,0 @@ -#include -#include -#include -#include "getopt.h" - -char *optarg; -int optind=1, opterr=1, optopt, __optpos, optreset=0; - -#define optpos __optpos - -static void __getopt_msg(const char *a, const char *b, const char *c, size_t l) -{ - FILE *f = stderr; -#if !defined(WIN32) && !defined(_WIN32) - flockfile(f); -#endif - fputs(a, f); - fwrite(b, strlen(b), 1, f); - fwrite(c, 1, l, f); - fputc('\n', f); -#if !defined(WIN32) && !defined(_WIN32) - funlockfile(f); -#endif -} - -int getopt(int argc, char * const argv[], const char *optstring) -{ - int i, c, d; - int k, l; - char *optchar; - - if (!optind || optreset) { - optreset = 0; - __optpos = 0; - optind = 1; - } - - if (optind >= argc || !argv[optind]) - return -1; - - if (argv[optind][0] != '-') { - if (optstring[0] == '-') { - optarg = argv[optind++]; - return 1; - } - return -1; - } - - if (!argv[optind][1]) - return -1; - - if (argv[optind][1] == '-' && !argv[optind][2]) - return optind++, -1; - - if (!optpos) optpos++; - c = argv[optind][optpos], k = 1; - optchar = argv[optind]+optpos; - optopt = c; - optpos += k; - - if (!argv[optind][optpos]) { - optind++; - optpos = 0; - } - - if (optstring[0] == '-' || optstring[0] == '+') - optstring++; - - i = 0; - d = 0; - do { - d = optstring[i], l = 1; - if (l>0) i+=l; else i++; - } while (l && d != c); - - if (d != c) { - if (optstring[0] != ':' && opterr) - __getopt_msg(argv[0], ": unrecognized option: ", optchar, k); - return '?'; - } - if (optstring[i] == ':') { - if (optstring[i+1] == ':') optarg = 0; - else if (optind >= argc) { - if (optstring[0] == ':') return ':'; - if (opterr) __getopt_msg(argv[0], - ": option requires an argument: ", - optchar, k); - return '?'; - } - if (optstring[i+1] != ':' || optpos) { - optarg = argv[optind++] + optpos; - optpos = 0; - } - } - return c; -} - -static void permute(char *const *argv, int dest, int src) -{ - char **av = (char **)argv; - char *tmp = av[src]; - int i; - for (i=src; i>dest; i--) - av[i] = av[i-1]; - av[dest] = tmp; -} - -static int __getopt_long_core(int argc, char *const *argv, const char *optstring, const struct option *longopts, int *idx, int longonly) -{ - optarg = 0; - if (longopts && argv[optind][0] == '-' && - ((longonly && argv[optind][1] && argv[optind][1] != '-') || - (argv[optind][1] == '-' && argv[optind][2]))) - { - int colon = optstring[optstring[0]=='+'||optstring[0]=='-']==':'; - int i, cnt, match = -1; - char *opt; - for (cnt=i=0; longopts[i].name; i++) { - const char *name = longopts[i].name; - opt = argv[optind]+1; - if (*opt == '-') opt++; - for (; *name && *name == *opt; name++, opt++); - if (*opt && *opt != '=') continue; - match = i; - if (!*name) { - cnt = 1; - break; - } - cnt++; - } - if (cnt==1) { - i = match; - optind++; - optopt = longopts[i].val; - if (*opt == '=') { - if (!longopts[i].has_arg) { - if (colon || !opterr) - return '?'; - __getopt_msg(argv[0], - ": option does not take an argument: ", - longopts[i].name, - strlen(longopts[i].name)); - return '?'; - } - optarg = opt+1; - } else if (longopts[i].has_arg == required_argument) { - if (!(optarg = argv[optind])) { - if (colon) return ':'; - if (!opterr) return '?'; - __getopt_msg(argv[0], - ": option requires an argument: ", - longopts[i].name, - strlen(longopts[i].name)); - return '?'; - } - optind++; - } - if (idx) *idx = i; - if (longopts[i].flag) { - *longopts[i].flag = longopts[i].val; - return 0; - } - return longopts[i].val; - } - if (argv[optind][1] == '-') { - if (!colon && opterr) - __getopt_msg(argv[0], cnt ? - ": option is ambiguous: " : - ": unrecognized option: ", - argv[optind]+2, - strlen(argv[optind]+2)); - optind++; - return '?'; - } - } - return getopt(argc, argv, optstring); -} - -static int __getopt_long(int argc, char *const *argv, const char *optstring, const struct option *longopts, int *idx, int longonly) -{ - int ret, skipped, resumed; - if (!optind || optreset) { - optreset = 0; - __optpos = 0; - optind = 1; - } - if (optind >= argc || !argv[optind]) return -1; - skipped = optind; - if (optstring[0] != '+' && optstring[0] != '-') { - int i; - for (i=optind; ; i++) { - if (i >= argc || !argv[i]) return -1; - if (argv[i][0] == '-' && argv[i][1]) break; - } - optind = i; - } - resumed = optind; - ret = __getopt_long_core(argc, argv, optstring, longopts, idx, longonly); - if (resumed > skipped) { - int i, cnt = optind-resumed; - for (i=0; i /* for strchr() and strncmp() */ + +#define ko_no_argument 0 +#define ko_required_argument 1 +#define ko_optional_argument 2 + +typedef struct { + int ind; /* equivalent to optind */ + int opt; /* equivalent to optopt */ + char *arg; /* equivalent to optarg */ + int longidx; /* index of a long option; or -1 if short */ + /* private variables not intended for external uses */ + int i, pos, n_args; +} ketopt_t; + +typedef struct { + char *name; + int has_arg; + int val; +} ko_longopt_t; + +static ketopt_t KETOPT_INIT = { 1, 0, 0, -1, 1, 0, 0 }; + +static void ketopt_permute(char *argv[], int j, int n) /* move argv[j] over n elements to the left */ +{ + int k; + char *p = argv[j]; + for (k = 0; k < n; ++k) + argv[j - k] = argv[j - k - 1]; + argv[j - k] = p; +} + +/** + * Parse command-line options and arguments + * + * This fuction has a similar interface to GNU's getopt_long(). Each call + * parses one option and returns the option name. s->arg points to the option + * argument if present. The function returns -1 when all command-line arguments + * are parsed. In this case, s->ind is the index of the first non-option + * argument. + * + * @param s status; shall be initialized to KETOPT_INIT on the first call + * @param argc length of argv[] + * @param argv list of command-line arguments; argv[0] is ignored + * @param permute non-zero to move options ahead of non-option arguments + * @param ostr option string + * @param longopts long options + * + * @return ASCII for a short option; ko_longopt_t::val for a long option; -1 if + * argv[] is fully processed; '?' for an unknown option or an ambiguous + * long option; ':' if an option argument is missing + */ +static int ketopt(ketopt_t *s, int argc, char *argv[], int permute, const char *ostr, const ko_longopt_t *longopts) +{ + int opt = -1, i0, j; + if (permute) { + while (s->i < argc && (argv[s->i][0] != '-' || argv[s->i][1] == '\0')) + ++s->i, ++s->n_args; + } + s->arg = 0, s->longidx = -1, i0 = s->i; + if (s->i >= argc || argv[s->i][0] != '-' || argv[s->i][1] == '\0') { + s->ind = s->i - s->n_args; + return -1; + } + if (argv[s->i][0] == '-' && argv[s->i][1] == '-') { /* "--" or a long option */ + if (argv[s->i][2] == '\0') { /* a bare "--" */ + ketopt_permute(argv, s->i, s->n_args); + ++s->i, s->ind = s->i - s->n_args; + return -1; + } + s->opt = 0, opt = '?', s->pos = -1; + if (longopts) { /* parse long options */ + int k, n_matches = 0; + const ko_longopt_t *o = 0; + for (j = 2; argv[s->i][j] != '\0' && argv[s->i][j] != '='; ++j) {} /* find the end of the option name */ + for (k = 0; longopts[k].name != 0; ++k) + if (strncmp(&argv[s->i][2], longopts[k].name, j - 2) == 0) + ++n_matches, o = &longopts[k]; + if (n_matches == 1) { + s->opt = opt = o->val, s->longidx = o - longopts; + if (argv[s->i][j] == '=') s->arg = &argv[s->i][j + 1]; + if (o->has_arg == 1 && argv[s->i][j] == '\0') { + if (s->i < argc - 1) s->arg = argv[++s->i]; + else opt = ':'; /* missing option argument */ + } + } + } + } else { /* a short option */ + char *p; + if (s->pos == 0) s->pos = 1; + opt = s->opt = argv[s->i][s->pos++]; + p = strchr(ostr, opt); + if (p == 0) { + opt = '?'; /* unknown option */ + } else if (p[1] == ':') { + if (argv[s->i][s->pos] == 0) { + if (s->i < argc - 1) s->arg = argv[++s->i]; + else opt = ':'; /* missing option argument */ + } else s->arg = &argv[s->i][s->pos]; + s->pos = -1; + } + } + if (s->pos < 0 || argv[s->i][s->pos] == 0) { + ++s->i, s->pos = 0; + if (s->n_args > 0) /* permute */ + for (j = i0; j < s->i; ++j) + ketopt_permute(argv, j, s->n_args); + } + s->ind = s->i - s->n_args; + return opt; +} + +#endif diff --git a/main.c b/main.c index 91cc9cc..680ea13 100644 --- a/main.c +++ b/main.c @@ -4,13 +4,9 @@ #include "bseq.h" #include "minimap.h" #include "mmpriv.h" -#ifdef HAVE_GETOPT -#include -#else -#include "getopt.h" -#endif +#include "ketopt.h" -#define MM_VERSION "2.12-r828-dirty" +#define MM_VERSION "2.12-r829-dirty" #ifdef __linux__ #include @@ -26,52 +22,52 @@ void liftrlimit() void liftrlimit() {} #endif -static struct option long_options[] = { - { "bucket-bits", required_argument, 0, 0 }, - { "mb-size", required_argument, 0, 'K' }, - { "seed", required_argument, 0, 0 }, - { "no-kalloc", no_argument, 0, 0 }, - { "print-qname", no_argument, 0, 0 }, - { "no-self", no_argument, 0, 'D' }, - { "print-seeds", no_argument, 0, 0 }, - { "max-chain-skip", required_argument, 0, 0 }, - { "min-dp-len", required_argument, 0, 0 }, - { "print-aln-seq", no_argument, 0, 0 }, - { "splice", no_argument, 0, 0 }, - { "cost-non-gt-ag", required_argument, 0, 'C' }, - { "no-long-join", no_argument, 0, 0 }, - { "sr", no_argument, 0, 0 }, - { "frag", required_argument, 0, 0 }, - { "secondary", required_argument, 0, 0 }, - { "cs", optional_argument, 0, 0 }, - { "end-bonus", required_argument, 0, 0 }, - { "no-pairing", no_argument, 0, 0 }, - { "splice-flank", required_argument, 0, 0 }, - { "idx-no-seq", no_argument, 0, 0 }, - { "end-seed-pen", required_argument, 0, 0 }, // 21 - { "for-only", no_argument, 0, 0 }, // 22 - { "rev-only", no_argument, 0, 0 }, // 23 - { "heap-sort", required_argument, 0, 0 }, // 24 - { "all-chain", no_argument, 0, 'P' }, - { "dual", required_argument, 0, 0 }, // 26 - { "max-clip-ratio", required_argument, 0, 0 }, // 27 - { "min-occ-floor", required_argument, 0, 0 }, // 28 - { "MD", no_argument, 0, 0 }, // 29 - { "lj-min-ratio", required_argument, 0, 0 }, // 30 - { "score-N", required_argument, 0, 0 }, // 31 - { "eqx", no_argument, 0, 0 }, // 32 - { "paf-no-hit", no_argument, 0, 0 }, // 33 - { "split-prefix", required_argument, 0, 0 }, // 34 - { "no-end-flt", no_argument, 0, 0 }, // 35 - { "help", no_argument, 0, 'h' }, - { "max-intron-len", required_argument, 0, 'G' }, - { "version", no_argument, 0, 'V' }, - { "min-count", required_argument, 0, 'n' }, - { "min-chain-score",required_argument, 0, 'm' }, - { "mask-level", required_argument, 0, 'M' }, - { "min-dp-score", required_argument, 0, 's' }, - { "sam", no_argument, 0, 'a' }, - { 0, 0, 0, 0} +static ko_longopt_t long_options[] = { + { "bucket-bits", ko_required_argument, 300 }, + { "mb-size", ko_required_argument, 'K' }, + { "seed", ko_required_argument, 302 }, + { "no-kalloc", ko_no_argument, 303 }, + { "print-qname", ko_no_argument, 304 }, + { "no-self", ko_no_argument, 'D' }, + { "print-seeds", ko_no_argument, 306 }, + { "max-chain-skip", ko_required_argument, 307 }, + { "min-dp-len", ko_required_argument, 308 }, + { "print-aln-seq", ko_no_argument, 309 }, + { "splice", ko_no_argument, 310 }, + { "cost-non-gt-ag", ko_required_argument, 'C' }, + { "no-long-join", ko_no_argument, 312 }, + { "sr", ko_no_argument, 313 }, + { "frag", ko_required_argument, 314 }, + { "secondary", ko_required_argument, 315 }, + { "cs", ko_optional_argument, 316 }, + { "end-bonus", ko_required_argument, 317 }, + { "no-pairing", ko_no_argument, 318 }, + { "splice-flank", ko_required_argument, 319 }, + { "idx-no-seq", ko_no_argument, 320 }, + { "end-seed-pen", ko_required_argument, 321 }, + { "for-only", ko_no_argument, 322 }, + { "rev-only", ko_no_argument, 323 }, + { "heap-sort", ko_required_argument, 324 }, + { "all-chain", ko_no_argument, 'P' }, + { "dual", ko_required_argument, 326 }, + { "max-clip-ratio", ko_required_argument, 327 }, + { "min-occ-floor", ko_required_argument, 328 }, + { "MD", ko_no_argument, 329 }, + { "lj-min-ratio", ko_required_argument, 330 }, + { "score-N", ko_required_argument, 331 }, + { "eqx", ko_no_argument, 332 }, + { "paf-no-hit", ko_no_argument, 333 }, + { "split-prefix", ko_required_argument, 334 }, + { "no-end-flt", ko_no_argument, 335 }, + { "help", ko_no_argument, 'h' }, + { "max-intron-len", ko_required_argument, 'G' }, + { "version", ko_no_argument, 'V' }, + { "min-count", ko_required_argument, 'n' }, + { "min-chain-score",ko_required_argument, 'm' }, + { "mask-level", ko_required_argument, 'M' }, + { "min-dp-score", ko_required_argument, 's' }, + { "sam", ko_no_argument, 'a' }, + { 0, 0, 0 } }; static inline int64_t mm_parse_num(const char *str) @@ -101,9 +97,10 @@ static inline void yes_or_no(mm_mapopt_t *opt, int flag, int long_idx, const cha int main(int argc, char *argv[]) { const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:LC:yY"; + ketopt_t o = KETOPT_INIT; mm_mapopt_t opt; mm_idxopt_t ipt; - int i, c, n_threads = 3, n_parts, long_idx; + int i, c, n_threads = 3, n_parts; char *fnw = 0, *rg = 0, *s; FILE *fp_help = stderr; mm_idx_reader_t *idx_rdr; @@ -114,30 +111,36 @@ int main(int argc, char *argv[]) mm_realtime0 = realtime(); mm_set_opt(0, &ipt, &opt); - while ((c = getopt_long(argc, argv, opt_str, long_options, &long_idx)) >= 0) // apply option -x/preset first + while ((c = ketopt(&o, argc, argv, 1, opt_str, long_options)) >= 0) { // test command line options and apply option -x/preset first if (c == 'x') { - if (mm_set_opt(optarg, &ipt, &opt) < 0) { - fprintf(stderr, "[ERROR] unknown preset '%s'\n", optarg); + if (mm_set_opt(o.arg, &ipt, &opt) < 0) { + fprintf(stderr, "[ERROR] unknown preset '%s'\n", o.arg); return 1; } - break; + } else if (c == ':') { + fprintf(stderr, "[ERROR] missing option argument\n"); + return 1; + } else if (c == '?') { + fprintf(stderr, "[ERROR] unknown option in \"%s\"\n", argv[o.i]); + return 1; } - optind = 0; // for musl getopt, optind=0 has the same effect as optreset=1; older libc doesn't have optreset + } + o = KETOPT_INIT; - while ((c = getopt_long(argc, argv, opt_str, long_options, &long_idx)) >= 0) { - if (c == 'w') ipt.w = atoi(optarg); - else if (c == 'k') ipt.k = atoi(optarg); + while ((c = ketopt(&o, argc, argv, 1, opt_str, long_options)) >= 0) { + if (c == 'w') ipt.w = atoi(o.arg); + else if (c == 'k') ipt.k = atoi(o.arg); else if (c == 'H') ipt.flag |= MM_I_HPC; - else if (c == 'd') fnw = optarg; // the above are indexing related options, except -I - else if (c == 'r') opt.bw = (int)mm_parse_num(optarg); - else if (c == 't') n_threads = atoi(optarg); - else if (c == 'v') mm_verbose = atoi(optarg); - else if (c == 'g') opt.max_gap = (int)mm_parse_num(optarg); - else if (c == 'G') mm_mapopt_max_intron_len(&opt, (int)mm_parse_num(optarg)); - else if (c == 'F') opt.max_frag_len = (int)mm_parse_num(optarg); - else if (c == 'N') opt.best_n = atoi(optarg); - else if (c == 'p') opt.pri_ratio = atof(optarg); - else if (c == 'M') opt.mask_level = atof(optarg); + else if (c == 'd') fnw = o.arg; // the above are indexing related options, except -I + else if (c == 'r') opt.bw = (int)mm_parse_num(o.arg); + else if (c == 't') n_threads = atoi(o.arg); + else if (c == 'v') mm_verbose = atoi(o.arg); + else if (c == 'g') opt.max_gap = (int)mm_parse_num(o.arg); + else if (c == 'G') mm_mapopt_max_intron_len(&opt, (int)mm_parse_num(o.arg)); + else if (c == 'F') opt.max_frag_len = (int)mm_parse_num(o.arg); + else if (c == 'N') opt.best_n = atoi(o.arg); + else if (c == 'p') opt.pri_ratio = atof(o.arg); + else if (c == 'M') opt.mask_level = atof(o.arg); else if (c == 'c') opt.flag |= MM_F_OUT_CG | MM_F_CIGAR; else if (c == 'D') opt.flag |= MM_F_NO_DIAG; else if (c == 'P') opt.flag |= MM_F_ALL_CHAINS; @@ -147,65 +150,65 @@ int main(int argc, char *argv[]) else if (c == 'Y') opt.flag |= MM_F_SOFTCLIP; else if (c == 'L') opt.flag |= MM_F_LONG_CIGAR; else if (c == 'y') opt.flag |= MM_F_COPY_COMMENT; - else if (c == 'T') opt.sdust_thres = atoi(optarg); - else if (c == 'n') opt.min_cnt = atoi(optarg); - else if (c == 'm') opt.min_chain_score = atoi(optarg); - else if (c == 'A') opt.a = atoi(optarg); - else if (c == 'B') opt.b = atoi(optarg); - else if (c == 's') opt.min_dp_max = atoi(optarg); - else if (c == 'C') opt.noncan = atoi(optarg); - else if (c == 'I') ipt.batch_size = mm_parse_num(optarg); - else if (c == 'K') opt.mini_batch_size = (int)mm_parse_num(optarg); - else if (c == 'R') rg = optarg; + else if (c == 'T') opt.sdust_thres = atoi(o.arg); + else if (c == 'n') opt.min_cnt = atoi(o.arg); + else if (c == 'm') opt.min_chain_score = atoi(o.arg); + else if (c == 'A') opt.a = atoi(o.arg); + else if (c == 'B') opt.b = atoi(o.arg); + else if (c == 's') opt.min_dp_max = atoi(o.arg); + else if (c == 'C') opt.noncan = atoi(o.arg); + else if (c == 'I') ipt.batch_size = mm_parse_num(o.arg); + else if (c == 'K') opt.mini_batch_size = (int)mm_parse_num(o.arg); + else if (c == 'R') rg = o.arg; else if (c == 'h') fp_help = stdout; else if (c == '2') opt.flag |= MM_F_2_IO_THREADS; - else if (c == 0 && long_idx == 0) ipt.bucket_bits = atoi(optarg); // --bucket-bits - else if (c == 0 && long_idx == 2) opt.seed = atoi(optarg); // --seed - else if (c == 0 && long_idx == 3) mm_dbg_flag |= MM_DBG_NO_KALLOC; // --no-kalloc - else if (c == 0 && long_idx == 4) mm_dbg_flag |= MM_DBG_PRINT_QNAME; // --print-qname - else if (c == 0 && long_idx == 6) mm_dbg_flag |= MM_DBG_PRINT_QNAME | MM_DBG_PRINT_SEED, n_threads = 1; // --print-seed - else if (c == 0 && long_idx == 7) opt.max_chain_skip = atoi(optarg); // --max-chain-skip - else if (c == 0 && long_idx == 8) opt.min_ksw_len = atoi(optarg); // --min-dp-len - else if (c == 0 && long_idx == 9) mm_dbg_flag |= MM_DBG_PRINT_QNAME | MM_DBG_PRINT_ALN_SEQ, n_threads = 1; // --print-aln-seq - else if (c == 0 && long_idx ==10) opt.flag |= MM_F_SPLICE; // --splice - else if (c == 0 && long_idx ==12) opt.flag |= MM_F_NO_LJOIN; // --no-long-join - else if (c == 0 && long_idx ==13) opt.flag |= MM_F_SR; // --sr - else if (c == 0 && long_idx ==17) opt.end_bonus = atoi(optarg); // --end-bonus - else if (c == 0 && long_idx ==18) opt.flag |= MM_F_INDEPEND_SEG; // --no-pairing - else if (c == 0 && long_idx ==20) ipt.flag |= MM_I_NO_SEQ; // --idx-no-seq - else if (c == 0 && long_idx ==21) opt.anchor_ext_shift = atoi(optarg); // --end-seed-pen - else if (c == 0 && long_idx ==22) opt.flag |= MM_F_FOR_ONLY; // --for-only - else if (c == 0 && long_idx ==23) opt.flag |= MM_F_REV_ONLY; // --rev-only - else if (c == 0 && long_idx ==27) opt.max_clip_ratio = atof(optarg); // --max-clip-ratio - else if (c == 0 && long_idx ==28) opt.min_mid_occ = atoi(optarg); // --min-occ-floor - else if (c == 0 && long_idx ==29) opt.flag |= MM_F_OUT_MD; // --MD - else if (c == 0 && long_idx ==30) opt.min_join_flank_ratio = atof(optarg); // --lj-min-ratio - else if (c == 0 && long_idx ==31) opt.sc_ambi = atoi(optarg); // --score-N - else if (c == 0 && long_idx ==32) opt.flag |= MM_F_EQX; // --eqx - else if (c == 0 && long_idx ==33) opt.flag |= MM_F_PAF_NO_HIT; // --paf-no-hit - else if (c == 0 && long_idx ==34) opt.split_prefix = optarg; // --split-prefix - else if (c == 0 && long_idx ==35) opt.flag |= MM_F_NO_END_FLT; // --no-end-flt - else if (c == 0 && long_idx == 14) { // --frag - yes_or_no(&opt, MM_F_FRAG_MODE, long_idx, optarg, 1); - } else if (c == 0 && long_idx == 15) { // --secondary - yes_or_no(&opt, MM_F_NO_PRINT_2ND, long_idx, optarg, 0); - } else if (c == 0 && long_idx == 16) { // --cs + else if (c == 300) ipt.bucket_bits = atoi(o.arg); // --bucket-bits + else if (c == 302) opt.seed = atoi(o.arg); // --seed + else if (c == 303) mm_dbg_flag |= MM_DBG_NO_KALLOC; // --no-kalloc + else if (c == 304) mm_dbg_flag |= MM_DBG_PRINT_QNAME; // --print-qname + else if (c == 306) mm_dbg_flag |= MM_DBG_PRINT_QNAME | MM_DBG_PRINT_SEED, n_threads = 1; // --print-seed + else if (c == 307) opt.max_chain_skip = atoi(o.arg); // --max-chain-skip + else if (c == 308) opt.min_ksw_len = atoi(o.arg); // --min-dp-len + else if (c == 309) mm_dbg_flag |= MM_DBG_PRINT_QNAME | MM_DBG_PRINT_ALN_SEQ, n_threads = 1; // --print-aln-seq + else if (c == 310) opt.flag |= MM_F_SPLICE; // --splice + else if (c == 312) opt.flag |= MM_F_NO_LJOIN; // --no-long-join + else if (c == 313) opt.flag |= MM_F_SR; // --sr + else if (c == 317) opt.end_bonus = atoi(o.arg); // --end-bonus + else if (c == 318) opt.flag |= MM_F_INDEPEND_SEG; // --no-pairing + else if (c == 320) ipt.flag |= MM_I_NO_SEQ; // --idx-no-seq + else if (c == 321) opt.anchor_ext_shift = atoi(o.arg); // --end-seed-pen + else if (c == 322) opt.flag |= MM_F_FOR_ONLY; // --for-only + else if (c == 323) opt.flag |= MM_F_REV_ONLY; // --rev-only + else if (c == 327) opt.max_clip_ratio = atof(o.arg); // --max-clip-ratio + else if (c == 328) opt.min_mid_occ = atoi(o.arg); // --min-occ-floor + else if (c == 329) opt.flag |= MM_F_OUT_MD; // --MD + else if (c == 330) opt.min_join_flank_ratio = atof(o.arg); // --lj-min-ratio + else if (c == 331) opt.sc_ambi = atoi(o.arg); // --score-N + else if (c == 332) opt.flag |= MM_F_EQX; // --eqx + else if (c == 333) opt.flag |= MM_F_PAF_NO_HIT; // --paf-no-hit + else if (c == 334) opt.split_prefix = o.arg; // --split-prefix + else if (c == 335) opt.flag |= MM_F_NO_END_FLT; // --no-end-flt + else if (c == 314) { // --frag + yes_or_no(&opt, MM_F_FRAG_MODE, o.longidx, o.arg, 1); + } else if (c == 315) { // --secondary + yes_or_no(&opt, MM_F_NO_PRINT_2ND, o.longidx, o.arg, 0); + } else if (c == 316) { // --cs opt.flag |= MM_F_OUT_CS | MM_F_CIGAR; - if (optarg == 0 || strcmp(optarg, "short") == 0) { + if (o.arg == 0 || strcmp(o.arg, "short") == 0) { opt.flag &= ~MM_F_OUT_CS_LONG; - } else if (strcmp(optarg, "long") == 0) { + } else if (strcmp(o.arg, "long") == 0) { opt.flag |= MM_F_OUT_CS_LONG; - } else if (strcmp(optarg, "none") == 0) { + } else if (strcmp(o.arg, "none") == 0) { opt.flag &= ~MM_F_OUT_CS; } else if (mm_verbose >= 2) { fprintf(stderr, "[WARNING]\033[1;31m --cs only takes 'short' or 'long'. Invalid values are assumed to be 'short'.\033[0m\n"); } - } else if (c == 0 && long_idx == 19) { // --splice-flank - yes_or_no(&opt, MM_F_SPLICE_FLANK, long_idx, optarg, 1); - } else if (c == 0 && long_idx == 24) { // --heap-sort - yes_or_no(&opt, MM_F_HEAP_SORT, long_idx, optarg, 1); - } else if (c == 0 && long_idx == 26) { // --dual - yes_or_no(&opt, MM_F_NO_DUAL, long_idx, optarg, 0); + } else if (c == 319) { // --splice-flank + yes_or_no(&opt, MM_F_SPLICE_FLANK, o.longidx, o.arg, 1); + } else if (c == 324) { // --heap-sort + yes_or_no(&opt, MM_F_HEAP_SORT, o.longidx, o.arg, 1); + } else if (c == 326) { // --dual + yes_or_no(&opt, MM_F_NO_DUAL, o.longidx, o.arg, 0); } else if (c == 'S') { opt.flag |= MM_F_OUT_CS | MM_F_CIGAR | MM_F_OUT_CS_LONG; if (mm_verbose >= 2) @@ -216,27 +219,27 @@ int main(int argc, char *argv[]) } else if (c == 'f') { double x; char *p; - x = strtod(optarg, &p); + x = strtod(o.arg, &p); if (x < 1.0) opt.mid_occ_frac = x, opt.mid_occ = 0; else opt.mid_occ = (int)(x + .499); if (*p == ',') opt.max_occ = (int)(strtod(p+1, &p) + .499); } else if (c == 'u') { - if (*optarg == 'b') opt.flag |= MM_F_SPLICE_FOR|MM_F_SPLICE_REV; // both strands - else if (*optarg == 'f') opt.flag |= MM_F_SPLICE_FOR, opt.flag &= ~MM_F_SPLICE_REV; // match GT-AG - else if (*optarg == 'r') opt.flag |= MM_F_SPLICE_REV, opt.flag &= ~MM_F_SPLICE_FOR; // match CT-AC (reverse complement of GT-AG) - else if (*optarg == 'n') opt.flag &= ~(MM_F_SPLICE_FOR|MM_F_SPLICE_REV); // don't try to match the GT-AG signal + if (*o.arg == 'b') opt.flag |= MM_F_SPLICE_FOR|MM_F_SPLICE_REV; // both strands + else if (*o.arg == 'f') opt.flag |= MM_F_SPLICE_FOR, opt.flag &= ~MM_F_SPLICE_REV; // match GT-AG + else if (*o.arg == 'r') opt.flag |= MM_F_SPLICE_REV, opt.flag &= ~MM_F_SPLICE_FOR; // match CT-AC (reverse complement of GT-AG) + else if (*o.arg == 'n') opt.flag &= ~(MM_F_SPLICE_FOR|MM_F_SPLICE_REV); // don't try to match the GT-AG signal else { fprintf(stderr, "[ERROR]\033[1;31m unrecognized cDNA direction\033[0m\n"); return 1; } } else if (c == 'z') { - opt.zdrop = opt.zdrop_inv = strtol(optarg, &s, 10); + opt.zdrop = opt.zdrop_inv = strtol(o.arg, &s, 10); if (*s == ',') opt.zdrop_inv = strtol(s + 1, &s, 10); } else if (c == 'O') { - opt.q = opt.q2 = strtol(optarg, &s, 10); + opt.q = opt.q2 = strtol(o.arg, &s, 10); if (*s == ',') opt.q2 = strtol(s + 1, &s, 10); } else if (c == 'E') { - opt.e = opt.e2 = strtol(optarg, &s, 10); + opt.e = opt.e2 = strtol(o.arg, &s, 10); if (*s == ',') opt.e2 = strtol(s + 1, &s, 10); } } @@ -249,7 +252,7 @@ int main(int argc, char *argv[]) if (mm_check_opt(&ipt, &opt) < 0) return 1; - if (argc == optind || fp_help == stdout) { + if (argc == o.ind || fp_help == stdout) { fprintf(fp_help, "Usage: minimap2 [options] | [query.fa] [...]\n"); fprintf(fp_help, "Options:\n"); fprintf(fp_help, " Indexing:\n"); @@ -303,16 +306,16 @@ int main(int argc, char *argv[]) return fp_help == stdout? 0 : 1; } - if ((opt.flag & MM_F_SR) && argc - optind > 3) { + if ((opt.flag & MM_F_SR) && argc - o.ind > 3) { fprintf(stderr, "[ERROR] incorrect input: in the sr mode, please specify no more than two query files.\n"); return 1; } - idx_rdr = mm_idx_reader_open(argv[optind], &ipt, fnw); + idx_rdr = mm_idx_reader_open(argv[o.ind], &ipt, fnw); if (idx_rdr == 0) { - fprintf(stderr, "[ERROR] failed to open file '%s'\n", argv[optind]); + fprintf(stderr, "[ERROR] failed to open file '%s'\n", argv[o.ind]); return 1; } - if (!idx_rdr->is_idx && fnw == 0 && argc - optind < 2) { + if (!idx_rdr->is_idx && fnw == 0 && argc - o.ind < 2) { fprintf(stderr, "[ERROR] missing input: please specify a query file to map or option -d to keep the index\n"); mm_idx_reader_close(idx_rdr); return 1; @@ -338,13 +341,13 @@ int main(int argc, char *argv[]) if (mm_verbose >= 3) fprintf(stderr, "[M::%s::%.3f*%.2f] loaded/built the index for %d target sequence(s)\n", __func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), mi->n_seq); - if (argc != optind + 1) mm_mapopt_update(&opt, mi); + if (argc != o.ind + 1) mm_mapopt_update(&opt, mi); if (mm_verbose >= 3) mm_idx_stat(mi); if (!(opt.flag & MM_F_FRAG_MODE)) { - for (i = optind + 1; i < argc; ++i) + for (i = o.ind + 1; i < argc; ++i) mm_map_file(mi, argv[i], &opt, n_threads); } else { - mm_map_file_frag(mi, argc - (optind + 1), (const char**)&argv[optind + 1], &opt, n_threads); + mm_map_file_frag(mi, argc - (o.ind + 1), (const char**)&argv[o.ind + 1], &opt, n_threads); } mm_idx_destroy(mi); } @@ -352,7 +355,7 @@ int main(int argc, char *argv[]) mm_idx_reader_close(idx_rdr); if (opt.split_prefix) - mm_split_merge(argc - (optind + 1), (const char**)&argv[optind + 1], &opt, n_parts); + mm_split_merge(argc - (o.ind + 1), (const char**)&argv[o.ind + 1], &opt, n_parts); if (fflush(stdout) == EOF) { fprintf(stderr, "[ERROR] failed to write the results\n"); diff --git a/sdust.c b/sdust.c index 24b7cc3..176dcb0 100644 --- a/sdust.c +++ b/sdust.c @@ -177,7 +177,7 @@ uint64_t *sdust(void *km, const uint8_t *seq, int l_seq, int T, int W, int *n) #ifdef _SDUST_MAIN #include #include -#include "getopt.h" +#include "ketopt.h" #include "kseq.h" KSEQ_INIT(gzFile, gzread) @@ -186,16 +186,17 @@ int main(int argc, char *argv[]) gzFile fp; kseq_t *ks; int W = 64, T = 20, c; + ketopt_t o = KETOPT_INIT; - while ((c = getopt(argc, argv, "w:t:")) >= 0) { - if (c == 'w') W = atoi(optarg); - else if (c == 't') T = atoi(optarg); + while ((c = ketopt(&o, argc, argv, 1, "w:t:", 0)) >= 0) { + if (c == 'w') W = atoi(o.arg); + else if (c == 't') T = atoi(o.arg); } - if (optind == argc) { + if (o.ind == argc) { fprintf(stderr, "Usage: sdust [-w %d] [-t %d] \n", W, T); return 1; } - fp = strcmp(argv[optind], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r"); + fp = strcmp(argv[o.ind], "-")? gzopen(argv[o.ind], "r") : gzdopen(fileno(stdin), "r"); ks = kseq_init(fp); while (kseq_read(ks) >= 0) { uint64_t *r;