r829: replaced musl's getopt with ketopt

This commit is contained in:
Heng Li 2018-09-01 21:15:43 -04:00
parent 3a119d606f
commit 7998fe9906
6 changed files with 272 additions and 420 deletions

View File

@ -32,8 +32,8 @@ all:$(PROG)
extra:all $(PROG_EXTRA)
minimap2:main.o getopt.o libminimap2.a
$(CC) $(CFLAGS) main.o getopt.o -o $@ -L. -lminimap2 $(LIBS)
minimap2:main.o libminimap2.a
$(CC) $(CFLAGS) main.o -o $@ -L. -lminimap2 $(LIBS)
minimap2-lite:example.o libminimap2.a
$(CC) $(CFLAGS) $< -o $@ -L. -lminimap2 $(LIBS)
@ -41,8 +41,8 @@ minimap2-lite:example.o libminimap2.a
libminimap2.a:$(OBJS)
$(AR) -csru $@ $(OBJS)
sdust:sdust.c getopt.o kalloc.o kalloc.h kdq.h kvec.h kseq.h sdust.h
$(CC) -D_SDUST_MAIN $(CFLAGS) $< getopt.o kalloc.o -o $@ -lz
sdust:sdust.c kalloc.o kalloc.h kdq.h kvec.h kseq.h ketopt.h sdust.h
$(CC) -D_SDUST_MAIN $(CFLAGS) $< kalloc.o -o $@ -lz
# SSE-specific targets on x86/x86_64
@ -99,7 +99,6 @@ chain.o: minimap.h mmpriv.h bseq.h kalloc.h
esterr.o: mmpriv.h minimap.h bseq.h
example.o: minimap.h kseq.h
format.o: kalloc.h mmpriv.h minimap.h bseq.h
getopt.o: getopt.h
hit.o: mmpriv.h minimap.h bseq.h kalloc.h khash.h
index.o: kthread.h bseq.h minimap.h mmpriv.h kvec.h kalloc.h khash.h
kalloc.o: kalloc.h
@ -108,11 +107,13 @@ ksw2_exts2_sse.o: ksw2.h kalloc.h
ksw2_extz2_sse.o: ksw2.h kalloc.h
ksw2_ll_sse.o: ksw2.h kalloc.h
kthread.o: kthread.h
main.o: bseq.h minimap.h mmpriv.h getopt.h
main.o: bseq.h minimap.h mmpriv.h ketopt.h
map.o: kthread.h kvec.h kalloc.h sdust.h mmpriv.h minimap.h bseq.h khash.h
map.o: ksort.h
misc.o: mmpriv.h minimap.h bseq.h ksort.h
options.o: mmpriv.h minimap.h bseq.h
pe.o: mmpriv.h minimap.h bseq.h kvec.h kalloc.h ksort.h
sdust.o: kalloc.h kdq.h kvec.h sdust.h
rl.o: kseq.h
sdust.o: kalloc.h kdq.h kvec.h ketopt.h sdust.h
sketch.o: kvec.h kalloc.h mmpriv.h minimap.h bseq.h
splitidx.o: mmpriv.h minimap.h bseq.h

216
getopt.c
View File

@ -1,216 +0,0 @@
#include <stddef.h>
#include <stdio.h>
#include <string.h>
#include "getopt.h"
char *optarg;
int optind=1, opterr=1, optopt, __optpos, optreset=0;
#define optpos __optpos
static void __getopt_msg(const char *a, const char *b, const char *c, size_t l)
{
FILE *f = stderr;
#if !defined(WIN32) && !defined(_WIN32)
flockfile(f);
#endif
fputs(a, f);
fwrite(b, strlen(b), 1, f);
fwrite(c, 1, l, f);
fputc('\n', f);
#if !defined(WIN32) && !defined(_WIN32)
funlockfile(f);
#endif
}
int getopt(int argc, char * const argv[], const char *optstring)
{
int i, c, d;
int k, l;
char *optchar;
if (!optind || optreset) {
optreset = 0;
__optpos = 0;
optind = 1;
}
if (optind >= argc || !argv[optind])
return -1;
if (argv[optind][0] != '-') {
if (optstring[0] == '-') {
optarg = argv[optind++];
return 1;
}
return -1;
}
if (!argv[optind][1])
return -1;
if (argv[optind][1] == '-' && !argv[optind][2])
return optind++, -1;
if (!optpos) optpos++;
c = argv[optind][optpos], k = 1;
optchar = argv[optind]+optpos;
optopt = c;
optpos += k;
if (!argv[optind][optpos]) {
optind++;
optpos = 0;
}
if (optstring[0] == '-' || optstring[0] == '+')
optstring++;
i = 0;
d = 0;
do {
d = optstring[i], l = 1;
if (l>0) i+=l; else i++;
} while (l && d != c);
if (d != c) {
if (optstring[0] != ':' && opterr)
__getopt_msg(argv[0], ": unrecognized option: ", optchar, k);
return '?';
}
if (optstring[i] == ':') {
if (optstring[i+1] == ':') optarg = 0;
else if (optind >= argc) {
if (optstring[0] == ':') return ':';
if (opterr) __getopt_msg(argv[0],
": option requires an argument: ",
optchar, k);
return '?';
}
if (optstring[i+1] != ':' || optpos) {
optarg = argv[optind++] + optpos;
optpos = 0;
}
}
return c;
}
static void permute(char *const *argv, int dest, int src)
{
char **av = (char **)argv;
char *tmp = av[src];
int i;
for (i=src; i>dest; i--)
av[i] = av[i-1];
av[dest] = tmp;
}
static int __getopt_long_core(int argc, char *const *argv, const char *optstring, const struct option *longopts, int *idx, int longonly)
{
optarg = 0;
if (longopts && argv[optind][0] == '-' &&
((longonly && argv[optind][1] && argv[optind][1] != '-') ||
(argv[optind][1] == '-' && argv[optind][2])))
{
int colon = optstring[optstring[0]=='+'||optstring[0]=='-']==':';
int i, cnt, match = -1;
char *opt;
for (cnt=i=0; longopts[i].name; i++) {
const char *name = longopts[i].name;
opt = argv[optind]+1;
if (*opt == '-') opt++;
for (; *name && *name == *opt; name++, opt++);
if (*opt && *opt != '=') continue;
match = i;
if (!*name) {
cnt = 1;
break;
}
cnt++;
}
if (cnt==1) {
i = match;
optind++;
optopt = longopts[i].val;
if (*opt == '=') {
if (!longopts[i].has_arg) {
if (colon || !opterr)
return '?';
__getopt_msg(argv[0],
": option does not take an argument: ",
longopts[i].name,
strlen(longopts[i].name));
return '?';
}
optarg = opt+1;
} else if (longopts[i].has_arg == required_argument) {
if (!(optarg = argv[optind])) {
if (colon) return ':';
if (!opterr) return '?';
__getopt_msg(argv[0],
": option requires an argument: ",
longopts[i].name,
strlen(longopts[i].name));
return '?';
}
optind++;
}
if (idx) *idx = i;
if (longopts[i].flag) {
*longopts[i].flag = longopts[i].val;
return 0;
}
return longopts[i].val;
}
if (argv[optind][1] == '-') {
if (!colon && opterr)
__getopt_msg(argv[0], cnt ?
": option is ambiguous: " :
": unrecognized option: ",
argv[optind]+2,
strlen(argv[optind]+2));
optind++;
return '?';
}
}
return getopt(argc, argv, optstring);
}
static int __getopt_long(int argc, char *const *argv, const char *optstring, const struct option *longopts, int *idx, int longonly)
{
int ret, skipped, resumed;
if (!optind || optreset) {
optreset = 0;
__optpos = 0;
optind = 1;
}
if (optind >= argc || !argv[optind]) return -1;
skipped = optind;
if (optstring[0] != '+' && optstring[0] != '-') {
int i;
for (i=optind; ; i++) {
if (i >= argc || !argv[i]) return -1;
if (argv[i][0] == '-' && argv[i][1]) break;
}
optind = i;
}
resumed = optind;
ret = __getopt_long_core(argc, argv, optstring, longopts, idx, longonly);
if (resumed > skipped) {
int i, cnt = optind-resumed;
for (i=0; i<cnt; i++)
permute(argv, skipped, optind-1);
optind = skipped + cnt;
}
return ret;
}
int getopt_long(int argc, char *const *argv, const char *optstring, const struct option *longopts, int *idx)
{
return __getopt_long(argc, argv, optstring, longopts, idx, 0);
}
int getopt_long_only(int argc, char *const *argv, const char *optstring, const struct option *longopts, int *idx)
{
return __getopt_long(argc, argv, optstring, longopts, idx, 1);
}

View File

@ -1,53 +0,0 @@
/*
Copyright 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef _GETOPT_H
#define _GETOPT_H
#ifdef __cplusplus
extern "C" {
#endif
int getopt(int, char * const [], const char *);
extern char *optarg;
extern int optind, opterr, optopt, optreset;
struct option {
const char *name;
int has_arg;
int *flag;
int val;
};
int getopt_long(int, char *const *, const char *, const struct option *, int *);
int getopt_long_only(int, char *const *, const char *, const struct option *, int *);
#define no_argument 0
#define required_argument 1
#define optional_argument 2
#ifdef __cplusplus
}
#endif
#endif

116
ketopt.h 100644
View File

@ -0,0 +1,116 @@
#ifndef KETOPT_H
#define KETOPT_H
#include <string.h> /* for strchr() and strncmp() */
#define ko_no_argument 0
#define ko_required_argument 1
#define ko_optional_argument 2
typedef struct {
int ind; /* equivalent to optind */
int opt; /* equivalent to optopt */
char *arg; /* equivalent to optarg */
int longidx; /* index of a long option; or -1 if short */
/* private variables not intended for external uses */
int i, pos, n_args;
} ketopt_t;
typedef struct {
char *name;
int has_arg;
int val;
} ko_longopt_t;
static ketopt_t KETOPT_INIT = { 1, 0, 0, -1, 1, 0, 0 };
static void ketopt_permute(char *argv[], int j, int n) /* move argv[j] over n elements to the left */
{
int k;
char *p = argv[j];
for (k = 0; k < n; ++k)
argv[j - k] = argv[j - k - 1];
argv[j - k] = p;
}
/**
* Parse command-line options and arguments
*
* This fuction has a similar interface to GNU's getopt_long(). Each call
* parses one option and returns the option name. s->arg points to the option
* argument if present. The function returns -1 when all command-line arguments
* are parsed. In this case, s->ind is the index of the first non-option
* argument.
*
* @param s status; shall be initialized to KETOPT_INIT on the first call
* @param argc length of argv[]
* @param argv list of command-line arguments; argv[0] is ignored
* @param permute non-zero to move options ahead of non-option arguments
* @param ostr option string
* @param longopts long options
*
* @return ASCII for a short option; ko_longopt_t::val for a long option; -1 if
* argv[] is fully processed; '?' for an unknown option or an ambiguous
* long option; ':' if an option argument is missing
*/
static int ketopt(ketopt_t *s, int argc, char *argv[], int permute, const char *ostr, const ko_longopt_t *longopts)
{
int opt = -1, i0, j;
if (permute) {
while (s->i < argc && (argv[s->i][0] != '-' || argv[s->i][1] == '\0'))
++s->i, ++s->n_args;
}
s->arg = 0, s->longidx = -1, i0 = s->i;
if (s->i >= argc || argv[s->i][0] != '-' || argv[s->i][1] == '\0') {
s->ind = s->i - s->n_args;
return -1;
}
if (argv[s->i][0] == '-' && argv[s->i][1] == '-') { /* "--" or a long option */
if (argv[s->i][2] == '\0') { /* a bare "--" */
ketopt_permute(argv, s->i, s->n_args);
++s->i, s->ind = s->i - s->n_args;
return -1;
}
s->opt = 0, opt = '?', s->pos = -1;
if (longopts) { /* parse long options */
int k, n_matches = 0;
const ko_longopt_t *o = 0;
for (j = 2; argv[s->i][j] != '\0' && argv[s->i][j] != '='; ++j) {} /* find the end of the option name */
for (k = 0; longopts[k].name != 0; ++k)
if (strncmp(&argv[s->i][2], longopts[k].name, j - 2) == 0)
++n_matches, o = &longopts[k];
if (n_matches == 1) {
s->opt = opt = o->val, s->longidx = o - longopts;
if (argv[s->i][j] == '=') s->arg = &argv[s->i][j + 1];
if (o->has_arg == 1 && argv[s->i][j] == '\0') {
if (s->i < argc - 1) s->arg = argv[++s->i];
else opt = ':'; /* missing option argument */
}
}
}
} else { /* a short option */
char *p;
if (s->pos == 0) s->pos = 1;
opt = s->opt = argv[s->i][s->pos++];
p = strchr(ostr, opt);
if (p == 0) {
opt = '?'; /* unknown option */
} else if (p[1] == ':') {
if (argv[s->i][s->pos] == 0) {
if (s->i < argc - 1) s->arg = argv[++s->i];
else opt = ':'; /* missing option argument */
} else s->arg = &argv[s->i][s->pos];
s->pos = -1;
}
}
if (s->pos < 0 || argv[s->i][s->pos] == 0) {
++s->i, s->pos = 0;
if (s->n_args > 0) /* permute */
for (j = i0; j < s->i; ++j)
ketopt_permute(argv, j, s->n_args);
}
s->ind = s->i - s->n_args;
return opt;
}
#endif

279
main.c
View File

@ -4,13 +4,9 @@
#include "bseq.h"
#include "minimap.h"
#include "mmpriv.h"
#ifdef HAVE_GETOPT
#include <getopt.h>
#else
#include "getopt.h"
#endif
#include "ketopt.h"
#define MM_VERSION "2.12-r828-dirty"
#define MM_VERSION "2.12-r829-dirty"
#ifdef __linux__
#include <sys/resource.h>
@ -26,52 +22,52 @@ void liftrlimit()
void liftrlimit() {}
#endif
static struct option long_options[] = {
{ "bucket-bits", required_argument, 0, 0 },
{ "mb-size", required_argument, 0, 'K' },
{ "seed", required_argument, 0, 0 },
{ "no-kalloc", no_argument, 0, 0 },
{ "print-qname", no_argument, 0, 0 },
{ "no-self", no_argument, 0, 'D' },
{ "print-seeds", no_argument, 0, 0 },
{ "max-chain-skip", required_argument, 0, 0 },
{ "min-dp-len", required_argument, 0, 0 },
{ "print-aln-seq", no_argument, 0, 0 },
{ "splice", no_argument, 0, 0 },
{ "cost-non-gt-ag", required_argument, 0, 'C' },
{ "no-long-join", no_argument, 0, 0 },
{ "sr", no_argument, 0, 0 },
{ "frag", required_argument, 0, 0 },
{ "secondary", required_argument, 0, 0 },
{ "cs", optional_argument, 0, 0 },
{ "end-bonus", required_argument, 0, 0 },
{ "no-pairing", no_argument, 0, 0 },
{ "splice-flank", required_argument, 0, 0 },
{ "idx-no-seq", no_argument, 0, 0 },
{ "end-seed-pen", required_argument, 0, 0 }, // 21
{ "for-only", no_argument, 0, 0 }, // 22
{ "rev-only", no_argument, 0, 0 }, // 23
{ "heap-sort", required_argument, 0, 0 }, // 24
{ "all-chain", no_argument, 0, 'P' },
{ "dual", required_argument, 0, 0 }, // 26
{ "max-clip-ratio", required_argument, 0, 0 }, // 27
{ "min-occ-floor", required_argument, 0, 0 }, // 28
{ "MD", no_argument, 0, 0 }, // 29
{ "lj-min-ratio", required_argument, 0, 0 }, // 30
{ "score-N", required_argument, 0, 0 }, // 31
{ "eqx", no_argument, 0, 0 }, // 32
{ "paf-no-hit", no_argument, 0, 0 }, // 33
{ "split-prefix", required_argument, 0, 0 }, // 34
{ "no-end-flt", no_argument, 0, 0 }, // 35
{ "help", no_argument, 0, 'h' },
{ "max-intron-len", required_argument, 0, 'G' },
{ "version", no_argument, 0, 'V' },
{ "min-count", required_argument, 0, 'n' },
{ "min-chain-score",required_argument, 0, 'm' },
{ "mask-level", required_argument, 0, 'M' },
{ "min-dp-score", required_argument, 0, 's' },
{ "sam", no_argument, 0, 'a' },
{ 0, 0, 0, 0}
static ko_longopt_t long_options[] = {
{ "bucket-bits", ko_required_argument, 300 },
{ "mb-size", ko_required_argument, 'K' },
{ "seed", ko_required_argument, 302 },
{ "no-kalloc", ko_no_argument, 303 },
{ "print-qname", ko_no_argument, 304 },
{ "no-self", ko_no_argument, 'D' },
{ "print-seeds", ko_no_argument, 306 },
{ "max-chain-skip", ko_required_argument, 307 },
{ "min-dp-len", ko_required_argument, 308 },
{ "print-aln-seq", ko_no_argument, 309 },
{ "splice", ko_no_argument, 310 },
{ "cost-non-gt-ag", ko_required_argument, 'C' },
{ "no-long-join", ko_no_argument, 312 },
{ "sr", ko_no_argument, 313 },
{ "frag", ko_required_argument, 314 },
{ "secondary", ko_required_argument, 315 },
{ "cs", ko_optional_argument, 316 },
{ "end-bonus", ko_required_argument, 317 },
{ "no-pairing", ko_no_argument, 318 },
{ "splice-flank", ko_required_argument, 319 },
{ "idx-no-seq", ko_no_argument, 320 },
{ "end-seed-pen", ko_required_argument, 321 },
{ "for-only", ko_no_argument, 322 },
{ "rev-only", ko_no_argument, 323 },
{ "heap-sort", ko_required_argument, 324 },
{ "all-chain", ko_no_argument, 'P' },
{ "dual", ko_required_argument, 326 },
{ "max-clip-ratio", ko_required_argument, 327 },
{ "min-occ-floor", ko_required_argument, 328 },
{ "MD", ko_no_argument, 329 },
{ "lj-min-ratio", ko_required_argument, 330 },
{ "score-N", ko_required_argument, 331 },
{ "eqx", ko_no_argument, 332 },
{ "paf-no-hit", ko_no_argument, 333 },
{ "split-prefix", ko_required_argument, 334 },
{ "no-end-flt", ko_no_argument, 335 },
{ "help", ko_no_argument, 'h' },
{ "max-intron-len", ko_required_argument, 'G' },
{ "version", ko_no_argument, 'V' },
{ "min-count", ko_required_argument, 'n' },
{ "min-chain-score",ko_required_argument, 'm' },
{ "mask-level", ko_required_argument, 'M' },
{ "min-dp-score", ko_required_argument, 's' },
{ "sam", ko_no_argument, 'a' },
{ 0, 0, 0 }
};
static inline int64_t mm_parse_num(const char *str)
@ -101,9 +97,10 @@ static inline void yes_or_no(mm_mapopt_t *opt, int flag, int long_idx, const cha
int main(int argc, char *argv[])
{
const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:LC:yY";
ketopt_t o = KETOPT_INIT;
mm_mapopt_t opt;
mm_idxopt_t ipt;
int i, c, n_threads = 3, n_parts, long_idx;
int i, c, n_threads = 3, n_parts;
char *fnw = 0, *rg = 0, *s;
FILE *fp_help = stderr;
mm_idx_reader_t *idx_rdr;
@ -114,30 +111,36 @@ int main(int argc, char *argv[])
mm_realtime0 = realtime();
mm_set_opt(0, &ipt, &opt);
while ((c = getopt_long(argc, argv, opt_str, long_options, &long_idx)) >= 0) // apply option -x/preset first
while ((c = ketopt(&o, argc, argv, 1, opt_str, long_options)) >= 0) { // test command line options and apply option -x/preset first
if (c == 'x') {
if (mm_set_opt(optarg, &ipt, &opt) < 0) {
fprintf(stderr, "[ERROR] unknown preset '%s'\n", optarg);
if (mm_set_opt(o.arg, &ipt, &opt) < 0) {
fprintf(stderr, "[ERROR] unknown preset '%s'\n", o.arg);
return 1;
}
break;
} else if (c == ':') {
fprintf(stderr, "[ERROR] missing option argument\n");
return 1;
} else if (c == '?') {
fprintf(stderr, "[ERROR] unknown option in \"%s\"\n", argv[o.i]);
return 1;
}
optind = 0; // for musl getopt, optind=0 has the same effect as optreset=1; older libc doesn't have optreset
}
o = KETOPT_INIT;
while ((c = getopt_long(argc, argv, opt_str, long_options, &long_idx)) >= 0) {
if (c == 'w') ipt.w = atoi(optarg);
else if (c == 'k') ipt.k = atoi(optarg);
while ((c = ketopt(&o, argc, argv, 1, opt_str, long_options)) >= 0) {
if (c == 'w') ipt.w = atoi(o.arg);
else if (c == 'k') ipt.k = atoi(o.arg);
else if (c == 'H') ipt.flag |= MM_I_HPC;
else if (c == 'd') fnw = optarg; // the above are indexing related options, except -I
else if (c == 'r') opt.bw = (int)mm_parse_num(optarg);
else if (c == 't') n_threads = atoi(optarg);
else if (c == 'v') mm_verbose = atoi(optarg);
else if (c == 'g') opt.max_gap = (int)mm_parse_num(optarg);
else if (c == 'G') mm_mapopt_max_intron_len(&opt, (int)mm_parse_num(optarg));
else if (c == 'F') opt.max_frag_len = (int)mm_parse_num(optarg);
else if (c == 'N') opt.best_n = atoi(optarg);
else if (c == 'p') opt.pri_ratio = atof(optarg);
else if (c == 'M') opt.mask_level = atof(optarg);
else if (c == 'd') fnw = o.arg; // the above are indexing related options, except -I
else if (c == 'r') opt.bw = (int)mm_parse_num(o.arg);
else if (c == 't') n_threads = atoi(o.arg);
else if (c == 'v') mm_verbose = atoi(o.arg);
else if (c == 'g') opt.max_gap = (int)mm_parse_num(o.arg);
else if (c == 'G') mm_mapopt_max_intron_len(&opt, (int)mm_parse_num(o.arg));
else if (c == 'F') opt.max_frag_len = (int)mm_parse_num(o.arg);
else if (c == 'N') opt.best_n = atoi(o.arg);
else if (c == 'p') opt.pri_ratio = atof(o.arg);
else if (c == 'M') opt.mask_level = atof(o.arg);
else if (c == 'c') opt.flag |= MM_F_OUT_CG | MM_F_CIGAR;
else if (c == 'D') opt.flag |= MM_F_NO_DIAG;
else if (c == 'P') opt.flag |= MM_F_ALL_CHAINS;
@ -147,65 +150,65 @@ int main(int argc, char *argv[])
else if (c == 'Y') opt.flag |= MM_F_SOFTCLIP;
else if (c == 'L') opt.flag |= MM_F_LONG_CIGAR;
else if (c == 'y') opt.flag |= MM_F_COPY_COMMENT;
else if (c == 'T') opt.sdust_thres = atoi(optarg);
else if (c == 'n') opt.min_cnt = atoi(optarg);
else if (c == 'm') opt.min_chain_score = atoi(optarg);
else if (c == 'A') opt.a = atoi(optarg);
else if (c == 'B') opt.b = atoi(optarg);
else if (c == 's') opt.min_dp_max = atoi(optarg);
else if (c == 'C') opt.noncan = atoi(optarg);
else if (c == 'I') ipt.batch_size = mm_parse_num(optarg);
else if (c == 'K') opt.mini_batch_size = (int)mm_parse_num(optarg);
else if (c == 'R') rg = optarg;
else if (c == 'T') opt.sdust_thres = atoi(o.arg);
else if (c == 'n') opt.min_cnt = atoi(o.arg);
else if (c == 'm') opt.min_chain_score = atoi(o.arg);
else if (c == 'A') opt.a = atoi(o.arg);
else if (c == 'B') opt.b = atoi(o.arg);
else if (c == 's') opt.min_dp_max = atoi(o.arg);
else if (c == 'C') opt.noncan = atoi(o.arg);
else if (c == 'I') ipt.batch_size = mm_parse_num(o.arg);
else if (c == 'K') opt.mini_batch_size = (int)mm_parse_num(o.arg);
else if (c == 'R') rg = o.arg;
else if (c == 'h') fp_help = stdout;
else if (c == '2') opt.flag |= MM_F_2_IO_THREADS;
else if (c == 0 && long_idx == 0) ipt.bucket_bits = atoi(optarg); // --bucket-bits
else if (c == 0 && long_idx == 2) opt.seed = atoi(optarg); // --seed
else if (c == 0 && long_idx == 3) mm_dbg_flag |= MM_DBG_NO_KALLOC; // --no-kalloc
else if (c == 0 && long_idx == 4) mm_dbg_flag |= MM_DBG_PRINT_QNAME; // --print-qname
else if (c == 0 && long_idx == 6) mm_dbg_flag |= MM_DBG_PRINT_QNAME | MM_DBG_PRINT_SEED, n_threads = 1; // --print-seed
else if (c == 0 && long_idx == 7) opt.max_chain_skip = atoi(optarg); // --max-chain-skip
else if (c == 0 && long_idx == 8) opt.min_ksw_len = atoi(optarg); // --min-dp-len
else if (c == 0 && long_idx == 9) mm_dbg_flag |= MM_DBG_PRINT_QNAME | MM_DBG_PRINT_ALN_SEQ, n_threads = 1; // --print-aln-seq
else if (c == 0 && long_idx ==10) opt.flag |= MM_F_SPLICE; // --splice
else if (c == 0 && long_idx ==12) opt.flag |= MM_F_NO_LJOIN; // --no-long-join
else if (c == 0 && long_idx ==13) opt.flag |= MM_F_SR; // --sr
else if (c == 0 && long_idx ==17) opt.end_bonus = atoi(optarg); // --end-bonus
else if (c == 0 && long_idx ==18) opt.flag |= MM_F_INDEPEND_SEG; // --no-pairing
else if (c == 0 && long_idx ==20) ipt.flag |= MM_I_NO_SEQ; // --idx-no-seq
else if (c == 0 && long_idx ==21) opt.anchor_ext_shift = atoi(optarg); // --end-seed-pen
else if (c == 0 && long_idx ==22) opt.flag |= MM_F_FOR_ONLY; // --for-only
else if (c == 0 && long_idx ==23) opt.flag |= MM_F_REV_ONLY; // --rev-only
else if (c == 0 && long_idx ==27) opt.max_clip_ratio = atof(optarg); // --max-clip-ratio
else if (c == 0 && long_idx ==28) opt.min_mid_occ = atoi(optarg); // --min-occ-floor
else if (c == 0 && long_idx ==29) opt.flag |= MM_F_OUT_MD; // --MD
else if (c == 0 && long_idx ==30) opt.min_join_flank_ratio = atof(optarg); // --lj-min-ratio
else if (c == 0 && long_idx ==31) opt.sc_ambi = atoi(optarg); // --score-N
else if (c == 0 && long_idx ==32) opt.flag |= MM_F_EQX; // --eqx
else if (c == 0 && long_idx ==33) opt.flag |= MM_F_PAF_NO_HIT; // --paf-no-hit
else if (c == 0 && long_idx ==34) opt.split_prefix = optarg; // --split-prefix
else if (c == 0 && long_idx ==35) opt.flag |= MM_F_NO_END_FLT; // --no-end-flt
else if (c == 0 && long_idx == 14) { // --frag
yes_or_no(&opt, MM_F_FRAG_MODE, long_idx, optarg, 1);
} else if (c == 0 && long_idx == 15) { // --secondary
yes_or_no(&opt, MM_F_NO_PRINT_2ND, long_idx, optarg, 0);
} else if (c == 0 && long_idx == 16) { // --cs
else if (c == 300) ipt.bucket_bits = atoi(o.arg); // --bucket-bits
else if (c == 302) opt.seed = atoi(o.arg); // --seed
else if (c == 303) mm_dbg_flag |= MM_DBG_NO_KALLOC; // --no-kalloc
else if (c == 304) mm_dbg_flag |= MM_DBG_PRINT_QNAME; // --print-qname
else if (c == 306) mm_dbg_flag |= MM_DBG_PRINT_QNAME | MM_DBG_PRINT_SEED, n_threads = 1; // --print-seed
else if (c == 307) opt.max_chain_skip = atoi(o.arg); // --max-chain-skip
else if (c == 308) opt.min_ksw_len = atoi(o.arg); // --min-dp-len
else if (c == 309) mm_dbg_flag |= MM_DBG_PRINT_QNAME | MM_DBG_PRINT_ALN_SEQ, n_threads = 1; // --print-aln-seq
else if (c == 310) opt.flag |= MM_F_SPLICE; // --splice
else if (c == 312) opt.flag |= MM_F_NO_LJOIN; // --no-long-join
else if (c == 313) opt.flag |= MM_F_SR; // --sr
else if (c == 317) opt.end_bonus = atoi(o.arg); // --end-bonus
else if (c == 318) opt.flag |= MM_F_INDEPEND_SEG; // --no-pairing
else if (c == 320) ipt.flag |= MM_I_NO_SEQ; // --idx-no-seq
else if (c == 321) opt.anchor_ext_shift = atoi(o.arg); // --end-seed-pen
else if (c == 322) opt.flag |= MM_F_FOR_ONLY; // --for-only
else if (c == 323) opt.flag |= MM_F_REV_ONLY; // --rev-only
else if (c == 327) opt.max_clip_ratio = atof(o.arg); // --max-clip-ratio
else if (c == 328) opt.min_mid_occ = atoi(o.arg); // --min-occ-floor
else if (c == 329) opt.flag |= MM_F_OUT_MD; // --MD
else if (c == 330) opt.min_join_flank_ratio = atof(o.arg); // --lj-min-ratio
else if (c == 331) opt.sc_ambi = atoi(o.arg); // --score-N
else if (c == 332) opt.flag |= MM_F_EQX; // --eqx
else if (c == 333) opt.flag |= MM_F_PAF_NO_HIT; // --paf-no-hit
else if (c == 334) opt.split_prefix = o.arg; // --split-prefix
else if (c == 335) opt.flag |= MM_F_NO_END_FLT; // --no-end-flt
else if (c == 314) { // --frag
yes_or_no(&opt, MM_F_FRAG_MODE, o.longidx, o.arg, 1);
} else if (c == 315) { // --secondary
yes_or_no(&opt, MM_F_NO_PRINT_2ND, o.longidx, o.arg, 0);
} else if (c == 316) { // --cs
opt.flag |= MM_F_OUT_CS | MM_F_CIGAR;
if (optarg == 0 || strcmp(optarg, "short") == 0) {
if (o.arg == 0 || strcmp(o.arg, "short") == 0) {
opt.flag &= ~MM_F_OUT_CS_LONG;
} else if (strcmp(optarg, "long") == 0) {
} else if (strcmp(o.arg, "long") == 0) {
opt.flag |= MM_F_OUT_CS_LONG;
} else if (strcmp(optarg, "none") == 0) {
} else if (strcmp(o.arg, "none") == 0) {
opt.flag &= ~MM_F_OUT_CS;
} else if (mm_verbose >= 2) {
fprintf(stderr, "[WARNING]\033[1;31m --cs only takes 'short' or 'long'. Invalid values are assumed to be 'short'.\033[0m\n");
}
} else if (c == 0 && long_idx == 19) { // --splice-flank
yes_or_no(&opt, MM_F_SPLICE_FLANK, long_idx, optarg, 1);
} else if (c == 0 && long_idx == 24) { // --heap-sort
yes_or_no(&opt, MM_F_HEAP_SORT, long_idx, optarg, 1);
} else if (c == 0 && long_idx == 26) { // --dual
yes_or_no(&opt, MM_F_NO_DUAL, long_idx, optarg, 0);
} else if (c == 319) { // --splice-flank
yes_or_no(&opt, MM_F_SPLICE_FLANK, o.longidx, o.arg, 1);
} else if (c == 324) { // --heap-sort
yes_or_no(&opt, MM_F_HEAP_SORT, o.longidx, o.arg, 1);
} else if (c == 326) { // --dual
yes_or_no(&opt, MM_F_NO_DUAL, o.longidx, o.arg, 0);
} else if (c == 'S') {
opt.flag |= MM_F_OUT_CS | MM_F_CIGAR | MM_F_OUT_CS_LONG;
if (mm_verbose >= 2)
@ -216,27 +219,27 @@ int main(int argc, char *argv[])
} else if (c == 'f') {
double x;
char *p;
x = strtod(optarg, &p);
x = strtod(o.arg, &p);
if (x < 1.0) opt.mid_occ_frac = x, opt.mid_occ = 0;
else opt.mid_occ = (int)(x + .499);
if (*p == ',') opt.max_occ = (int)(strtod(p+1, &p) + .499);
} else if (c == 'u') {
if (*optarg == 'b') opt.flag |= MM_F_SPLICE_FOR|MM_F_SPLICE_REV; // both strands
else if (*optarg == 'f') opt.flag |= MM_F_SPLICE_FOR, opt.flag &= ~MM_F_SPLICE_REV; // match GT-AG
else if (*optarg == 'r') opt.flag |= MM_F_SPLICE_REV, opt.flag &= ~MM_F_SPLICE_FOR; // match CT-AC (reverse complement of GT-AG)
else if (*optarg == 'n') opt.flag &= ~(MM_F_SPLICE_FOR|MM_F_SPLICE_REV); // don't try to match the GT-AG signal
if (*o.arg == 'b') opt.flag |= MM_F_SPLICE_FOR|MM_F_SPLICE_REV; // both strands
else if (*o.arg == 'f') opt.flag |= MM_F_SPLICE_FOR, opt.flag &= ~MM_F_SPLICE_REV; // match GT-AG
else if (*o.arg == 'r') opt.flag |= MM_F_SPLICE_REV, opt.flag &= ~MM_F_SPLICE_FOR; // match CT-AC (reverse complement of GT-AG)
else if (*o.arg == 'n') opt.flag &= ~(MM_F_SPLICE_FOR|MM_F_SPLICE_REV); // don't try to match the GT-AG signal
else {
fprintf(stderr, "[ERROR]\033[1;31m unrecognized cDNA direction\033[0m\n");
return 1;
}
} else if (c == 'z') {
opt.zdrop = opt.zdrop_inv = strtol(optarg, &s, 10);
opt.zdrop = opt.zdrop_inv = strtol(o.arg, &s, 10);
if (*s == ',') opt.zdrop_inv = strtol(s + 1, &s, 10);
} else if (c == 'O') {
opt.q = opt.q2 = strtol(optarg, &s, 10);
opt.q = opt.q2 = strtol(o.arg, &s, 10);
if (*s == ',') opt.q2 = strtol(s + 1, &s, 10);
} else if (c == 'E') {
opt.e = opt.e2 = strtol(optarg, &s, 10);
opt.e = opt.e2 = strtol(o.arg, &s, 10);
if (*s == ',') opt.e2 = strtol(s + 1, &s, 10);
}
}
@ -249,7 +252,7 @@ int main(int argc, char *argv[])
if (mm_check_opt(&ipt, &opt) < 0)
return 1;
if (argc == optind || fp_help == stdout) {
if (argc == o.ind || fp_help == stdout) {
fprintf(fp_help, "Usage: minimap2 [options] <target.fa>|<target.idx> [query.fa] [...]\n");
fprintf(fp_help, "Options:\n");
fprintf(fp_help, " Indexing:\n");
@ -303,16 +306,16 @@ int main(int argc, char *argv[])
return fp_help == stdout? 0 : 1;
}
if ((opt.flag & MM_F_SR) && argc - optind > 3) {
if ((opt.flag & MM_F_SR) && argc - o.ind > 3) {
fprintf(stderr, "[ERROR] incorrect input: in the sr mode, please specify no more than two query files.\n");
return 1;
}
idx_rdr = mm_idx_reader_open(argv[optind], &ipt, fnw);
idx_rdr = mm_idx_reader_open(argv[o.ind], &ipt, fnw);
if (idx_rdr == 0) {
fprintf(stderr, "[ERROR] failed to open file '%s'\n", argv[optind]);
fprintf(stderr, "[ERROR] failed to open file '%s'\n", argv[o.ind]);
return 1;
}
if (!idx_rdr->is_idx && fnw == 0 && argc - optind < 2) {
if (!idx_rdr->is_idx && fnw == 0 && argc - o.ind < 2) {
fprintf(stderr, "[ERROR] missing input: please specify a query file to map or option -d to keep the index\n");
mm_idx_reader_close(idx_rdr);
return 1;
@ -338,13 +341,13 @@ int main(int argc, char *argv[])
if (mm_verbose >= 3)
fprintf(stderr, "[M::%s::%.3f*%.2f] loaded/built the index for %d target sequence(s)\n",
__func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), mi->n_seq);
if (argc != optind + 1) mm_mapopt_update(&opt, mi);
if (argc != o.ind + 1) mm_mapopt_update(&opt, mi);
if (mm_verbose >= 3) mm_idx_stat(mi);
if (!(opt.flag & MM_F_FRAG_MODE)) {
for (i = optind + 1; i < argc; ++i)
for (i = o.ind + 1; i < argc; ++i)
mm_map_file(mi, argv[i], &opt, n_threads);
} else {
mm_map_file_frag(mi, argc - (optind + 1), (const char**)&argv[optind + 1], &opt, n_threads);
mm_map_file_frag(mi, argc - (o.ind + 1), (const char**)&argv[o.ind + 1], &opt, n_threads);
}
mm_idx_destroy(mi);
}
@ -352,7 +355,7 @@ int main(int argc, char *argv[])
mm_idx_reader_close(idx_rdr);
if (opt.split_prefix)
mm_split_merge(argc - (optind + 1), (const char**)&argv[optind + 1], &opt, n_parts);
mm_split_merge(argc - (o.ind + 1), (const char**)&argv[o.ind + 1], &opt, n_parts);
if (fflush(stdout) == EOF) {
fprintf(stderr, "[ERROR] failed to write the results\n");

13
sdust.c
View File

@ -177,7 +177,7 @@ uint64_t *sdust(void *km, const uint8_t *seq, int l_seq, int T, int W, int *n)
#ifdef _SDUST_MAIN
#include <zlib.h>
#include <stdio.h>
#include "getopt.h"
#include "ketopt.h"
#include "kseq.h"
KSEQ_INIT(gzFile, gzread)
@ -186,16 +186,17 @@ int main(int argc, char *argv[])
gzFile fp;
kseq_t *ks;
int W = 64, T = 20, c;
ketopt_t o = KETOPT_INIT;
while ((c = getopt(argc, argv, "w:t:")) >= 0) {
if (c == 'w') W = atoi(optarg);
else if (c == 't') T = atoi(optarg);
while ((c = ketopt(&o, argc, argv, 1, "w:t:", 0)) >= 0) {
if (c == 'w') W = atoi(o.arg);
else if (c == 't') T = atoi(o.arg);
}
if (optind == argc) {
if (o.ind == argc) {
fprintf(stderr, "Usage: sdust [-w %d] [-t %d] <in.fa>\n", W, T);
return 1;
}
fp = strcmp(argv[optind], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r");
fp = strcmp(argv[o.ind], "-")? gzopen(argv[o.ind], "r") : gzdopen(fileno(stdin), "r");
ks = kseq_init(fp);
while (kseq_read(ks) >= 0) {
uint64_t *r;