2011-01-14 09:52:12 +08:00
|
|
|
#include <unistd.h>
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
#include <string.h>
|
|
|
|
|
#include <stdio.h>
|
|
|
|
|
#include <math.h>
|
|
|
|
|
#include "bwt.h"
|
|
|
|
|
#include "bwtsw2.h"
|
2011-01-14 09:54:10 +08:00
|
|
|
#include "utils.h"
|
2011-01-14 09:52:12 +08:00
|
|
|
|
|
|
|
|
int bwa_bwtsw2(int argc, char *argv[])
|
|
|
|
|
{
|
2012-03-30 00:22:51 +08:00
|
|
|
extern char *bwa_infer_prefix(const char *hint);
|
2011-01-14 09:52:12 +08:00
|
|
|
bsw2opt_t *opt;
|
2011-10-24 21:36:52 +08:00
|
|
|
bwt_t *target;
|
2012-03-30 00:22:51 +08:00
|
|
|
char buf[1024], *prefix;
|
2011-01-14 09:52:12 +08:00
|
|
|
bntseq_t *bns;
|
|
|
|
|
int c;
|
|
|
|
|
|
|
|
|
|
opt = bsw2_init_opt();
|
|
|
|
|
srand48(11);
|
2012-06-29 02:51:02 +08:00
|
|
|
while ((c = getopt(argc, argv, "q:r:a:b:t:T:w:d:z:m:s:c:N:Hf:MI:SG:")) >= 0) {
|
2011-01-14 09:52:12 +08:00
|
|
|
switch (c) {
|
|
|
|
|
case 'q': opt->q = atoi(optarg); break;
|
|
|
|
|
case 'r': opt->r = atoi(optarg); break;
|
|
|
|
|
case 'a': opt->a = atoi(optarg); break;
|
|
|
|
|
case 'b': opt->b = atoi(optarg); break;
|
|
|
|
|
case 'w': opt->bw = atoi(optarg); break;
|
|
|
|
|
case 'T': opt->t = atoi(optarg); break;
|
|
|
|
|
case 't': opt->n_threads = atoi(optarg); break;
|
|
|
|
|
case 'z': opt->z = atoi(optarg); break;
|
|
|
|
|
case 's': opt->is = atoi(optarg); break;
|
|
|
|
|
case 'm': opt->mask_level = atof(optarg); break;
|
|
|
|
|
case 'c': opt->coef = atof(optarg); break;
|
|
|
|
|
case 'N': opt->t_seeds = atoi(optarg); break;
|
2011-11-24 12:36:08 +08:00
|
|
|
case 'M': opt->multi_2nd = 1; break;
|
2011-01-14 09:52:12 +08:00
|
|
|
case 'H': opt->hard_clip = 1; break;
|
2011-01-14 09:54:10 +08:00
|
|
|
case 'f': xreopen(optarg, "w", stdout); break;
|
2012-04-18 08:43:43 +08:00
|
|
|
case 'I': opt->max_ins = atoi(optarg); break;
|
|
|
|
|
case 'S': opt->skip_sw = 1; break;
|
2012-06-29 02:51:02 +08:00
|
|
|
case 'G': opt->max_chain_gap = atoi(optarg); break;
|
2011-01-14 09:52:12 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
opt->qr = opt->q + opt->r;
|
|
|
|
|
|
|
|
|
|
if (optind + 2 > argc) {
|
|
|
|
|
fprintf(stderr, "\n");
|
2011-11-06 02:00:01 +08:00
|
|
|
fprintf(stderr, "Usage: bwa bwasw [options] <target.prefix> <query.fa> [query2.fa]\n\n");
|
2011-01-14 09:52:12 +08:00
|
|
|
fprintf(stderr, "Options: -a INT score for a match [%d]\n", opt->a);
|
|
|
|
|
fprintf(stderr, " -b INT mismatch penalty [%d]\n", opt->b);
|
|
|
|
|
fprintf(stderr, " -q INT gap open penalty [%d]\n", opt->q);
|
|
|
|
|
fprintf(stderr, " -r INT gap extension penalty [%d]\n", opt->r);
|
|
|
|
|
fprintf(stderr, " -w INT band width [%d]\n", opt->bw);
|
|
|
|
|
fprintf(stderr, " -m FLOAT mask level [%.2f]\n", opt->mask_level);
|
|
|
|
|
fprintf(stderr, "\n");
|
2011-11-24 12:36:08 +08:00
|
|
|
fprintf(stderr, " -t INT number of threads [%d]\n", opt->n_threads);
|
2012-04-18 08:43:43 +08:00
|
|
|
fprintf(stderr, " -f FILE file to output results to instead of stdout\n");
|
2011-11-24 12:36:08 +08:00
|
|
|
fprintf(stderr, " -H in SAM output, use hard clipping instead of soft clipping\n");
|
|
|
|
|
fprintf(stderr, " -M mark multi-part alignments as secondary\n");
|
2012-04-18 08:43:43 +08:00
|
|
|
fprintf(stderr, " -S skip Smith-Waterman read pairing\n");
|
|
|
|
|
fprintf(stderr, " -I INT ignore pairs with insert >=INT for inferring the size distr [%d]\n", opt->max_ins);
|
2011-11-24 12:36:08 +08:00
|
|
|
fprintf(stderr, "\n");
|
2011-01-14 09:52:12 +08:00
|
|
|
fprintf(stderr, " -T INT score threshold divided by a [%d]\n", opt->t);
|
2011-11-24 12:36:08 +08:00
|
|
|
fprintf(stderr, " -c FLOAT coefficient of length-threshold adjustment [%.1f]\n", opt->coef);
|
2011-01-14 09:52:12 +08:00
|
|
|
fprintf(stderr, " -z INT Z-best [%d]\n", opt->z);
|
2011-11-24 12:36:08 +08:00
|
|
|
fprintf(stderr, " -s INT maximum seeding interval size [%d]\n", opt->is);
|
2012-06-29 02:51:02 +08:00
|
|
|
fprintf(stderr, " -N INT # seeds to trigger rev aln; 2*INT is also the chaining threshold [%d]\n", opt->t_seeds);
|
|
|
|
|
fprintf(stderr, " -G INT maximum gap size during chaining [%d]\n", opt->max_chain_gap);
|
2011-11-24 12:36:08 +08:00
|
|
|
fprintf(stderr, "\n");
|
2011-01-23 02:20:11 +08:00
|
|
|
fprintf(stderr, "Note: For long Illumina, 454 and Sanger reads, assembly contigs, fosmids and\n");
|
|
|
|
|
fprintf(stderr, " BACs, the default setting usually works well. For the current PacBio\n");
|
|
|
|
|
fprintf(stderr, " reads (end of 2010), '-b5 -q2 -r1 -z10' is recommended. One may also\n");
|
|
|
|
|
fprintf(stderr, " increase '-z' for better sensitivity.\n");
|
2011-01-14 09:52:12 +08:00
|
|
|
fprintf(stderr, "\n");
|
|
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// adjust opt for opt->a
|
|
|
|
|
opt->t *= opt->a;
|
|
|
|
|
opt->coef *= opt->a;
|
|
|
|
|
|
2012-03-30 00:22:51 +08:00
|
|
|
if ((prefix = bwa_infer_prefix(argv[optind])) == 0) {
|
|
|
|
|
fprintf(stderr, "[%s] fail to locate the index\n", __func__);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
strcpy(buf, prefix); target = bwt_restore_bwt(strcat(buf, ".bwt"));
|
|
|
|
|
strcpy(buf, prefix); bwt_restore_sa(strcat(buf, ".sa"), target);
|
|
|
|
|
bns = bns_restore(prefix);
|
2011-01-14 09:52:12 +08:00
|
|
|
|
2011-11-06 02:00:01 +08:00
|
|
|
bsw2_aln(opt, bns, target, argv[optind+1], optind+2 < argc? argv[optind+2] : 0);
|
2011-01-14 09:52:12 +08:00
|
|
|
|
|
|
|
|
bns_destroy(bns);
|
2011-10-24 21:36:52 +08:00
|
|
|
bwt_destroy(target);
|
2012-04-02 21:37:02 +08:00
|
|
|
free(opt); free(prefix);
|
2011-01-14 09:52:12 +08:00
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|