2011-10-25 12:22:28 +08:00
|
|
|
#include <zlib.h>
|
|
|
|
|
#include <stdio.h>
|
2011-10-26 03:06:13 +08:00
|
|
|
#include <unistd.h>
|
|
|
|
|
#include <stdlib.h>
|
2013-02-24 04:55:55 +08:00
|
|
|
#include "bwa.h"
|
2013-02-01 02:59:48 +08:00
|
|
|
#include "bwamem.h"
|
2011-10-25 12:22:28 +08:00
|
|
|
#include "kvec.h"
|
2013-02-12 23:36:15 +08:00
|
|
|
#include "utils.h"
|
2011-10-25 12:22:28 +08:00
|
|
|
#include "kseq.h"
|
2013-02-07 03:38:40 +08:00
|
|
|
KSEQ_DECLARE(gzFile)
|
2011-10-25 12:22:28 +08:00
|
|
|
|
|
|
|
|
extern unsigned char nst_nt4_table[256];
|
|
|
|
|
|
2013-02-01 02:59:48 +08:00
|
|
|
int main_mem(int argc, char *argv[])
|
|
|
|
|
{
|
2013-02-02 05:39:50 +08:00
|
|
|
mem_opt_t *opt;
|
2013-02-24 05:41:44 +08:00
|
|
|
int c, n;
|
2013-02-08 02:13:43 +08:00
|
|
|
gzFile fp, fp2 = 0;
|
|
|
|
|
kseq_t *ks, *ks2 = 0;
|
|
|
|
|
bseq1_t *seqs;
|
2013-02-24 04:55:55 +08:00
|
|
|
bwaidx_t *idx;
|
2013-02-24 05:41:44 +08:00
|
|
|
char *rg_line = 0;
|
2013-02-01 02:59:48 +08:00
|
|
|
|
|
|
|
|
opt = mem_opt_init();
|
2013-02-24 05:41:44 +08:00
|
|
|
while ((c = getopt(argc, argv, "PHk:c:v:s:r:t:R:")) >= 0) {
|
2013-02-08 05:27:11 +08:00
|
|
|
if (c == 'k') opt->min_seed_len = atoi(optarg);
|
2013-02-24 03:48:54 +08:00
|
|
|
else if (c == 't') opt->n_threads = atoi(optarg), opt->n_threads = opt->n_threads > 1? opt->n_threads : 1;
|
2013-02-19 05:33:06 +08:00
|
|
|
else if (c == 'P') opt->flag |= MEM_F_NOPAIRING;
|
|
|
|
|
else if (c == 'H') opt->flag |= MEM_F_HARDCLIP;
|
2013-02-08 08:50:37 +08:00
|
|
|
else if (c == 'c') opt->max_occ = atoi(optarg);
|
2013-02-11 23:59:38 +08:00
|
|
|
else if (c == 'v') mem_verbose = atoi(optarg);
|
2013-02-22 01:52:00 +08:00
|
|
|
else if (c == 'r') opt->split_factor = atof(optarg);
|
2013-02-24 05:41:44 +08:00
|
|
|
else if (c == 'R') {
|
|
|
|
|
if ((rg_line = bwa_set_rg(optarg)) == 0) return 1; // FIXME: memory leak
|
|
|
|
|
} else if (c == 's') opt->split_width = atoi(optarg);
|
2013-02-01 02:59:48 +08:00
|
|
|
}
|
|
|
|
|
if (optind + 1 >= argc) {
|
|
|
|
|
fprintf(stderr, "\n");
|
2013-02-09 11:12:18 +08:00
|
|
|
fprintf(stderr, "Usage: bwa mem [options] <idxbase> <in.fq>\n\n");
|
2013-02-09 11:11:44 +08:00
|
|
|
fprintf(stderr, "Options: -k INT minimum seed length [%d]\n", opt->min_seed_len);
|
2013-02-24 03:48:54 +08:00
|
|
|
fprintf(stderr, " -t INT number of threads [%d]\n", opt->n_threads);
|
2013-02-09 11:11:44 +08:00
|
|
|
fprintf(stderr, " -c INT skip seeds with more than INT occurrences [%d]\n", opt->max_occ);
|
|
|
|
|
fprintf(stderr, " -s INT look for internal seeds inside a seed with less than INT occ [%d]\n", opt->split_width);
|
2013-02-22 01:52:00 +08:00
|
|
|
fprintf(stderr, " -r FLOAT look for internal seeds inside a seed longer than {-k} * FLOAT [%g]\n", opt->split_factor);
|
2013-02-24 05:41:44 +08:00
|
|
|
fprintf(stderr, " -R STR read group header line such as '@RG\tID:foo\tSM:bar' [null]\n");
|
2013-02-11 23:59:38 +08:00
|
|
|
fprintf(stderr, " -v INT verbose level [%d]\n", mem_verbose);
|
2013-02-01 02:59:48 +08:00
|
|
|
fprintf(stderr, "\n");
|
|
|
|
|
free(opt);
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
2013-02-05 01:37:38 +08:00
|
|
|
mem_fill_scmat(opt->a, opt->b, opt->mat);
|
2013-02-24 05:41:44 +08:00
|
|
|
if ((idx = bwa_idx_load(argv[optind], BWA_IDX_ALL)) == 0) return 1; // FIXME: memory leak
|
|
|
|
|
bwa_print_sam_hdr(idx->bns, rg_line);
|
2013-02-08 02:13:43 +08:00
|
|
|
|
|
|
|
|
fp = strcmp(argv[optind + 1], "-")? gzopen(argv[optind + 1], "r") : gzdopen(fileno(stdin), "r");
|
|
|
|
|
ks = kseq_init(fp);
|
|
|
|
|
if (optind + 2 < argc) {
|
|
|
|
|
fp2 = gzopen(argv[optind + 2], "r");
|
2013-02-12 00:27:35 +08:00
|
|
|
ks2 = kseq_init(fp2);
|
2013-02-19 05:33:06 +08:00
|
|
|
opt->flag |= MEM_F_PE;
|
2013-02-08 02:13:43 +08:00
|
|
|
}
|
2013-02-24 04:12:26 +08:00
|
|
|
while ((seqs = bseq_read(opt->chunk_size, &n, ks, ks2)) != 0) {
|
2013-02-24 04:55:55 +08:00
|
|
|
mem_process_seqs(opt, idx->bwt, idx->bns, idx->pac, n, seqs);
|
2013-02-08 02:13:43 +08:00
|
|
|
free(seqs);
|
|
|
|
|
}
|
2013-02-01 02:59:48 +08:00
|
|
|
|
2013-02-24 04:55:55 +08:00
|
|
|
free(opt);
|
|
|
|
|
bwa_idx_destroy(idx);
|
2013-02-08 02:13:43 +08:00
|
|
|
kseq_destroy(ks);
|
2013-02-05 04:40:26 +08:00
|
|
|
gzclose(fp);
|
2013-02-08 02:13:43 +08:00
|
|
|
if (ks2) {
|
|
|
|
|
kseq_destroy(ks2);
|
|
|
|
|
gzclose(fp2);
|
|
|
|
|
}
|
2013-02-01 02:59:48 +08:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2011-10-25 12:22:28 +08:00
|
|
|
int main_fastmap(int argc, char *argv[])
|
|
|
|
|
{
|
2013-02-09 05:56:28 +08:00
|
|
|
int c, i, min_iwidth = 20, min_len = 17, print_seq = 0, split_width = 0;
|
2011-10-25 12:22:28 +08:00
|
|
|
kseq_t *seq;
|
2011-10-26 03:06:13 +08:00
|
|
|
bwtint_t k;
|
2011-10-25 12:22:28 +08:00
|
|
|
gzFile fp;
|
2013-02-01 01:34:05 +08:00
|
|
|
smem_i *itr;
|
2013-02-02 03:38:44 +08:00
|
|
|
const bwtintv_v *a;
|
2013-02-24 04:55:55 +08:00
|
|
|
bwaidx_t *idx;
|
2011-10-26 03:06:13 +08:00
|
|
|
|
2013-02-09 05:56:28 +08:00
|
|
|
while ((c = getopt(argc, argv, "w:l:ps:")) >= 0) {
|
2011-10-25 12:22:28 +08:00
|
|
|
switch (c) {
|
2013-02-09 05:56:28 +08:00
|
|
|
case 's': split_width = atoi(optarg); break;
|
2013-02-02 03:20:38 +08:00
|
|
|
case 'p': print_seq = 1; break;
|
2011-10-26 03:06:13 +08:00
|
|
|
case 'w': min_iwidth = atoi(optarg); break;
|
|
|
|
|
case 'l': min_len = atoi(optarg); break;
|
2011-10-25 12:22:28 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (optind + 1 >= argc) {
|
2013-02-09 05:56:28 +08:00
|
|
|
fprintf(stderr, "Usage: bwa fastmap [-p] [-s splitWidth=%d] [-l minLen=%d] [-w maxSaSize=%d] <idxbase> <in.fq>\n", split_width, min_len, min_iwidth);
|
2011-10-25 12:22:28 +08:00
|
|
|
return 1;
|
|
|
|
|
}
|
2011-10-26 03:06:13 +08:00
|
|
|
|
2011-10-25 12:22:28 +08:00
|
|
|
fp = gzopen(argv[optind + 1], "r");
|
|
|
|
|
seq = kseq_init(fp);
|
2013-02-24 04:55:55 +08:00
|
|
|
idx = bwa_idx_load(argv[optind], BWA_IDX_BWT|BWA_IDX_BNS);
|
|
|
|
|
itr = smem_itr_init(idx->bwt);
|
2011-10-25 12:22:28 +08:00
|
|
|
while (kseq_read(seq) >= 0) {
|
2011-11-25 08:44:21 +08:00
|
|
|
printf("SQ\t%s\t%ld", seq->name.s, seq->seq.l);
|
|
|
|
|
if (print_seq) {
|
|
|
|
|
putchar('\t');
|
|
|
|
|
puts(seq->seq.s);
|
|
|
|
|
} else putchar('\n');
|
2011-10-25 12:22:28 +08:00
|
|
|
for (i = 0; i < seq->seq.l; ++i)
|
|
|
|
|
seq->seq.s[i] = nst_nt4_table[(int)seq->seq.s[i]];
|
2013-02-02 03:20:38 +08:00
|
|
|
smem_set_query(itr, seq->seq.l, (uint8_t*)seq->seq.s);
|
2013-02-09 05:56:28 +08:00
|
|
|
while ((a = smem_next(itr, min_len<<1, split_width)) != 0) {
|
2013-02-02 03:38:44 +08:00
|
|
|
for (i = 0; i < a->n; ++i) {
|
|
|
|
|
bwtintv_t *p = &a->a[i];
|
2011-11-25 08:15:14 +08:00
|
|
|
if ((uint32_t)p->info - (p->info>>32) < min_len) continue;
|
|
|
|
|
printf("EM\t%d\t%d\t%ld", (uint32_t)(p->info>>32), (uint32_t)p->info, (long)p->x[2]);
|
|
|
|
|
if (p->x[2] <= min_iwidth) {
|
|
|
|
|
for (k = 0; k < p->x[2]; ++k) {
|
|
|
|
|
bwtint_t pos;
|
|
|
|
|
int len, is_rev, ref_id;
|
|
|
|
|
len = (uint32_t)p->info - (p->info>>32);
|
2013-02-24 04:55:55 +08:00
|
|
|
pos = bns_depos(idx->bns, bwt_sa(idx->bwt, p->x[0] + k), &is_rev);
|
2011-11-25 08:15:14 +08:00
|
|
|
if (is_rev) pos -= len - 1;
|
2013-02-24 04:55:55 +08:00
|
|
|
bns_cnt_ambi(idx->bns, pos, len, &ref_id);
|
|
|
|
|
printf("\t%s:%c%ld", idx->bns->anns[ref_id].name, "+-"[is_rev], (long)(pos - idx->bns->anns[ref_id].offset) + 1);
|
2011-11-25 08:15:14 +08:00
|
|
|
}
|
|
|
|
|
} else fputs("\t*", stdout);
|
|
|
|
|
putchar('\n');
|
2011-10-26 03:06:13 +08:00
|
|
|
}
|
2011-10-25 12:22:28 +08:00
|
|
|
}
|
2011-10-27 22:56:09 +08:00
|
|
|
puts("//");
|
2011-10-25 12:22:28 +08:00
|
|
|
}
|
2011-10-26 03:06:13 +08:00
|
|
|
|
2013-02-01 01:34:05 +08:00
|
|
|
smem_itr_destroy(itr);
|
2013-02-24 04:55:55 +08:00
|
|
|
bwa_idx_destroy(idx);
|
2011-10-25 12:22:28 +08:00
|
|
|
kseq_destroy(seq);
|
|
|
|
|
gzclose(fp);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|