2011-10-25 12:22:28 +08:00
|
|
|
#include <zlib.h>
|
|
|
|
|
#include <stdio.h>
|
2011-10-26 03:06:13 +08:00
|
|
|
#include <unistd.h>
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
#include "bntseq.h"
|
2011-10-25 12:22:28 +08:00
|
|
|
#include "bwt.h"
|
|
|
|
|
#include "kvec.h"
|
|
|
|
|
#include "kseq.h"
|
|
|
|
|
KSEQ_INIT(gzFile, gzread)
|
|
|
|
|
|
|
|
|
|
extern unsigned char nst_nt4_table[256];
|
|
|
|
|
|
|
|
|
|
int main_fastmap(int argc, char *argv[])
|
|
|
|
|
{
|
2013-02-01 00:42:31 +08:00
|
|
|
int c, i, min_iwidth = 20, min_len = 17, print_seq = 0, min_intv = 1;
|
2011-10-25 12:22:28 +08:00
|
|
|
kseq_t *seq;
|
2011-10-26 03:06:13 +08:00
|
|
|
bwtint_t k;
|
2011-10-25 12:22:28 +08:00
|
|
|
gzFile fp;
|
|
|
|
|
bwt_t *bwt;
|
2011-10-26 03:06:13 +08:00
|
|
|
bntseq_t *bns;
|
2013-02-01 01:34:05 +08:00
|
|
|
smem_i *itr;
|
2011-10-26 03:06:13 +08:00
|
|
|
|
2013-02-01 00:42:31 +08:00
|
|
|
while ((c = getopt(argc, argv, "w:l:sm:")) >= 0) {
|
2011-10-25 12:22:28 +08:00
|
|
|
switch (c) {
|
2011-11-25 08:44:21 +08:00
|
|
|
case 's': print_seq = 1; break;
|
2011-10-26 03:06:13 +08:00
|
|
|
case 'w': min_iwidth = atoi(optarg); break;
|
|
|
|
|
case 'l': min_len = atoi(optarg); break;
|
2013-02-01 00:42:31 +08:00
|
|
|
case 'm': min_intv = atoi(optarg); break;
|
2011-10-25 12:22:28 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (optind + 1 >= argc) {
|
2013-02-01 00:42:31 +08:00
|
|
|
fprintf(stderr, "Usage: bwa fastmap [-s] [-l minLen=%d] [-w maxSaSize=%d] [-m minIntv=%d] <idxbase> <in.fq>\n", min_len, min_iwidth, min_intv);
|
2011-10-25 12:22:28 +08:00
|
|
|
return 1;
|
|
|
|
|
}
|
2011-10-26 03:06:13 +08:00
|
|
|
|
2011-10-25 12:22:28 +08:00
|
|
|
fp = gzopen(argv[optind + 1], "r");
|
|
|
|
|
seq = kseq_init(fp);
|
2011-10-26 03:06:13 +08:00
|
|
|
{ // load the packed sequences, BWT and SA
|
2011-10-25 12:22:28 +08:00
|
|
|
char *tmp = calloc(strlen(argv[optind]) + 5, 1);
|
|
|
|
|
strcat(strcpy(tmp, argv[optind]), ".bwt");
|
|
|
|
|
bwt = bwt_restore_bwt(tmp);
|
2011-10-26 03:06:13 +08:00
|
|
|
strcat(strcpy(tmp, argv[optind]), ".sa");
|
|
|
|
|
bwt_restore_sa(tmp, bwt);
|
2011-10-25 12:22:28 +08:00
|
|
|
free(tmp);
|
2011-10-26 03:06:13 +08:00
|
|
|
bns = bns_restore(argv[optind]);
|
2011-10-25 12:22:28 +08:00
|
|
|
}
|
2013-02-01 01:34:05 +08:00
|
|
|
itr = smem_itr_init(bwt);
|
2011-10-25 12:22:28 +08:00
|
|
|
while (kseq_read(seq) >= 0) {
|
2011-11-25 08:44:21 +08:00
|
|
|
printf("SQ\t%s\t%ld", seq->name.s, seq->seq.l);
|
|
|
|
|
if (print_seq) {
|
|
|
|
|
putchar('\t');
|
|
|
|
|
puts(seq->seq.s);
|
|
|
|
|
} else putchar('\n');
|
2011-10-25 12:22:28 +08:00
|
|
|
for (i = 0; i < seq->seq.l; ++i)
|
|
|
|
|
seq->seq.s[i] = nst_nt4_table[(int)seq->seq.s[i]];
|
2013-02-01 01:34:05 +08:00
|
|
|
smem_set_query(itr, min_intv, seq->seq.l, (uint8_t*)seq->seq.s);
|
|
|
|
|
while (smem_next(itr) > 0) {
|
|
|
|
|
for (i = 0; i < itr->matches->n; ++i) {
|
|
|
|
|
bwtintv_t *p = &itr->matches->a[i];
|
2011-11-25 08:15:14 +08:00
|
|
|
if ((uint32_t)p->info - (p->info>>32) < min_len) continue;
|
|
|
|
|
printf("EM\t%d\t%d\t%ld", (uint32_t)(p->info>>32), (uint32_t)p->info, (long)p->x[2]);
|
|
|
|
|
if (p->x[2] <= min_iwidth) {
|
|
|
|
|
for (k = 0; k < p->x[2]; ++k) {
|
|
|
|
|
bwtint_t pos;
|
|
|
|
|
int len, is_rev, ref_id;
|
|
|
|
|
len = (uint32_t)p->info - (p->info>>32);
|
|
|
|
|
pos = bns_depos(bns, bwt_sa(bwt, p->x[0] + k), &is_rev);
|
|
|
|
|
if (is_rev) pos -= len - 1;
|
|
|
|
|
bns_cnt_ambi(bns, pos, len, &ref_id);
|
|
|
|
|
printf("\t%s:%c%ld", bns->anns[ref_id].name, "+-"[is_rev], (long)(pos - bns->anns[ref_id].offset) + 1);
|
|
|
|
|
}
|
|
|
|
|
} else fputs("\t*", stdout);
|
|
|
|
|
putchar('\n');
|
2011-10-26 03:06:13 +08:00
|
|
|
}
|
2011-10-25 12:22:28 +08:00
|
|
|
}
|
2011-10-27 22:56:09 +08:00
|
|
|
puts("//");
|
2011-10-25 12:22:28 +08:00
|
|
|
}
|
2011-10-26 03:06:13 +08:00
|
|
|
|
2013-02-01 01:34:05 +08:00
|
|
|
smem_itr_destroy(itr);
|
2011-10-26 03:06:13 +08:00
|
|
|
bns_destroy(bns);
|
2011-10-25 12:22:28 +08:00
|
|
|
bwt_destroy(bwt);
|
|
|
|
|
kseq_destroy(seq);
|
|
|
|
|
gzclose(fp);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|