2011-10-25 12:22:28 +08:00
|
|
|
#include <zlib.h>
|
|
|
|
|
#include <unistd.h>
|
|
|
|
|
#include <stdio.h>
|
|
|
|
|
#include "bwt.h"
|
|
|
|
|
#include "kvec.h"
|
|
|
|
|
#include "kseq.h"
|
|
|
|
|
KSEQ_INIT(gzFile, gzread)
|
|
|
|
|
|
|
|
|
|
extern unsigned char nst_nt4_table[256];
|
|
|
|
|
|
|
|
|
|
int main_fastmap(int argc, char *argv[])
|
|
|
|
|
{
|
|
|
|
|
int c, i;
|
|
|
|
|
kseq_t *seq;
|
|
|
|
|
gzFile fp;
|
|
|
|
|
bwt_t *bwt;
|
|
|
|
|
bwtintv_v a[3], mem, *tvec[3];
|
|
|
|
|
while ((c = getopt(argc, argv, "")) >= 0) {
|
|
|
|
|
switch (c) {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (optind + 1 >= argc) {
|
|
|
|
|
fprintf(stderr, "bwa fastmap <idxbase> <in.fq>\n");
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
fp = gzopen(argv[optind + 1], "r");
|
|
|
|
|
seq = kseq_init(fp);
|
|
|
|
|
{ // load the BWT
|
|
|
|
|
char *tmp = calloc(strlen(argv[optind]) + 5, 1);
|
|
|
|
|
strcat(strcpy(tmp, argv[optind]), ".bwt");
|
|
|
|
|
bwt = bwt_restore_bwt(tmp);
|
|
|
|
|
free(tmp);
|
|
|
|
|
}
|
|
|
|
|
for (i = 0; i < 3; ++i) {
|
|
|
|
|
kv_init(a[i]);
|
|
|
|
|
tvec[i] = &a[i];
|
|
|
|
|
}
|
|
|
|
|
kv_init(mem);
|
|
|
|
|
while (kseq_read(seq) >= 0) {
|
|
|
|
|
for (i = 0; i < seq->seq.l; ++i)
|
|
|
|
|
seq->seq.s[i] = nst_nt4_table[(int)seq->seq.s[i]];
|
2011-10-25 22:03:57 +08:00
|
|
|
{
|
2011-10-26 00:31:36 +08:00
|
|
|
int beg = 97;
|
|
|
|
|
uint8_t str[3];
|
|
|
|
|
bwtint_t b, e;
|
2011-10-25 22:03:57 +08:00
|
|
|
bwtintv_t ik, ok[4];
|
|
|
|
|
bwt_set_intv(bwt, seq->seq.s[seq->seq.l - 1], ik);
|
|
|
|
|
for (i = seq->seq.l - 2; i >= beg; --i) {
|
|
|
|
|
//printf("[%lld,%lld,%lld] @ %d\n", ik.x[0], ik.x[1], ik.x[2], i+1);
|
|
|
|
|
bwt_extend(bwt, &ik, ok, 1);
|
|
|
|
|
ik = ok[seq->seq.s[i]];
|
|
|
|
|
if (ik.x[2] == 0) break;
|
|
|
|
|
}
|
|
|
|
|
printf("[%lld,%lld,%lld] @ %d\n", ik.x[0], ik.x[1], ik.x[2], i+1);
|
2011-10-26 00:31:36 +08:00
|
|
|
//str[0] = '\1'; str[1] = '\3'; bwt_match_exact(bwt, 2, str, &b, &e); printf("%lld,%lld,%lld\n", b, e, e-b+1);
|
2011-10-25 22:03:57 +08:00
|
|
|
printf("======================== %lld, [%lld,%lld,%lld,%lld]\n", bwt->primary, bwt->L2[1], bwt->L2[2]-bwt->L2[1], bwt->L2[3]-bwt->L2[2], bwt->L2[4]-bwt->L2[3]);
|
|
|
|
|
bwt_set_intv(bwt, seq->seq.s[beg], ik);
|
|
|
|
|
for (i = beg + 1; i < seq->seq.l; ++i) {
|
|
|
|
|
//printf("[%lld,%lld,%lld] @ %d\n", ik.x[0], ik.x[1], ik.x[2], i-1);
|
|
|
|
|
bwt_extend(bwt, &ik, ok, 0);
|
|
|
|
|
ik = ok[3-seq->seq.s[i]];
|
|
|
|
|
if (ik.x[2] == 0) break;
|
|
|
|
|
}
|
|
|
|
|
printf("[%lld,%lld,%lld] @ %d\n", ik.x[0], ik.x[1], ik.x[2], i-1);
|
2011-10-26 00:31:36 +08:00
|
|
|
//str[0] = '\0'; str[1] = '\2'; bwt_match_exact(bwt, 2, str, &b, &e); printf("%lld,%lld,%lld\n", b, e, e-b+1);
|
2011-10-25 22:03:57 +08:00
|
|
|
}
|
2011-10-25 12:22:28 +08:00
|
|
|
bwt_smem(bwt, seq->seq.l, (uint8_t*)seq->seq.s, &mem, tvec);
|
|
|
|
|
printf(">%s\t%ld\n", seq->name.s, mem.n);
|
|
|
|
|
for (i = 0; i < mem.n; ++i) {
|
|
|
|
|
bwtintv_t *p = &mem.a[i];
|
|
|
|
|
printf("%d\t%d\t%ld\n", (uint32_t)(p->info>>32), (uint32_t)p->info, (long)p->x[2]);
|
|
|
|
|
}
|
|
|
|
|
puts("//");
|
|
|
|
|
}
|
|
|
|
|
free(mem.a);
|
|
|
|
|
for (i = 0; i < 3; ++i) free(a[i].a);
|
|
|
|
|
bwt_destroy(bwt);
|
|
|
|
|
kseq_destroy(seq);
|
|
|
|
|
gzclose(fp);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|