diff --git a/bwt_gen.c b/bwt_gen.c index b9568e9..cac6a5f 100644 --- a/bwt_gen.c +++ b/bwt_gen.c @@ -1486,8 +1486,8 @@ BWTInc *BWTIncConstructFromPacked(const char *inputFileName, bgint_t initialMaxB BWTIncConstruct(bwtInc, textToLoad); processedTextLength += textToLoad; if (bwtInc->numberOfIterationDone % 10 == 0) { - printf("[BWTIncConstructFromPacked] %lu iterations done. %lu characters processed.\n", - (long)bwtInc->numberOfIterationDone, (long)processedTextLength); + fprintf(stderr, "[BWTIncConstructFromPacked] %lu iterations done. %lu characters processed.\n", + (long)bwtInc->numberOfIterationDone, (long)processedTextLength); } } return bwtInc; diff --git a/fastmap.c b/fastmap.c index ee8e9b4..e3485fc 100644 --- a/fastmap.c +++ b/fastmap.c @@ -1,6 +1,8 @@ #include -#include #include +#include +#include +#include "bntseq.h" #include "bwt.h" #include "kvec.h" #include "kseq.h" @@ -10,28 +12,37 @@ extern unsigned char nst_nt4_table[256]; int main_fastmap(int argc, char *argv[]) { - int c, i; + int c, i, min_iwidth = 3, min_len = 17; kseq_t *seq; + bwtint_t k; gzFile fp; bwt_t *bwt; + bntseq_t *bns; bwtintv_v a[3], mem, *tvec[3]; - while ((c = getopt(argc, argv, "")) >= 0) { + + while ((c = getopt(argc, argv, "w:l:")) >= 0) { switch (c) { + case 'w': min_iwidth = atoi(optarg); break; + case 'l': min_len = atoi(optarg); break; } } if (optind + 1 >= argc) { fprintf(stderr, "bwa fastmap \n"); return 1; } + fp = gzopen(argv[optind + 1], "r"); seq = kseq_init(fp); - { // load the BWT + { // load the packed sequences, BWT and SA char *tmp = calloc(strlen(argv[optind]) + 5, 1); strcat(strcpy(tmp, argv[optind]), ".bwt"); bwt = bwt_restore_bwt(tmp); + strcat(strcpy(tmp, argv[optind]), ".sa"); + bwt_restore_sa(tmp, bwt); free(tmp); + bns = bns_restore(argv[optind]); } - for (i = 0; i < 3; ++i) { + for (i = 0; i < 3; ++i) { // initiate the temporary array kv_init(a[i]); tvec[i] = &a[i]; } @@ -40,15 +51,27 @@ int main_fastmap(int argc, char *argv[]) for (i = 0; i < seq->seq.l; ++i) seq->seq.s[i] = nst_nt4_table[(int)seq->seq.s[i]]; bwt_smem(bwt, seq->seq.l, (uint8_t*)seq->seq.s, &mem, tvec); - printf(">%s\t%ld\n", seq->name.s, mem.n); + printf(">%s\t%ld\n", seq->name.s, seq->seq.l); for (i = 0; i < mem.n; ++i) { bwtintv_t *p = &mem.a[i]; - printf("%d\t%d\t%ld\n", (uint32_t)(p->info>>32), (uint32_t)p->info, (long)p->x[2]); + if ((uint32_t)p->info - (p->info>>32) < min_len) continue; + printf("%d\t%d\t%ld", (uint32_t)(p->info>>32), (uint32_t)p->info, (long)p->x[2]); + if (p->x[2] <= min_iwidth) { + for (k = 0; k < p->x[2]; ++k) { + int is_rev; + int64_t pos; + pos = bns_depos(bns, bwt_sa(bwt, p->x[0] + k), &is_rev); + printf("\t%c%ld", is_rev?'-':'+', (long)pos); + } + } + putchar('\n'); } puts("//"); } + free(mem.a); for (i = 0; i < 3; ++i) free(a[i].a); + bns_destroy(bns); bwt_destroy(bwt); kseq_destroy(seq); gzclose(fp);