From 74ebd77ce228c3df6d12cbaf7d481471fab2ccd6 Mon Sep 17 00:00:00 2001 From: zzh Date: Thu, 24 Aug 2023 14:25:32 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=97=B6=E9=97=B4=E7=BB=9F?= =?UTF-8?q?=E8=AE=A1=EF=BC=8Cavx2=E5=AE=9E=E7=8E=B0=E7=9A=84bsw?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 3 + .vscode/launch.json | 44 ++ .vscode/settings.json | 5 + .vscode/tasks.json | 17 + Makefile | 20 +- bwa.c | 467 ++++++++++----- bwamem.c | 1327 +++++++++++++++++++++++++++-------------- bwamem_pair.c | 437 +++++++++----- bwt.c | 357 +++++++---- bwtindex.c | 218 ++++--- fastmap.c | 623 +++++++++++++------ ksw.c | 719 ++++++++++++++-------- ksw_avx2.c | 450 ++++++++++++++ run.sh | 23 + 14 files changed, 3322 insertions(+), 1388 deletions(-) create mode 100644 .vscode/launch.json create mode 100644 .vscode/settings.json create mode 100644 .vscode/tasks.json create mode 100644 ksw_avx2.c create mode 100755 run.sh diff --git a/.gitignore b/.gitignore index 0dc1011..9b453de 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,7 @@ +test_out/ *.[oa] +*.fa +*.txt bwa test test64 diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..b661b25 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,44 @@ +{ + // 使用 IntelliSense 了解相关属性。 + // 悬停以查看现有属性的描述。 + // 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "bwa-mem", + "preLaunchTask": "Build", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceRoot}/bwa", + "args": [ + "mem", + "-t", + "64", + "-M", + "-R", + "'@RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa'", + "/share_nas3/zyseq-release-v1.1.3/zyseq/wes/resource/reference/human_g1k_v37_decoy.fasta", + "/share_nas3/zyseq-release-v1.1.3/zyseq/data/n1.fq", + "/share_nas3/zyseq-release-v1.1.3/zyseq/data/n2.fq", + //"/public/home/zzh/data/reference/human_g1k_v37_decoy.fasta", + //"/public/home/zzh/data/fastq/n1.fq", + //"/public/home/zzh/data/fastq/n2.fq", + "-o", + "/dev/null" + ], + "cwd": "${workspaceFolder}", // 当前工作路径:当前文件所在的工作空间 + }, + { + "name": "index", + "preLaunchTask": "Build", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceRoot}/bwa", + "args": [ + "index", + "reference/human_g1k_v37_decoy.fasta" + ], + "cwd": "${workspaceFolder}", // 当前工作路径:当前文件所在的工作空间 + } + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..1728226 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "files.associations": { + "random": "c" + } +} \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..f76ae19 --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,17 @@ +{ + // See https://go.microsoft.com/fwlink/?LinkId=733558 + // for the documentation about the tasks.json format + "version": "2.0.0", + "tasks": [ + { + "label": "Build", + "type": "shell", + "command": "make clean; make -j 16", + "problemMatcher": [], + "group": { + "kind": "build", + "isDefault": true + } + } + ] +} \ No newline at end of file diff --git a/Makefile b/Makefile index 5480536..d447bee 100644 --- a/Makefile +++ b/Makefile @@ -1,18 +1,22 @@ CC= gcc #CC= clang --analyze -CFLAGS= -g -Wall -Wno-unused-function -O2 +#CFLAGS= -g -Wall -Wno-unused-function -mavx2 -I../jemalloc-5.3.0/include +CFLAGS= -g -Wall -Wno-unused-function -O2 -mavx2 -I../jemalloc-5.3.0/include +#CFLAGS= -g -Wall -Wno-unused-function -O2 -mavx2 WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS +SHOW_PERF=-DSHOW_PERF AR= ar -DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC) +DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC) $(SHOW_PERF) LOBJS= utils.o kthread.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o bwamem_extra.o malloc_wrap.o \ - QSufSort.o bwt_gen.o rope.o rle.o is.o bwtindex.o + QSufSort.o bwt_gen.o rope.o rle.o is.o bwtindex.o ksw_avx2.o AOBJS= bwashm.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \ bwape.o kopen.o pemerge.o maxk.o \ bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \ bwtsw2_chain.o fastmap.o bwtsw2_pair.o PROG= bwa INCLUDES= -LIBS= -lm -lz -lpthread +LIBS= -lm -lz -lpthread -ldl +#LIBS= -lm -lz -lpthread SUBDIRS= . ifeq ($(shell uname -s),Linux) @@ -26,8 +30,12 @@ endif all:$(PROG) -bwa:libbwa.a $(AOBJS) main.o - $(CC) $(CFLAGS) $(LDFLAGS) $(AOBJS) main.o -o $@ -L. -lbwa $(LIBS) +# 用jemalloc代替malloc +bwa:libbwa.a ../jemalloc-5.3.0/lib/libjemalloc.a $(AOBJS) main.o + $(CC) $(CFLAGS) $(LDFLAGS) $(AOBJS) ../jemalloc-5.3.0/lib/libjemalloc.a main.o -o $@ -L. -lbwa $(LIBS) +# 原始malloc +#bwa:libbwa.a $(AOBJS) main.o +# $(CC) $(CFLAGS) $(LDFLAGS) $(AOBJS) main.o -o $@ -L. -lbwa $(LIBS) bwamem-lite:libbwa.a example.o $(CC) $(CFLAGS) $(LDFLAGS) example.o -o $@ -L. -lbwa $(LIBS) diff --git a/bwa.c b/bwa.c index 104c95c..da372e1 100644 --- a/bwa.c +++ b/bwa.c @@ -1,8 +1,8 @@ /* The MIT License Copyright (c) 2018- Dana-Farber Cancer Institute - 2009-2018 Broad Institute, Inc. - 2008-2009 Genome Research Ltd. (GRL) + 2009-2018 Broad Institute, Inc. + 2008-2009 Genome Research Ltd. (GRL) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the @@ -36,7 +36,12 @@ #include "kvec.h" #ifdef USE_MALLOC_WRAPPERS -# include "malloc_wrap.h" +#include "malloc_wrap.h" +#endif + +#ifdef SHOW_PERF +extern int64_t get_mseconds(); +extern int64_t time_ksw_global2; #endif int bwa_verbose = 3; @@ -53,14 +58,15 @@ KSEQ_DECLARE(gzFile) static inline void trim_readno(kstring_t *s) { - if (s->l > 2 && s->s[s->l-2] == '/' && isdigit(s->s[s->l-1])) + if (s->l > 2 && s->s[s->l - 2] == '/' && isdigit(s->s[s->l - 1])) s->l -= 2, s->s[s->l] = 0; } static inline char *dupkstring(const kstring_t *str, int dupempty) { - char *s = (str->l > 0 || dupempty)? malloc(str->l + 1) : NULL; - if (!s) return NULL; + char *s = (str->l > 0 || dupempty) ? malloc(str->l + 1) : NULL; + if (!s) + return NULL; memcpy(s, str->s, str->l); s[str->l] = '\0'; @@ -78,32 +84,39 @@ static inline void kseq2bseq1(const kseq_t *ks, bseq1_t *s) bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_) { - kseq_t *ks = (kseq_t*)ks1_, *ks2 = (kseq_t*)ks2_; + kseq_t *ks = (kseq_t *)ks1_, *ks2 = (kseq_t *)ks2_; int size = 0, m, n; bseq1_t *seqs; - m = n = 0; seqs = 0; - while (kseq_read(ks) >= 0) { - if (ks2 && kseq_read(ks2) < 0) { // the 2nd file has fewer reads + m = n = 0; + seqs = 0; + while (kseq_read(ks) >= 0) + { + if (ks2 && kseq_read(ks2) < 0) + { // the 2nd file has fewer reads fprintf(stderr, "[W::%s] the 2nd file has fewer sequences.\n", __func__); break; } - if (n >= m) { - m = m? m<<1 : 256; + if (n >= m) + { + m = m ? m << 1 : 256; seqs = realloc(seqs, m * sizeof(bseq1_t)); } trim_readno(&ks->name); kseq2bseq1(ks, &seqs[n]); seqs[n].id = n; size += seqs[n++].l_seq; - if (ks2) { + if (ks2) + { trim_readno(&ks2->name); kseq2bseq1(ks2, &seqs[n]); seqs[n].id = n; size += seqs[n++].l_seq; } - if (size >= chunk_size && (n&1) == 0) break; + if (size >= chunk_size && (n & 1) == 0) + break; } - if (size == 0) { // test if the 2nd file is finished + if (size == 0) + { // test if the 2nd file is finished if (ks2 && kseq_read(ks2) >= 0) fprintf(stderr, "[W::%s] the 1st file has fewer sequences.\n", __func__); } @@ -114,17 +127,25 @@ bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_) void bseq_classify(int n, bseq1_t *seqs, int m[2], bseq1_t *sep[2]) { int i, has_last; - kvec_t(bseq1_t) a[2] = {{0,0,0}, {0,0,0}}; - for (i = 1, has_last = 1; i < n; ++i) { - if (has_last) { - if (strcmp(seqs[i].name, seqs[i-1].name) == 0) { - kv_push(bseq1_t, a[1], seqs[i-1]); + kvec_t(bseq1_t) a[2] = {{0, 0, 0}, {0, 0, 0}}; + for (i = 1, has_last = 1; i < n; ++i) + { + if (has_last) + { + if (strcmp(seqs[i].name, seqs[i - 1].name) == 0) + { + kv_push(bseq1_t, a[1], seqs[i - 1]); kv_push(bseq1_t, a[1], seqs[i]); has_last = 0; - } else kv_push(bseq1_t, a[0], seqs[i-1]); - } else has_last = 1; + } + else + kv_push(bseq1_t, a[0], seqs[i - 1]); + } + else + has_last = 1; } - if (has_last) kv_push(bseq1_t, a[0], seqs[i-1]); + if (has_last) + kv_push(bseq1_t, a[0], seqs[i - 1]); sep[0] = a[0].a, m[0] = a[0].n; sep[1] = a[1].a, m[1] = a[1].n; } @@ -136,12 +157,14 @@ void bseq_classify(int n, bseq1_t *seqs, int m[2], bseq1_t *sep[2]) void bwa_fill_scmat(int a, int b, int8_t mat[25]) { int i, j, k; - for (i = k = 0; i < 4; ++i) { + for (i = k = 0; i < 4; ++i) + { for (j = 0; j < 4; ++j) - mat[k++] = i == j? a : -b; + mat[k++] = i == j ? a : -b; mat[k++] = -1; // ambiguous base } - for (j = 0; j < 5; ++j) mat[k++] = -1; + for (j = 0; j < 5; ++j) + mat[k++] = -1; } // Generate CIGAR when the alignment end points are known @@ -154,78 +177,119 @@ uint32_t *bwa_gen_cigar2(const int8_t mat[25], int o_del, int e_del, int o_ins, kstring_t str; const char *int2base; - if (n_cigar) *n_cigar = 0; - if (NM) *NM = -1; - if (l_query <= 0 || rb >= re || (rb < l_pac && re > l_pac)) return 0; // reject if negative length or bridging the forward and reverse strand + if (n_cigar) + *n_cigar = 0; + if (NM) + *NM = -1; + if (l_query <= 0 || rb >= re || (rb < l_pac && re > l_pac)) + return 0; // reject if negative length or bridging the forward and reverse strand rseq = bns_get_seq(l_pac, pac, rb, re, &rlen); - if (re - rb != rlen) goto ret_gen_cigar; // possible if out of range - if (rb >= l_pac) { // then reverse both query and rseq; this is to ensure indels to be placed at the leftmost position - for (i = 0; i < l_query>>1; ++i) + if (re - rb != rlen) + goto ret_gen_cigar; // possible if out of range + if (rb >= l_pac) + { // then reverse both query and rseq; this is to ensure indels to be placed at the leftmost position + for (i = 0; i < l_query >> 1; ++i) tmp = query[i], query[i] = query[l_query - 1 - i], query[l_query - 1 - i] = tmp; - for (i = 0; i < rlen>>1; ++i) + for (i = 0; i < rlen >> 1; ++i) tmp = rseq[i], rseq[i] = rseq[rlen - 1 - i], rseq[rlen - 1 - i] = tmp; } - if (l_query == re - rb && w_ == 0) { // no gap; no need to do DP + if (l_query == re - rb && w_ == 0) + { // no gap; no need to do DP // UPDATE: we come to this block now... FIXME: due to an issue in mem_reg2aln(), we never come to this block. This does not affect accuracy, but it hurts performance. - if (n_cigar) { + if (n_cigar) + { cigar = malloc(4); - cigar[0] = l_query<<4 | 0; + cigar[0] = l_query << 4 | 0; *n_cigar = 1; } for (i = 0, *score = 0; i < l_query; ++i) - *score += mat[rseq[i]*5 + query[i]]; - } else { + *score += mat[rseq[i] * 5 + query[i]]; + } + else + { int w, max_gap, max_ins, max_del, min_w; // set the band-width - max_ins = (int)((double)(((l_query+1)>>1) * mat[0] - o_ins) / e_ins + 1.); - max_del = (int)((double)(((l_query+1)>>1) * mat[0] - o_del) / e_del + 1.); - max_gap = max_ins > max_del? max_ins : max_del; - max_gap = max_gap > 1? max_gap : 1; + max_ins = (int)((double)(((l_query + 1) >> 1) * mat[0] - o_ins) / e_ins + 1.); + max_del = (int)((double)(((l_query + 1) >> 1) * mat[0] - o_del) / e_del + 1.); + max_gap = max_ins > max_del ? max_ins : max_del; + max_gap = max_gap > 1 ? max_gap : 1; w = (max_gap + abs((int)rlen - l_query) + 1) >> 1; - w = w < w_? w : w_; + w = w < w_ ? w : w_; min_w = abs((int)rlen - l_query) + 3; - w = w > min_w? w : min_w; + w = w > min_w ? w : min_w; // NW alignment - if (bwa_verbose >= 4) { + if (bwa_verbose >= 4) + { printf("* Global bandwidth: %d\n", w); - printf("* Global ref: "); for (i = 0; i < rlen; ++i) putchar("ACGTN"[(int)rseq[i]]); putchar('\n'); - printf("* Global query: "); for (i = 0; i < l_query; ++i) putchar("ACGTN"[(int)query[i]]); putchar('\n'); + printf("* Global ref: "); + for (i = 0; i < rlen; ++i) + putchar("ACGTN"[(int)rseq[i]]); + putchar('\n'); + printf("* Global query: "); + for (i = 0; i < l_query; ++i) + putchar("ACGTN"[(int)query[i]]); + putchar('\n'); } +#ifdef SHOW_PERF + int64_t start_time = get_mseconds(); +#endif *score = ksw_global2(l_query, query, rlen, rseq, 5, mat, o_del, e_del, o_ins, e_ins, w, n_cigar, &cigar); +#ifdef SHOW_PERF + int64_t tmp_diff = get_mseconds() - start_time; + __sync_fetch_and_add(&time_ksw_global2, tmp_diff); +#endif } - if (NM && n_cigar) {// compute NM and MD + if (NM && n_cigar) + { // compute NM and MD int k, x, y, u, n_mm = 0, n_gap = 0; - str.l = str.m = *n_cigar * 4; str.s = (char*)cigar; // append MD to CIGAR - int2base = rb < l_pac? "ACGTN" : "TGCAN"; - for (k = 0, x = y = u = 0; k < *n_cigar; ++k) { + str.l = str.m = *n_cigar * 4; + str.s = (char *)cigar; // append MD to CIGAR + int2base = rb < l_pac ? "ACGTN" : "TGCAN"; + for (k = 0, x = y = u = 0; k < *n_cigar; ++k) + { int op, len; - cigar = (uint32_t*)str.s; - op = cigar[k]&0xf, len = cigar[k]>>4; - if (op == 0) { // match - for (i = 0; i < len; ++i) { - if (query[x + i] != rseq[y + i]) { + cigar = (uint32_t *)str.s; + op = cigar[k] & 0xf, len = cigar[k] >> 4; + if (op == 0) + { // match + for (i = 0; i < len; ++i) + { + if (query[x + i] != rseq[y + i]) + { kputw(u, &str); - kputc(int2base[rseq[y+i]], &str); - ++n_mm; u = 0; - } else ++u; + kputc(int2base[rseq[y + i]], &str); + ++n_mm; + u = 0; + } + else + ++u; } - x += len; y += len; - } else if (op == 2) { // deletion - if (k > 0 && k < *n_cigar - 1) { // don't do the following if D is the first or the last CIGAR - kputw(u, &str); kputc('^', &str); + x += len; + y += len; + } + else if (op == 2) + { // deletion + if (k > 0 && k < *n_cigar - 1) + { // don't do the following if D is the first or the last CIGAR + kputw(u, &str); + kputc('^', &str); for (i = 0; i < len; ++i) - kputc(int2base[rseq[y+i]], &str); - u = 0; n_gap += len; + kputc(int2base[rseq[y + i]], &str); + u = 0; + n_gap += len; } y += len; - } else if (op == 1) x += len, n_gap += len; // insertion + } + else if (op == 1) + x += len, n_gap += len; // insertion } - kputw(u, &str); kputc(0, &str); + kputw(u, &str); + kputc(0, &str); *NM = n_mm + n_gap; - cigar = (uint32_t*)str.s; + cigar = (uint32_t *)str.s; } if (rb >= l_pac) // reverse back query - for (i = 0; i < l_query>>1; ++i) + for (i = 0; i < l_query >> 1; ++i) tmp = query[i], query[i] = query[l_query - 1 - i], query[l_query - 1 - i] = tmp; ret_gen_cigar: @@ -251,16 +315,22 @@ char *bwa_idx_infer_prefix(const char *hint) prefix = malloc(l_hint + 3 + 4 + 1); strcpy(prefix, hint); strcpy(prefix + l_hint, ".64.bwt"); - if ((fp = fopen(prefix, "rb")) != 0) { + if ((fp = fopen(prefix, "rb")) != 0) + { fclose(fp); prefix[l_hint + 3] = 0; return prefix; - } else { + } + else + { strcpy(prefix + l_hint, ".bwt"); - if ((fp = fopen(prefix, "rb")) == 0) { + if ((fp = fopen(prefix, "rb")) == 0) + { free(prefix); return 0; - } else { + } + else + { fclose(fp); prefix[l_hint] = 0; return prefix; @@ -273,16 +343,19 @@ bwt_t *bwa_idx_load_bwt(const char *hint) char *tmp, *prefix; bwt_t *bwt; prefix = bwa_idx_infer_prefix(hint); - if (prefix == 0) { - if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] fail to locate the index files\n", __func__); + if (prefix == 0) + { + if (bwa_verbose >= 1) + fprintf(stderr, "[E::%s] fail to locate the index files\n", __func__); return 0; } tmp = calloc(strlen(prefix) + 5, 1); strcat(strcpy(tmp, prefix), ".bwt"); // FM-index bwt = bwt_restore_bwt(tmp); - strcat(strcpy(tmp, prefix), ".sa"); // partial suffix array (SA) + strcat(strcpy(tmp, prefix), ".sa"); // partial suffix array (SA) bwt_restore_sa(tmp, bwt); - free(tmp); free(prefix); + free(tmp); + free(prefix); return bwt; } @@ -291,22 +364,28 @@ bwaidx_t *bwa_idx_load_from_disk(const char *hint, int which) bwaidx_t *idx; char *prefix; prefix = bwa_idx_infer_prefix(hint); - if (prefix == 0) { - if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] fail to locate the index files\n", __func__); + if (prefix == 0) + { + if (bwa_verbose >= 1) + fprintf(stderr, "[E::%s] fail to locate the index files\n", __func__); return 0; } idx = calloc(1, sizeof(bwaidx_t)); - if (which & BWA_IDX_BWT) idx->bwt = bwa_idx_load_bwt(hint); - if (which & BWA_IDX_BNS) { + if (which & BWA_IDX_BWT) + idx->bwt = bwa_idx_load_bwt(hint); + if (which & BWA_IDX_BNS) + { int i, c; idx->bns = bns_restore(prefix); for (i = c = 0; i < idx->bns->n_seqs; ++i) - if (idx->bns->anns[i].is_alt) ++c; + if (idx->bns->anns[i].is_alt) + ++c; if (bwa_verbose >= 3) fprintf(stderr, "[M::%s] read %d ALT contigs\n", __func__, c); - if (which & BWA_IDX_PAC) { - idx->pac = calloc(idx->bns->l_pac/4+1, 1); - err_fread_noeof(idx->pac, 1, idx->bns->l_pac/4+1, idx->bns->fp_pac); // concatenated 2-bit encoded sequence + if (which & BWA_IDX_PAC) + { + idx->pac = calloc(idx->bns->l_pac / 4 + 1, 1); + err_fread_noeof(idx->pac, 1, idx->bns->l_pac / 4 + 1, idx->bns->fp_pac); // concatenated 2-bit encoded sequence err_fclose(idx->bns->fp_pac); idx->bns->fp_pac = 0; } @@ -322,14 +401,24 @@ bwaidx_t *bwa_idx_load(const char *hint, int which) void bwa_idx_destroy(bwaidx_t *idx) { - if (idx == 0) return; - if (idx->mem == 0) { - if (idx->bwt) bwt_destroy(idx->bwt); - if (idx->bns) bns_destroy(idx->bns); - if (idx->pac) free(idx->pac); - } else { - free(idx->bwt); free(idx->bns->anns); free(idx->bns); - if (!idx->is_shm) free(idx->mem); + if (idx == 0) + return; + if (idx->mem == 0) + { + if (idx->bwt) + bwt_destroy(idx->bwt); + if (idx->bns) + bns_destroy(idx->bns); + if (idx->pac) + free(idx->pac); + } + else + { + free(idx->bwt); + free(idx->bns->anns); + free(idx->bns); + if (!idx->is_shm) + free(idx->mem); } free(idx); } @@ -340,22 +429,42 @@ int bwa_mem2idx(int64_t l_mem, uint8_t *mem, bwaidx_t *idx) int i; // generate idx->bwt - x = sizeof(bwt_t); idx->bwt = malloc(x); memcpy(idx->bwt, mem + k, x); k += x; - x = idx->bwt->bwt_size * 4; idx->bwt->bwt = (uint32_t*)(mem + k); k += x; - x = idx->bwt->n_sa * sizeof(bwtint_t); idx->bwt->sa = (bwtint_t*)(mem + k); k += x; + x = sizeof(bwt_t); + idx->bwt = malloc(x); + memcpy(idx->bwt, mem + k, x); + k += x; + x = idx->bwt->bwt_size * 4; + idx->bwt->bwt = (uint32_t *)(mem + k); + k += x; + x = idx->bwt->n_sa * sizeof(bwtint_t); + idx->bwt->sa = (bwtint_t *)(mem + k); + k += x; // generate idx->bns and idx->pac - x = sizeof(bntseq_t); idx->bns = malloc(x); memcpy(idx->bns, mem + k, x); k += x; - x = idx->bns->n_holes * sizeof(bntamb1_t); idx->bns->ambs = (bntamb1_t*)(mem + k); k += x; - x = idx->bns->n_seqs * sizeof(bntann1_t); idx->bns->anns = malloc(x); memcpy(idx->bns->anns, mem + k, x); k += x; - for (i = 0; i < idx->bns->n_seqs; ++i) { - idx->bns->anns[i].name = (char*)(mem + k); k += strlen(idx->bns->anns[i].name) + 1; - idx->bns->anns[i].anno = (char*)(mem + k); k += strlen(idx->bns->anns[i].anno) + 1; + x = sizeof(bntseq_t); + idx->bns = malloc(x); + memcpy(idx->bns, mem + k, x); + k += x; + x = idx->bns->n_holes * sizeof(bntamb1_t); + idx->bns->ambs = (bntamb1_t *)(mem + k); + k += x; + x = idx->bns->n_seqs * sizeof(bntann1_t); + idx->bns->anns = malloc(x); + memcpy(idx->bns->anns, mem + k, x); + k += x; + for (i = 0; i < idx->bns->n_seqs; ++i) + { + idx->bns->anns[i].name = (char *)(mem + k); + k += strlen(idx->bns->anns[i].name) + 1; + idx->bns->anns[i].anno = (char *)(mem + k); + k += strlen(idx->bns->anns[i].anno) + 1; } - idx->pac = (uint8_t*)(mem + k); k += idx->bns->l_pac/4+1; + idx->pac = (uint8_t *)(mem + k); + k += idx->bns->l_pac / 4 + 1; assert(k == l_mem); - idx->l_mem = k; idx->mem = mem; + idx->l_mem = k; + idx->mem = mem; return 0; } @@ -367,35 +476,56 @@ int bwa_idx2mem(bwaidx_t *idx) // copy idx->bwt x = idx->bwt->bwt_size * 4; - mem = realloc(idx->bwt->bwt, sizeof(bwt_t) + x); idx->bwt->bwt = 0; + mem = realloc(idx->bwt->bwt, sizeof(bwt_t) + x); + idx->bwt->bwt = 0; memmove(mem + sizeof(bwt_t), mem, x); - memcpy(mem, idx->bwt, sizeof(bwt_t)); k = sizeof(bwt_t) + x; - x = idx->bwt->n_sa * sizeof(bwtint_t); mem = realloc(mem, k + x); memcpy(mem + k, idx->bwt->sa, x); k += x; + memcpy(mem, idx->bwt, sizeof(bwt_t)); + k = sizeof(bwt_t) + x; + x = idx->bwt->n_sa * sizeof(bwtint_t); + mem = realloc(mem, k + x); + memcpy(mem + k, idx->bwt->sa, x); + k += x; free(idx->bwt->sa); - free(idx->bwt); idx->bwt = 0; + free(idx->bwt); + idx->bwt = 0; // copy idx->bns tmp = idx->bns->n_seqs * sizeof(bntann1_t) + idx->bns->n_holes * sizeof(bntamb1_t); for (i = 0; i < idx->bns->n_seqs; ++i) // compute the size of heap-allocated memory tmp += strlen(idx->bns->anns[i].name) + strlen(idx->bns->anns[i].anno) + 2; mem = realloc(mem, k + sizeof(bntseq_t) + tmp); - x = sizeof(bntseq_t); memcpy(mem + k, idx->bns, x); k += x; - x = idx->bns->n_holes * sizeof(bntamb1_t); memcpy(mem + k, idx->bns->ambs, x); k += x; + x = sizeof(bntseq_t); + memcpy(mem + k, idx->bns, x); + k += x; + x = idx->bns->n_holes * sizeof(bntamb1_t); + memcpy(mem + k, idx->bns->ambs, x); + k += x; free(idx->bns->ambs); - x = idx->bns->n_seqs * sizeof(bntann1_t); memcpy(mem + k, idx->bns->anns, x); k += x; - for (i = 0; i < idx->bns->n_seqs; ++i) { - x = strlen(idx->bns->anns[i].name) + 1; memcpy(mem + k, idx->bns->anns[i].name, x); k += x; - x = strlen(idx->bns->anns[i].anno) + 1; memcpy(mem + k, idx->bns->anns[i].anno, x); k += x; - free(idx->bns->anns[i].name); free(idx->bns->anns[i].anno); + x = idx->bns->n_seqs * sizeof(bntann1_t); + memcpy(mem + k, idx->bns->anns, x); + k += x; + for (i = 0; i < idx->bns->n_seqs; ++i) + { + x = strlen(idx->bns->anns[i].name) + 1; + memcpy(mem + k, idx->bns->anns[i].name, x); + k += x; + x = strlen(idx->bns->anns[i].anno) + 1; + memcpy(mem + k, idx->bns->anns[i].anno, x); + k += x; + free(idx->bns->anns[i].name); + free(idx->bns->anns[i].anno); } free(idx->bns->anns); // copy idx->pac - x = idx->bns->l_pac/4+1; + x = idx->bns->l_pac / 4 + 1; mem = realloc(mem, k + x); - memcpy(mem + k, idx->pac, x); k += x; - free(idx->bns); idx->bns = 0; - free(idx->pac); idx->pac = 0; + memcpy(mem + k, idx->pac, x); + k += x; + free(idx->bns); + idx->bns = 0; + free(idx->pac); + idx->pac = 0; return bwa_mem2idx(k, mem, idx); } @@ -408,46 +538,66 @@ void bwa_print_sam_hdr(const bntseq_t *bns, const char *hdr_line) { int i, n_HD = 0, n_SQ = 0; extern char *bwa_pg; - - if (hdr_line) { + + if (hdr_line) + { // check for HD line const char *p = hdr_line; - if ((p = strstr(p, "@HD")) != 0) { + if ((p = strstr(p, "@HD")) != 0) + { ++n_HD; - } + } // check for SQ lines p = hdr_line; - while ((p = strstr(p, "@SQ\t")) != 0) { - if (p == hdr_line || *(p-1) == '\n') ++n_SQ; + while ((p = strstr(p, "@SQ\t")) != 0) + { + if (p == hdr_line || *(p - 1) == '\n') + ++n_SQ; p += 4; } } - if (n_SQ == 0) { - for (i = 0; i < bns->n_seqs; ++i) { + if (n_SQ == 0) + { + for (i = 0; i < bns->n_seqs; ++i) + { err_printf("@SQ\tSN:%s\tLN:%d", bns->anns[i].name, bns->anns[i].len); - if (bns->anns[i].is_alt) err_printf("\tAH:*\n"); - else err_fputc('\n', stdout); + if (bns->anns[i].is_alt) + err_printf("\tAH:*\n"); + else + err_fputc('\n', stdout); } - } else if (n_SQ != bns->n_seqs && bwa_verbose >= 2) + } + else if (n_SQ != bns->n_seqs && bwa_verbose >= 2) fprintf(stderr, "[W::%s] %d @SQ lines provided with -H; %d sequences in the index. Continue anyway.\n", __func__, n_SQ, bns->n_seqs); - if (n_HD == 0) { + if (n_HD == 0) + { err_printf("@HD\tVN:1.5\tSO:unsorted\tGO:query\n"); } - if (hdr_line) err_printf("%s\n", hdr_line); - if (bwa_pg) err_printf("%s\n", bwa_pg); + if (hdr_line) + err_printf("%s\n", hdr_line); + if (bwa_pg) + err_printf("%s\n", bwa_pg); } static char *bwa_escape(char *s) { char *p, *q; - for (p = q = s; *p; ++p) { - if (*p == '\\') { + for (p = q = s; *p; ++p) + { + if (*p == '\\') + { ++p; - if (*p == 't') *q++ = '\t'; - else if (*p == 'n') *q++ = '\n'; - else if (*p == 'r') *q++ = '\r'; - else if (*p == '\\') *q++ = '\\'; - } else *q++ = *p; + if (*p == 't') + *q++ = '\t'; + else if (*p == 'n') + *q++ = '\n'; + else if (*p == 'r') + *q++ = '\r'; + else if (*p == '\\') + *q++ = '\\'; + } + else + *q++ = *p; } *q = '\0'; return s; @@ -457,24 +607,33 @@ char *bwa_set_rg(const char *s) { char *p, *q, *r, *rg_line = 0; memset(bwa_rg_id, 0, 256); - if (strstr(s, "@RG") != s) { - if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] the read group line is not started with @RG\n", __func__); + if (strstr(s, "@RG") != s) + { + if (bwa_verbose >= 1) + fprintf(stderr, "[E::%s] the read group line is not started with @RG\n", __func__); goto err_set_rg; } - if (strstr(s, "\t") != NULL) { - if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] the read group line contained literal characters -- replace with escaped tabs: \\t\n", __func__); + if (strstr(s, "\t") != NULL) + { + if (bwa_verbose >= 1) + fprintf(stderr, "[E::%s] the read group line contained literal characters -- replace with escaped tabs: \\t\n", __func__); goto err_set_rg; } rg_line = strdup(s); bwa_escape(rg_line); - if ((p = strstr(rg_line, "\tID:")) == 0) { - if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] no ID within the read group line\n", __func__); + if ((p = strstr(rg_line, "\tID:")) == 0) + { + if (bwa_verbose >= 1) + fprintf(stderr, "[E::%s] no ID within the read group line\n", __func__); goto err_set_rg; } p += 4; - for (q = p; *q && *q != '\t' && *q != '\n'; ++q); - if (q - p + 1 > 256) { - if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] @RG:ID is longer than 255 characters\n", __func__); + for (q = p; *q && *q != '\t' && *q != '\n'; ++q) + ; + if (q - p + 1 > 256) + { + if (bwa_verbose >= 1) + fprintf(stderr, "[E::%s] @RG:ID is longer than 255 characters\n", __func__); goto err_set_rg; } for (q = p, r = bwa_rg_id; *q && *q != '\t' && *q != '\n'; ++q) @@ -489,13 +648,17 @@ err_set_rg: char *bwa_insert_header(const char *s, char *hdr) { int len = 0; - if (s == 0 || s[0] != '@') return hdr; - if (hdr) { + if (s == 0 || s[0] != '@') + return hdr; + if (hdr) + { len = strlen(hdr); hdr = realloc(hdr, len + strlen(s) + 2); hdr[len++] = '\n'; strcpy(hdr + len, s); - } else hdr = strdup(s); + } + else + hdr = strdup(s); bwa_escape(hdr + len); return hdr; } diff --git a/bwamem.c b/bwamem.c index 03e2a05..68d176f 100644 --- a/bwamem.c +++ b/bwamem.c @@ -1,8 +1,8 @@ /* The MIT License Copyright (c) 2018- Dana-Farber Cancer Institute - 2009-2018 Broad Institute, Inc. - 2008-2009 Genome Research Ltd. (GRL) + 2009-2018 Broad Institute, Inc. + 2008-2009 Genome Research Ltd. (GRL) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the @@ -43,7 +43,18 @@ #include "utils.h" #ifdef USE_MALLOC_WRAPPERS -# include "malloc_wrap.h" +#include "malloc_wrap.h" +#endif + +#ifdef SHOW_PERF +extern int64_t get_mseconds(); +extern int64_t time_ksw_extend2, + time_ksw_align2, + time_mem_chain, + time_worker1, + time_worker2, + count_ksw_extend2, + time_bwt_smem1a; #endif /* Theory on probability and scoring *ungapped* alignment @@ -76,7 +87,8 @@ mem_opt_t *mem_opt_init() mem_opt_t *o; o = calloc(1, sizeof(mem_opt_t)); o->flag = 0; - o->a = 1; o->b = 4; + o->a = 1; + o->b = 4; o->o_del = o->o_ins = 6; o->e_del = o->e_ins = 1; o->w = 100; @@ -103,8 +115,9 @@ mem_opt_t *mem_opt_init() o->max_matesw = 50; o->mask_level_redun = 0.95; o->min_chain_weight = 0; - o->max_chain_extend = 1<<30; - o->mapQ_coef_len = 50; o->mapQ_coef_fac = log(o->mapQ_coef_len); + o->max_chain_extend = 1 << 30; + o->mapQ_coef_len = 50; + o->mapQ_coef_fac = log(o->mapQ_coef_len); bwa_fill_scmat(o->a, o->b, o->mat); return o; } @@ -116,7 +129,8 @@ mem_opt_t *mem_opt_init() #define intv_lt(a, b) ((a).info < (b).info) KSORT_INIT(mem_intv, bwtintv_t, intv_lt) -typedef struct { +typedef struct +{ bwtintv_v mem, mem1, *tmpv[2]; } smem_aux_t; @@ -130,10 +144,13 @@ static smem_aux_t *smem_aux_init() } static void smem_aux_destroy(smem_aux_t *a) -{ - free(a->tmpv[0]->a); free(a->tmpv[0]); - free(a->tmpv[1]->a); free(a->tmpv[1]); - free(a->mem.a); free(a->mem1.a); +{ + free(a->tmpv[0]->a); + free(a->tmpv[0]); + free(a->tmpv[1]->a); + free(a->tmpv[1]); + free(a->mem.a); + free(a->mem1.a); free(a); } @@ -144,43 +161,73 @@ static void mem_collect_intv(const mem_opt_t *opt, const bwt_t *bwt, int len, co int split_len = (int)(opt->min_seed_len * opt->split_factor + .499); a->mem.n = 0; // first pass: find all SMEMs - while (x < len) { - if (seq[x] < 4) { + while (x < len) + { + if (seq[x] < 4) + { x = bwt_smem1(bwt, len, seq, x, start_width, &a->mem1, a->tmpv); - for (i = 0; i < a->mem1.n; ++i) { + for (i = 0; i < a->mem1.n; ++i) + { bwtintv_t *p = &a->mem1.a[i]; - int slen = (uint32_t)p->info - (p->info>>32); // seed length + int slen = (uint32_t)p->info - (p->info >> 32); // seed length + // extern FILE *info_f; + // fprintf(info_f, "%d\n", slen); if (slen >= opt->min_seed_len) kv_push(bwtintv_t, a->mem, *p); } - } else ++x; + } + else + ++x; } // second pass: find MEMs inside a long SMEM old_n = a->mem.n; - for (k = 0; k < old_n; ++k) { + for (k = 0; k < old_n; ++k) + { bwtintv_t *p = &a->mem.a[k]; - int start = p->info>>32, end = (int32_t)p->info; - if (end - start < split_len || p->x[2] > opt->split_width) continue; - bwt_smem1(bwt, len, seq, (start + end)>>1, p->x[2]+1, &a->mem1, a->tmpv); + int start = p->info >> 32, end = (int32_t)p->info; + if (end - start < split_len || p->x[2] > opt->split_width) + continue; + bwt_smem1(bwt, len, seq, (start + end) >> 1, p->x[2] + 1, &a->mem1, a->tmpv); for (i = 0; i < a->mem1.n; ++i) - if ((uint32_t)a->mem1.a[i].info - (a->mem1.a[i].info>>32) >= opt->min_seed_len) + { + int slen = (uint32_t)a->mem1.a[i].info - (a->mem1.a[i].info >> 32); // seed length + // extern FILE *query_f; + // fprintf(query_f, "%d\n", slen); + if (slen >= opt->min_seed_len) kv_push(bwtintv_t, a->mem, a->mem1.a[i]); + } } // third pass: LAST-like - if (opt->max_mem_intv > 0) { + if (opt->max_mem_intv > 0) + { x = 0; - while (x < len) { - if (seq[x] < 4) { - if (1) { + while (x < len) + { + if (seq[x] < 4) + { + if (1) + { bwtintv_t m; +#ifdef SHOW_PERF + int64_t start_time = get_mseconds(); +#endif x = bwt_seed_strategy1(bwt, len, seq, x, opt->min_seed_len, opt->max_mem_intv, &m); - if (m.x[2] > 0) kv_push(bwtintv_t, a->mem, m); - } else { // for now, we never come to this block which is slower +#ifdef SHOW_PERF + int64_t tmp_diff = get_mseconds() - start_time; + __sync_fetch_and_add(&time_bwt_smem1a, tmp_diff); +#endif + if (m.x[2] > 0) + kv_push(bwtintv_t, a->mem, m); + } + else + { // for now, we never come to this block which is slower x = bwt_smem1a(bwt, len, seq, x, start_width, opt->max_mem_intv, &a->mem1, a->tmpv); for (i = 0; i < a->mem1.n; ++i) kv_push(bwtintv_t, a->mem, a->mem1.a[i]); } - } else ++x; + } + else + ++x; } } // sort @@ -191,21 +238,27 @@ static void mem_collect_intv(const mem_opt_t *opt, const bwt_t *bwt, int len, co * Chaining * ************/ -typedef struct { +typedef struct +{ int64_t rbeg; int32_t qbeg, len; int score; } mem_seed_t; // unaligned memory -typedef struct { +typedef struct +{ int n, m, first, rid; - uint32_t w:29, kept:2, is_alt:1; + uint32_t w : 29, kept : 2, is_alt : 1; float frac_rep; int64_t pos; mem_seed_t *seeds; } mem_chain_t; -typedef struct { size_t n, m; mem_chain_t *a; } mem_chain_v; +typedef struct +{ + size_t n, m; + mem_chain_t *a; +} mem_chain_v; #include "kbtree.h" @@ -216,17 +269,21 @@ KBTREE_INIT(chn, mem_chain_t, chain_cmp) static int test_and_merge(const mem_opt_t *opt, int64_t l_pac, mem_chain_t *c, const mem_seed_t *p, int seed_rid) { int64_t qend, rend, x, y; - const mem_seed_t *last = &c->seeds[c->n-1]; + const mem_seed_t *last = &c->seeds[c->n - 1]; qend = last->qbeg + last->len; rend = last->rbeg + last->len; - if (seed_rid != c->rid) return 0; // different chr; request a new chain + if (seed_rid != c->rid) + return 0; // different chr; request a new chain if (p->qbeg >= c->seeds[0].qbeg && p->qbeg + p->len <= qend && p->rbeg >= c->seeds[0].rbeg && p->rbeg + p->len <= rend) return 1; // contained seed; do nothing - if ((last->rbeg < l_pac || c->seeds[0].rbeg < l_pac) && p->rbeg >= l_pac) return 0; // don't chain if on different strand + if ((last->rbeg < l_pac || c->seeds[0].rbeg < l_pac) && p->rbeg >= l_pac) + return 0; // don't chain if on different strand x = p->qbeg - last->qbeg; // always non-negtive y = p->rbeg - last->rbeg; - if (y >= 0 && x - y <= opt->w && y - x <= opt->w && x - last->len < opt->max_chain_gap && y - last->len < opt->max_chain_gap) { // grow the chain - if (c->n == c->m) { + if (y >= 0 && x - y <= opt->w && y - x <= opt->w && x - last->len < opt->max_chain_gap && y - last->len < opt->max_chain_gap) + { // grow the chain + if (c->n == c->m) + { c->m <<= 1; c->seeds = realloc(c->seeds, c->m * sizeof(mem_seed_t)); } @@ -240,34 +297,44 @@ int mem_chain_weight(const mem_chain_t *c) { int64_t end; int j, w = 0, tmp; - for (j = 0, end = 0; j < c->n; ++j) { + for (j = 0, end = 0; j < c->n; ++j) + { const mem_seed_t *s = &c->seeds[j]; - if (s->qbeg >= end) w += s->len; - else if (s->qbeg + s->len > end) w += s->qbeg + s->len - end; - end = end > s->qbeg + s->len? end : s->qbeg + s->len; + if (s->qbeg >= end) + w += s->len; + else if (s->qbeg + s->len > end) + w += s->qbeg + s->len - end; + end = end > s->qbeg + s->len ? end : s->qbeg + s->len; } - tmp = w; w = 0; - for (j = 0, end = 0; j < c->n; ++j) { + tmp = w; + w = 0; + for (j = 0, end = 0; j < c->n; ++j) + { const mem_seed_t *s = &c->seeds[j]; - if (s->rbeg >= end) w += s->len; - else if (s->rbeg + s->len > end) w += s->rbeg + s->len - end; - end = end > s->rbeg + s->len? end : s->rbeg + s->len; + if (s->rbeg >= end) + w += s->len; + else if (s->rbeg + s->len > end) + w += s->rbeg + s->len - end; + end = end > s->rbeg + s->len ? end : s->rbeg + s->len; } - w = w < tmp? w : tmp; - return w < 1<<30? w : (1<<30)-1; + w = w < tmp ? w : tmp; + return w < 1 << 30 ? w : (1 << 30) - 1; } void mem_print_chain(const bntseq_t *bns, mem_chain_v *chn) { int i, j; - for (i = 0; i < chn->n; ++i) { + for (i = 0; i < chn->n; ++i) + { mem_chain_t *p = &chn->a[i]; err_printf("* Found CHAIN(%d): n=%d; weight=%d", i, p->n, mem_chain_weight(p)); - for (j = 0; j < p->n; ++j) { + for (j = 0; j < p->n; ++j) + { bwtint_t pos; int is_rev; pos = bns_depos(bns, p->seeds[j].rbeg, &is_rev); - if (is_rev) pos -= p->seeds[j].len - 1; + if (is_rev) + pos -= p->seeds[j].len - 1; err_printf("\t%d;%d;%d,%ld(%s:%c%ld)", p->seeds[j].score, p->seeds[j].len, p->seeds[j].qbeg, (long)p->seeds[j].rbeg, bns->anns[p->rid].name, "+-"[is_rev], (long)(pos - bns->anns[p->rid].offset) + 1); } err_putchar('\n'); @@ -279,44 +346,58 @@ mem_chain_v mem_chain(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bn int i, b, e, l_rep; int64_t l_pac = bns->l_pac; mem_chain_v chain; - kbtree_t(chn) *tree; + kbtree_t(chn) * tree; smem_aux_t *aux; kv_init(chain); - if (len < opt->min_seed_len) return chain; // if the query is shorter than the seed length, no match + if (len < opt->min_seed_len) + return chain; // if the query is shorter than the seed length, no match tree = kb_init(chn, KB_DEFAULT_SIZE); - aux = buf? (smem_aux_t*)buf : smem_aux_init(); + aux = buf ? (smem_aux_t *)buf : smem_aux_init(); mem_collect_intv(opt, bwt, len, seq, aux); - for (i = 0, b = e = l_rep = 0; i < aux->mem.n; ++i) { // compute frac_rep + for (i = 0, b = e = l_rep = 0; i < aux->mem.n; ++i) + { // compute frac_rep bwtintv_t *p = &aux->mem.a[i]; - int sb = (p->info>>32), se = (uint32_t)p->info; - if (p->x[2] <= opt->max_occ) continue; - if (sb > e) l_rep += e - b, b = sb, e = se; - else e = e > se? e : se; + int sb = (p->info >> 32), se = (uint32_t)p->info; + if (p->x[2] <= opt->max_occ) + continue; + if (sb > e) + l_rep += e - b, b = sb, e = se; + else + e = e > se ? e : se; } l_rep += e - b; - for (i = 0; i < aux->mem.n; ++i) { + for (i = 0; i < aux->mem.n; ++i) + { bwtintv_t *p = &aux->mem.a[i]; - int step, count, slen = (uint32_t)p->info - (p->info>>32); // seed length + int step, count, slen = (uint32_t)p->info - (p->info >> 32); // seed length int64_t k; // if (slen < opt->min_seed_len) continue; // ignore if too short or too repetitive - step = p->x[2] > opt->max_occ? p->x[2] / opt->max_occ : 1; - for (k = count = 0; k < p->x[2] && count < opt->max_occ; k += step, ++count) { + step = p->x[2] > opt->max_occ ? p->x[2] / opt->max_occ : 1; + for (k = count = 0; k < p->x[2] && count < opt->max_occ; k += step, ++count) + { mem_chain_t tmp, *lower, *upper; mem_seed_t s; int rid, to_add = 0; s.rbeg = tmp.pos = bwt_sa(bwt, p->x[0] + k); // this is the base coordinate in the forward-reverse reference - s.qbeg = p->info>>32; - s.score= s.len = slen; + s.qbeg = p->info >> 32; + s.score = s.len = slen; rid = bns_intv2rid(bns, s.rbeg, s.rbeg + s.len); - if (rid < 0) continue; // bridging multiple reference sequences or the forward-reverse boundary; TODO: split the seed; don't discard it!!! - if (kb_size(tree)) { + if (rid < 0) + continue; // bridging multiple reference sequences or the forward-reverse boundary; TODO: split the seed; don't discard it!!! + if (kb_size(tree)) + { kb_intervalp(chn, tree, &tmp, &lower, &upper); // find the closest chain - if (!lower || !test_and_merge(opt, l_pac, lower, &s, rid)) to_add = 1; - } else to_add = 1; - if (to_add) { // add the seed as a new chain - tmp.n = 1; tmp.m = 4; + if (!lower || !test_and_merge(opt, l_pac, lower, &s, rid)) + to_add = 1; + } + else + to_add = 1; + if (to_add) + { // add the seed as a new chain + tmp.n = 1; + tmp.m = 4; tmp.seeds = calloc(tmp.m, sizeof(mem_seed_t)); tmp.seeds[0] = s; tmp.rid = rid; @@ -325,16 +406,19 @@ mem_chain_v mem_chain(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bn } } } - if (buf == 0) smem_aux_destroy(aux); + if (buf == 0) + smem_aux_destroy(aux); kv_resize(mem_chain_t, chain, kb_size(tree)); - #define traverse_func(p_) (chain.a[chain.n++] = *(p_)) +#define traverse_func(p_) (chain.a[chain.n++] = *(p_)) __kb_traverse(mem_chain_t, tree, traverse_func); - #undef traverse_func +#undef traverse_func - for (i = 0; i < chain.n; ++i) chain.a[i].frac_rep = (float)l_rep / len; - if (bwa_verbose >= 4) printf("* fraction of repetitive seeds: %.3f\n", (float)l_rep / len); + for (i = 0; i < chain.n; ++i) + chain.a[i].frac_rep = (float)l_rep / len; + if (bwa_verbose >= 4) + printf("* fraction of repetitive seeds: %.3f\n", (float)l_rep / len); kb_destroy(chn, tree); return chain; @@ -345,7 +429,7 @@ mem_chain_v mem_chain(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bn ********************/ #define chn_beg(ch) ((ch).seeds->qbeg) -#define chn_end(ch) ((ch).seeds[(ch).n-1].qbeg + (ch).seeds[(ch).n-1].len) +#define chn_end(ch) ((ch).seeds[(ch).n - 1].qbeg + (ch).seeds[(ch).n - 1].len) #define flt_lt(a, b) ((a).w > (b).w) KSORT_INIT(mem_flt, mem_chain_t, flt_lt) @@ -353,59 +437,79 @@ KSORT_INIT(mem_flt, mem_chain_t, flt_lt) int mem_chain_flt(const mem_opt_t *opt, int n_chn, mem_chain_t *a) { int i, k; - kvec_t(int) chains = {0,0,0}; // this keeps int indices of the non-overlapping chains - if (n_chn == 0) return 0; // no need to filter + kvec_t(int) chains = {0, 0, 0}; // this keeps int indices of the non-overlapping chains + if (n_chn == 0) + return 0; // no need to filter // compute the weight of each chain and drop chains with small weight - for (i = k = 0; i < n_chn; ++i) { + for (i = k = 0; i < n_chn; ++i) + { mem_chain_t *c = &a[i]; - c->first = -1; c->kept = 0; + c->first = -1; + c->kept = 0; c->w = mem_chain_weight(c); - if (c->w < opt->min_chain_weight) free(c->seeds); - else a[k++] = *c; + if (c->w < opt->min_chain_weight) + free(c->seeds); + else + a[k++] = *c; } n_chn = k; ks_introsort(mem_flt, n_chn, a); // pairwise chain comparisons a[0].kept = 3; kv_push(int, chains, 0); - for (i = 1; i < n_chn; ++i) { + for (i = 1; i < n_chn; ++i) + { int large_ovlp = 0; - for (k = 0; k < chains.n; ++k) { + for (k = 0; k < chains.n; ++k) + { int j = chains.a[k]; - int b_max = chn_beg(a[j]) > chn_beg(a[i])? chn_beg(a[j]) : chn_beg(a[i]); - int e_min = chn_end(a[j]) < chn_end(a[i])? chn_end(a[j]) : chn_end(a[i]); - if (e_min > b_max && (!a[j].is_alt || a[i].is_alt)) { // have overlap; don't consider ovlp where the kept chain is ALT while the current chain is primary + int b_max = chn_beg(a[j]) > chn_beg(a[i]) ? chn_beg(a[j]) : chn_beg(a[i]); + int e_min = chn_end(a[j]) < chn_end(a[i]) ? chn_end(a[j]) : chn_end(a[i]); + if (e_min > b_max && (!a[j].is_alt || a[i].is_alt)) + { // have overlap; don't consider ovlp where the kept chain is ALT while the current chain is primary int li = chn_end(a[i]) - chn_beg(a[i]); int lj = chn_end(a[j]) - chn_beg(a[j]); - int min_l = li < lj? li : lj; - if (e_min - b_max >= min_l * opt->mask_level && min_l < opt->max_chain_gap) { // significant overlap + int min_l = li < lj ? li : lj; + if (e_min - b_max >= min_l * opt->mask_level && min_l < opt->max_chain_gap) + { // significant overlap large_ovlp = 1; - if (a[j].first < 0) a[j].first = i; // keep the first shadowed hit s.t. mapq can be more accurate - if (a[i].w < a[j].w * opt->drop_ratio && a[j].w - a[i].w >= opt->min_seed_len<<1) + if (a[j].first < 0) + a[j].first = i; // keep the first shadowed hit s.t. mapq can be more accurate + if (a[i].w < a[j].w * opt->drop_ratio && a[j].w - a[i].w >= opt->min_seed_len << 1) break; } } } - if (k == chains.n) { + if (k == chains.n) + { kv_push(int, chains, i); - a[i].kept = large_ovlp? 2 : 3; + a[i].kept = large_ovlp ? 2 : 3; } } - for (i = 0; i < chains.n; ++i) { + for (i = 0; i < chains.n; ++i) + { mem_chain_t *c = &a[chains.a[i]]; - if (c->first >= 0) a[c->first].kept = 1; + if (c->first >= 0) + a[c->first].kept = 1; } free(chains.a); - for (i = k = 0; i < n_chn; ++i) { // don't extend more than opt->max_chain_extend .kept=1/2 chains - if (a[i].kept == 0 || a[i].kept == 3) continue; - if (++k >= opt->max_chain_extend) break; + for (i = k = 0; i < n_chn; ++i) + { // don't extend more than opt->max_chain_extend .kept=1/2 chains + if (a[i].kept == 0 || a[i].kept == 3) + continue; + if (++k >= opt->max_chain_extend) + break; } for (; i < n_chn; ++i) - if (a[i].kept < 3) a[i].kept = 0; - for (i = k = 0; i < n_chn; ++i) { // free discarded chains + if (a[i].kept < 3) + a[i].kept = 0; + for (i = k = 0; i < n_chn; ++i) + { // free discarded chains mem_chain_t *c = &a[i]; - if (c->kept == 0) free(c->seeds); - else a[k++] = a[i]; + if (c->kept == 0) + free(c->seeds); + else + a[k++] = a[i]; } return k; } @@ -420,7 +524,7 @@ KSORT_INIT(mem_ars2, mem_alnreg_t, alnreg_slt2) #define alnreg_slt(a, b) ((a).score > (b).score || ((a).score == (b).score && ((a).rb < (b).rb || ((a).rb == (b).rb && (a).qb < (b).qb)))) KSORT_INIT(mem_ars, mem_alnreg_t, alnreg_slt) -#define alnreg_hlt(a, b) ((a).score > (b).score || ((a).score == (b).score && ((a).is_alt < (b).is_alt || ((a).is_alt == (b).is_alt && (a).hash < (b).hash)))) +#define alnreg_hlt(a, b) ((a).score > (b).score || ((a).score == (b).score && ((a).is_alt < (b).is_alt || ((a).is_alt == (b).is_alt && (a).hash < (b).hash)))) KSORT_INIT(mem_ars_hash, mem_alnreg_t, alnreg_hlt) #define alnreg_hlt2(a, b) ((a).is_alt < (b).is_alt || ((a).is_alt == (b).is_alt && ((a).score > (b).score || ((a).score == (b).score && (a).hash < (b).hash)))) @@ -433,29 +537,39 @@ int mem_patch_reg(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, { int w, score, q_s, r_s; double r; - if (bns == 0 || pac == 0 || query == 0) return 0; + if (bns == 0 || pac == 0 || query == 0) + return 0; assert(a->rid == b->rid && a->rb <= b->rb); - if (a->rb < bns->l_pac && b->rb >= bns->l_pac) return 0; // on different strands - if (a->qb >= b->qb || a->qe >= b->qe || a->re >= b->re) return 0; // not colinear - w = (a->re - b->rb) - (a->qe - b->qb); // required bandwidth - w = w > 0? w : -w; // l = abs(l) + if (a->rb < bns->l_pac && b->rb >= bns->l_pac) + return 0; // on different strands + if (a->qb >= b->qb || a->qe >= b->qe || a->re >= b->re) + return 0; // not colinear + w = (a->re - b->rb) - (a->qe - b->qb); // required bandwidth + w = w > 0 ? w : -w; // l = abs(l) r = (double)(a->re - b->rb) / (b->re - a->rb) - (double)(a->qe - b->qb) / (b->qe - a->qb); // relative bandwidth - r = r > 0.? r : -r; // r = fabs(r) + r = r > 0. ? r : -r; // r = fabs(r) if (bwa_verbose >= 4) printf("* potential hit merge between [%d,%d)<=>[%ld,%ld) and [%d,%d)<=>[%ld,%ld), @ %s; w=%d, r=%.4g\n", a->qb, a->qe, (long)a->rb, (long)a->re, b->qb, b->qe, (long)b->rb, (long)b->re, bns->anns[a->rid].name, w, r); - if (a->re < b->rb || a->qe < b->qb) { // no overlap on query or on ref - if (w > opt->w<<1 || r >= PATCH_MAX_R_BW) return 0; // the bandwidth or the relative bandwidth is too large - } else if (w > opt->w<<2 || r >= PATCH_MAX_R_BW*2) return 0; // more permissive if overlapping on both ref and query + if (a->re < b->rb || a->qe < b->qb) + { // no overlap on query or on ref + if (w > opt->w << 1 || r >= PATCH_MAX_R_BW) + return 0; // the bandwidth or the relative bandwidth is too large + } + else if (w > opt->w << 2 || r >= PATCH_MAX_R_BW * 2) + return 0; // more permissive if overlapping on both ref and query // global alignment w += a->w + b->w; - w = w < opt->w<<2? w : opt->w<<2; - if (bwa_verbose >= 4) printf("* test potential hit merge with global alignment; w=%d\n", w); + w = w < opt->w << 2 ? w : opt->w << 2; + if (bwa_verbose >= 4) + printf("* test potential hit merge with global alignment; w=%d\n", w); bwa_gen_cigar2(opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, w, bns->l_pac, pac, b->qe - a->qb, query + a->qb, a->rb, b->re, &score, 0, 0); q_s = (int)((double)(b->qe - a->qb) / ((b->qe - b->qb) + (a->qe - a->qb)) * (b->score + a->score) + .499); // predicted score from query r_s = (int)((double)(b->re - a->rb) / ((b->re - b->rb) + (a->re - a->rb)) * (b->score + a->score) + .499); // predicted score from ref - if (bwa_verbose >= 4) printf("* score=%d;(%d,%d)\n", score, q_s, r_s); - if ((double)score / (q_s > r_s? q_s : r_s) < PATCH_MIN_SC_RATIO) return 0; + if (bwa_verbose >= 4) + printf("* score=%d;(%d,%d)\n", score, q_s, r_s); + if ((double)score / (q_s > r_s ? q_s : r_s) < PATCH_MIN_SC_RATIO) + return 0; *_w = w; return score; } @@ -463,31 +577,43 @@ int mem_patch_reg(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int mem_sort_dedup_patch(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, uint8_t *query, int n, mem_alnreg_t *a) { int m, i, j; - if (n <= 1) return n; + if (n <= 1) + return n; ks_introsort(mem_ars2, n, a); // sort by the END position, not START! - for (i = 0; i < n; ++i) a[i].n_comp = 1; - for (i = 1; i < n; ++i) { + for (i = 0; i < n; ++i) + a[i].n_comp = 1; + for (i = 1; i < n; ++i) + { mem_alnreg_t *p = &a[i]; - if (p->rid != a[i-1].rid || p->rb >= a[i-1].re + opt->max_chain_gap) continue; // then no need to go into the loop below - for (j = i - 1; j >= 0 && p->rid == a[j].rid && p->rb < a[j].re + opt->max_chain_gap; --j) { + if (p->rid != a[i - 1].rid || p->rb >= a[i - 1].re + opt->max_chain_gap) + continue; // then no need to go into the loop below + for (j = i - 1; j >= 0 && p->rid == a[j].rid && p->rb < a[j].re + opt->max_chain_gap; --j) + { mem_alnreg_t *q = &a[j]; int64_t or, oq, mr, mq; int score, w; - if (q->qe == q->qb) continue; // a[j] has been excluded - or = q->re - p->rb; // overlap length on the reference - oq = q->qb < p->qb? q->qe - p->qb : p->qe - q->qb; // overlap length on the query - mr = q->re - q->rb < p->re - p->rb? q->re - q->rb : p->re - p->rb; // min ref len in alignment - mq = q->qe - q->qb < p->qe - p->qb? q->qe - q->qb : p->qe - p->qb; // min qry len in alignment - if (or > opt->mask_level_redun * mr && oq > opt->mask_level_redun * mq) { // one of the hits is redundant - if (p->score < q->score) { + if (q->qe == q->qb) + continue; // a[j] has been excluded + or = q->re - p->rb; // overlap length on the reference + oq = q->qb < p->qb ? q->qe - p->qb : p->qe - q->qb; // overlap length on the query + mr = q->re - q->rb < p->re - p->rb ? q->re - q->rb : p->re - p->rb; // min ref len in alignment + mq = q->qe - q->qb < p->qe - p->qb ? q->qe - q->qb : p->qe - p->qb; // min qry len in alignment + if (or > opt->mask_level_redun * mr && oq > opt->mask_level_redun * mq) + { // one of the hits is redundant + if (p->score < q->score) + { p->qe = p->qb; break; - } else q->qe = q->qb; - } else if (q->rb < p->rb && (score = mem_patch_reg(opt, bns, pac, query, q, p, &w)) > 0) { // then merge q into p + } + else + q->qe = q->qb; + } + else if (q->rb < p->rb && (score = mem_patch_reg(opt, bns, pac, query, q, p, &w)) > 0) + { // then merge q into p p->n_comp += q->n_comp + 1; - p->seedcov = p->seedcov > q->seedcov? p->seedcov : q->seedcov; - p->sub = p->sub > q->sub? p->sub : q->sub; - p->csub = p->csub > q->csub? p->csub : q->csub; + p->seedcov = p->seedcov > q->seedcov ? p->seedcov : q->seedcov; + p->sub = p->sub > q->sub ? p->sub : q->sub; + p->csub = p->csub > q->csub ? p->csub : q->csub; p->qb = q->qb, p->rb = q->rb; p->truesc = p->score = score; p->w = w; @@ -496,20 +622,27 @@ int mem_sort_dedup_patch(const mem_opt_t *opt, const bntseq_t *bns, const uint8_ } } for (i = 0, m = 0; i < n; ++i) // exclude identical hits - if (a[i].qe > a[i].qb) { - if (m != i) a[m++] = a[i]; - else ++m; + if (a[i].qe > a[i].qb) + { + if (m != i) + a[m++] = a[i]; + else + ++m; } n = m; ks_introsort(mem_ars, n, a); - for (i = 1; i < n; ++i) { // mark identical hits - if (a[i].score == a[i-1].score && a[i].rb == a[i-1].rb && a[i].qb == a[i-1].qb) + for (i = 1; i < n; ++i) + { // mark identical hits + if (a[i].score == a[i - 1].score && a[i].rb == a[i - 1].rb && a[i].qb == a[i - 1].qb) a[i].qe = a[i].qb; } for (i = 1, m = 1; i < n; ++i) // exclude identical hits - if (a[i].qe > a[i].qb) { - if (m != i) a[m++] = a[i]; - else ++m; + if (a[i].qe > a[i].qb) + { + if (m != i) + a[m++] = a[i]; + else + ++m; } return m; } @@ -520,62 +653,85 @@ static void mem_mark_primary_se_core(const mem_opt_t *opt, int n, mem_alnreg_t * { // similar to the loop in mem_chain_flt() int i, k, tmp; tmp = opt->a + opt->b; - tmp = opt->o_del + opt->e_del > tmp? opt->o_del + opt->e_del : tmp; - tmp = opt->o_ins + opt->e_ins > tmp? opt->o_ins + opt->e_ins : tmp; + tmp = opt->o_del + opt->e_del > tmp ? opt->o_del + opt->e_del : tmp; + tmp = opt->o_ins + opt->e_ins > tmp ? opt->o_ins + opt->e_ins : tmp; z->n = 0; kv_push(int, *z, 0); - for (i = 1; i < n; ++i) { - for (k = 0; k < z->n; ++k) { + for (i = 1; i < n; ++i) + { + for (k = 0; k < z->n; ++k) + { int j = z->a[k]; - int b_max = a[j].qb > a[i].qb? a[j].qb : a[i].qb; - int e_min = a[j].qe < a[i].qe? a[j].qe : a[i].qe; - if (e_min > b_max) { // have overlap - int min_l = a[i].qe - a[i].qb < a[j].qe - a[j].qb? a[i].qe - a[i].qb : a[j].qe - a[j].qb; - if (e_min - b_max >= min_l * opt->mask_level) { // significant overlap - if (a[j].sub == 0) a[j].sub = a[i].score; + int b_max = a[j].qb > a[i].qb ? a[j].qb : a[i].qb; + int e_min = a[j].qe < a[i].qe ? a[j].qe : a[i].qe; + if (e_min > b_max) + { // have overlap + int min_l = a[i].qe - a[i].qb < a[j].qe - a[j].qb ? a[i].qe - a[i].qb : a[j].qe - a[j].qb; + if (e_min - b_max >= min_l * opt->mask_level) + { // significant overlap + if (a[j].sub == 0) + a[j].sub = a[i].score; if (a[j].score - a[i].score <= tmp && (a[j].is_alt || !a[i].is_alt)) ++a[j].sub_n; break; } } } - if (k == z->n) kv_push(int, *z, i); - else a[i].secondary = z->a[k]; + if (k == z->n) + kv_push(int, *z, i); + else + a[i].secondary = z->a[k]; } } int mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id) { int i, n_pri; - int_v z = {0,0,0}; - if (n == 0) return 0; - for (i = n_pri = 0; i < n; ++i) { - a[i].sub = a[i].alt_sc = 0, a[i].secondary = a[i].secondary_all = -1, a[i].hash = hash_64(id+i); - if (!a[i].is_alt) ++n_pri; + int_v z = {0, 0, 0}; + if (n == 0) + return 0; + for (i = n_pri = 0; i < n; ++i) + { + a[i].sub = a[i].alt_sc = 0, a[i].secondary = a[i].secondary_all = -1, a[i].hash = hash_64(id + i); + if (!a[i].is_alt) + ++n_pri; } ks_introsort(mem_ars_hash, n, a); mem_mark_primary_se_core(opt, n, a, &z); - for (i = 0; i < n; ++i) { + for (i = 0; i < n; ++i) + { mem_alnreg_t *p = &a[i]; p->secondary_all = i; // keep the rank in the first round if (!p->is_alt && p->secondary >= 0 && a[p->secondary].is_alt) p->alt_sc = a[p->secondary].score; } - if (n_pri >= 0 && n_pri < n) { + if (n_pri >= 0 && n_pri < n) + { kv_resize(int, z, n); - if (n_pri > 0) ks_introsort(mem_ars_hash2, n, a); - for (i = 0; i < n; ++i) z.a[a[i].secondary_all] = i; - for (i = 0; i < n; ++i) { - if (a[i].secondary >= 0) { + if (n_pri > 0) + ks_introsort(mem_ars_hash2, n, a); + for (i = 0; i < n; ++i) + z.a[a[i].secondary_all] = i; + for (i = 0; i < n; ++i) + { + if (a[i].secondary >= 0) + { a[i].secondary_all = z.a[a[i].secondary]; - if (a[i].is_alt) a[i].secondary = INT_MAX; - } else a[i].secondary_all = -1; + if (a[i].is_alt) + a[i].secondary = INT_MAX; + } + else + a[i].secondary_all = -1; } - if (n_pri > 0) { // mark primary for hits to the primary assembly only - for (i = 0; i < n_pri; ++i) a[i].sub = 0, a[i].secondary = -1; + if (n_pri > 0) + { // mark primary for hits to the primary assembly only + for (i = 0; i < n_pri; ++i) + a[i].sub = 0, a[i].secondary = -1; mem_mark_primary_se_core(opt, n_pri, a, &z); } - } else { + } + else + { for (i = 0; i < n; ++i) a[i].secondary_all = a[i].secondary; } @@ -601,38 +757,58 @@ int mem_seed_sw(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, i uint8_t *rseq = 0; kswr_t x; - if (s->len >= MEM_SHORT_LEN) return -1; // the seed is longer than the max-extend; no need to do SW + if (s->len >= MEM_SHORT_LEN) + return -1; // the seed is longer than the max-extend; no need to do SW qb = s->qbeg, qe = s->qbeg + s->len; rb = s->rbeg, re = s->rbeg + s->len; mid = (rb + re) >> 1; - qb -= MEM_SHORT_EXT; qb = qb > 0? qb : 0; - qe += MEM_SHORT_EXT; qe = qe < l_query? qe : l_query; - rb -= MEM_SHORT_EXT; rb = rb > 0? rb : 0; - re += MEM_SHORT_EXT; re = re < l_pac<<1? re : l_pac<<1; - if (rb < l_pac && l_pac < re) { - if (mid < l_pac) re = l_pac; - else rb = l_pac; + qb -= MEM_SHORT_EXT; + qb = qb > 0 ? qb : 0; + qe += MEM_SHORT_EXT; + qe = qe < l_query ? qe : l_query; + rb -= MEM_SHORT_EXT; + rb = rb > 0 ? rb : 0; + re += MEM_SHORT_EXT; + re = re < l_pac << 1 ? re : l_pac << 1; + if (rb < l_pac && l_pac < re) + { + if (mid < l_pac) + re = l_pac; + else + rb = l_pac; } - if (qe - qb >= MEM_SHORT_LEN || re - rb >= MEM_SHORT_LEN) return -1; // the seed seems good enough; no need to do SW + if (qe - qb >= MEM_SHORT_LEN || re - rb >= MEM_SHORT_LEN) + return -1; // the seed seems good enough; no need to do SW rseq = bns_fetch_seq(bns, pac, &rb, mid, &re, &rid); - x = ksw_align2(qe - qb, (uint8_t*)query + qb, re - rb, rseq, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, KSW_XSTART, 0); +#ifdef SHOW_PERF + int64_t start_time = get_mseconds(); +#endif + x = ksw_align2(qe - qb, (uint8_t *)query + qb, re - rb, rseq, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, KSW_XSTART, 0); +#ifdef SHOW_PERF + int64_t tmp_diff = get_mseconds() - start_time; + __sync_fetch_and_add(&time_ksw_align2, tmp_diff); +#endif free(rseq); return x.score; } void mem_flt_chained_seeds(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const uint8_t *query, int n_chn, mem_chain_t *a) { - double min_l = opt->min_chain_weight? MEM_HSP_COEF * opt->min_chain_weight : MEM_MINSC_COEF * log(l_query); + double min_l = opt->min_chain_weight ? MEM_HSP_COEF * opt->min_chain_weight : MEM_MINSC_COEF * log(l_query); int i, j, k, min_HSP_score = (int)(opt->a * min_l + .499); - if (min_l > MEM_SEEDSW_COEF * l_query) return; // don't run the following for short reads - for (i = 0; i < n_chn; ++i) { + if (min_l > MEM_SEEDSW_COEF * l_query) + return; // don't run the following for short reads + for (i = 0; i < n_chn; ++i) + { mem_chain_t *c = &a[i]; - for (j = k = 0; j < c->n; ++j) { + for (j = k = 0; j < c->n; ++j) + { mem_seed_t *s = &c->seeds[j]; s->score = mem_seed_sw(opt, bns, pac, l_query, query, s); - if (s->score < 0 || s->score >= min_HSP_score) { - s->score = s->score < 0? s->len * opt->a : s->score; + if (s->score < 0 || s->score >= min_HSP_score) + { + s->score = s->score < 0 ? s->len * opt->a : s->score; c->seeds[k++] = *s; } } @@ -648,12 +824,12 @@ static inline int cal_max_gap(const mem_opt_t *opt, int qlen) { int l_del = (int)((double)(qlen * opt->a - opt->o_del) / opt->e_del + 1.); int l_ins = (int)((double)(qlen * opt->a - opt->o_ins) / opt->e_ins + 1.); - int l = l_del > l_ins? l_del : l_ins; - l = l > 1? l : 1; - return l < opt->w<<1? l : opt->w<<1; + int l = l_del > l_ins ? l_del : l_ins; + l = l > 1 ? l : 1; + return l < opt->w << 1 ? l : opt->w << 1; } -#define MAX_BAND_TRY 2 +#define MAX_BAND_TRY 2 void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain_t *c, mem_alnreg_v *av) { @@ -663,23 +839,30 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac uint8_t *rseq = 0; uint64_t *srt; - if (c->n == 0) return; + if (c->n == 0) + return; // get the max possible span - rmax[0] = l_pac<<1; rmax[1] = 0; - for (i = 0; i < c->n; ++i) { + rmax[0] = l_pac << 1; + rmax[1] = 0; + for (i = 0; i < c->n; ++i) + { int64_t b, e; const mem_seed_t *t = &c->seeds[i]; b = t->rbeg - (t->qbeg + cal_max_gap(opt, t->qbeg)); e = t->rbeg + t->len + ((l_query - t->qbeg - t->len) + cal_max_gap(opt, l_query - t->qbeg - t->len)); - rmax[0] = rmax[0] < b? rmax[0] : b; - rmax[1] = rmax[1] > e? rmax[1] : e; - if (t->len > max) max = t->len; + rmax[0] = rmax[0] < b ? rmax[0] : b; + rmax[1] = rmax[1] > e ? rmax[1] : e; + if (t->len > max) + max = t->len; } - rmax[0] = rmax[0] > 0? rmax[0] : 0; - rmax[1] = rmax[1] < l_pac<<1? rmax[1] : l_pac<<1; - if (rmax[0] < l_pac && l_pac < rmax[1]) { // crossing the forward-reverse boundary; then choose one side - if (c->seeds[0].rbeg < l_pac) rmax[1] = l_pac; // this works because all seeds are guaranteed to be on the same strand - else rmax[0] = l_pac; + rmax[0] = rmax[0] > 0 ? rmax[0] : 0; + rmax[1] = rmax[1] < l_pac << 1 ? rmax[1] : l_pac << 1; + if (rmax[0] < l_pac && l_pac < rmax[1]) + { // crossing the forward-reverse boundary; then choose one side + if (c->seeds[0].rbeg < l_pac) + rmax[1] = l_pac; // this works because all seeds are guaranteed to be on the same strand + else + rmax[0] = l_pac; } // retrieve the reference sequence rseq = bns_fetch_seq(bns, pac, &rmax[0], c->seeds[0].rbeg, &rmax[1], &rid); @@ -687,43 +870,58 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac srt = malloc(c->n * 8); for (i = 0; i < c->n; ++i) - srt[i] = (uint64_t)c->seeds[i].score<<32 | i; + srt[i] = (uint64_t)c->seeds[i].score << 32 | i; ks_introsort_64(c->n, srt); - for (k = c->n - 1; k >= 0; --k) { + for (k = c->n - 1; k >= 0; --k) + { mem_alnreg_t *a; s = &c->seeds[(uint32_t)srt[k]]; - for (i = 0; i < av->n; ++i) { // test whether extension has been made before + for (i = 0; i < av->n; ++i) + { // test whether extension has been made before mem_alnreg_t *p = &av->a[i]; int64_t rd; int qd, w, max_gap; - if (s->rbeg < p->rb || s->rbeg + s->len > p->re || s->qbeg < p->qb || s->qbeg + s->len > p->qe) continue; // not fully contained - if (s->len - p->seedlen0 > .1 * l_query) continue; // this seed may give a better alignment + if (s->rbeg < p->rb || s->rbeg + s->len > p->re || s->qbeg < p->qb || s->qbeg + s->len > p->qe) + continue; // not fully contained + if (s->len - p->seedlen0 > .1 * l_query) + continue; // this seed may give a better alignment // qd: distance ahead of the seed on query; rd: on reference - qd = s->qbeg - p->qb; rd = s->rbeg - p->rb; - max_gap = cal_max_gap(opt, qd < rd? qd : rd); // the maximal gap allowed in regions ahead of the seed - w = max_gap < p->w? max_gap : p->w; // bounded by the band width - if (qd - rd < w && rd - qd < w) break; // the seed is "around" a previous hit + qd = s->qbeg - p->qb; + rd = s->rbeg - p->rb; + max_gap = cal_max_gap(opt, qd < rd ? qd : rd); // the maximal gap allowed in regions ahead of the seed + w = max_gap < p->w ? max_gap : p->w; // bounded by the band width + if (qd - rd < w && rd - qd < w) + break; // the seed is "around" a previous hit // similar to the previous four lines, but this time we look at the region behind - qd = p->qe - (s->qbeg + s->len); rd = p->re - (s->rbeg + s->len); - max_gap = cal_max_gap(opt, qd < rd? qd : rd); - w = max_gap < p->w? max_gap : p->w; - if (qd - rd < w && rd - qd < w) break; + qd = p->qe - (s->qbeg + s->len); + rd = p->re - (s->rbeg + s->len); + max_gap = cal_max_gap(opt, qd < rd ? qd : rd); + w = max_gap < p->w ? max_gap : p->w; + if (qd - rd < w && rd - qd < w) + break; } - if (i < av->n) { // the seed is (almost) contained in an existing alignment; further testing is needed to confirm it is not leading to a different aln + if (i < av->n) + { // the seed is (almost) contained in an existing alignment; further testing is needed to confirm it is not leading to a different aln if (bwa_verbose >= 4) printf("** Seed(%d) [%ld;%ld,%ld] is almost contained in an existing alignment [%d,%d) <=> [%ld,%ld)\n", k, (long)s->len, (long)s->qbeg, (long)s->rbeg, av->a[i].qb, av->a[i].qe, (long)av->a[i].rb, (long)av->a[i].re); - for (i = k + 1; i < c->n; ++i) { // check overlapping seeds in the same chain + for (i = k + 1; i < c->n; ++i) + { // check overlapping seeds in the same chain const mem_seed_t *t; - if (srt[i] == 0) continue; + if (srt[i] == 0) + continue; t = &c->seeds[(uint32_t)srt[i]]; - if (t->len < s->len * .95) continue; // only check overlapping if t is long enough; TODO: more efficient by early stopping - if (s->qbeg <= t->qbeg && s->qbeg + s->len - t->qbeg >= s->len>>2 && t->qbeg - s->qbeg != t->rbeg - s->rbeg) break; - if (t->qbeg <= s->qbeg && t->qbeg + t->len - s->qbeg >= s->len>>2 && s->qbeg - t->qbeg != s->rbeg - t->rbeg) break; + if (t->len < s->len * .95) + continue; // only check overlapping if t is long enough; TODO: more efficient by early stopping + if (s->qbeg <= t->qbeg && s->qbeg + s->len - t->qbeg >= s->len >> 2 && t->qbeg - s->qbeg != t->rbeg - s->rbeg) + break; + if (t->qbeg <= s->qbeg && t->qbeg + t->len - s->qbeg >= s->len >> 2 && s->qbeg - t->qbeg != s->rbeg - t->rbeg) + break; } - if (i == c->n) { // no overlapping seeds; then skip extension + if (i == c->n) + { // no overlapping seeds; then skip extension srt[k] = 0; // mark that seed extension has not been performed continue; } @@ -737,78 +935,139 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac a->score = a->truesc = -1; a->rid = c->rid; - if (bwa_verbose >= 4) err_printf("** ---> Extending from seed(%d) [%ld;%ld,%ld] @ %s <---\n", k, (long)s->len, (long)s->qbeg, (long)s->rbeg, bns->anns[c->rid].name); - if (s->qbeg) { // left extension + if (bwa_verbose >= 4) + err_printf("** ---> Extending from seed(%d) [%ld;%ld,%ld] @ %s <---\n", k, (long)s->len, (long)s->qbeg, (long)s->rbeg, bns->anns[c->rid].name); + if (s->qbeg) + { // left extension uint8_t *rs, *qs; int qle, tle, gtle, gscore; qs = malloc(s->qbeg); - for (i = 0; i < s->qbeg; ++i) qs[i] = query[s->qbeg - 1 - i]; + for (i = 0; i < s->qbeg; ++i) + qs[i] = query[s->qbeg - 1 - i]; tmp = s->rbeg - rmax[0]; rs = malloc(tmp); - for (i = 0; i < tmp; ++i) rs[i] = rseq[tmp - 1 - i]; - for (i = 0; i < MAX_BAND_TRY; ++i) { + for (i = 0; i < tmp; ++i) + rs[i] = rseq[tmp - 1 - i]; + for (i = 0; i < MAX_BAND_TRY; ++i) + { int prev = a->score; aw[0] = opt->w << i; - if (bwa_verbose >= 4) { + if (bwa_verbose >= 4) + { int j; - printf("*** Left ref: "); for (j = 0; j < tmp; ++j) putchar("ACGTN"[(int)rs[j]]); putchar('\n'); - printf("*** Left query: "); for (j = 0; j < s->qbeg; ++j) putchar("ACGTN"[(int)qs[j]]); putchar('\n'); + printf("*** Left ref: "); + for (j = 0; j < tmp; ++j) + putchar("ACGTN"[(int)rs[j]]); + putchar('\n'); + printf("*** Left query: "); + for (j = 0; j < s->qbeg; ++j) + putchar("ACGTN"[(int)qs[j]]); + putchar('\n'); } +#ifdef SHOW_PERF + int64_t start_time = get_mseconds(); +#endif a->score = ksw_extend2(s->qbeg, qs, tmp, rs, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, aw[0], opt->pen_clip5, opt->zdrop, s->len * opt->a, &qle, &tle, >le, &gscore, &max_off[0]); - if (bwa_verbose >= 4) { printf("*** Left extension: prev_score=%d; score=%d; bandwidth=%d; max_off_diagonal_dist=%d\n", prev, a->score, aw[0], max_off[0]); fflush(stdout); } - if (a->score == prev || max_off[0] < (aw[0]>>1) + (aw[0]>>2)) break; +#ifdef SHOW_PERF + int64_t tmp_diff = get_mseconds() - start_time; + __sync_fetch_and_add(&time_ksw_extend2, tmp_diff); + __sync_fetch_and_add(&count_ksw_extend2, 1); +#endif + if (bwa_verbose >= 4) + { + printf("*** Left extension: prev_score=%d; score=%d; bandwidth=%d; max_off_diagonal_dist=%d\n", prev, a->score, aw[0], max_off[0]); + fflush(stdout); + } + if (a->score == prev || max_off[0] < (aw[0] >> 1) + (aw[0] >> 2)) + break; } // check whether we prefer to reach the end of the query - if (gscore <= 0 || gscore <= a->score - opt->pen_clip5) { // local extension + if (gscore <= 0 || gscore <= a->score - opt->pen_clip5) + { // local extension a->qb = s->qbeg - qle, a->rb = s->rbeg - tle; a->truesc = a->score; - } else { // to-end extension + } + else + { // to-end extension a->qb = 0, a->rb = s->rbeg - gtle; a->truesc = gscore; } - free(qs); free(rs); - } else a->score = a->truesc = s->len * opt->a, a->qb = 0, a->rb = s->rbeg; + free(qs); + free(rs); + } + else + a->score = a->truesc = s->len * opt->a, a->qb = 0, a->rb = s->rbeg; - if (s->qbeg + s->len != l_query) { // right extension + if (s->qbeg + s->len != l_query) + { // right extension int qle, tle, qe, re, gtle, gscore, sc0 = a->score; qe = s->qbeg + s->len; re = s->rbeg + s->len - rmax[0]; assert(re >= 0); - for (i = 0; i < MAX_BAND_TRY; ++i) { + for (i = 0; i < MAX_BAND_TRY; ++i) + { int prev = a->score; aw[1] = opt->w << i; - if (bwa_verbose >= 4) { + if (bwa_verbose >= 4) + { int j; - printf("*** Right ref: "); for (j = 0; j < rmax[1] - rmax[0] - re; ++j) putchar("ACGTN"[(int)rseq[re+j]]); putchar('\n'); - printf("*** Right query: "); for (j = 0; j < l_query - qe; ++j) putchar("ACGTN"[(int)query[qe+j]]); putchar('\n'); + printf("*** Right ref: "); + for (j = 0; j < rmax[1] - rmax[0] - re; ++j) + putchar("ACGTN"[(int)rseq[re + j]]); + putchar('\n'); + printf("*** Right query: "); + for (j = 0; j < l_query - qe; ++j) + putchar("ACGTN"[(int)query[qe + j]]); + putchar('\n'); } +#ifdef SHOW_PERF + int64_t start_time = get_mseconds(); +#endif a->score = ksw_extend2(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, aw[1], opt->pen_clip3, opt->zdrop, sc0, &qle, &tle, >le, &gscore, &max_off[1]); - if (bwa_verbose >= 4) { printf("*** Right extension: prev_score=%d; score=%d; bandwidth=%d; max_off_diagonal_dist=%d\n", prev, a->score, aw[1], max_off[1]); fflush(stdout); } - if (a->score == prev || max_off[1] < (aw[1]>>1) + (aw[1]>>2)) break; +#ifdef SHOW_PERF + int64_t tmp_diff = get_mseconds() - start_time; + __sync_fetch_and_add(&time_ksw_extend2, tmp_diff); + __sync_fetch_and_add(&count_ksw_extend2, 1); +#endif + if (bwa_verbose >= 4) + { + printf("*** Right extension: prev_score=%d; score=%d; bandwidth=%d; max_off_diagonal_dist=%d\n", prev, a->score, aw[1], max_off[1]); + fflush(stdout); + } + if (a->score == prev || max_off[1] < (aw[1] >> 1) + (aw[1] >> 2)) + break; } // similar to the above - if (gscore <= 0 || gscore <= a->score - opt->pen_clip3) { // local extension + if (gscore <= 0 || gscore <= a->score - opt->pen_clip3) + { // local extension a->qe = qe + qle, a->re = rmax[0] + re + tle; a->truesc += a->score - sc0; - } else { // to-end extension + } + else + { // to-end extension a->qe = l_query, a->re = rmax[0] + re + gtle; a->truesc += gscore - sc0; } - } else a->qe = l_query, a->re = s->rbeg + s->len; - if (bwa_verbose >= 4) printf("*** Added alignment region: [%d,%d) <=> [%ld,%ld); score=%d; {left,right}_bandwidth={%d,%d}\n", a->qb, a->qe, (long)a->rb, (long)a->re, a->score, aw[0], aw[1]); + } + else + a->qe = l_query, a->re = s->rbeg + s->len; + if (bwa_verbose >= 4) + printf("*** Added alignment region: [%d,%d) <=> [%ld,%ld); score=%d; {left,right}_bandwidth={%d,%d}\n", a->qb, a->qe, (long)a->rb, (long)a->re, a->score, aw[0], aw[1]); // compute seedcov - for (i = 0, a->seedcov = 0; i < c->n; ++i) { + for (i = 0, a->seedcov = 0; i < c->n; ++i) + { const mem_seed_t *t = &c->seeds[i]; if (t->qbeg >= a->qb && t->qbeg + t->len <= a->qe && t->rbeg >= a->rb && t->rbeg + t->len <= a->re) // seed fully contained - a->seedcov += t->len; // this is not very accurate, but for approx. mapQ, this is good enough + a->seedcov += t->len; // this is not very accurate, but for approx. mapQ, this is good enough } - a->w = aw[0] > aw[1]? aw[0] : aw[1]; + a->w = aw[0] > aw[1] ? aw[0] : aw[1]; a->seedlen0 = s->len; a->frac_rep = c->frac_rep; } - free(srt); free(rseq); + free(srt); + free(rseq); } /***************************** @@ -818,19 +1077,22 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac static inline int infer_bw(int l1, int l2, int score, int a, int q, int r) { int w; - if (l1 == l2 && l1 * a - score < (q + r - a)<<1) return 0; // to get equal alignment length, we need at least two gaps - w = ((double)((l1 < l2? l1 : l2) * a - score - q) / r + 2.); - if (w < abs(l1 - l2)) w = abs(l1 - l2); + if (l1 == l2 && l1 * a - score < (q + r - a) << 1) + return 0; // to get equal alignment length, we need at least two gaps + w = ((double)((l1 < l2 ? l1 : l2) * a - score - q) / r + 2.); + if (w < abs(l1 - l2)) + w = abs(l1 - l2); return w; } static inline int get_rlen(int n_cigar, const uint32_t *cigar) { int k, l; - for (k = l = 0; k < n_cigar; ++k) { - int op = cigar[k]&0xf; + for (k = l = 0; k < n_cigar; ++k) + { + int op = cigar[k] & 0xf; if (op == 0 || op == 2) - l += cigar[k]>>4; + l += cigar[k] >> 4; } return l; } @@ -838,14 +1100,19 @@ static inline int get_rlen(int n_cigar, const uint32_t *cigar) static inline void add_cigar(const mem_opt_t *opt, mem_aln_t *p, kstring_t *str, int which) { int i; - if (p->n_cigar) { // aligned - for (i = 0; i < p->n_cigar; ++i) { - int c = p->cigar[i]&0xf; - if (!(opt->flag&MEM_F_SOFTCLIP) && !p->is_alt && (c == 3 || c == 4)) - c = which? 4 : 3; // use hard clipping for supplementary alignments - kputw(p->cigar[i]>>4, str); kputc("MIDSH"[c], str); + if (p->n_cigar) + { // aligned + for (i = 0; i < p->n_cigar; ++i) + { + int c = p->cigar[i] & 0xf; + if (!(opt->flag & MEM_F_SOFTCLIP) && !p->is_alt && (c == 3 || c == 4)) + c = which ? 4 : 3; // use hard clipping for supplementary alignments + kputw(p->cigar[i] >> 4, str); + kputc("MIDSH"[c], str); } - } else kputc('*', str); // having a coordinate but unaligned (e.g. when copy_mate is true) + } + else + kputc('*', str); // having a coordinate but unaligned (e.g. when copy_mate is true) } void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m_) @@ -853,124 +1120,209 @@ void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq int i, l_name; mem_aln_t ptmp = list[which], *p = &ptmp, mtmp, *m = 0; // make a copy of the alignment to convert - if (m_) mtmp = *m_, m = &mtmp; + if (m_) + mtmp = *m_, m = &mtmp; // set flag - p->flag |= m? 0x1 : 0; // is paired in sequencing - p->flag |= p->rid < 0? 0x4 : 0; // is mapped - p->flag |= m && m->rid < 0? 0x8 : 0; // is mate mapped - if (p->rid < 0 && m && m->rid >= 0) // copy mate to alignment + p->flag |= m ? 0x1 : 0; // is paired in sequencing + p->flag |= p->rid < 0 ? 0x4 : 0; // is mapped + p->flag |= m && m->rid < 0 ? 0x8 : 0; // is mate mapped + if (p->rid < 0 && m && m->rid >= 0) // copy mate to alignment p->rid = m->rid, p->pos = m->pos, p->is_rev = m->is_rev, p->n_cigar = 0; if (m && m->rid < 0 && p->rid >= 0) // copy alignment to mate m->rid = p->rid, m->pos = p->pos, m->is_rev = p->is_rev, m->n_cigar = 0; - p->flag |= p->is_rev? 0x10 : 0; // is on the reverse strand - p->flag |= m && m->is_rev? 0x20 : 0; // is mate on the reverse strand + p->flag |= p->is_rev ? 0x10 : 0; // is on the reverse strand + p->flag |= m && m->is_rev ? 0x20 : 0; // is mate on the reverse strand // print up to CIGAR l_name = strlen(s->name); - ks_resize(str, str->l + s->l_seq + l_name + (s->qual? s->l_seq : 0) + 20); - kputsn(s->name, l_name, str); kputc('\t', str); // QNAME - kputw((p->flag&0xffff) | (p->flag&0x10000? 0x100 : 0), str); kputc('\t', str); // FLAG - if (p->rid >= 0) { // with coordinate - kputs(bns->anns[p->rid].name, str); kputc('\t', str); // RNAME - kputl(p->pos + 1, str); kputc('\t', str); // POS - kputw(p->mapq, str); kputc('\t', str); // MAPQ + ks_resize(str, str->l + s->l_seq + l_name + (s->qual ? s->l_seq : 0) + 20); + kputsn(s->name, l_name, str); + kputc('\t', str); // QNAME + kputw((p->flag & 0xffff) | (p->flag & 0x10000 ? 0x100 : 0), str); + kputc('\t', str); // FLAG + if (p->rid >= 0) + { // with coordinate + kputs(bns->anns[p->rid].name, str); + kputc('\t', str); // RNAME + kputl(p->pos + 1, str); + kputc('\t', str); // POS + kputw(p->mapq, str); + kputc('\t', str); // MAPQ add_cigar(opt, p, str, which); - } else kputsn("*\t0\t0\t*", 7, str); // without coordinte + } + else + kputsn("*\t0\t0\t*", 7, str); // without coordinte kputc('\t', str); // print the mate position if applicable - if (m && m->rid >= 0) { - if (p->rid == m->rid) kputc('=', str); - else kputs(bns->anns[m->rid].name, str); + if (m && m->rid >= 0) + { + if (p->rid == m->rid) + kputc('=', str); + else + kputs(bns->anns[m->rid].name, str); kputc('\t', str); - kputl(m->pos + 1, str); kputc('\t', str); - if (p->rid == m->rid) { - int64_t p0 = p->pos + (p->is_rev? get_rlen(p->n_cigar, p->cigar) - 1 : 0); - int64_t p1 = m->pos + (m->is_rev? get_rlen(m->n_cigar, m->cigar) - 1 : 0); - if (m->n_cigar == 0 || p->n_cigar == 0) kputc('0', str); - else kputl(-(p0 - p1 + (p0 > p1? 1 : p0 < p1? -1 : 0)), str); - } else kputc('0', str); - } else kputsn("*\t0\t0", 5, str); + kputl(m->pos + 1, str); + kputc('\t', str); + if (p->rid == m->rid) + { + int64_t p0 = p->pos + (p->is_rev ? get_rlen(p->n_cigar, p->cigar) - 1 : 0); + int64_t p1 = m->pos + (m->is_rev ? get_rlen(m->n_cigar, m->cigar) - 1 : 0); + if (m->n_cigar == 0 || p->n_cigar == 0) + kputc('0', str); + else + kputl(-(p0 - p1 + (p0 > p1 ? 1 : p0 < p1 ? -1 + : 0)), + str); + } + else + kputc('0', str); + } + else + kputsn("*\t0\t0", 5, str); kputc('\t', str); // print SEQ and QUAL - if (p->flag & 0x100) { // for secondary alignments, don't write SEQ and QUAL + if (p->flag & 0x100) + { // for secondary alignments, don't write SEQ and QUAL kputsn("*\t*", 3, str); - } else if (!p->is_rev) { // the forward strand + } + else if (!p->is_rev) + { // the forward strand int i, qb = 0, qe = s->l_seq; - if (p->n_cigar && which && !(opt->flag&MEM_F_SOFTCLIP) && !p->is_alt) { // have cigar && not the primary alignment && not softclip all - if ((p->cigar[0]&0xf) == 4 || (p->cigar[0]&0xf) == 3) qb += p->cigar[0]>>4; - if ((p->cigar[p->n_cigar-1]&0xf) == 4 || (p->cigar[p->n_cigar-1]&0xf) == 3) qe -= p->cigar[p->n_cigar-1]>>4; + if (p->n_cigar && which && !(opt->flag & MEM_F_SOFTCLIP) && !p->is_alt) + { // have cigar && not the primary alignment && not softclip all + if ((p->cigar[0] & 0xf) == 4 || (p->cigar[0] & 0xf) == 3) + qb += p->cigar[0] >> 4; + if ((p->cigar[p->n_cigar - 1] & 0xf) == 4 || (p->cigar[p->n_cigar - 1] & 0xf) == 3) + qe -= p->cigar[p->n_cigar - 1] >> 4; } ks_resize(str, str->l + (qe - qb) + 1); - for (i = qb; i < qe; ++i) str->s[str->l++] = "ACGTN"[(int)s->seq[i]]; + for (i = qb; i < qe; ++i) + str->s[str->l++] = "ACGTN"[(int)s->seq[i]]; kputc('\t', str); - if (s->qual) { // printf qual + if (s->qual) + { // printf qual ks_resize(str, str->l + (qe - qb) + 1); - for (i = qb; i < qe; ++i) str->s[str->l++] = s->qual[i]; + for (i = qb; i < qe; ++i) + str->s[str->l++] = s->qual[i]; str->s[str->l] = 0; - } else kputc('*', str); - } else { // the reverse strand + } + else + kputc('*', str); + } + else + { // the reverse strand int i, qb = 0, qe = s->l_seq; - if (p->n_cigar && which && !(opt->flag&MEM_F_SOFTCLIP) && !p->is_alt) { - if ((p->cigar[0]&0xf) == 4 || (p->cigar[0]&0xf) == 3) qe -= p->cigar[0]>>4; - if ((p->cigar[p->n_cigar-1]&0xf) == 4 || (p->cigar[p->n_cigar-1]&0xf) == 3) qb += p->cigar[p->n_cigar-1]>>4; + if (p->n_cigar && which && !(opt->flag & MEM_F_SOFTCLIP) && !p->is_alt) + { + if ((p->cigar[0] & 0xf) == 4 || (p->cigar[0] & 0xf) == 3) + qe -= p->cigar[0] >> 4; + if ((p->cigar[p->n_cigar - 1] & 0xf) == 4 || (p->cigar[p->n_cigar - 1] & 0xf) == 3) + qb += p->cigar[p->n_cigar - 1] >> 4; } ks_resize(str, str->l + (qe - qb) + 1); - for (i = qe-1; i >= qb; --i) str->s[str->l++] = "TGCAN"[(int)s->seq[i]]; + for (i = qe - 1; i >= qb; --i) + str->s[str->l++] = "TGCAN"[(int)s->seq[i]]; kputc('\t', str); - if (s->qual) { // printf qual + if (s->qual) + { // printf qual ks_resize(str, str->l + (qe - qb) + 1); - for (i = qe-1; i >= qb; --i) str->s[str->l++] = s->qual[i]; + for (i = qe - 1; i >= qb; --i) + str->s[str->l++] = s->qual[i]; str->s[str->l] = 0; - } else kputc('*', str); + } + else + kputc('*', str); } // print optional tags - if (p->n_cigar) { - kputsn("\tNM:i:", 6, str); kputw(p->NM, str); - kputsn("\tMD:Z:", 6, str); kputs((char*)(p->cigar + p->n_cigar), str); + if (p->n_cigar) + { + kputsn("\tNM:i:", 6, str); + kputw(p->NM, str); + kputsn("\tMD:Z:", 6, str); + kputs((char *)(p->cigar + p->n_cigar), str); } - if (m && m->n_cigar) { kputsn("\tMC:Z:", 6, str); add_cigar(opt, m, str, which); } - if (m) { kputsn("\tMQ:i:", 6, str); kputw(m->mapq, str);} - if (p->score >= 0) { kputsn("\tAS:i:", 6, str); kputw(p->score, str); } - if (p->sub >= 0) { kputsn("\tXS:i:", 6, str); kputw(p->sub, str); } - if (bwa_rg_id[0]) { kputsn("\tRG:Z:", 6, str); kputs(bwa_rg_id, str); } - if (!(p->flag & 0x100)) { // not multi-hit + if (m && m->n_cigar) + { + kputsn("\tMC:Z:", 6, str); + add_cigar(opt, m, str, which); + } + if (m) + { + kputsn("\tMQ:i:", 6, str); + kputw(m->mapq, str); + } + if (p->score >= 0) + { + kputsn("\tAS:i:", 6, str); + kputw(p->score, str); + } + if (p->sub >= 0) + { + kputsn("\tXS:i:", 6, str); + kputw(p->sub, str); + } + if (bwa_rg_id[0]) + { + kputsn("\tRG:Z:", 6, str); + kputs(bwa_rg_id, str); + } + if (!(p->flag & 0x100)) + { // not multi-hit for (i = 0; i < n; ++i) - if (i != which && !(list[i].flag&0x100)) break; - if (i < n) { // there are other primary hits; output them + if (i != which && !(list[i].flag & 0x100)) + break; + if (i < n) + { // there are other primary hits; output them kputsn("\tSA:Z:", 6, str); - for (i = 0; i < n; ++i) { + for (i = 0; i < n; ++i) + { const mem_aln_t *r = &list[i]; int k; - if (i == which || (r->flag&0x100)) continue; // proceed if: 1) different from the current; 2) not shadowed multi hit - kputs(bns->anns[r->rid].name, str); kputc(',', str); - kputl(r->pos+1, str); kputc(',', str); - kputc("+-"[r->is_rev], str); kputc(',', str); - for (k = 0; k < r->n_cigar; ++k) { - kputw(r->cigar[k]>>4, str); kputc("MIDSH"[r->cigar[k]&0xf], str); + if (i == which || (r->flag & 0x100)) + continue; // proceed if: 1) different from the current; 2) not shadowed multi hit + kputs(bns->anns[r->rid].name, str); + kputc(',', str); + kputl(r->pos + 1, str); + kputc(',', str); + kputc("+-"[r->is_rev], str); + kputc(',', str); + for (k = 0; k < r->n_cigar; ++k) + { + kputw(r->cigar[k] >> 4, str); + kputc("MIDSH"[r->cigar[k] & 0xf], str); } - kputc(',', str); kputw(r->mapq, str); - kputc(',', str); kputw(r->NM, str); + kputc(',', str); + kputw(r->mapq, str); + kputc(',', str); + kputw(r->NM, str); kputc(';', str); } } if (p->alt_sc > 0) ksprintf(str, "\tpa:f:%.3f", (double)p->score / p->alt_sc); } - if (p->XA) { - kputsn((opt->flag&MEM_F_XB)? "\tXB:Z:" : "\tXA:Z:", 6, str); + if (p->XA) + { + kputsn((opt->flag & MEM_F_XB) ? "\tXB:Z:" : "\tXA:Z:", 6, str); kputs(p->XA, str); } - if (s->comment) { kputc('\t', str); kputs(s->comment, str); } - if ((opt->flag&MEM_F_REF_HDR) && p->rid >= 0 && bns->anns[p->rid].anno != 0 && bns->anns[p->rid].anno[0] != 0) { + if (s->comment) + { + kputc('\t', str); + kputs(s->comment, str); + } + if ((opt->flag & MEM_F_REF_HDR) && p->rid >= 0 && bns->anns[p->rid].anno != 0 && bns->anns[p->rid].anno[0] != 0) + { int tmp; kputsn("\tXR:Z:", 6, str); tmp = str->l; kputs(bns->anns[p->rid].anno, str); for (i = tmp; i < str->l; ++i) // replace TAB in the comment to SPACE - if (str->s[i] == '\t') str->s[i] = ' '; + if (str->s[i] == '\t') + str->s[i] = ' '; } kputc('\n', str); } @@ -981,26 +1333,35 @@ void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq int mem_approx_mapq_se(const mem_opt_t *opt, const mem_alnreg_t *a) { - int mapq, l, sub = a->sub? a->sub : opt->min_seed_len * opt->a; + int mapq, l, sub = a->sub ? a->sub : opt->min_seed_len * opt->a; double identity; - sub = a->csub > sub? a->csub : sub; - if (sub >= a->score) return 0; - l = a->qe - a->qb > a->re - a->rb? a->qe - a->qb : a->re - a->rb; + sub = a->csub > sub ? a->csub : sub; + if (sub >= a->score) + return 0; + l = a->qe - a->qb > a->re - a->rb ? a->qe - a->qb : a->re - a->rb; identity = 1. - (double)(l * opt->a - a->score) / (opt->a + opt->b) / l; - if (a->score == 0) { + if (a->score == 0) + { mapq = 0; - } else if (opt->mapQ_coef_len > 0) { + } + else if (opt->mapQ_coef_len > 0) + { double tmp; - tmp = l < opt->mapQ_coef_len? 1. : opt->mapQ_coef_fac / log(l); + tmp = l < opt->mapQ_coef_len ? 1. : opt->mapQ_coef_fac / log(l); tmp *= identity * identity; mapq = (int)(6.02 * (a->score - sub) / opt->a * tmp * tmp + .499); - } else { - mapq = (int)(MEM_MAPQ_COEF * (1. - (double)sub / a->score) * log(a->seedcov) + .499); - mapq = identity < 0.95? (int)(mapq * identity * identity + .499) : mapq; } - if (a->sub_n > 0) mapq -= (int)(4.343 * log(a->sub_n+1) + .499); - if (mapq > 60) mapq = 60; - if (mapq < 0) mapq = 0; + else + { + mapq = (int)(MEM_MAPQ_COEF * (1. - (double)sub / a->score) * log(a->seedcov) + .499); + mapq = identity < 0.95 ? (int)(mapq * identity * identity + .499) : mapq; + } + if (a->sub_n > 0) + mapq -= (int)(4.343 * log(a->sub_n + 1) + .499); + if (mapq > 60) + mapq = 60; + if (mapq < 0) + mapq = 0; mapq = (int)(mapq * (1. - a->frac_rep) + .499); return mapq; } @@ -1010,22 +1371,33 @@ void mem_reorder_primary5(int T, mem_alnreg_v *a) int k, n_pri = 0, left_st = INT_MAX, left_k = -1; mem_alnreg_t t; for (k = 0; k < a->n; ++k) - if (a->a[k].secondary < 0 && !a->a[k].is_alt && a->a[k].score >= T) ++n_pri; - if (n_pri <= 1) return; // only one alignment - for (k = 0; k < a->n; ++k) { + if (a->a[k].secondary < 0 && !a->a[k].is_alt && a->a[k].score >= T) + ++n_pri; + if (n_pri <= 1) + return; // only one alignment + for (k = 0; k < a->n; ++k) + { mem_alnreg_t *p = &a->a[k]; - if (p->secondary >= 0 || p->is_alt || p->score < T) continue; - if (p->qb < left_st) left_st = p->qb, left_k = k; + if (p->secondary >= 0 || p->is_alt || p->score < T) + continue; + if (p->qb < left_st) + left_st = p->qb, left_k = k; } assert(a->a[0].secondary < 0); - if (left_k == 0) return; // no need to reorder + if (left_k == 0) + return; // no need to reorder t = a->a[0], a->a[0] = a->a[left_k], a->a[left_k] = t; - for (k = 1; k < a->n; ++k) { // update secondary and secondary_all + for (k = 1; k < a->n; ++k) + { // update secondary and secondary_all mem_alnreg_t *p = &a->a[k]; - if (p->secondary == 0) p->secondary = left_k; - else if (p->secondary == left_k) p->secondary = 0; - if (p->secondary_all == 0) p->secondary_all = left_k; - else if (p->secondary_all == left_k) p->secondary_all = 0; + if (p->secondary == 0) + p->secondary = left_k; + else if (p->secondary == left_k) + p->secondary = 0; + if (p->secondary_all == 0) + p->secondary_all = left_k; + else if (p->secondary_all == left_k) + p->secondary_all = 0; } } @@ -1041,39 +1413,51 @@ void mem_reg2sam(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, if (!(opt->flag & MEM_F_ALL)) XA = mem_gen_alt(opt, bns, pac, a, s->l_seq, s->seq); kv_init(aa); - str.l = str.m = 0; str.s = 0; - for (k = l = 0; k < a->n; ++k) { + str.l = str.m = 0; + str.s = 0; + for (k = l = 0; k < a->n; ++k) + { mem_alnreg_t *p = &a->a[k]; mem_aln_t *q; - if (p->score < opt->T) continue; - if (p->secondary >= 0 && (p->is_alt || !(opt->flag&MEM_F_ALL))) continue; - if (p->secondary >= 0 && p->secondary < INT_MAX && p->score < a->a[p->secondary].score * opt->drop_ratio) continue; + if (p->score < opt->T) + continue; + if (p->secondary >= 0 && (p->is_alt || !(opt->flag & MEM_F_ALL))) + continue; + if (p->secondary >= 0 && p->secondary < INT_MAX && p->score < a->a[p->secondary].score * opt->drop_ratio) + continue; q = kv_pushp(mem_aln_t, aa); *q = mem_reg2aln(opt, bns, pac, s->l_seq, s->seq, p); assert(q->rid >= 0); // this should not happen with the new code - q->XA = XA? XA[k] : 0; + q->XA = XA ? XA[k] : 0; q->flag |= extra_flag; // flag secondary - if (p->secondary >= 0) q->sub = -1; // don't output sub-optimal score + if (p->secondary >= 0) + q->sub = -1; // don't output sub-optimal score if (l && p->secondary < 0) // if supplementary - q->flag |= (opt->flag&MEM_F_NO_MULTI)? 0x10000 : 0x800; + q->flag |= (opt->flag & MEM_F_NO_MULTI) ? 0x10000 : 0x800; if (!(opt->flag & MEM_F_KEEP_SUPP_MAPQ) && l && !p->is_alt && q->mapq > aa.a[0].mapq) q->mapq = aa.a[0].mapq; // lower mapq for supplementary mappings, unless -5 or -q is applied ++l; } - if (aa.n == 0) { // no alignments good enough; then write an unaligned record + if (aa.n == 0) + { // no alignments good enough; then write an unaligned record mem_aln_t t; t = mem_reg2aln(opt, bns, pac, s->l_seq, s->seq, 0); t.flag |= extra_flag; mem_aln2sam(opt, bns, &str, s, 1, &t, 0, m); - } else { + } + else + { for (k = 0; k < aa.n; ++k) mem_aln2sam(opt, bns, &str, s, aa.n, aa.a, k, m); - for (k = 0; k < aa.n; ++k) free(aa.a[k].cigar); + for (k = 0; k < aa.n; ++k) + free(aa.a[k].cigar); free(aa.a); } s->sam = str.s; - if (XA) { - for (k = 0; k < a->n; ++k) free(XA[k]); + if (XA) + { + for (k = 0; k < a->n; ++k) + free(XA[k]); free(XA); } } @@ -1085,30 +1469,42 @@ mem_alnreg_v mem_align1_core(const mem_opt_t *opt, const bwt_t *bwt, const bntse mem_alnreg_v regs; for (i = 0; i < l_seq; ++i) // convert to 2-bit encoding if we have not done so - seq[i] = seq[i] < 4? seq[i] : nst_nt4_table[(int)seq[i]]; - - chn = mem_chain(opt, bwt, bns, l_seq, (uint8_t*)seq, buf); + seq[i] = seq[i] < 4 ? seq[i] : nst_nt4_table[(int)seq[i]]; +#ifdef SHOW_PERF + int64_t start_time = get_mseconds(); +#endif + chn = mem_chain(opt, bwt, bns, l_seq, (uint8_t *)seq, buf); +#ifdef SHOW_PERF + int64_t tmp_diff = get_mseconds() - start_time; + __sync_fetch_and_add(&time_mem_chain, tmp_diff); +#endif chn.n = mem_chain_flt(opt, chn.n, chn.a); - mem_flt_chained_seeds(opt, bns, pac, l_seq, (uint8_t*)seq, chn.n, chn.a); - if (bwa_verbose >= 4) mem_print_chain(bns, &chn); + mem_flt_chained_seeds(opt, bns, pac, l_seq, (uint8_t *)seq, chn.n, chn.a); + if (bwa_verbose >= 4) + mem_print_chain(bns, &chn); kv_init(regs); - for (i = 0; i < chn.n; ++i) { + for (i = 0; i < chn.n; ++i) + { mem_chain_t *p = &chn.a[i]; - if (bwa_verbose >= 4) err_printf("* ---> Processing chain(%d) <---\n", i); - mem_chain2aln(opt, bns, pac, l_seq, (uint8_t*)seq, p, ®s); + if (bwa_verbose >= 4) + err_printf("* ---> Processing chain(%d) <---\n", i); + mem_chain2aln(opt, bns, pac, l_seq, (uint8_t *)seq, p, ®s); free(chn.a[i].seeds); } free(chn.a); - regs.n = mem_sort_dedup_patch(opt, bns, pac, (uint8_t*)seq, regs.n, regs.a); - if (bwa_verbose >= 4) { + regs.n = mem_sort_dedup_patch(opt, bns, pac, (uint8_t *)seq, regs.n, regs.a); + if (bwa_verbose >= 4) + { err_printf("* %ld chains remain after removing duplicated chains\n", regs.n); - for (i = 0; i < regs.n; ++i) { + for (i = 0; i < regs.n; ++i) + { mem_alnreg_t *p = ®s.a[i]; printf("** %d, [%d,%d) <=> [%ld,%ld)\n", p->score, p->qb, p->qe, (long)p->rb, (long)p->re); } } - for (i = 0; i < regs.n; ++i) { + for (i = 0; i < regs.n; ++i) + { mem_alnreg_t *p = ®s.a[i]; if (p->rid >= 0 && bns->anns[p->rid].is_alt) p->is_alt = 1; @@ -1119,76 +1515,96 @@ mem_alnreg_v mem_align1_core(const mem_opt_t *opt, const bwt_t *bwt, const bntse mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const char *query_, const mem_alnreg_t *ar) { mem_aln_t a; - int i, w2, tmp, qb, qe, NM, score, is_rev, last_sc = -(1<<30), l_MD; + int i, w2, tmp, qb, qe, NM, score, is_rev, last_sc = -(1 << 30), l_MD; int64_t pos, rb, re; uint8_t *query; memset(&a, 0, sizeof(mem_aln_t)); - if (ar == 0 || ar->rb < 0 || ar->re < 0) { // generate an unmapped record - a.rid = -1; a.pos = -1; a.flag |= 0x4; + if (ar == 0 || ar->rb < 0 || ar->re < 0) + { // generate an unmapped record + a.rid = -1; + a.pos = -1; + a.flag |= 0x4; return a; } qb = ar->qb, qe = ar->qe; rb = ar->rb, re = ar->re; query = malloc(l_query); for (i = 0; i < l_query; ++i) // convert to the nt4 encoding - query[i] = query_[i] < 5? query_[i] : nst_nt4_table[(int)query_[i]]; - a.mapq = ar->secondary < 0? mem_approx_mapq_se(opt, ar) : 0; - if (ar->secondary >= 0) a.flag |= 0x100; // secondary alignment + query[i] = query_[i] < 5 ? query_[i] : nst_nt4_table[(int)query_[i]]; + a.mapq = ar->secondary < 0 ? mem_approx_mapq_se(opt, ar) : 0; + if (ar->secondary >= 0) + a.flag |= 0x100; // secondary alignment tmp = infer_bw(qe - qb, re - rb, ar->truesc, opt->a, opt->o_del, opt->e_del); - w2 = infer_bw(qe - qb, re - rb, ar->truesc, opt->a, opt->o_ins, opt->e_ins); - w2 = w2 > tmp? w2 : tmp; - if (bwa_verbose >= 4) printf("* Band width: inferred=%d, cmd_opt=%d, alnreg=%d\n", w2, opt->w, ar->w); - if (w2 > opt->w) w2 = w2 < ar->w? w2 : ar->w; - i = 0; a.cigar = 0; - do { + w2 = infer_bw(qe - qb, re - rb, ar->truesc, opt->a, opt->o_ins, opt->e_ins); + w2 = w2 > tmp ? w2 : tmp; + if (bwa_verbose >= 4) + printf("* Band width: inferred=%d, cmd_opt=%d, alnreg=%d\n", w2, opt->w, ar->w); + if (w2 > opt->w) + w2 = w2 < ar->w ? w2 : ar->w; + i = 0; + a.cigar = 0; + do + { free(a.cigar); - w2 = w2 < opt->w<<2? w2 : opt->w<<2; - a.cigar = bwa_gen_cigar2(opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, w2, bns->l_pac, pac, qe - qb, (uint8_t*)&query[qb], rb, re, &score, &a.n_cigar, &NM); - if (bwa_verbose >= 4) printf("* Final alignment: w2=%d, global_sc=%d, local_sc=%d\n", w2, score, ar->truesc); - if (score == last_sc || w2 == opt->w<<2) break; // it is possible that global alignment and local alignment give different scores + w2 = w2 < opt->w << 2 ? w2 : opt->w << 2; + a.cigar = bwa_gen_cigar2(opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, w2, bns->l_pac, pac, qe - qb, (uint8_t *)&query[qb], rb, re, &score, &a.n_cigar, &NM); + if (bwa_verbose >= 4) + printf("* Final alignment: w2=%d, global_sc=%d, local_sc=%d\n", w2, score, ar->truesc); + if (score == last_sc || w2 == opt->w << 2) + break; // it is possible that global alignment and local alignment give different scores last_sc = score; w2 <<= 1; } while (++i < 3 && score < ar->truesc - opt->a); - l_MD = strlen((char*)(a.cigar + a.n_cigar)) + 1; + l_MD = strlen((char *)(a.cigar + a.n_cigar)) + 1; a.NM = NM; - pos = bns_depos(bns, rb < bns->l_pac? rb : re - 1, &is_rev); + pos = bns_depos(bns, rb < bns->l_pac ? rb : re - 1, &is_rev); a.is_rev = is_rev; - if (a.n_cigar > 0) { // squeeze out leading or trailing deletions - if ((a.cigar[0]&0xf) == 2) { - pos += a.cigar[0]>>4; + if (a.n_cigar > 0) + { // squeeze out leading or trailing deletions + if ((a.cigar[0] & 0xf) == 2) + { + pos += a.cigar[0] >> 4; --a.n_cigar; memmove(a.cigar, a.cigar + 1, a.n_cigar * 4 + l_MD); - } else if ((a.cigar[a.n_cigar-1]&0xf) == 2) { + } + else if ((a.cigar[a.n_cigar - 1] & 0xf) == 2) + { --a.n_cigar; memmove(a.cigar + a.n_cigar, a.cigar + a.n_cigar + 1, l_MD); // MD needs to be moved accordingly } } - if (qb != 0 || qe != l_query) { // add clipping to CIGAR + if (qb != 0 || qe != l_query) + { // add clipping to CIGAR int clip5, clip3; - clip5 = is_rev? l_query - qe : qb; - clip3 = is_rev? qb : l_query - qe; + clip5 = is_rev ? l_query - qe : qb; + clip3 = is_rev ? qb : l_query - qe; a.cigar = realloc(a.cigar, 4 * (a.n_cigar + 2) + l_MD); - if (clip5) { - memmove(a.cigar+1, a.cigar, a.n_cigar * 4 + l_MD); // make room for 5'-end clipping - a.cigar[0] = clip5<<4 | 3; + if (clip5) + { + memmove(a.cigar + 1, a.cigar, a.n_cigar * 4 + l_MD); // make room for 5'-end clipping + a.cigar[0] = clip5 << 4 | 3; ++a.n_cigar; } - if (clip3) { + if (clip3) + { memmove(a.cigar + a.n_cigar + 1, a.cigar + a.n_cigar, l_MD); // make room for 3'-end clipping - a.cigar[a.n_cigar++] = clip3<<4 | 3; + a.cigar[a.n_cigar++] = clip3 << 4 | 3; } } a.rid = bns_pos2rid(bns, pos); assert(a.rid == ar->rid); a.pos = pos - bns->anns[a.rid].offset; - a.score = ar->score; a.sub = ar->sub > ar->csub? ar->sub : ar->csub; - a.is_alt = ar->is_alt; a.alt_sc = ar->alt_sc; + a.score = ar->score; + a.sub = ar->sub > ar->csub ? ar->sub : ar->csub; + a.is_alt = ar->is_alt; + a.alt_sc = ar->alt_sc; free(query); return a; } -typedef struct { +typedef struct +{ const mem_opt_t *opt; const bwt_t *bwt; const bntseq_t *bns; @@ -1202,15 +1618,21 @@ typedef struct { static void worker1(void *data, int i, int tid) { - worker_t *w = (worker_t*)data; - if (!(w->opt->flag&MEM_F_PE)) { - if (bwa_verbose >= 4) printf("=====> Processing read '%s' <=====\n", w->seqs[i].name); + worker_t *w = (worker_t *)data; + if (!(w->opt->flag & MEM_F_PE)) + { + if (bwa_verbose >= 4) + printf("=====> Processing read '%s' <=====\n", w->seqs[i].name); w->regs[i] = mem_align1_core(w->opt, w->bwt, w->bns, w->pac, w->seqs[i].l_seq, w->seqs[i].seq, w->aux[tid]); - } else { - if (bwa_verbose >= 4) printf("=====> Processing read '%s'/1 <=====\n", w->seqs[i<<1|0].name); - w->regs[i<<1|0] = mem_align1_core(w->opt, w->bwt, w->bns, w->pac, w->seqs[i<<1|0].l_seq, w->seqs[i<<1|0].seq, w->aux[tid]); - if (bwa_verbose >= 4) printf("=====> Processing read '%s'/2 <=====\n", w->seqs[i<<1|1].name); - w->regs[i<<1|1] = mem_align1_core(w->opt, w->bwt, w->bns, w->pac, w->seqs[i<<1|1].l_seq, w->seqs[i<<1|1].seq, w->aux[tid]); + } + else + { + if (bwa_verbose >= 4) + printf("=====> Processing read '%s'/1 <=====\n", w->seqs[i << 1 | 0].name); + w->regs[i << 1 | 0] = mem_align1_core(w->opt, w->bwt, w->bns, w->pac, w->seqs[i << 1 | 0].l_seq, w->seqs[i << 1 | 0].seq, w->aux[tid]); + if (bwa_verbose >= 4) + printf("=====> Processing read '%s'/2 <=====\n", w->seqs[i << 1 | 1].name); + w->regs[i << 1 | 1] = mem_align1_core(w->opt, w->bwt, w->bns, w->pac, w->seqs[i << 1 | 1].l_seq, w->seqs[i << 1 | 1].seq, w->aux[tid]); } } @@ -1218,46 +1640,75 @@ static void worker2(void *data, int i, int tid) { extern int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2]); extern void mem_reg2ovlp(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a); - worker_t *w = (worker_t*)data; - if (!(w->opt->flag&MEM_F_PE)) { - if (bwa_verbose >= 4) printf("=====> Finalizing read '%s' <=====\n", w->seqs[i].name); + worker_t *w = (worker_t *)data; + if (!(w->opt->flag & MEM_F_PE)) + { + if (bwa_verbose >= 4) + printf("=====> Finalizing read '%s' <=====\n", w->seqs[i].name); mem_mark_primary_se(w->opt, w->regs[i].n, w->regs[i].a, w->n_processed + i); - if (w->opt->flag & MEM_F_PRIMARY5) mem_reorder_primary5(w->opt->T, &w->regs[i]); + if (w->opt->flag & MEM_F_PRIMARY5) + mem_reorder_primary5(w->opt->T, &w->regs[i]); mem_reg2sam(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i], 0, 0); free(w->regs[i].a); - } else { - if (bwa_verbose >= 4) printf("=====> Finalizing read pair '%s' <=====\n", w->seqs[i<<1|0].name); - mem_sam_pe(w->opt, w->bns, w->pac, w->pes, (w->n_processed>>1) + i, &w->seqs[i<<1], &w->regs[i<<1]); - free(w->regs[i<<1|0].a); free(w->regs[i<<1|1].a); + } + else + { + if (bwa_verbose >= 4) + printf("=====> Finalizing read pair '%s' <=====\n", w->seqs[i << 1 | 0].name); + mem_sam_pe(w->opt, w->bns, w->pac, w->pes, (w->n_processed >> 1) + i, &w->seqs[i << 1], &w->regs[i << 1]); + free(w->regs[i << 1 | 0].a); + free(w->regs[i << 1 | 1].a); } } void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int64_t n_processed, int n, bseq1_t *seqs, const mem_pestat_t *pes0) { - extern void kt_for(int n_threads, void (*func)(void*,int,int), void *data, int n); + extern void kt_for(int n_threads, void (*func)(void *, int, int), void *data, int n); worker_t w; mem_pestat_t pes[4]; double ctime, rtime; int i; - ctime = cputime(); rtime = realtime(); + ctime = cputime(); + rtime = realtime(); global_bns = bns; w.regs = malloc(n * sizeof(mem_alnreg_v)); - w.opt = opt; w.bwt = bwt; w.bns = bns; w.pac = pac; - w.seqs = seqs; w.n_processed = n_processed; + w.opt = opt; + w.bwt = bwt; + w.bns = bns; + w.pac = pac; + w.seqs = seqs; + w.n_processed = n_processed; w.pes = &pes[0]; w.aux = malloc(opt->n_threads * sizeof(smem_aux_t)); for (i = 0; i < opt->n_threads; ++i) w.aux[i] = smem_aux_init(); - kt_for(opt->n_threads, worker1, &w, (opt->flag&MEM_F_PE)? n>>1 : n); // find mapping positions +#ifdef SHOW_PERF + int64_t start_time = get_mseconds(); +#endif + kt_for(opt->n_threads, worker1, &w, (opt->flag & MEM_F_PE) ? n >> 1 : n); // find mapping positions +#ifdef SHOW_PERF + int64_t tmp_diff = get_mseconds() - start_time; + __sync_fetch_and_add(&time_worker1, tmp_diff); +#endif for (i = 0; i < opt->n_threads; ++i) smem_aux_destroy(w.aux[i]); free(w.aux); - if (opt->flag&MEM_F_PE) { // infer insert sizes if not provided - if (pes0) memcpy(pes, pes0, 4 * sizeof(mem_pestat_t)); // if pes0 != NULL, set the insert-size distribution as pes0 - else mem_pestat(opt, bns->l_pac, n, w.regs, pes); // otherwise, infer the insert size distribution from data + if (opt->flag & MEM_F_PE) + { // infer insert sizes if not provided + if (pes0) + memcpy(pes, pes0, 4 * sizeof(mem_pestat_t)); // if pes0 != NULL, set the insert-size distribution as pes0 + else + mem_pestat(opt, bns->l_pac, n, w.regs, pes); // otherwise, infer the insert size distribution from data } - kt_for(opt->n_threads, worker2, &w, (opt->flag&MEM_F_PE)? n>>1 : n); // generate alignment +#ifdef SHOW_PERF + start_time = get_mseconds(); +#endif + kt_for(opt->n_threads, worker2, &w, (opt->flag & MEM_F_PE) ? n >> 1 : n); // generate alignment +#ifdef SHOW_PERF + tmp_diff = get_mseconds() - start_time; + __sync_fetch_and_add(&time_worker2, tmp_diff); +#endif free(w.regs); if (bwa_verbose >= 3) fprintf(stderr, "[M::%s] Processed %d reads in %.3f CPU sec, %.3f real sec\n", __func__, n, cputime() - ctime, realtime() - rtime); diff --git a/bwamem_pair.c b/bwamem_pair.c index ef79521..130fe14 100644 --- a/bwamem_pair.c +++ b/bwamem_pair.c @@ -1,8 +1,8 @@ /* The MIT License Copyright (c) 2018- Dana-Farber Cancer Institute - 2009-2018 Broad Institute, Inc. - 2008-2009 Genome Research Ltd. (GRL) + 2009-2018 Broad Institute, Inc. + 2008-2009 Genome Research Ltd. (GRL) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the @@ -35,38 +35,45 @@ #include "ksw.h" #ifdef USE_MALLOC_WRAPPERS -# include "malloc_wrap.h" +#include "malloc_wrap.h" #endif +#ifdef SHOW_PERF +extern int64_t get_mseconds(); +extern int64_t time_ksw_align2; +#endif -#define MIN_RATIO 0.8 -#define MIN_DIR_CNT 10 +#define MIN_RATIO 0.8 +#define MIN_DIR_CNT 10 #define MIN_DIR_RATIO 0.05 #define OUTLIER_BOUND 2.0 #define MAPPING_BOUND 3.0 -#define MAX_STDDEV 4.0 +#define MAX_STDDEV 4.0 static inline int mem_infer_dir(int64_t l_pac, int64_t b1, int64_t b2, int64_t *dist) { int64_t p2; int r1 = (b1 >= l_pac), r2 = (b2 >= l_pac); - p2 = r1 == r2? b2 : (l_pac<<1) - 1 - b2; // p2 is the coordinate of read 2 on the read 1 strand - *dist = p2 > b1? p2 - b1 : b1 - p2; - return (r1 == r2? 0 : 1) ^ (p2 > b1? 0 : 3); + p2 = r1 == r2 ? b2 : (l_pac << 1) - 1 - b2; // p2 is the coordinate of read 2 on the read 1 strand + *dist = p2 > b1 ? p2 - b1 : b1 - p2; + return (r1 == r2 ? 0 : 1) ^ (p2 > b1 ? 0 : 3); } static int cal_sub(const mem_opt_t *opt, mem_alnreg_v *r) { int j; - for (j = 1; j < r->n; ++j) { // choose unique alignment - int b_max = r->a[j].qb > r->a[0].qb? r->a[j].qb : r->a[0].qb; - int e_min = r->a[j].qe < r->a[0].qe? r->a[j].qe : r->a[0].qe; - if (e_min > b_max) { // have overlap - int min_l = r->a[j].qe - r->a[j].qb < r->a[0].qe - r->a[0].qb? r->a[j].qe - r->a[j].qb : r->a[0].qe - r->a[0].qb; - if (e_min - b_max >= min_l * opt->mask_level) break; // significant overlap + for (j = 1; j < r->n; ++j) + { // choose unique alignment + int b_max = r->a[j].qb > r->a[0].qb ? r->a[j].qb : r->a[0].qb; + int e_min = r->a[j].qe < r->a[0].qe ? r->a[j].qe : r->a[0].qe; + if (e_min > b_max) + { // have overlap + int min_l = r->a[j].qe - r->a[j].qb < r->a[0].qe - r->a[0].qb ? r->a[j].qe - r->a[j].qb : r->a[0].qe - r->a[0].qb; + if (e_min - b_max >= min_l * opt->mask_level) + break; // significant overlap } } - return j < r->n? r->a[j].score : opt->min_seed_len * opt->a; + return j < r->n ? r->a[j].score : opt->min_seed_len * opt->a; } void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v *regs, mem_pestat_t pes[4]) @@ -75,36 +82,48 @@ void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v * uint64_v isize[4]; memset(pes, 0, 4 * sizeof(mem_pestat_t)); memset(isize, 0, sizeof(kvec_t(int)) * 4); - for (i = 0; i < n>>1; ++i) { + for (i = 0; i < n >> 1; ++i) + { int dir; int64_t is; mem_alnreg_v *r[2]; - r[0] = (mem_alnreg_v*)®s[i<<1|0]; - r[1] = (mem_alnreg_v*)®s[i<<1|1]; - if (r[0]->n == 0 || r[1]->n == 0) continue; - if (cal_sub(opt, r[0]) > MIN_RATIO * r[0]->a[0].score) continue; - if (cal_sub(opt, r[1]) > MIN_RATIO * r[1]->a[0].score) continue; - if (r[0]->a[0].rid != r[1]->a[0].rid) continue; // not on the same chr + r[0] = (mem_alnreg_v *)®s[i << 1 | 0]; + r[1] = (mem_alnreg_v *)®s[i << 1 | 1]; + if (r[0]->n == 0 || r[1]->n == 0) + continue; + if (cal_sub(opt, r[0]) > MIN_RATIO * r[0]->a[0].score) + continue; + if (cal_sub(opt, r[1]) > MIN_RATIO * r[1]->a[0].score) + continue; + if (r[0]->a[0].rid != r[1]->a[0].rid) + continue; // not on the same chr dir = mem_infer_dir(l_pac, r[0]->a[0].rb, r[1]->a[0].rb, &is); - if (is && is <= opt->max_ins) kv_push(uint64_t, isize[dir], is); + if (is && is <= opt->max_ins) + kv_push(uint64_t, isize[dir], is); } - if (bwa_verbose >= 3) fprintf(stderr, "[M::%s] # candidate unique pairs for (FF, FR, RF, RR): (%ld, %ld, %ld, %ld)\n", __func__, isize[0].n, isize[1].n, isize[2].n, isize[3].n); - for (d = 0; d < 4; ++d) { // TODO: this block is nearly identical to the one in bwtsw2_pair.c. It would be better to merge these two. + if (bwa_verbose >= 3) + fprintf(stderr, "[M::%s] # candidate unique pairs for (FF, FR, RF, RR): (%ld, %ld, %ld, %ld)\n", __func__, isize[0].n, isize[1].n, isize[2].n, isize[3].n); + for (d = 0; d < 4; ++d) + { // TODO: this block is nearly identical to the one in bwtsw2_pair.c. It would be better to merge these two. mem_pestat_t *r = &pes[d]; uint64_v *q = &isize[d]; int p25, p50, p75, x; - if (q->n < MIN_DIR_CNT) { - fprintf(stderr, "[M::%s] skip orientation %c%c as there are not enough pairs\n", __func__, "FR"[d>>1&1], "FR"[d&1]); + if (q->n < MIN_DIR_CNT) + { + fprintf(stderr, "[M::%s] skip orientation %c%c as there are not enough pairs\n", __func__, "FR"[d >> 1 & 1], "FR"[d & 1]); r->failed = 1; free(q->a); continue; - } else fprintf(stderr, "[M::%s] analyzing insert size distribution for orientation %c%c...\n", __func__, "FR"[d>>1&1], "FR"[d&1]); + } + else + fprintf(stderr, "[M::%s] analyzing insert size distribution for orientation %c%c...\n", __func__, "FR"[d >> 1 & 1], "FR"[d & 1]); ks_introsort_64(q->n, q->a); p25 = q->a[(int)(.25 * q->n + .499)]; p50 = q->a[(int)(.50 * q->n + .499)]; p75 = q->a[(int)(.75 * q->n + .499)]; - r->low = (int)(p25 - OUTLIER_BOUND * (p75 - p25) + .499); - if (r->low < 1) r->low = 1; + r->low = (int)(p25 - OUTLIER_BOUND * (p75 - p25) + .499); + if (r->low < 1) + r->low = 1; r->high = (int)(p75 + OUTLIER_BOUND * (p75 - p25) + .499); fprintf(stderr, "[M::%s] (25, 50, 75) percentile: (%d, %d, %d)\n", __func__, p25, p50, p75); fprintf(stderr, "[M::%s] low and high boundaries for computing mean and std.dev: (%d, %d)\n", __func__, r->low, r->high); @@ -117,20 +136,24 @@ void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v * r->std += (q->a[i] - r->avg) * (q->a[i] - r->avg); r->std = sqrt(r->std / x); fprintf(stderr, "[M::%s] mean and std.dev: (%.2f, %.2f)\n", __func__, r->avg, r->std); - r->low = (int)(p25 - MAPPING_BOUND * (p75 - p25) + .499); + r->low = (int)(p25 - MAPPING_BOUND * (p75 - p25) + .499); r->high = (int)(p75 + MAPPING_BOUND * (p75 - p25) + .499); - if (r->low > r->avg - MAX_STDDEV * r->std) r->low = (int)(r->avg - MAX_STDDEV * r->std + .499); - if (r->high < r->avg + MAX_STDDEV * r->std) r->high = (int)(r->avg + MAX_STDDEV * r->std + .499); - if (r->low < 1) r->low = 1; + if (r->low > r->avg - MAX_STDDEV * r->std) + r->low = (int)(r->avg - MAX_STDDEV * r->std + .499); + if (r->high < r->avg + MAX_STDDEV * r->std) + r->high = (int)(r->avg + MAX_STDDEV * r->std + .499); + if (r->low < 1) + r->low = 1; fprintf(stderr, "[M::%s] low and high boundaries for proper pairs: (%d, %d)\n", __func__, r->low, r->high); free(q->a); } for (d = 0, max = 0; d < 4; ++d) - max = max > isize[d].n? max : isize[d].n; + max = max > isize[d].n ? max : isize[d].n; for (d = 0; d < 4; ++d) - if (pes[d].failed == 0 && isize[d].n < max * MIN_DIR_RATIO) { + if (pes[d].failed == 0 && isize[d].n < max * MIN_DIR_RATIO) + { pes[d].failed = 1; - fprintf(stderr, "[M::%s] skip orientation %c%c\n", __func__, "FR"[d>>1&1], "FR"[d&1]); + fprintf(stderr, "[M::%s] skip orientation %c%c\n", __func__, "FR"[d >> 1 & 1], "FR"[d & 1]); } } @@ -140,66 +163,93 @@ int mem_matesw(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co int64_t l_pac = bns->l_pac; int i, r, skip[4], n = 0, rid; for (r = 0; r < 4; ++r) - skip[r] = pes[r].failed? 1 : 0; - for (i = 0; i < ma->n; ++i) { // check which orinentation has been found + skip[r] = pes[r].failed ? 1 : 0; + for (i = 0; i < ma->n; ++i) + { // check which orinentation has been found int64_t dist; r = mem_infer_dir(l_pac, a->rb, ma->a[i].rb, &dist); if (dist >= pes[r].low && dist <= pes[r].high) skip[r] = 1; } - if (skip[0] + skip[1] + skip[2] + skip[3] == 4) return 0; // consistent pair exist; no need to perform SW - for (r = 0; r < 4; ++r) { + if (skip[0] + skip[1] + skip[2] + skip[3] == 4) + return 0; // consistent pair exist; no need to perform SW + for (r = 0; r < 4; ++r) + { int is_rev, is_larger; uint8_t *seq, *rev = 0, *ref = 0; int64_t rb, re; - if (skip[r]) continue; - is_rev = (r>>1 != (r&1)); // whether to reverse complement the mate - is_larger = !(r>>1); // whether the mate has larger coordinate - if (is_rev) { + if (skip[r]) + continue; + is_rev = (r >> 1 != (r & 1)); // whether to reverse complement the mate + is_larger = !(r >> 1); // whether the mate has larger coordinate + if (is_rev) + { rev = malloc(l_ms); // this is the reverse complement of $ms - for (i = 0; i < l_ms; ++i) rev[l_ms - 1 - i] = ms[i] < 4? 3 - ms[i] : 4; + for (i = 0; i < l_ms; ++i) + rev[l_ms - 1 - i] = ms[i] < 4 ? 3 - ms[i] : 4; seq = rev; - } else seq = (uint8_t*)ms; - if (!is_rev) { - rb = is_larger? a->rb + pes[r].low : a->rb - pes[r].high; - re = (is_larger? a->rb + pes[r].high: a->rb - pes[r].low) + l_ms; // if on the same strand, end position should be larger to make room for the seq length - } else { - rb = (is_larger? a->rb + pes[r].low : a->rb - pes[r].high) - l_ms; // similarly on opposite strands - re = is_larger? a->rb + pes[r].high: a->rb - pes[r].low; } - if (rb < 0) rb = 0; - if (re > l_pac<<1) re = l_pac<<1; - if (rb < re) ref = bns_fetch_seq(bns, pac, &rb, (rb+re)>>1, &re, &rid); - if (a->rid == rid && re - rb >= opt->min_seed_len) { // no funny things happening + else + seq = (uint8_t *)ms; + if (!is_rev) + { + rb = is_larger ? a->rb + pes[r].low : a->rb - pes[r].high; + re = (is_larger ? a->rb + pes[r].high : a->rb - pes[r].low) + l_ms; // if on the same strand, end position should be larger to make room for the seq length + } + else + { + rb = (is_larger ? a->rb + pes[r].low : a->rb - pes[r].high) - l_ms; // similarly on opposite strands + re = is_larger ? a->rb + pes[r].high : a->rb - pes[r].low; + } + if (rb < 0) + rb = 0; + if (re > l_pac << 1) + re = l_pac << 1; + if (rb < re) + ref = bns_fetch_seq(bns, pac, &rb, (rb + re) >> 1, &re, &rid); + if (a->rid == rid && re - rb >= opt->min_seed_len) + { // no funny things happening kswr_t aln; mem_alnreg_t b; - int tmp, xtra = KSW_XSUBO | KSW_XSTART | (l_ms * opt->a < 250? KSW_XBYTE : 0) | (opt->min_seed_len * opt->a); + int tmp, xtra = KSW_XSUBO | KSW_XSTART | (l_ms * opt->a < 250 ? KSW_XBYTE : 0) | (opt->min_seed_len * opt->a); +#ifdef SHOW_PERF + int64_t start_time = get_mseconds(); +#endif aln = ksw_align2(l_ms, seq, re - rb, ref, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, xtra, 0); +#ifdef SHOW_PERF + int64_t tmp_diff = get_mseconds() - start_time; + __sync_fetch_and_add(&time_ksw_align2, tmp_diff); +#endif memset(&b, 0, sizeof(mem_alnreg_t)); - if (aln.score >= opt->min_seed_len && aln.qb >= 0) { // something goes wrong if aln.qb < 0 + if (aln.score >= opt->min_seed_len && aln.qb >= 0) + { // something goes wrong if aln.qb < 0 b.rid = a->rid; b.is_alt = a->is_alt; - b.qb = is_rev? l_ms - (aln.qe + 1) : aln.qb; - b.qe = is_rev? l_ms - aln.qb : aln.qe + 1; - b.rb = is_rev? (l_pac<<1) - (rb + aln.te + 1) : rb + aln.tb; - b.re = is_rev? (l_pac<<1) - (rb + aln.tb) : rb + aln.te + 1; + b.qb = is_rev ? l_ms - (aln.qe + 1) : aln.qb; + b.qe = is_rev ? l_ms - aln.qb : aln.qe + 1; + b.rb = is_rev ? (l_pac << 1) - (rb + aln.te + 1) : rb + aln.tb; + b.re = is_rev ? (l_pac << 1) - (rb + aln.tb) : rb + aln.te + 1; b.score = aln.score; b.csub = aln.score2; b.secondary = -1; - b.seedcov = (b.re - b.rb < b.qe - b.qb? b.re - b.rb : b.qe - b.qb) >> 1; -// printf("*** %d, [%lld,%lld], %d:%d, (%lld,%lld), (%lld,%lld) == (%lld,%lld)\n", aln.score, rb, re, is_rev, is_larger, a->rb, a->re, ma->a[0].rb, ma->a[0].re, b.rb, b.re); + b.seedcov = (b.re - b.rb < b.qe - b.qb ? b.re - b.rb : b.qe - b.qb) >> 1; + // printf("*** %d, [%lld,%lld], %d:%d, (%lld,%lld), (%lld,%lld) == (%lld,%lld)\n", aln.score, rb, re, is_rev, is_larger, a->rb, a->re, ma->a[0].rb, ma->a[0].re, b.rb, b.re); kv_push(mem_alnreg_t, *ma, b); // make room for a new element // move b s.t. ma is sorted for (i = 0; i < ma->n - 1; ++i) // find the insertion point - if (ma->a[i].score < b.score) break; + if (ma->a[i].score < b.score) + break; tmp = i; - for (i = ma->n - 1; i > tmp; --i) ma->a[i] = ma->a[i-1]; + for (i = ma->n - 1; i > tmp; --i) + ma->a[i] = ma->a[i - 1]; ma->a[i] = b; } ++n; } - if (n) ma->n = mem_sort_dedup_patch(opt, 0, 0, 0, ma->n, ma->a); - if (rev) free(rev); + if (n) + ma->n = mem_sort_dedup_patch(opt, 0, 0, 0, ma->n, ma->a); + if (rev) + free(rev); free(ref); } return n; @@ -210,61 +260,79 @@ int mem_pair(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, cons pair64_v v, u; int r, i, k, y[4], ret; // y[] keeps the last hit int64_t l_pac = bns->l_pac; - kv_init(v); kv_init(u); - for (r = 0; r < 2; ++r) { // loop through read number - for (i = 0; i < n_pri[r]; ++i) { + kv_init(v); + kv_init(u); + for (r = 0; r < 2; ++r) + { // loop through read number + for (i = 0; i < n_pri[r]; ++i) + { pair64_t key; mem_alnreg_t *e = &a[r].a[i]; - key.x = e->rb < l_pac? e->rb : (l_pac<<1) - 1 - e->rb; // forward position - key.x = (uint64_t)e->rid<<32 | (key.x - bns->anns[e->rid].offset); - key.y = (uint64_t)e->score << 32 | i << 2 | (e->rb >= l_pac)<<1 | r; + key.x = e->rb < l_pac ? e->rb : (l_pac << 1) - 1 - e->rb; // forward position + key.x = (uint64_t)e->rid << 32 | (key.x - bns->anns[e->rid].offset); + key.y = (uint64_t)e->score << 32 | i << 2 | (e->rb >= l_pac) << 1 | r; kv_push(pair64_t, v, key); } } ks_introsort_128(v.n, v.a); y[0] = y[1] = y[2] = y[3] = -1; - //for (i = 0; i < v.n; ++i) printf("[%d]\t%d\t%c%ld\n", i, (int)(v.a[i].y&1)+1, "+-"[v.a[i].y>>1&1], (long)v.a[i].x); - for (i = 0; i < v.n; ++i) { - for (r = 0; r < 2; ++r) { // loop through direction - int dir = r<<1 | (v.a[i].y>>1&1), which; - if (pes[dir].failed) continue; // invalid orientation - which = r<<1 | ((v.a[i].y&1)^1); - if (y[which] < 0) continue; // no previous hits - for (k = y[which]; k >= 0; --k) { // TODO: this is a O(n^2) solution in the worst case; remember to check if this loop takes a lot of time (I doubt) + // for (i = 0; i < v.n; ++i) printf("[%d]\t%d\t%c%ld\n", i, (int)(v.a[i].y&1)+1, "+-"[v.a[i].y>>1&1], (long)v.a[i].x); + for (i = 0; i < v.n; ++i) + { + for (r = 0; r < 2; ++r) + { // loop through direction + int dir = r << 1 | (v.a[i].y >> 1 & 1), which; + if (pes[dir].failed) + continue; // invalid orientation + which = r << 1 | ((v.a[i].y & 1) ^ 1); + if (y[which] < 0) + continue; // no previous hits + for (k = y[which]; k >= 0; --k) + { // TODO: this is a O(n^2) solution in the worst case; remember to check if this loop takes a lot of time (I doubt) int64_t dist; int q; double ns; pair64_t *p; - if ((v.a[k].y&3) != which) continue; + if ((v.a[k].y & 3) != which) + continue; dist = (int64_t)v.a[i].x - v.a[k].x; - //printf("%d: %lld\n", k, dist); - if (dist > pes[dir].high) break; - if (dist < pes[dir].low) continue; + // printf("%d: %lld\n", k, dist); + if (dist > pes[dir].high) + break; + if (dist < pes[dir].low) + continue; ns = (dist - pes[dir].avg) / pes[dir].std; - q = (int)((v.a[i].y>>32) + (v.a[k].y>>32) + .721 * log(2. * erfc(fabs(ns) * M_SQRT1_2)) * opt->a + .499); // .721 = 1/log(4) - if (q < 0) q = 0; + q = (int)((v.a[i].y >> 32) + (v.a[k].y >> 32) + .721 * log(2. * erfc(fabs(ns) * M_SQRT1_2)) * opt->a + .499); // .721 = 1/log(4) + if (q < 0) + q = 0; p = kv_pushp(pair64_t, u); - p->y = (uint64_t)k<<32 | i; - p->x = (uint64_t)q<<32 | (hash_64(p->y ^ id<<8) & 0xffffffffU); - //printf("[%lld,%lld]\t%d\tdist=%ld\n", v.a[k].x, v.a[i].x, q, (long)dist); + p->y = (uint64_t)k << 32 | i; + p->x = (uint64_t)q << 32 | (hash_64(p->y ^ id << 8) & 0xffffffffU); + // printf("[%lld,%lld]\t%d\tdist=%ld\n", v.a[k].x, v.a[i].x, q, (long)dist); } } - y[v.a[i].y&3] = i; + y[v.a[i].y & 3] = i; } - if (u.n) { // found at least one proper pair + if (u.n) + { // found at least one proper pair int tmp = opt->a + opt->b; - tmp = tmp > opt->o_del + opt->e_del? tmp : opt->o_del + opt->e_del; - tmp = tmp > opt->o_ins + opt->e_ins? tmp : opt->o_ins + opt->e_ins; + tmp = tmp > opt->o_del + opt->e_del ? tmp : opt->o_del + opt->e_del; + tmp = tmp > opt->o_ins + opt->e_ins ? tmp : opt->o_ins + opt->e_ins; ks_introsort_128(u.n, u.a); - i = u.a[u.n-1].y >> 32; k = u.a[u.n-1].y << 32 >> 32; - z[v.a[i].y&1] = v.a[i].y<<32>>34; // index of the best pair - z[v.a[k].y&1] = v.a[k].y<<32>>34; - ret = u.a[u.n-1].x >> 32; - *sub = u.n > 1? u.a[u.n-2].x>>32 : 0; + i = u.a[u.n - 1].y >> 32; + k = u.a[u.n - 1].y << 32 >> 32; + z[v.a[i].y & 1] = v.a[i].y << 32 >> 34; // index of the best pair + z[v.a[k].y & 1] = v.a[k].y << 32 >> 34; + ret = u.a[u.n - 1].x >> 32; + *sub = u.n > 1 ? u.a[u.n - 2].x >> 32 : 0; for (i = (long)u.n - 2, *n_sub = 0; i >= 0; --i) - if (*sub - (int)(u.a[i].x>>32) <= tmp) ++*n_sub; - } else ret = 0, *sub = 0, *n_sub = 0; - free(u.a); free(v.a); + if (*sub - (int)(u.a[i].x >> 32) <= tmp) + ++*n_sub; + } + else + ret = 0, *sub = 0, *n_sub = 0; + free(u.a); + free(v.a); return ret; } @@ -284,72 +352,94 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co kstring_t str; mem_aln_t h[2], g[2], aa[2][2]; - str.l = str.m = 0; str.s = 0; + str.l = str.m = 0; + str.s = 0; memset(h, 0, sizeof(mem_aln_t) * 2); memset(g, 0, sizeof(mem_aln_t) * 2); n_aa[0] = n_aa[1] = 0; - if (!(opt->flag & MEM_F_NO_RESCUE)) { // then perform SW for the best alignment + if (!(opt->flag & MEM_F_NO_RESCUE)) + { // then perform SW for the best alignment mem_alnreg_v b[2]; - kv_init(b[0]); kv_init(b[1]); + kv_init(b[0]); + kv_init(b[1]); for (i = 0; i < 2; ++i) for (j = 0; j < a[i].n; ++j) - if (a[i].a[j].score >= a[i].a[0].score - opt->pen_unpaired) + if (a[i].a[j].score >= a[i].a[0].score - opt->pen_unpaired) kv_push(mem_alnreg_t, b[i], a[i].a[j]); for (i = 0; i < 2; ++i) for (j = 0; j < b[i].n && j < opt->max_matesw; ++j) - n += mem_matesw(opt, bns, pac, pes, &b[i].a[j], s[!i].l_seq, (uint8_t*)s[!i].seq, &a[!i]); - free(b[0].a); free(b[1].a); + n += mem_matesw(opt, bns, pac, pes, &b[i].a[j], s[!i].l_seq, (uint8_t *)s[!i].seq, &a[!i]); + free(b[0].a); + free(b[1].a); } - n_pri[0] = mem_mark_primary_se(opt, a[0].n, a[0].a, id<<1|0); - n_pri[1] = mem_mark_primary_se(opt, a[1].n, a[1].a, id<<1|1); - if (opt->flag & MEM_F_PRIMARY5) { + n_pri[0] = mem_mark_primary_se(opt, a[0].n, a[0].a, id << 1 | 0); + n_pri[1] = mem_mark_primary_se(opt, a[1].n, a[1].a, id << 1 | 1); + if (opt->flag & MEM_F_PRIMARY5) + { mem_reorder_primary5(opt->T, &a[0]); mem_reorder_primary5(opt->T, &a[1]); } - if (opt->flag&MEM_F_NOPAIRING) goto no_pairing; + if (opt->flag & MEM_F_NOPAIRING) + goto no_pairing; // pairing single-end hits - if (n_pri[0] && n_pri[1] && (o = mem_pair(opt, bns, pac, pes, s, a, id, &subo, &n_sub, z, n_pri)) > 0) { + if (n_pri[0] && n_pri[1] && (o = mem_pair(opt, bns, pac, pes, s, a, id, &subo, &n_sub, z, n_pri)) > 0) + { int is_multi[2], q_pe, score_un, q_se[2]; char **XA[2]; // check if an end has multiple hits even after mate-SW - for (i = 0; i < 2; ++i) { + for (i = 0; i < 2; ++i) + { for (j = 1; j < n_pri[i]; ++j) - if (a[i].a[j].secondary < 0 && a[i].a[j].score >= opt->T) break; - is_multi[i] = j < n_pri[i]? 1 : 0; + if (a[i].a[j].secondary < 0 && a[i].a[j].score >= opt->T) + break; + is_multi[i] = j < n_pri[i] ? 1 : 0; } - if (is_multi[0] || is_multi[1]) goto no_pairing; // TODO: in rare cases, the true hit may be long but with low score + if (is_multi[0] || is_multi[1]) + goto no_pairing; // TODO: in rare cases, the true hit may be long but with low score // compute mapQ for the best SE hit score_un = a[0].a[0].score + a[1].a[0].score - opt->pen_unpaired; - //q_pe = o && subo < o? (int)(MEM_MAPQ_COEF * (1. - (double)subo / o) * log(a[0].a[z[0]].seedcov + a[1].a[z[1]].seedcov) + .499) : 0; - subo = subo > score_un? subo : score_un; + // q_pe = o && subo < o? (int)(MEM_MAPQ_COEF * (1. - (double)subo / o) * log(a[0].a[z[0]].seedcov + a[1].a[z[1]].seedcov) + .499) : 0; + subo = subo > score_un ? subo : score_un; q_pe = raw_mapq(o - subo, opt->a); - if (n_sub > 0) q_pe -= (int)(4.343 * log(n_sub+1) + .499); - if (q_pe < 0) q_pe = 0; - if (q_pe > 60) q_pe = 60; + if (n_sub > 0) + q_pe -= (int)(4.343 * log(n_sub + 1) + .499); + if (q_pe < 0) + q_pe = 0; + if (q_pe > 60) + q_pe = 60; q_pe = (int)(q_pe * (1. - .5 * (a[0].a[0].frac_rep + a[1].a[0].frac_rep)) + .499); // the following assumes no split hits - if (o > score_un) { // paired alignment is preferred + if (o > score_un) + { // paired alignment is preferred mem_alnreg_t *c[2]; - c[0] = &a[0].a[z[0]]; c[1] = &a[1].a[z[1]]; - for (i = 0; i < 2; ++i) { + c[0] = &a[0].a[z[0]]; + c[1] = &a[1].a[z[1]]; + for (i = 0; i < 2; ++i) + { if (c[i]->secondary >= 0) c[i]->sub = a[i].a[c[i]->secondary].score, c[i]->secondary = -2; q_se[i] = mem_approx_mapq_se(opt, c[i]); } - q_se[0] = q_se[0] > q_pe? q_se[0] : q_pe < q_se[0] + 40? q_pe : q_se[0] + 40; - q_se[1] = q_se[1] > q_pe? q_se[1] : q_pe < q_se[1] + 40? q_pe : q_se[1] + 40; + q_se[0] = q_se[0] > q_pe ? q_se[0] : q_pe < q_se[0] + 40 ? q_pe + : q_se[0] + 40; + q_se[1] = q_se[1] > q_pe ? q_se[1] : q_pe < q_se[1] + 40 ? q_pe + : q_se[1] + 40; extra_flag |= 2; // cap at the tandem repeat score - q_se[0] = q_se[0] < raw_mapq(c[0]->score - c[0]->csub, opt->a)? q_se[0] : raw_mapq(c[0]->score - c[0]->csub, opt->a); - q_se[1] = q_se[1] < raw_mapq(c[1]->score - c[1]->csub, opt->a)? q_se[1] : raw_mapq(c[1]->score - c[1]->csub, opt->a); - } else { // the unpaired alignment is preferred + q_se[0] = q_se[0] < raw_mapq(c[0]->score - c[0]->csub, opt->a) ? q_se[0] : raw_mapq(c[0]->score - c[0]->csub, opt->a); + q_se[1] = q_se[1] < raw_mapq(c[1]->score - c[1]->csub, opt->a) ? q_se[1] : raw_mapq(c[1]->score - c[1]->csub, opt->a); + } + else + { // the unpaired alignment is preferred z[0] = z[1] = 0; q_se[0] = mem_approx_mapq_se(opt, &a[0].a[0]); q_se[1] = mem_approx_mapq_se(opt, &a[1].a[0]); } - for (i = 0; i < 2; ++i) { + for (i = 0; i < 2; ++i) + { int k = a[i].a[z[i]].secondary_all; - if (k >= 0 && k < n_pri[i]) { // switch secondary and primary if both of them are non-ALT + if (k >= 0 && k < n_pri[i]) + { // switch secondary and primary if both of them are non-ALT assert(a[i].a[k].secondary_all < 0); for (j = 0; j < a[i].n; ++j) if (a[i].a[j].secondary_all == k || j == k) @@ -357,63 +447,86 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co a[i].a[z[i]].secondary_all = -1; } } - if (!(opt->flag & MEM_F_ALL)) { + if (!(opt->flag & MEM_F_ALL)) + { for (i = 0; i < 2; ++i) XA[i] = mem_gen_alt(opt, bns, pac, &a[i], s[i].l_seq, s[i].seq); - } else XA[0] = XA[1] = 0; + } + else + XA[0] = XA[1] = 0; // write SAM - for (i = 0; i < 2; ++i) { + for (i = 0; i < 2; ++i) + { h[i] = mem_reg2aln(opt, bns, pac, s[i].l_seq, s[i].seq, &a[i].a[z[i]]); h[i].mapq = q_se[i]; - h[i].flag |= 0x40<score < opt->T || p->secondary >= 0 || !p->is_alt) continue; + if (p->score < opt->T || p->secondary >= 0 || !p->is_alt) + continue; g[i] = mem_reg2aln(opt, bns, pac, s[i].l_seq, s[i].seq, p); - g[i].flag |= 0x800 | 0x40<= opt->T) which = 0; + if (a[i].n) + { + if (a[i].a[0].score >= opt->T) + which = 0; else if (n_pri[i] < a[i].n && a[i].a[n_pri[i]].score >= opt->T) which = n_pri[i]; } - if (which >= 0) h[i] = mem_reg2aln(opt, bns, pac, s[i].l_seq, s[i].seq, &a[i].a[which]); - else h[i] = mem_reg2aln(opt, bns, pac, s[i].l_seq, s[i].seq, 0); + if (which >= 0) + h[i] = mem_reg2aln(opt, bns, pac, s[i].l_seq, s[i].seq, &a[i].a[which]); + else + h[i] = mem_reg2aln(opt, bns, pac, s[i].l_seq, s[i].seq, 0); } - if (!(opt->flag & MEM_F_NOPAIRING) && h[0].rid == h[1].rid && h[0].rid >= 0) { // if the top hits from the two ends constitute a proper pair, flag it. + if (!(opt->flag & MEM_F_NOPAIRING) && h[0].rid == h[1].rid && h[0].rid >= 0) + { // if the top hits from the two ends constitute a proper pair, flag it. int64_t dist; int d; d = mem_infer_dir(bns->l_pac, a[0].a[0].rb, a[1].a[0].rb, &dist); - if (!pes[d].failed && dist >= pes[d].low && dist <= pes[d].high) extra_flag |= 2; + if (!pes[d].failed && dist >= pes[d].low && dist <= pes[d].high) + extra_flag |= 2; } - mem_reg2sam(opt, bns, pac, &s[0], &a[0], 0x41|extra_flag, &h[1]); - mem_reg2sam(opt, bns, pac, &s[1], &a[1], 0x81|extra_flag, &h[0]); - if (strcmp(s[0].name, s[1].name) != 0) err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name); - free(h[0].cigar); free(h[1].cigar); + mem_reg2sam(opt, bns, pac, &s[0], &a[0], 0x41 | extra_flag, &h[1]); + mem_reg2sam(opt, bns, pac, &s[1], &a[1], 0x81 | extra_flag, &h[0]); + if (strcmp(s[0].name, s[1].name) != 0) + err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name); + free(h[0].cigar); + free(h[1].cigar); return n; } diff --git a/bwt.c b/bwt.c index 9083654..0229555 100644 --- a/bwt.c +++ b/bwt.c @@ -36,16 +36,24 @@ #include "kvec.h" #ifdef USE_MALLOC_WRAPPERS -# include "malloc_wrap.h" +#include "malloc_wrap.h" +#endif + +#ifdef SHOW_PERF +extern int64_t get_mseconds(); +extern int64_t time_bwt_sa, + time_bwt_occ4, + time_bwt_smem1a; #endif void bwt_gen_cnt_table(bwt_t *bwt) { int i, j; - for (i = 0; i != 256; ++i) { + for (i = 0; i != 256; ++i) + { uint32_t x = 0; for (j = 0; j != 4; ++j) - x |= (((i&3) == j) + ((i>>2&3) == j) + ((i>>4&3) == j) + (i>>6 == j)) << (j<<3); + x |= (((i & 3) == j) + ((i >> 2 & 3) == j) + ((i >> 4 & 3) == j) + (i >> 6 == j)) << (j << 3); bwt->cnt_table[i] = x; } } @@ -55,7 +63,7 @@ static inline bwtint_t bwt_invPsi(const bwt_t *bwt, bwtint_t k) // compute inver bwtint_t x = k - (k > bwt->primary); x = bwt_B0(bwt, x); x = bwt->L2[x] + bwt_occ(bwt, k, x); - return k == bwt->primary? 0 : x; + return k == bwt->primary ? 0 : x; } // bwt->bwt and bwt->occ must be precalculated @@ -68,37 +76,50 @@ void bwt_cal_sa(bwt_t *bwt, int intv) xassert(intv_round == intv, "SA sample interval is not a power of 2."); xassert(bwt->bwt, "bwt_t::bwt is not initialized."); - if (bwt->sa) free(bwt->sa); + if (bwt->sa) + free(bwt->sa); bwt->sa_intv = intv; bwt->n_sa = (bwt->seq_len + intv) / intv; - bwt->sa = (bwtint_t*)calloc(bwt->n_sa, sizeof(bwtint_t)); + bwt->sa = (bwtint_t *)calloc(bwt->n_sa, sizeof(bwtint_t)); // calculate SA value - isa = 0; sa = bwt->seq_len; - for (i = 0; i < bwt->seq_len; ++i) { - if (isa % intv == 0) bwt->sa[isa/intv] = sa; + isa = 0; + sa = bwt->seq_len; + for (i = 0; i < bwt->seq_len; ++i) + { + if (isa % intv == 0) + bwt->sa[isa / intv] = sa; --sa; isa = bwt_invPsi(bwt, isa); } - if (isa % intv == 0) bwt->sa[isa/intv] = sa; + if (isa % intv == 0) + bwt->sa[isa / intv] = sa; bwt->sa[0] = (bwtint_t)-1; // before this line, bwt->sa[0] = bwt->seq_len } bwtint_t bwt_sa(const bwt_t *bwt, bwtint_t k) { +#ifdef SHOW_PERF + int64_t start_time = get_mseconds(); +#endif bwtint_t sa = 0, mask = bwt->sa_intv - 1; - while (k & mask) { + while (k & mask) + { ++sa; k = bwt_invPsi(bwt, k); } /* without setting bwt->sa[0] = -1, the following line should be changed to (sa + bwt->sa[k/bwt->sa_intv]) % (bwt->seq_len + 1) */ - return sa + bwt->sa[k/bwt->sa_intv]; +#ifdef SHOW_PERF + int64_t tmp_diff = get_mseconds() - start_time; + __sync_fetch_and_add(&time_bwt_sa, tmp_diff); +#endif + return sa + bwt->sa[k / bwt->sa_intv]; } static inline int __occ_aux(uint64_t y, int c) { // reduce nucleotide counting to bits counting - y = ((c&2)? y : ~y) >> 1 & ((c&1)? y : ~y) & 0x5555555555555555ull; + y = ((c & 2) ? y : ~y) >> 1 & ((c & 1) ? y : ~y) & 0x5555555555555555ull; // count the number of 1s in y y = (y & 0x3333333333333333ull) + (y >> 2 & 0x3333333333333333ull); return ((y + (y >> 4)) & 0xf0f0f0f0f0f0f0full) * 0x101010101010101ull >> 56; @@ -109,21 +130,25 @@ bwtint_t bwt_occ(const bwt_t *bwt, bwtint_t k, ubyte_t c) bwtint_t n; uint32_t *p, *end; - if (k == bwt->seq_len) return bwt->L2[c+1] - bwt->L2[c]; - if (k == (bwtint_t)(-1)) return 0; + if (k == bwt->seq_len) + return bwt->L2[c + 1] - bwt->L2[c]; + if (k == (bwtint_t)(-1)) + return 0; k -= (k >= bwt->primary); // because $ is not in bwt // retrieve Occ at k/OCC_INTERVAL - n = ((bwtint_t*)(p = bwt_occ_intv(bwt, k)))[c]; + n = ((bwtint_t *)(p = bwt_occ_intv(bwt, k)))[c]; p += sizeof(bwtint_t); // jump to the start of the first BWT cell // calculate Occ up to the last k/32 - end = p + (((k>>5) - ((k&~OCC_INTV_MASK)>>5))<<1); - for (; p < end; p += 2) n += __occ_aux((uint64_t)p[0]<<32 | p[1], c); + end = p + (((k >> 5) - ((k & ~OCC_INTV_MASK) >> 5)) << 1); + for (; p < end; p += 2) + n += __occ_aux((uint64_t)p[0] << 32 | p[1], c); // calculate Occ - n += __occ_aux(((uint64_t)p[0]<<32 | p[1]) & ~((1ull<<((~k&31)<<1)) - 1), c); - if (c == 0) n -= ~k&31; // corrected for the masked bits + n += __occ_aux(((uint64_t)p[0] << 32 | p[1]) & ~((1ull << ((~k & 31) << 1)) - 1), c); + if (c == 0) + n -= ~k & 31; // corrected for the masked bits return n; } @@ -132,69 +157,86 @@ bwtint_t bwt_occ(const bwt_t *bwt, bwtint_t k, ubyte_t c) void bwt_2occ(const bwt_t *bwt, bwtint_t k, bwtint_t l, ubyte_t c, bwtint_t *ok, bwtint_t *ol) { bwtint_t _k, _l; - _k = (k >= bwt->primary)? k-1 : k; - _l = (l >= bwt->primary)? l-1 : l; - if (_l/OCC_INTERVAL != _k/OCC_INTERVAL || k == (bwtint_t)(-1) || l == (bwtint_t)(-1)) { + _k = (k >= bwt->primary) ? k - 1 : k; + _l = (l >= bwt->primary) ? l - 1 : l; + if (_l / OCC_INTERVAL != _k / OCC_INTERVAL || k == (bwtint_t)(-1) || l == (bwtint_t)(-1)) + { *ok = bwt_occ(bwt, k, c); *ol = bwt_occ(bwt, l, c); - } else { + } + else + { bwtint_t m, n, i, j; uint32_t *p; - if (k >= bwt->primary) --k; - if (l >= bwt->primary) --l; - n = ((bwtint_t*)(p = bwt_occ_intv(bwt, k)))[c]; + if (k >= bwt->primary) + --k; + if (l >= bwt->primary) + --l; + n = ((bwtint_t *)(p = bwt_occ_intv(bwt, k)))[c]; p += sizeof(bwtint_t); // calculate *ok j = k >> 5 << 5; - for (i = k/OCC_INTERVAL*OCC_INTERVAL; i < j; i += 32, p += 2) - n += __occ_aux((uint64_t)p[0]<<32 | p[1], c); + for (i = k / OCC_INTERVAL * OCC_INTERVAL; i < j; i += 32, p += 2) + n += __occ_aux((uint64_t)p[0] << 32 | p[1], c); m = n; - n += __occ_aux(((uint64_t)p[0]<<32 | p[1]) & ~((1ull<<((~k&31)<<1)) - 1), c); - if (c == 0) n -= ~k&31; // corrected for the masked bits + n += __occ_aux(((uint64_t)p[0] << 32 | p[1]) & ~((1ull << ((~k & 31) << 1)) - 1), c); + if (c == 0) + n -= ~k & 31; // corrected for the masked bits *ok = n; // calculate *ol j = l >> 5 << 5; for (; i < j; i += 32, p += 2) - m += __occ_aux((uint64_t)p[0]<<32 | p[1], c); - m += __occ_aux(((uint64_t)p[0]<<32 | p[1]) & ~((1ull<<((~l&31)<<1)) - 1), c); - if (c == 0) m -= ~l&31; // corrected for the masked bits + m += __occ_aux((uint64_t)p[0] << 32 | p[1], c); + m += __occ_aux(((uint64_t)p[0] << 32 | p[1]) & ~((1ull << ((~l & 31) << 1)) - 1), c); + if (c == 0) + m -= ~l & 31; // corrected for the masked bits *ol = m; } } -#define __occ_aux4(bwt, b) \ - ((bwt)->cnt_table[(b)&0xff] + (bwt)->cnt_table[(b)>>8&0xff] \ - + (bwt)->cnt_table[(b)>>16&0xff] + (bwt)->cnt_table[(b)>>24]) +#define __occ_aux4(bwt, b) \ + ((bwt)->cnt_table[(b)&0xff] + (bwt)->cnt_table[(b) >> 8 & 0xff] + (bwt)->cnt_table[(b) >> 16 & 0xff] + (bwt)->cnt_table[(b) >> 24]) void bwt_occ4(const bwt_t *bwt, bwtint_t k, bwtint_t cnt[4]) { bwtint_t x; uint32_t *p, tmp, *end; - if (k == (bwtint_t)(-1)) { + if (k == (bwtint_t)(-1)) + { memset(cnt, 0, 4 * sizeof(bwtint_t)); return; } k -= (k >= bwt->primary); // because $ is not in bwt p = bwt_occ_intv(bwt, k); memcpy(cnt, p, 4 * sizeof(bwtint_t)); - p += sizeof(bwtint_t); // sizeof(bwtint_t) = 4*(sizeof(bwtint_t)/sizeof(uint32_t)) - end = p + ((k>>4) - ((k&~OCC_INTV_MASK)>>4)); // this is the end point of the following loop - for (x = 0; p < end; ++p) x += __occ_aux4(bwt, *p); - tmp = *p & ~((1U<<((~k&15)<<1)) - 1); - x += __occ_aux4(bwt, tmp) - (~k&15); - cnt[0] += x&0xff; cnt[1] += x>>8&0xff; cnt[2] += x>>16&0xff; cnt[3] += x>>24; + p += sizeof(bwtint_t); // sizeof(bwtint_t) = 4*(sizeof(bwtint_t)/sizeof(uint32_t)) + end = p + ((k >> 4) - ((k & ~OCC_INTV_MASK) >> 4)); // this is the end point of the following loop + for (x = 0; p < end; ++p) + x += __occ_aux4(bwt, *p); + tmp = *p & ~((1U << ((~k & 15) << 1)) - 1); + x += __occ_aux4(bwt, tmp) - (~k & 15); + cnt[0] += x & 0xff; + cnt[1] += x >> 8 & 0xff; + cnt[2] += x >> 16 & 0xff; + cnt[3] += x >> 24; } // an analogy to bwt_occ4() but more efficient, requiring k <= l void bwt_2occ4(const bwt_t *bwt, bwtint_t k, bwtint_t l, bwtint_t cntk[4], bwtint_t cntl[4]) { +#ifdef SHOW_PERF +// int64_t start_time = get_mseconds(); +#endif bwtint_t _k, _l; _k = k - (k >= bwt->primary); _l = l - (l >= bwt->primary); - if (_l>>OCC_INTV_SHIFT != _k>>OCC_INTV_SHIFT || k == (bwtint_t)(-1) || l == (bwtint_t)(-1)) { + if (_l >> OCC_INTV_SHIFT != _k >> OCC_INTV_SHIFT || k == (bwtint_t)(-1) || l == (bwtint_t)(-1)) + { bwt_occ4(bwt, k, cntk); bwt_occ4(bwt, l, cntl); - } else { + } + else + { bwtint_t x, y; uint32_t *p, tmp, *endk, *endl; k -= (k >= bwt->primary); // because $ is not in bwt @@ -203,38 +245,57 @@ void bwt_2occ4(const bwt_t *bwt, bwtint_t k, bwtint_t l, bwtint_t cntk[4], bwtin memcpy(cntk, p, 4 * sizeof(bwtint_t)); p += sizeof(bwtint_t); // sizeof(bwtint_t) = 4*(sizeof(bwtint_t)/sizeof(uint32_t)) // prepare cntk[] - endk = p + ((k>>4) - ((k&~OCC_INTV_MASK)>>4)); - endl = p + ((l>>4) - ((l&~OCC_INTV_MASK)>>4)); - for (x = 0; p < endk; ++p) x += __occ_aux4(bwt, *p); + endk = p + ((k >> 4) - ((k & ~OCC_INTV_MASK) >> 4)); + endl = p + ((l >> 4) - ((l & ~OCC_INTV_MASK) >> 4)); + for (x = 0; p < endk; ++p) + x += __occ_aux4(bwt, *p); y = x; - tmp = *p & ~((1U<<((~k&15)<<1)) - 1); - x += __occ_aux4(bwt, tmp) - (~k&15); + tmp = *p & ~((1U << ((~k & 15) << 1)) - 1); + x += __occ_aux4(bwt, tmp) - (~k & 15); // calculate cntl[] and finalize cntk[] - for (; p < endl; ++p) y += __occ_aux4(bwt, *p); - tmp = *p & ~((1U<<((~l&15)<<1)) - 1); - y += __occ_aux4(bwt, tmp) - (~l&15); + for (; p < endl; ++p) + y += __occ_aux4(bwt, *p); + tmp = *p & ~((1U << ((~l & 15) << 1)) - 1); + y += __occ_aux4(bwt, tmp) - (~l & 15); memcpy(cntl, cntk, 4 * sizeof(bwtint_t)); - cntk[0] += x&0xff; cntk[1] += x>>8&0xff; cntk[2] += x>>16&0xff; cntk[3] += x>>24; - cntl[0] += y&0xff; cntl[1] += y>>8&0xff; cntl[2] += y>>16&0xff; cntl[3] += y>>24; + cntk[0] += x & 0xff; + cntk[1] += x >> 8 & 0xff; + cntk[2] += x >> 16 & 0xff; + cntk[3] += x >> 24; + cntl[0] += y & 0xff; + cntl[1] += y >> 8 & 0xff; + cntl[2] += y >> 16 & 0xff; + cntl[3] += y >> 24; } +#ifdef SHOW_PERF +// int64_t tmp_diff = get_mseconds() - start_time; +// __sync_fetch_and_add(&time_bwt_occ4, tmp_diff); +#endif } int bwt_match_exact(const bwt_t *bwt, int len, const ubyte_t *str, bwtint_t *sa_begin, bwtint_t *sa_end) { bwtint_t k, l, ok, ol; int i; - k = 0; l = bwt->seq_len; - for (i = len - 1; i >= 0; --i) { + k = 0; + l = bwt->seq_len; + for (i = len - 1; i >= 0; --i) + { ubyte_t c = str[i]; - if (c > 3) return 0; // no match + if (c > 3) + return 0; // no match bwt_2occ(bwt, k - 1, l, c, &ok, &ol); k = bwt->L2[c] + ok + 1; l = bwt->L2[c] + ol; - if (k > l) break; // no match + if (k > l) + break; // no match } - if (k > l) return 0; // no match - if (sa_begin) *sa_begin = k; - if (sa_end) *sa_end = l; + if (k > l) + return 0; // no match + if (sa_begin) + *sa_begin = k; + if (sa_end) + *sa_end = l; return l - k + 1; } @@ -242,16 +303,21 @@ int bwt_match_exact_alt(const bwt_t *bwt, int len, const ubyte_t *str, bwtint_t { int i; bwtint_t k, l, ok, ol; - k = *k0; l = *l0; - for (i = len - 1; i >= 0; --i) { + k = *k0; + l = *l0; + for (i = len - 1; i >= 0; --i) + { ubyte_t c = str[i]; - if (c > 3) return 0; // there is an N here. no match + if (c > 3) + return 0; // there is an N here. no match bwt_2occ(bwt, k - 1, l, c, &ok, &ol); k = bwt->L2[c] + ok + 1; l = bwt->L2[c] + ol; - if (k > l) return 0; // no match + if (k > l) + return 0; // no match } - *k0 = k; *l0 = l; + *k0 = k; + *l0 = l; return l - k + 1; } @@ -264,7 +330,8 @@ void bwt_extend(const bwt_t *bwt, const bwtintv_t *ik, bwtintv_t ok[4], int is_b bwtint_t tk[4], tl[4]; int i; bwt_2occ4(bwt, ik->x[!is_back] - 1, ik->x[!is_back] - 1 + ik->x[2], tk, tl); - for (i = 0; i != 4; ++i) { + for (i = 0; i != 4; ++i) + { ok[i].x[!is_back] = bwt->L2[i] + 1 + tk[i]; ok[i].x[2] = tl[i] - tk[i]; } @@ -276,9 +343,11 @@ void bwt_extend(const bwt_t *bwt, const bwtintv_t *ik, bwtintv_t ok[4], int is_b static void bwt_reverse_intvs(bwtintv_v *p) { - if (p->n > 1) { + if (p->n > 1) + { int j; - for (j = 0; j < p->n>>1; ++j) { + for (j = 0; j < p->n >> 1; ++j) + { bwtintv_t tmp = p->a[p->n - 1 - j]; p->a[p->n - 1 - j] = p->a[j]; p->a[j] = tmp; @@ -288,65 +357,102 @@ static void bwt_reverse_intvs(bwtintv_v *p) // NOTE: $max_intv is not currently used in BWA-MEM int bwt_smem1a(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, uint64_t max_intv, bwtintv_v *mem, bwtintv_v *tmpvec[2]) { +#ifdef SHOW_PERF + int64_t start_time = get_mseconds(); +#endif int i, j, c, ret; bwtintv_t ik, ok[4]; bwtintv_v a[2], *prev, *curr, *swap; mem->n = 0; - if (q[x] > 3) return x + 1; - if (min_intv < 1) min_intv = 1; // the interval size should be at least 1 - kv_init(a[0]); kv_init(a[1]); - prev = tmpvec && tmpvec[0]? tmpvec[0] : &a[0]; // use the temporary vector if provided - curr = tmpvec && tmpvec[1]? tmpvec[1] : &a[1]; + if (q[x] > 3) + return x + 1; + if (min_intv < 1) + min_intv = 1; // the interval size should be at least 1 + kv_init(a[0]); + kv_init(a[1]); + prev = tmpvec && tmpvec[0] ? tmpvec[0] : &a[0]; // use the temporary vector if provided + curr = tmpvec && tmpvec[1] ? tmpvec[1] : &a[1]; bwt_set_intv(bwt, q[x], ik); // the initial interval of a single base ik.info = x + 1; - for (i = x + 1, curr->n = 0; i < len; ++i) { // forward search - if (ik.x[2] < max_intv) { // an interval small enough + for (i = x + 1, curr->n = 0; i < len; ++i) + { // forward search + if (ik.x[2] < max_intv) + { // an interval small enough kv_push(bwtintv_t, *curr, ik); break; - } else if (q[i] < 4) { // an A/C/G/T base + } + else if (q[i] < 4) + { // an A/C/G/T base c = 3 - q[i]; // complement of q[i] bwt_extend(bwt, &ik, ok, 0); - if (ok[c].x[2] != ik.x[2]) { // change of the interval size + if (ok[c].x[2] != ik.x[2]) + { // change of the interval size kv_push(bwtintv_t, *curr, ik); - if (ok[c].x[2] < min_intv) break; // the interval size is too small to be extended further + if (ok[c].x[2] < min_intv) + break; // the interval size is too small to be extended further } - ik = ok[c]; ik.info = i + 1; - } else { // an ambiguous base + ik = ok[c]; + ik.info = i + 1; + } + else + { // an ambiguous base kv_push(bwtintv_t, *curr, ik); break; // always terminate extension at an ambiguous base; in this case, ia[0].info; // this will be the returned value - swap = curr; curr = prev; prev = swap; + if (i == len) + kv_push(bwtintv_t, *curr, ik); // push the last interval if we reach the end + bwt_reverse_intvs(curr); // s.t. smaller intervals (i.e. longer matches) visited first + ret = curr->a[0].info; // this will be the returned value + swap = curr; + curr = prev; + prev = swap; - for (i = x - 1; i >= -1; --i) { // backward search for MEMs - c = i < 0? -1 : q[i] < 4? q[i] : -1; // c==-1 if i<0 or q[i] is an ambiguous base - for (j = 0, curr->n = 0; j < prev->n; ++j) { + for (i = x - 1; i >= -1; --i) + { // backward search for MEMs + c = i < 0 ? -1 : q[i] < 4 ? q[i] + : -1; // c==-1 if i<0 or q[i] is an ambiguous base + for (j = 0, curr->n = 0; j < prev->n; ++j) + { bwtintv_t *p = &prev->a[j]; - if (c >= 0 && ik.x[2] >= max_intv) bwt_extend(bwt, p, ok, 1); - if (c < 0 || ik.x[2] < max_intv || ok[c].x[2] < min_intv) { // keep the hit if reaching the beginning or an ambiguous base or the intv is small enough - if (curr->n == 0) { // test curr->n>0 to make sure there are no longer matches - if (mem->n == 0 || i + 1 < mem->a[mem->n-1].info>>32) { // skip contained matches - ik = *p; ik.info |= (uint64_t)(i + 1)<<32; + if (c >= 0 && ik.x[2] >= max_intv) + bwt_extend(bwt, p, ok, 1); + if (c < 0 || ik.x[2] < max_intv || ok[c].x[2] < min_intv) + { // keep the hit if reaching the beginning or an ambiguous base or the intv is small enough + if (curr->n == 0) + { // test curr->n>0 to make sure there are no longer matches + if (mem->n == 0 || i + 1 < mem->a[mem->n - 1].info >> 32) + { // skip contained matches + ik = *p; + ik.info |= (uint64_t)(i + 1) << 32; kv_push(bwtintv_t, *mem, ik); } } // otherwise the match is contained in another longer match - } else if (curr->n == 0 || ok[c].x[2] != curr->a[curr->n-1].x[2]) { + } + else if (curr->n == 0 || ok[c].x[2] != curr->a[curr->n - 1].x[2]) + { ok[c].info = p->info; kv_push(bwtintv_t, *curr, ok[c]); } } - if (curr->n == 0) break; - swap = curr; curr = prev; prev = swap; + if (curr->n == 0) + break; + swap = curr; + curr = prev; + prev = swap; } bwt_reverse_intvs(mem); // s.t. sorted by the start coordinate - if (tmpvec == 0 || tmpvec[0] == 0) free(a[0].a); - if (tmpvec == 0 || tmpvec[1] == 0) free(a[1].a); + if (tmpvec == 0 || tmpvec[0] == 0) + free(a[0].a); + if (tmpvec == 0 || tmpvec[1] == 0) + free(a[1].a); +#ifdef SHOW_PERF + int64_t tmp_diff = get_mseconds() - start_time; + __sync_fetch_and_add(&time_bwt_smem1a, tmp_diff); +#endif return ret; } @@ -361,19 +467,25 @@ int bwt_seed_strategy1(const bwt_t *bwt, int len, const uint8_t *q, int x, int m bwtintv_t ik, ok[4]; memset(mem, 0, sizeof(bwtintv_t)); - if (q[x] > 3) return x + 1; + if (q[x] > 3) + return x + 1; bwt_set_intv(bwt, q[x], ik); // the initial interval of a single base - for (i = x + 1; i < len; ++i) { // forward search - if (q[i] < 4) { // an A/C/G/T base + for (i = x + 1; i < len; ++i) + { // forward search + if (q[i] < 4) + { // an A/C/G/T base c = 3 - q[i]; // complement of q[i] bwt_extend(bwt, &ik, ok, 0); - if (ok[c].x[2] < max_intv && i - x >= min_len) { + if (ok[c].x[2] < max_intv && i - x >= min_len) + { *mem = ok[c]; - mem->info = (uint64_t)x<<32 | (i + 1); + mem->info = (uint64_t)x << 32 | (i + 1); return i + 1; } ik = ok[c]; - } else return i + 1; + } + else + return i + 1; } return len; } @@ -387,7 +499,7 @@ void bwt_dump_bwt(const char *fn, const bwt_t *bwt) FILE *fp; fp = xopen(fn, "wb"); err_fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp); - err_fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp); + err_fwrite(bwt->L2 + 1, sizeof(bwtint_t), 4, fp); err_fwrite(bwt->bwt, 4, bwt->bwt_size, fp); err_fflush(fp); err_fclose(fp); @@ -398,7 +510,7 @@ void bwt_dump_sa(const char *fn, const bwt_t *bwt) FILE *fp; fp = xopen(fn, "wb"); err_fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp); - err_fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp); + err_fwrite(bwt->L2 + 1, sizeof(bwtint_t), 4, fp); err_fwrite(&bwt->sa_intv, sizeof(bwtint_t), 1, fp); err_fwrite(&bwt->seq_len, sizeof(bwtint_t), 1, fp); err_fwrite(bwt->sa + 1, sizeof(bwtint_t), bwt->n_sa - 1, fp); @@ -407,13 +519,16 @@ void bwt_dump_sa(const char *fn, const bwt_t *bwt) } static bwtint_t fread_fix(FILE *fp, bwtint_t size, void *a) -{ // Mac/Darwin has a bug when reading data longer than 2GB. This function fixes this issue by reading data in small chunks +{ // Mac/Darwin has a bug when reading data longer than 2GB. This function fixes this issue by reading data in small chunks const int bufsize = 0x1000000; // 16M block bwtint_t offset = 0; - while (size) { - int x = bufsize < size? bufsize : size; - if ((x = err_fread_noeof(a + offset, 1, x, fp)) == 0) break; - size -= x; offset += x; + while (size) + { + int x = bufsize < size ? bufsize : size; + if ((x = err_fread_noeof(a + offset, 1, x, fp)) == 0) + break; + size -= x; + offset += x; } return offset; } @@ -433,7 +548,7 @@ void bwt_restore_sa(const char *fn, bwt_t *bwt) xassert(primary == bwt->seq_len, "SA-BWT inconsistency: seq_len is not the same."); bwt->n_sa = (bwt->seq_len + bwt->sa_intv) / bwt->sa_intv; - bwt->sa = (bwtint_t*)calloc(bwt->n_sa, sizeof(bwtint_t)); + bwt->sa = (bwtint_t *)calloc(bwt->n_sa, sizeof(bwtint_t)); bwt->sa[0] = -1; fread_fix(fp, sizeof(bwtint_t) * (bwt->n_sa - 1), bwt->sa + 1); @@ -445,15 +560,15 @@ bwt_t *bwt_restore_bwt(const char *fn) bwt_t *bwt; FILE *fp; - bwt = (bwt_t*)calloc(1, sizeof(bwt_t)); + bwt = (bwt_t *)calloc(1, sizeof(bwt_t)); fp = xopen(fn, "rb"); err_fseek(fp, 0, SEEK_END); bwt->bwt_size = (err_ftell(fp) - sizeof(bwtint_t) * 5) >> 2; - bwt->bwt = (uint32_t*)calloc(bwt->bwt_size, 4); + bwt->bwt = (uint32_t *)calloc(bwt->bwt_size, 4); err_fseek(fp, 0, SEEK_SET); err_fread_noeof(&bwt->primary, sizeof(bwtint_t), 1, fp); - err_fread_noeof(bwt->L2+1, sizeof(bwtint_t), 4, fp); - fread_fix(fp, bwt->bwt_size<<2, bwt->bwt); + err_fread_noeof(bwt->L2 + 1, sizeof(bwtint_t), 4, fp); + fread_fix(fp, bwt->bwt_size << 2, bwt->bwt); bwt->seq_len = bwt->L2[4]; err_fclose(fp); bwt_gen_cnt_table(bwt); @@ -463,7 +578,9 @@ bwt_t *bwt_restore_bwt(const char *fn) void bwt_destroy(bwt_t *bwt) { - if (bwt == 0) return; - free(bwt->sa); free(bwt->bwt); + if (bwt == 0) + return; + free(bwt->sa); + free(bwt->bwt); free(bwt); } diff --git a/bwtindex.c b/bwtindex.c index 6a27ae1..941be28 100644 --- a/bwtindex.c +++ b/bwtindex.c @@ -1,8 +1,8 @@ /* The MIT License Copyright (c) 2018- Dana-Farber Cancer Institute - 2009-2018 Broad Institute, Inc. - 2008-2009 Genome Research Ltd. (GRL) + 2009-2018 Broad Institute, Inc. + 2008-2009 Genome Research Ltd. (GRL) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the @@ -42,10 +42,9 @@ #endif #ifdef USE_MALLOC_WRAPPERS -# include "malloc_wrap.h" +#include "malloc_wrap.h" #endif - int is_bwt(ubyte_t *T, int n); int64_t bwa_seq_len(const char *fn_pac) @@ -69,46 +68,55 @@ bwt_t *bwt_pac2bwt(const char *fn_pac, int use_is) FILE *fp; // initialization - bwt = (bwt_t*)calloc(1, sizeof(bwt_t)); + bwt = (bwt_t *)calloc(1, sizeof(bwt_t)); bwt->seq_len = bwa_seq_len(fn_pac); bwt->bwt_size = (bwt->seq_len + 15) >> 4; fp = xopen(fn_pac, "rb"); // prepare sequence - pac_size = (bwt->seq_len>>2) + ((bwt->seq_len&3) == 0? 0 : 1); - buf2 = (ubyte_t*)calloc(pac_size, 1); + pac_size = (bwt->seq_len >> 2) + ((bwt->seq_len & 3) == 0 ? 0 : 1); + buf2 = (ubyte_t *)calloc(pac_size, 1); err_fread_noeof(buf2, 1, pac_size, fp); err_fclose(fp); memset(bwt->L2, 0, 5 * 4); - buf = (ubyte_t*)calloc(bwt->seq_len + 1, 1); - for (i = 0; i < bwt->seq_len; ++i) { - buf[i] = buf2[i>>2] >> ((3 - (i&3)) << 1) & 3; - ++bwt->L2[1+buf[i]]; + buf = (ubyte_t *)calloc(bwt->seq_len + 1, 1); + for (i = 0; i < bwt->seq_len; ++i) + { + buf[i] = buf2[i >> 2] >> ((3 - (i & 3)) << 1) & 3; + ++bwt->L2[1 + buf[i]]; } - for (i = 2; i <= 4; ++i) bwt->L2[i] += bwt->L2[i-1]; + for (i = 2; i <= 4; ++i) + bwt->L2[i] += bwt->L2[i - 1]; free(buf2); // Burrows-Wheeler Transform - if (use_is) { + if (use_is) + { bwt->primary = is_bwt(buf, bwt->seq_len); - } else { + } + else + { rope_t *r; int64_t x; rpitr_t itr; const uint8_t *blk; r = rope_init(ROPE_DEF_MAX_NODES, ROPE_DEF_BLOCK_LEN); - for (i = bwt->seq_len - 1, x = 0; i >= 0; --i) { + for (i = bwt->seq_len - 1, x = 0; i >= 0; --i) + { int c = buf[i] + 1; x = rope_insert_run(r, x, c, 1, 0) + 1; - while (--c >= 0) x += r->c[c]; + while (--c >= 0) + x += r->c[c]; } bwt->primary = x; rope_itr_first(r, &itr); x = 0; - while ((blk = rope_itr_next_block(&itr)) != 0) { + while ((blk = rope_itr_next_block(&itr)) != 0) + { const uint8_t *q = blk + 2, *end = blk + 2 + *rle_nptr(blk); - while (q < end) { + while (q < end) + { int c = 0; int64_t l; rle_dec1(q, c, l); @@ -118,9 +126,9 @@ bwt_t *bwt_pac2bwt(const char *fn_pac, int use_is) } rope_destroy(r); } - bwt->bwt = (uint32_t*)calloc(bwt->bwt_size, 4); + bwt->bwt = (uint32_t *)calloc(bwt->bwt_size, 4); for (i = 0; i < bwt->seq_len; ++i) - bwt->bwt[i>>4] |= buf[i] << ((15 - (i&15)) << 1); + bwt->bwt[i >> 4] |= buf[i] << ((15 - (i & 15)) << 1); free(buf); return bwt; } @@ -129,23 +137,29 @@ int bwa_pac2bwt(int argc, char *argv[]) // the "pac2bwt" command; IMPORTANT: bwt { bwt_t *bwt; int c, use_is = 1; - while ((c = getopt(argc, argv, "d")) >= 0) { - switch (c) { - case 'd': use_is = 0; break; - default: return 1; + while ((c = getopt(argc, argv, "d")) >= 0) + { + switch (c) + { + case 'd': + use_is = 0; + break; + default: + return 1; } } - if (optind + 2 > argc) { + if (optind + 2 > argc) + { fprintf(stderr, "Usage: bwa pac2bwt [-d] \n"); return 1; } bwt = bwt_pac2bwt(argv[optind], use_is); - bwt_dump_bwt(argv[optind+1], bwt); + bwt_dump_bwt(argv[optind + 1], bwt); bwt_destroy(bwt); return 0; } -#define bwt_B00(b, k) ((b)->bwt[(k)>>4]>>((~(k)&0xf)<<1)&3) +#define bwt_B00(b, k) ((b)->bwt[(k) >> 4] >> ((~(k)&0xf) << 1) & 3) void bwt_bwtupdate_core(bwt_t *bwt) { @@ -153,28 +167,33 @@ void bwt_bwtupdate_core(bwt_t *bwt) uint32_t *buf; n_occ = (bwt->seq_len + OCC_INTERVAL - 1) / OCC_INTERVAL + 1; - bwt->bwt_size += n_occ * sizeof(bwtint_t); // the new size - buf = (uint32_t*)calloc(bwt->bwt_size, 4); // will be the new bwt + bwt->bwt_size += n_occ * sizeof(bwtint_t); // the new size + buf = (uint32_t *)calloc(bwt->bwt_size, 4); // will be the new bwt c[0] = c[1] = c[2] = c[3] = 0; - for (i = k = 0; i < bwt->seq_len; ++i) { - if (i % OCC_INTERVAL == 0) { + for (i = k = 0; i < bwt->seq_len; ++i) + { + if (i % OCC_INTERVAL == 0) + { memcpy(buf + k, c, sizeof(bwtint_t) * 4); k += sizeof(bwtint_t); // in fact: sizeof(bwtint_t)=4*(sizeof(bwtint_t)/4) } - if (i % 16 == 0) buf[k++] = bwt->bwt[i/16]; // 16 == sizeof(uint32_t)/2 + if (i % 16 == 0) + buf[k++] = bwt->bwt[i / 16]; // 16 == sizeof(uint32_t)/2 ++c[bwt_B00(bwt, i)]; } // the last element memcpy(buf + k, c, sizeof(bwtint_t) * 4); xassert(k + sizeof(bwtint_t) == bwt->bwt_size, "inconsistent bwt_size"); // update bwt - free(bwt->bwt); bwt->bwt = buf; + free(bwt->bwt); + bwt->bwt = buf; } int bwa_bwtupdate(int argc, char *argv[]) // the "bwtupdate" command { bwt_t *bwt; - if (argc != 2) { + if (argc != 2) + { fprintf(stderr, "Usage: bwa bwtupdate \n"); return 1; } @@ -189,19 +208,25 @@ int bwa_bwt2sa(int argc, char *argv[]) // the "bwt2sa" command { bwt_t *bwt; int c, sa_intv = 32; - while ((c = getopt(argc, argv, "i:")) >= 0) { - switch (c) { - case 'i': sa_intv = atoi(optarg); break; - default: return 1; + while ((c = getopt(argc, argv, "i:")) >= 0) + { + switch (c) + { + case 'i': + sa_intv = atoi(optarg); + break; + default: + return 1; } } - if (optind + 2 > argc) { + if (optind + 2 > argc) + { fprintf(stderr, "Usage: bwa bwt2sa [-i %d] \n", sa_intv); return 1; } bwt = bwt_restore_bwt(argv[optind]); bwt_cal_sa(bwt, sa_intv); - bwt_dump_sa(argv[optind+1], bwt); + bwt_dump_sa(argv[optind + 1], bwt); bwt_destroy(bwt); return 0; } @@ -210,27 +235,42 @@ int bwa_index(int argc, char *argv[]) // the "index" command { int c, algo_type = BWTALGO_AUTO, is_64 = 0, block_size = 10000000; char *prefix = 0, *str; - while ((c = getopt(argc, argv, "6a:p:b:")) >= 0) { - switch (c) { + while ((c = getopt(argc, argv, "6a:p:b:")) >= 0) + { + switch (c) + { case 'a': // if -a is not set, algo_type will be determined later - if (strcmp(optarg, "rb2") == 0) algo_type = BWTALGO_RB2; - else if (strcmp(optarg, "bwtsw") == 0) algo_type = BWTALGO_BWTSW; - else if (strcmp(optarg, "is") == 0) algo_type = BWTALGO_IS; - else err_fatal(__func__, "unknown algorithm: '%s'.", optarg); + if (strcmp(optarg, "rb2") == 0) + algo_type = BWTALGO_RB2; + else if (strcmp(optarg, "bwtsw") == 0) + algo_type = BWTALGO_BWTSW; + else if (strcmp(optarg, "is") == 0) + algo_type = BWTALGO_IS; + else + err_fatal(__func__, "unknown algorithm: '%s'.", optarg); + break; + case 'p': + prefix = strdup(optarg); + break; + case '6': + is_64 = 1; break; - case 'p': prefix = strdup(optarg); break; - case '6': is_64 = 1; break; case 'b': block_size = strtol(optarg, &str, 10); - if (*str == 'G' || *str == 'g') block_size *= 1024 * 1024 * 1024; - else if (*str == 'M' || *str == 'm') block_size *= 1024 * 1024; - else if (*str == 'K' || *str == 'k') block_size *= 1024; + if (*str == 'G' || *str == 'g') + block_size *= 1024 * 1024 * 1024; + else if (*str == 'M' || *str == 'm') + block_size *= 1024 * 1024; + else if (*str == 'K' || *str == 'k') + block_size *= 1024; break; - default: return 1; + default: + return 1; } } - if (optind + 1 > argc) { + if (optind + 1 > argc) + { fprintf(stderr, "\n"); fprintf(stderr, "Usage: bwa index [options] \n\n"); fprintf(stderr, "Options: -a STR BWT construction algorithm: bwtsw, is or rb2 [auto]\n"); @@ -238,14 +278,16 @@ int bwa_index(int argc, char *argv[]) // the "index" command fprintf(stderr, " -b INT block size for the bwtsw algorithm (effective with -a bwtsw) [%d]\n", block_size); fprintf(stderr, " -6 index files named as .64.* instead of .* \n"); fprintf(stderr, "\n"); - fprintf(stderr, "Warning: `-a bwtsw' does not work for short genomes, while `-a is' and\n"); + fprintf(stderr, "Warning: `-a bwtsw' does not work for short genomes, while `-a is' and\n"); fprintf(stderr, " `-a div' do not work not for long genomes.\n\n"); return 1; } - if (prefix == 0) { + if (prefix == 0) + { prefix = malloc(strlen(argv[optind]) + 4); strcpy(prefix, argv[optind]); - if (is_64) strcat(prefix, ".64"); + if (is_64) + strcat(prefix, ".64"); } bwa_idx_build(argv[optind], prefix, algo_type, block_size); free(prefix); @@ -260,64 +302,84 @@ int bwa_idx_build(const char *fa, const char *prefix, int algo_type, int block_s clock_t t; int64_t l_pac; - str = (char*)calloc(strlen(prefix) + 10, 1); - str2 = (char*)calloc(strlen(prefix) + 10, 1); - str3 = (char*)calloc(strlen(prefix) + 10, 1); + str = (char *)calloc(strlen(prefix) + 10, 1); + str2 = (char *)calloc(strlen(prefix) + 10, 1); + str3 = (char *)calloc(strlen(prefix) + 10, 1); { // nucleotide indexing gzFile fp = xzopen(fa, "r"); t = clock(); - if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Pack FASTA... "); + if (bwa_verbose >= 3) + fprintf(stderr, "[bwa_index] Pack FASTA... "); l_pac = bns_fasta2bntseq(fp, prefix, 0); - if (bwa_verbose >= 3) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); + if (bwa_verbose >= 3) + fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); err_gzclose(fp); } - if (algo_type == 0) algo_type = l_pac > 50000000? 2 : 3; // set the algorithm for generating BWT + if (algo_type == 0) + algo_type = l_pac > 50000000 ? 2 : 3; // set the algorithm for generating BWT { - strcpy(str, prefix); strcat(str, ".pac"); - strcpy(str2, prefix); strcat(str2, ".bwt"); + strcpy(str, prefix); + strcat(str, ".pac"); + strcpy(str2, prefix); + strcat(str2, ".bwt"); t = clock(); - if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Construct BWT for the packed sequence...\n"); - if (algo_type == 2) bwt_bwtgen2(str, str2, block_size); - else if (algo_type == 1 || algo_type == 3) { + if (bwa_verbose >= 3) + fprintf(stderr, "[bwa_index] Construct BWT for the packed sequence...\n"); + if (algo_type == 2) + bwt_bwtgen2(str, str2, block_size); + else if (algo_type == 1 || algo_type == 3) + { bwt_t *bwt; bwt = bwt_pac2bwt(str, algo_type == 3); bwt_dump_bwt(str2, bwt); bwt_destroy(bwt); } - if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] %.2f seconds elapse.\n", (float)(clock() - t) / CLOCKS_PER_SEC); + if (bwa_verbose >= 3) + fprintf(stderr, "[bwa_index] %.2f seconds elapse.\n", (float)(clock() - t) / CLOCKS_PER_SEC); } { bwt_t *bwt; - strcpy(str, prefix); strcat(str, ".bwt"); + strcpy(str, prefix); + strcat(str, ".bwt"); t = clock(); - if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Update BWT... "); + if (bwa_verbose >= 3) + fprintf(stderr, "[bwa_index] Update BWT... "); bwt = bwt_restore_bwt(str); bwt_bwtupdate_core(bwt); bwt_dump_bwt(str, bwt); bwt_destroy(bwt); - if (bwa_verbose >= 3) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); + if (bwa_verbose >= 3) + fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); } { gzFile fp = xzopen(fa, "r"); t = clock(); - if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Pack forward-only FASTA... "); + if (bwa_verbose >= 3) + fprintf(stderr, "[bwa_index] Pack forward-only FASTA... "); l_pac = bns_fasta2bntseq(fp, prefix, 1); - if (bwa_verbose >= 3) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); + if (bwa_verbose >= 3) + fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); err_gzclose(fp); } { bwt_t *bwt; - strcpy(str, prefix); strcat(str, ".bwt"); - strcpy(str3, prefix); strcat(str3, ".sa"); + strcpy(str, prefix); + strcat(str, ".bwt"); + strcpy(str3, prefix); + strcat(str3, ".sa"); t = clock(); - if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Construct SA from BWT and Occ... "); + if (bwa_verbose >= 3) + fprintf(stderr, "[bwa_index] Construct SA from BWT and Occ... "); bwt = bwt_restore_bwt(str); bwt_cal_sa(bwt, 32); bwt_dump_sa(str3, bwt); bwt_destroy(bwt); - if (bwa_verbose >= 3) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); + if (bwa_verbose >= 3) + fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); } - free(str3); free(str2); free(str); + free(str3); + free(str2); + free(str); return 0; } diff --git a/fastmap.c b/fastmap.c index be7ba0e..9514aee 100644 --- a/fastmap.c +++ b/fastmap.c @@ -1,8 +1,8 @@ /* The MIT License Copyright (c) 2018- Dana-Farber Cancer Institute - 2009-2018 Broad Institute, Inc. - 2008-2009 Genome Research Ltd. (GRL) + 2009-2018 Broad Institute, Inc. + 2008-2009 Genome Research Ltd. (GRL) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the @@ -40,13 +40,48 @@ #include "kseq.h" KSEQ_DECLARE(gzFile) +FILE *query_f = 0, *target_f = 0, *info_f = 0; + +// 记录运行时间的变量 +#ifdef SHOW_PERF +// 用来调试,计算感兴趣部分的运行时间 +#include "sys/time.h" +// 获取当前毫秒数 +int64_t get_mseconds() +{ + struct timeval tv; + gettimeofday(&tv, NULL); + return (int64_t)1000 * (tv.tv_sec + ((1e-6) * tv.tv_usec)); +} +int64_t time_ksw_extend2 = 0, + time_ksw_global2 = 0, + time_ksw_align2 = 0, + time_bwt_smem1a = 0, + time_bwt_occ4 = 0, + time_bwt_sa = 0, + time_mem_chain = 0, + time_worker1 = 0, + time_worker2 = 0, + time_process_step0 = 0, + time_process_step1 = 0, + time_process_step2 = 0, + time_before_process = 0, + time_process = 0, + time_after_process = 0, + count_process_step2 = 0, + count_ksw_extend2 = 0; + +#endif +////////////////////////////////// + extern unsigned char nst_nt4_table[256]; void *kopen(const char *fn, int *_fd); int kclose(void *a); -void kt_pipeline(int n_threads, void *(*func)(void*, int, void*), void *shared_data, int n_steps); +void kt_pipeline(int n_threads, void *(*func)(void *, int, void *), void *shared_data, int n_steps); -typedef struct { +typedef struct +{ kseq_t *ks, *ks2; mem_opt_t *opt; mem_pestat_t *pes0; @@ -55,7 +90,8 @@ typedef struct { bwaidx_t *idx; } ktp_aux_t; -typedef struct { +typedef struct +{ ktp_aux_t *aux; int n_seqs; bseq1_t *seqs; @@ -63,60 +99,106 @@ typedef struct { static void *process(void *shared, int step, void *_data) { - ktp_aux_t *aux = (ktp_aux_t*)shared; - ktp_data_t *data = (ktp_data_t*)_data; + ktp_aux_t *aux = (ktp_aux_t *)shared; + ktp_data_t *data = (ktp_data_t *)_data; int i; - if (step == 0) { + if (step == 0) + { +#ifdef SHOW_PERF + int64_t start_time = get_mseconds(); +#endif ktp_data_t *ret; int64_t size = 0; ret = calloc(1, sizeof(ktp_data_t)); ret->seqs = bseq_read(aux->actual_chunk_size, &ret->n_seqs, aux->ks, aux->ks2); - if (ret->seqs == 0) { + if (ret->seqs == 0) + { free(ret); +#ifdef SHOW_PERF + int64_t tmp_diff = get_mseconds() - start_time; + __sync_fetch_and_add(&time_process_step0, tmp_diff); +#endif return 0; } if (!aux->copy_comment) - for (i = 0; i < ret->n_seqs; ++i) { + for (i = 0; i < ret->n_seqs; ++i) + { free(ret->seqs[i].comment); ret->seqs[i].comment = 0; } - for (i = 0; i < ret->n_seqs; ++i) size += ret->seqs[i].l_seq; + for (i = 0; i < ret->n_seqs; ++i) + size += ret->seqs[i].l_seq; if (bwa_verbose >= 3) fprintf(stderr, "[M::%s] read %d sequences (%ld bp)...\n", __func__, ret->n_seqs, (long)size); +#ifdef SHOW_PERF + int64_t tmp_diff = get_mseconds() - start_time; + __sync_fetch_and_add(&time_process_step0, tmp_diff); +#endif return ret; - } else if (step == 1) { + } + else if (step == 1) + { +#ifdef SHOW_PERF + int64_t start_time = get_mseconds(); +#endif const mem_opt_t *opt = aux->opt; const bwaidx_t *idx = aux->idx; - if (opt->flag & MEM_F_SMARTPE) { + if (opt->flag & MEM_F_SMARTPE) + { bseq1_t *sep[2]; int n_sep[2]; mem_opt_t tmp_opt = *opt; bseq_classify(data->n_seqs, data->seqs, n_sep, sep); if (bwa_verbose >= 3) fprintf(stderr, "[M::%s] %d single-end sequences; %d paired-end sequences\n", __func__, n_sep[0], n_sep[1]); - if (n_sep[0]) { + if (n_sep[0]) + { tmp_opt.flag &= ~MEM_F_PE; mem_process_seqs(&tmp_opt, idx->bwt, idx->bns, idx->pac, aux->n_processed, n_sep[0], sep[0], 0); for (i = 0; i < n_sep[0]; ++i) data->seqs[sep[0][i].id].sam = sep[0][i].sam; } - if (n_sep[1]) { + if (n_sep[1]) + { tmp_opt.flag |= MEM_F_PE; mem_process_seqs(&tmp_opt, idx->bwt, idx->bns, idx->pac, aux->n_processed + n_sep[0], n_sep[1], sep[1], aux->pes0); for (i = 0; i < n_sep[1]; ++i) data->seqs[sep[1][i].id].sam = sep[1][i].sam; } - free(sep[0]); free(sep[1]); - } else mem_process_seqs(opt, idx->bwt, idx->bns, idx->pac, aux->n_processed, data->n_seqs, data->seqs, aux->pes0); - aux->n_processed += data->n_seqs; - return data; - } else if (step == 2) { - for (i = 0; i < data->n_seqs; ++i) { - if (data->seqs[i].sam) err_fputs(data->seqs[i].sam, stdout); - free(data->seqs[i].name); free(data->seqs[i].comment); - free(data->seqs[i].seq); free(data->seqs[i].qual); free(data->seqs[i].sam); + free(sep[0]); + free(sep[1]); } - free(data->seqs); free(data); + else + mem_process_seqs(opt, idx->bwt, idx->bns, idx->pac, aux->n_processed, data->n_seqs, data->seqs, aux->pes0); + aux->n_processed += data->n_seqs; +#ifdef SHOW_PERF + int64_t tmp_diff = get_mseconds() - start_time; + __sync_fetch_and_add(&time_process_step1, tmp_diff); + __sync_fetch_and_add(&count_process_step2, 1); +#endif + return data; + } + else if (step == 2) + { +#ifdef SHOW_PERF + int64_t start_time = get_mseconds(); +#endif + for (i = 0; i < data->n_seqs; ++i) + { + if (data->seqs[i].sam) + err_fputs(data->seqs[i].sam, stdout); + free(data->seqs[i].name); + free(data->seqs[i].comment); + free(data->seqs[i].seq); + free(data->seqs[i].qual); + free(data->seqs[i].sam); + } + free(data->seqs); + free(data); +#ifdef SHOW_PERF + int64_t tmp_diff = get_mseconds() - start_time; + __sync_fetch_and_add(&time_process_step2, tmp_diff); +#endif return 0; } return 0; @@ -124,22 +206,36 @@ static void *process(void *shared, int step, void *_data) static void update_a(mem_opt_t *opt, const mem_opt_t *opt0) { - if (opt0->a) { // matching score is changed - if (!opt0->b) opt->b *= opt->a; - if (!opt0->T) opt->T *= opt->a; - if (!opt0->o_del) opt->o_del *= opt->a; - if (!opt0->e_del) opt->e_del *= opt->a; - if (!opt0->o_ins) opt->o_ins *= opt->a; - if (!opt0->e_ins) opt->e_ins *= opt->a; - if (!opt0->zdrop) opt->zdrop *= opt->a; - if (!opt0->pen_clip5) opt->pen_clip5 *= opt->a; - if (!opt0->pen_clip3) opt->pen_clip3 *= opt->a; - if (!opt0->pen_unpaired) opt->pen_unpaired *= opt->a; + if (opt0->a) + { // matching score is changed + if (!opt0->b) + opt->b *= opt->a; + if (!opt0->T) + opt->T *= opt->a; + if (!opt0->o_del) + opt->o_del *= opt->a; + if (!opt0->e_del) + opt->e_del *= opt->a; + if (!opt0->o_ins) + opt->o_ins *= opt->a; + if (!opt0->e_ins) + opt->e_ins *= opt->a; + if (!opt0->zdrop) + opt->zdrop *= opt->a; + if (!opt0->pen_clip5) + opt->pen_clip5 *= opt->a; + if (!opt0->pen_clip3) + opt->pen_clip3 *= opt->a; + if (!opt0->pen_unpaired) + opt->pen_unpaired *= opt->a; } } int main_mem(int argc, char *argv[]) { +#ifdef SHOW_PERF + int64_t start_time = get_mseconds(); +#endif mem_opt_t *opt, opt0; int fd, fd2, i, c, ignore_alt = 0, no_mt_io = 0; int fixed_chunk_size = -1; @@ -152,119 +248,187 @@ int main_mem(int argc, char *argv[]) memset(&aux, 0, sizeof(ktp_aux_t)); memset(pes, 0, 4 * sizeof(mem_pestat_t)); - for (i = 0; i < 4; ++i) pes[i].failed = 1; + for (i = 0; i < 4; ++i) + pes[i].failed = 1; + + query_f = fopen("query.fa", "w"); + target_f = fopen("target.fa", "w"); + info_f = fopen("info.txt", "w"); aux.opt = opt = mem_opt_init(); memset(&opt0, 0, sizeof(mem_opt_t)); - while ((c = getopt(argc, argv, "51qpaMCSPVYjuk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:o:f:W:x:G:h:y:K:X:H:F:z:")) >= 0) { - if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1; - else if (c == '1') no_mt_io = 1; - else if (c == 'x') mode = optarg; - else if (c == 'w') opt->w = atoi(optarg), opt0.w = 1; - else if (c == 'A') opt->a = atoi(optarg), opt0.a = 1; - else if (c == 'B') opt->b = atoi(optarg), opt0.b = 1; - else if (c == 'T') opt->T = atoi(optarg), opt0.T = 1; - else if (c == 'U') opt->pen_unpaired = atoi(optarg), opt0.pen_unpaired = 1; - else if (c == 't') opt->n_threads = atoi(optarg), opt->n_threads = opt->n_threads > 1? opt->n_threads : 1; - else if (c == 'P') opt->flag |= MEM_F_NOPAIRING; - else if (c == 'a') opt->flag |= MEM_F_ALL; - else if (c == 'p') opt->flag |= MEM_F_PE | MEM_F_SMARTPE; - else if (c == 'M') opt->flag |= MEM_F_NO_MULTI; - else if (c == 'S') opt->flag |= MEM_F_NO_RESCUE; - else if (c == 'Y') opt->flag |= MEM_F_SOFTCLIP; - else if (c == 'V') opt->flag |= MEM_F_REF_HDR; - else if (c == '5') opt->flag |= MEM_F_PRIMARY5 | MEM_F_KEEP_SUPP_MAPQ; // always apply MEM_F_KEEP_SUPP_MAPQ with -5 - else if (c == 'q') opt->flag |= MEM_F_KEEP_SUPP_MAPQ; - else if (c == 'u') opt->flag |= MEM_F_XB; - else if (c == 'c') opt->max_occ = atoi(optarg), opt0.max_occ = 1; - else if (c == 'd') opt->zdrop = atoi(optarg), opt0.zdrop = 1; - else if (c == 'v') bwa_verbose = atoi(optarg); - else if (c == 'j') ignore_alt = 1; - else if (c == 'r') opt->split_factor = atof(optarg), opt0.split_factor = 1.; - else if (c == 'D') opt->drop_ratio = atof(optarg), opt0.drop_ratio = 1.; - else if (c == 'm') opt->max_matesw = atoi(optarg), opt0.max_matesw = 1; - else if (c == 's') opt->split_width = atoi(optarg), opt0.split_width = 1; - else if (c == 'G') opt->max_chain_gap = atoi(optarg), opt0.max_chain_gap = 1; - else if (c == 'N') opt->max_chain_extend = atoi(optarg), opt0.max_chain_extend = 1; - else if (c == 'o' || c == 'f') xreopen(optarg, "wb", stdout); - else if (c == 'W') opt->min_chain_weight = atoi(optarg), opt0.min_chain_weight = 1; - else if (c == 'y') opt->max_mem_intv = atol(optarg), opt0.max_mem_intv = 1; - else if (c == 'C') aux.copy_comment = 1; - else if (c == 'K') fixed_chunk_size = atoi(optarg); - else if (c == 'X') opt->mask_level = atof(optarg); - else if (c == 'F') bwa_dbg = atoi(optarg); - else if (c == 'h') { + while ((c = getopt(argc, argv, "51qpaMCSPVYjuk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:o:f:W:x:G:h:y:K:X:H:F:z:")) >= 0) + { + if (c == 'k') + opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1; + else if (c == '1') + no_mt_io = 1; + else if (c == 'x') + mode = optarg; + else if (c == 'w') + opt->w = atoi(optarg), opt0.w = 1; + else if (c == 'A') + opt->a = atoi(optarg), opt0.a = 1; + else if (c == 'B') + opt->b = atoi(optarg), opt0.b = 1; + else if (c == 'T') + opt->T = atoi(optarg), opt0.T = 1; + else if (c == 'U') + opt->pen_unpaired = atoi(optarg), opt0.pen_unpaired = 1; + else if (c == 't') + opt->n_threads = atoi(optarg), opt->n_threads = opt->n_threads > 1 ? opt->n_threads : 1; + else if (c == 'P') + opt->flag |= MEM_F_NOPAIRING; + else if (c == 'a') + opt->flag |= MEM_F_ALL; + else if (c == 'p') + opt->flag |= MEM_F_PE | MEM_F_SMARTPE; + else if (c == 'M') + opt->flag |= MEM_F_NO_MULTI; + else if (c == 'S') + opt->flag |= MEM_F_NO_RESCUE; + else if (c == 'Y') + opt->flag |= MEM_F_SOFTCLIP; + else if (c == 'V') + opt->flag |= MEM_F_REF_HDR; + else if (c == '5') + opt->flag |= MEM_F_PRIMARY5 | MEM_F_KEEP_SUPP_MAPQ; // always apply MEM_F_KEEP_SUPP_MAPQ with -5 + else if (c == 'q') + opt->flag |= MEM_F_KEEP_SUPP_MAPQ; + else if (c == 'u') + opt->flag |= MEM_F_XB; + else if (c == 'c') + opt->max_occ = atoi(optarg), opt0.max_occ = 1; + else if (c == 'd') + opt->zdrop = atoi(optarg), opt0.zdrop = 1; + else if (c == 'v') + bwa_verbose = atoi(optarg); + else if (c == 'j') + ignore_alt = 1; + else if (c == 'r') + opt->split_factor = atof(optarg), opt0.split_factor = 1.; + else if (c == 'D') + opt->drop_ratio = atof(optarg), opt0.drop_ratio = 1.; + else if (c == 'm') + opt->max_matesw = atoi(optarg), opt0.max_matesw = 1; + else if (c == 's') + opt->split_width = atoi(optarg), opt0.split_width = 1; + else if (c == 'G') + opt->max_chain_gap = atoi(optarg), opt0.max_chain_gap = 1; + else if (c == 'N') + opt->max_chain_extend = atoi(optarg), opt0.max_chain_extend = 1; + else if (c == 'o' || c == 'f') + xreopen(optarg, "wb", stdout); + else if (c == 'W') + opt->min_chain_weight = atoi(optarg), opt0.min_chain_weight = 1; + else if (c == 'y') + opt->max_mem_intv = atol(optarg), opt0.max_mem_intv = 1; + else if (c == 'C') + aux.copy_comment = 1; + else if (c == 'K') + fixed_chunk_size = atoi(optarg); + else if (c == 'X') + opt->mask_level = atof(optarg); + else if (c == 'F') + bwa_dbg = atoi(optarg); + else if (c == 'h') + { opt0.max_XA_hits = opt0.max_XA_hits_alt = 1; opt->max_XA_hits = opt->max_XA_hits_alt = strtol(optarg, &p, 10); if (*p != 0 && ispunct(*p) && isdigit(p[1])) - opt->max_XA_hits_alt = strtol(p+1, &p, 10); + opt->max_XA_hits_alt = strtol(p + 1, &p, 10); } - else if (c == 'z') opt->XA_drop_ratio = atof(optarg); - else if (c == 'Q') { + else if (c == 'z') + opt->XA_drop_ratio = atof(optarg); + else if (c == 'Q') + { opt0.mapQ_coef_len = 1; opt->mapQ_coef_len = atoi(optarg); - opt->mapQ_coef_fac = opt->mapQ_coef_len > 0? log(opt->mapQ_coef_len) : 0; - } else if (c == 'O') { + opt->mapQ_coef_fac = opt->mapQ_coef_len > 0 ? log(opt->mapQ_coef_len) : 0; + } + else if (c == 'O') + { opt0.o_del = opt0.o_ins = 1; opt->o_del = opt->o_ins = strtol(optarg, &p, 10); if (*p != 0 && ispunct(*p) && isdigit(p[1])) - opt->o_ins = strtol(p+1, &p, 10); - } else if (c == 'E') { + opt->o_ins = strtol(p + 1, &p, 10); + } + else if (c == 'E') + { opt0.e_del = opt0.e_ins = 1; opt->e_del = opt->e_ins = strtol(optarg, &p, 10); if (*p != 0 && ispunct(*p) && isdigit(p[1])) - opt->e_ins = strtol(p+1, &p, 10); - } else if (c == 'L') { + opt->e_ins = strtol(p + 1, &p, 10); + } + else if (c == 'L') + { opt0.pen_clip5 = opt0.pen_clip3 = 1; opt->pen_clip5 = opt->pen_clip3 = strtol(optarg, &p, 10); if (*p != 0 && ispunct(*p) && isdigit(p[1])) - opt->pen_clip3 = strtol(p+1, &p, 10); - } else if (c == 'R') { - if ((rg_line = bwa_set_rg(optarg)) == 0) return 1; // FIXME: memory leak - } else if (c == 'H') { - if (optarg[0] != '@') { + opt->pen_clip3 = strtol(p + 1, &p, 10); + } + else if (c == 'R') + { + if ((rg_line = bwa_set_rg(optarg)) == 0) + return 1; // FIXME: memory leak + } + else if (c == 'H') + { + if (optarg[0] != '@') + { FILE *fp; - if ((fp = fopen(optarg, "r")) != 0) { + if ((fp = fopen(optarg, "r")) != 0) + { char *buf; buf = calloc(1, 0x10000); - while (fgets(buf, 0xffff, fp)) { + while (fgets(buf, 0xffff, fp)) + { i = strlen(buf); - assert(buf[i-1] == '\n'); // a long line - buf[i-1] = 0; + assert(buf[i - 1] == '\n'); // a long line + buf[i - 1] = 0; hdr_line = bwa_insert_header(buf, hdr_line); } free(buf); fclose(fp); } - } else hdr_line = bwa_insert_header(optarg, hdr_line); - } else if (c == 'I') { // specify the insert size distribution + } + else + hdr_line = bwa_insert_header(optarg, hdr_line); + } + else if (c == 'I') + { // specify the insert size distribution aux.pes0 = pes; pes[1].failed = 0; pes[1].avg = strtod(optarg, &p); pes[1].std = pes[1].avg * .1; if (*p != 0 && ispunct(*p) && isdigit(p[1])) - pes[1].std = strtod(p+1, &p); + pes[1].std = strtod(p + 1, &p); pes[1].high = (int)(pes[1].avg + 4. * pes[1].std + .499); - pes[1].low = (int)(pes[1].avg - 4. * pes[1].std + .499); - if (pes[1].low < 1) pes[1].low = 1; + pes[1].low = (int)(pes[1].avg - 4. * pes[1].std + .499); + if (pes[1].low < 1) + pes[1].low = 1; if (*p != 0 && ispunct(*p) && isdigit(p[1])) - pes[1].high = (int)(strtod(p+1, &p) + .499); + pes[1].high = (int)(strtod(p + 1, &p) + .499); if (*p != 0 && ispunct(*p) && isdigit(p[1])) - pes[1].low = (int)(strtod(p+1, &p) + .499); + pes[1].low = (int)(strtod(p + 1, &p) + .499); if (bwa_verbose >= 3) fprintf(stderr, "[M::%s] mean insert size: %.3f, stddev: %.3f, max: %d, min: %d\n", __func__, pes[1].avg, pes[1].std, pes[1].high, pes[1].low); } - else return 1; + else + return 1; } - if (rg_line) { + if (rg_line) + { hdr_line = bwa_insert_header(rg_line, hdr_line); free(rg_line); } - if (opt->n_threads < 1) opt->n_threads = 1; - if (optind + 1 >= argc || optind + 3 < argc) { + if (opt->n_threads < 1) + opt->n_threads = 1; + if (optind + 1 >= argc || optind + 3 < argc) + { fprintf(stderr, "\n"); fprintf(stderr, "Usage: bwa mem [options] [in2.fq]\n\n"); fprintf(stderr, "Algorithm options:\n\n"); @@ -274,7 +438,7 @@ int main_mem(int argc, char *argv[]) fprintf(stderr, " -d INT off-diagonal X-dropoff [%d]\n", opt->zdrop); fprintf(stderr, " -r FLOAT look for internal seeds inside a seed longer than {-k} * FLOAT [%g]\n", opt->split_factor); fprintf(stderr, " -y INT seed occurrence for the 3rd round seeding [%ld]\n", (long)opt->max_mem_intv); -// fprintf(stderr, " -s INT look for internal seeds inside a seed with less than INT occ [%d]\n", opt->split_width); + // fprintf(stderr, " -s INT look for internal seeds inside a seed with less than INT occ [%d]\n", opt->split_width); fprintf(stderr, " -c INT skip seeds with more than INT occurrences [%d]\n", opt->max_occ); fprintf(stderr, " -D FLOAT drop chains shorter than FLOAT fraction of the longest overlapping chain [%.2f]\n", opt->drop_ratio); fprintf(stderr, " -W INT discard a chain if seeded bases shorter than INT [0]\n"); @@ -304,7 +468,7 @@ int main_mem(int argc, char *argv[]) fprintf(stderr, "\n"); fprintf(stderr, " -v INT verbosity level: 1=error, 2=warning, 3=message, 4+=debugging [%d]\n", bwa_verbose); fprintf(stderr, " -T INT minimum score to output [%d]\n", opt->T); - fprintf(stderr, " -h INT[,INT] if there are %.2f%% of the max score, output all in XA [%d,%d]\n", + fprintf(stderr, " -h INT[,INT] if there are %.2f%% of the max score, output all in XA [%d,%d]\n", opt->XA_drop_ratio * 100.0, opt->max_XA_hits, opt->max_XA_hits_alt); fprintf(stderr, " A second value may be given for alternate sequences.\n"); @@ -327,62 +491,103 @@ int main_mem(int argc, char *argv[]) return 1; } - if (mode) { - if (strcmp(mode, "intractg") == 0) { - if (!opt0.o_del) opt->o_del = 16; - if (!opt0.o_ins) opt->o_ins = 16; - if (!opt0.b) opt->b = 9; - if (!opt0.pen_clip5) opt->pen_clip5 = 5; - if (!opt0.pen_clip3) opt->pen_clip3 = 5; - } else if (strcmp(mode, "pacbio") == 0 || strcmp(mode, "pbref") == 0 || strcmp(mode, "ont2d") == 0) { - if (!opt0.o_del) opt->o_del = 1; - if (!opt0.e_del) opt->e_del = 1; - if (!opt0.o_ins) opt->o_ins = 1; - if (!opt0.e_ins) opt->e_ins = 1; - if (!opt0.b) opt->b = 1; - if (opt0.split_factor == 0.) opt->split_factor = 10.; - if (strcmp(mode, "ont2d") == 0) { - if (!opt0.min_chain_weight) opt->min_chain_weight = 20; - if (!opt0.min_seed_len) opt->min_seed_len = 14; - if (!opt0.pen_clip5) opt->pen_clip5 = 0; - if (!opt0.pen_clip3) opt->pen_clip3 = 0; - } else { - if (!opt0.min_chain_weight) opt->min_chain_weight = 40; - if (!opt0.min_seed_len) opt->min_seed_len = 17; - if (!opt0.pen_clip5) opt->pen_clip5 = 0; - if (!opt0.pen_clip3) opt->pen_clip3 = 0; + if (mode) + { + if (strcmp(mode, "intractg") == 0) + { + if (!opt0.o_del) + opt->o_del = 16; + if (!opt0.o_ins) + opt->o_ins = 16; + if (!opt0.b) + opt->b = 9; + if (!opt0.pen_clip5) + opt->pen_clip5 = 5; + if (!opt0.pen_clip3) + opt->pen_clip3 = 5; + } + else if (strcmp(mode, "pacbio") == 0 || strcmp(mode, "pbref") == 0 || strcmp(mode, "ont2d") == 0) + { + if (!opt0.o_del) + opt->o_del = 1; + if (!opt0.e_del) + opt->e_del = 1; + if (!opt0.o_ins) + opt->o_ins = 1; + if (!opt0.e_ins) + opt->e_ins = 1; + if (!opt0.b) + opt->b = 1; + if (opt0.split_factor == 0.) + opt->split_factor = 10.; + if (strcmp(mode, "ont2d") == 0) + { + if (!opt0.min_chain_weight) + opt->min_chain_weight = 20; + if (!opt0.min_seed_len) + opt->min_seed_len = 14; + if (!opt0.pen_clip5) + opt->pen_clip5 = 0; + if (!opt0.pen_clip3) + opt->pen_clip3 = 0; } - } else { + else + { + if (!opt0.min_chain_weight) + opt->min_chain_weight = 40; + if (!opt0.min_seed_len) + opt->min_seed_len = 17; + if (!opt0.pen_clip5) + opt->pen_clip5 = 0; + if (!opt0.pen_clip3) + opt->pen_clip3 = 0; + } + } + else + { fprintf(stderr, "[E::%s] unknown read type '%s'\n", __func__, mode); return 1; // FIXME memory leak } - } else update_a(opt, &opt0); + } + else + update_a(opt, &opt0); bwa_fill_scmat(opt->a, opt->b, opt->mat); aux.idx = bwa_idx_load_from_shm(argv[optind]); - if (aux.idx == 0) { - if ((aux.idx = bwa_idx_load(argv[optind], BWA_IDX_ALL)) == 0) return 1; // FIXME: memory leak - } else if (bwa_verbose >= 3) + if (aux.idx == 0) + { + if ((aux.idx = bwa_idx_load(argv[optind], BWA_IDX_ALL)) == 0) + return 1; // FIXME: memory leak + } + else if (bwa_verbose >= 3) fprintf(stderr, "[M::%s] load the bwa index from shared memory\n", __func__); if (ignore_alt) for (i = 0; i < aux.idx->bns->n_seqs; ++i) aux.idx->bns->anns[i].is_alt = 0; ko = kopen(argv[optind + 1], &fd); - if (ko == 0) { - if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] fail to open file `%s'.\n", __func__, argv[optind + 1]); + if (ko == 0) + { + if (bwa_verbose >= 1) + fprintf(stderr, "[E::%s] fail to open file `%s'.\n", __func__, argv[optind + 1]); return 1; } fp = gzdopen(fd, "r"); aux.ks = kseq_init(fp); - if (optind + 2 < argc) { - if (opt->flag&MEM_F_PE) { + if (optind + 2 < argc) + { + if (opt->flag & MEM_F_PE) + { if (bwa_verbose >= 2) fprintf(stderr, "[W::%s] when '-p' is in use, the second query file is ignored.\n", __func__); - } else { + } + else + { ko2 = kopen(argv[optind + 2], &fd2); - if (ko2 == 0) { - if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] fail to open file `%s'.\n", __func__, argv[optind + 2]); + if (ko2 == 0) + { + if (bwa_verbose >= 1) + fprintf(stderr, "[E::%s] fail to open file `%s'.\n", __func__, argv[optind + 2]); return 1; } fp2 = gzdopen(fd2, "r"); @@ -391,17 +596,64 @@ int main_mem(int argc, char *argv[]) } } bwa_print_sam_hdr(aux.idx->bns, hdr_line); - aux.actual_chunk_size = fixed_chunk_size > 0? fixed_chunk_size : opt->chunk_size * opt->n_threads; - kt_pipeline(no_mt_io? 1 : 2, process, &aux, 3); + aux.actual_chunk_size = fixed_chunk_size > 0 ? fixed_chunk_size : opt->chunk_size * opt->n_threads; +#ifdef SHOW_PERF + int64_t tmp_diff = get_mseconds() - start_time; + __sync_fetch_and_add(&time_before_process, tmp_diff); + start_time = get_mseconds(); +#endif + kt_pipeline(no_mt_io ? 1 : 2, process, &aux, 3); +#ifdef SHOW_PERF + tmp_diff = get_mseconds() - start_time; + __sync_fetch_and_add(&time_process, tmp_diff); + start_time = get_mseconds(); +#endif free(hdr_line); free(opt); bwa_idx_destroy(aux.idx); kseq_destroy(aux.ks); - err_gzclose(fp); kclose(ko); - if (aux.ks2) { + err_gzclose(fp); + kclose(ko); + if (aux.ks2) + { kseq_destroy(aux.ks2); - err_gzclose(fp2); kclose(ko2); + err_gzclose(fp2); + kclose(ko2); } + if (query_f != 0) + fclose(query_f); + if (target_f != 0) + fclose(target_f); + if (info_f != 0) + fclose(info_f); +#ifdef SHOW_PERF + tmp_diff = get_mseconds() - start_time; + __sync_fetch_and_add(&time_after_process, tmp_diff); +#endif + +#ifdef SHOW_PERF + fprintf(stderr, "\n"); + fprintf(stderr, "time_ksw_extend2: %f s\n", time_ksw_extend2 / 1000.0 / opt->n_threads); + fprintf(stderr, "time_ksw_global2: %f s\n", time_ksw_global2 / 1000.0 / opt->n_threads); + fprintf(stderr, "time_ksw_align2: %f s\n", time_ksw_align2 / 1000.0 / opt->n_threads); + fprintf(stderr, "time_bwt_smem1a: %f s\n", time_bwt_smem1a / 1000.0 / opt->n_threads); + // fprintf(stderr, "time_bwt_occ4: %f s\n", time_bwt_occ4 / 1000.0 / opt->n_threads); + fprintf(stderr, "time_bwt_sa: %f s\n", time_bwt_sa / 1000.0 / opt->n_threads); + fprintf(stderr, "time_mem_chain: %f s\n", time_mem_chain / 1000.0 / opt->n_threads); + fprintf(stderr, "time_worker1: %f s\n", time_worker1 / 1000.0); + fprintf(stderr, "time_worker2: %f s\n", time_worker2 / 1000.0); + fprintf(stderr, "time_process_step0: %f s\n", time_process_step0 / 1000.0); + fprintf(stderr, "time_process_step1: %f s\n", time_process_step1 / 1000.0); + fprintf(stderr, "time_process_step2: %f s\n", time_process_step2 / 1000.0); + fprintf(stderr, "time_before_process: %f s\n", time_before_process / 1000.0); + fprintf(stderr, "time_process: %f s\n", time_process / 1000.0); + fprintf(stderr, "time_after_process: %f s\n", time_after_process / 1000.0); + fprintf(stderr, "count_process_step2: %ld cnts\n", count_process_step2); + fprintf(stderr, "count_ksw_extend2: %ld cnts\n", count_ksw_extend2); + + fprintf(stderr, "\n"); +#endif + return 0; } @@ -416,18 +668,34 @@ int main_fastmap(int argc, char *argv[]) const bwtintv_v *a; bwaidx_t *idx; - while ((c = getopt(argc, argv, "w:l:pi:I:L:")) >= 0) { - switch (c) { - case 'p': print_seq = 1; break; - case 'w': min_iwidth = atoi(optarg); break; - case 'l': min_len = atoi(optarg); break; - case 'i': min_intv = atoi(optarg); break; - case 'I': max_intv = atol(optarg); break; - case 'L': max_len = atoi(optarg); break; - default: return 1; + while ((c = getopt(argc, argv, "w:l:pi:I:L:")) >= 0) + { + switch (c) + { + case 'p': + print_seq = 1; + break; + case 'w': + min_iwidth = atoi(optarg); + break; + case 'l': + min_len = atoi(optarg); + break; + case 'i': + min_intv = atoi(optarg); + break; + case 'I': + max_intv = atol(optarg); + break; + case 'L': + max_len = atoi(optarg); + break; + default: + return 1; } } - if (optind + 1 >= argc) { + if (optind + 1 >= argc) + { fprintf(stderr, "\n"); fprintf(stderr, "Usage: bwa fastmap [options] \n\n"); fprintf(stderr, "Options: -l INT min SMEM length to output [%d]\n", min_len); @@ -441,34 +709,47 @@ int main_fastmap(int argc, char *argv[]) fp = xzopen(argv[optind + 1], "r"); seq = kseq_init(fp); - if ((idx = bwa_idx_load(argv[optind], BWA_IDX_BWT|BWA_IDX_BNS)) == 0) return 1; + if ((idx = bwa_idx_load(argv[optind], BWA_IDX_BWT | BWA_IDX_BNS)) == 0) + return 1; itr = smem_itr_init(idx->bwt); smem_config(itr, min_intv, max_len, max_intv); - while (kseq_read(seq) >= 0) { + while (kseq_read(seq) >= 0) + { err_printf("SQ\t%s\t%ld", seq->name.s, seq->seq.l); - if (print_seq) { + if (print_seq) + { err_putchar('\t'); err_puts(seq->seq.s); - } else err_putchar('\n'); + } + else + err_putchar('\n'); for (i = 0; i < seq->seq.l; ++i) seq->seq.s[i] = nst_nt4_table[(int)seq->seq.s[i]]; - smem_set_query(itr, seq->seq.l, (uint8_t*)seq->seq.s); - while ((a = smem_next(itr)) != 0) { - for (i = 0; i < a->n; ++i) { + smem_set_query(itr, seq->seq.l, (uint8_t *)seq->seq.s); + while ((a = smem_next(itr)) != 0) + { + for (i = 0; i < a->n; ++i) + { bwtintv_t *p = &a->a[i]; - if ((uint32_t)p->info - (p->info>>32) < min_len) continue; - err_printf("EM\t%d\t%d\t%ld", (uint32_t)(p->info>>32), (uint32_t)p->info, (long)p->x[2]); - if (p->x[2] <= min_iwidth) { - for (k = 0; k < p->x[2]; ++k) { + if ((uint32_t)p->info - (p->info >> 32) < min_len) + continue; + err_printf("EM\t%d\t%d\t%ld", (uint32_t)(p->info >> 32), (uint32_t)p->info, (long)p->x[2]); + if (p->x[2] <= min_iwidth) + { + for (k = 0; k < p->x[2]; ++k) + { bwtint_t pos; int len, is_rev, ref_id; - len = (uint32_t)p->info - (p->info>>32); + len = (uint32_t)p->info - (p->info >> 32); pos = bns_depos(idx->bns, bwt_sa(idx->bwt, p->x[0] + k), &is_rev); - if (is_rev) pos -= len - 1; + if (is_rev) + pos -= len - 1; bns_cnt_ambi(idx->bns, pos, len, &ref_id); err_printf("\t%s:%c%ld", idx->bns->anns[ref_id].name, "+-"[is_rev], (long)(pos - idx->bns->anns[ref_id].offset) + 1); } - } else err_puts("\t*"); + } + else + err_puts("\t*"); err_putchar('\n'); } } diff --git a/ksw.c b/ksw.c index 1e584e9..2b20e47 100644 --- a/ksw.c +++ b/ksw.c @@ -22,7 +22,7 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - +#include #include #include #include @@ -36,20 +36,21 @@ #include "ksw.h" #ifdef USE_MALLOC_WRAPPERS -# include "malloc_wrap.h" +#include "malloc_wrap.h" #endif #ifdef __GNUC__ -#define LIKELY(x) __builtin_expect((x),1) -#define UNLIKELY(x) __builtin_expect((x),0) +#define LIKELY(x) __builtin_expect((x), 1) +#define UNLIKELY(x) __builtin_expect((x), 0) #else #define LIKELY(x) (x) #define UNLIKELY(x) (x) #endif -const kswr_t g_defr = { 0, -1, -1, -1, -1, -1, -1 }; +const kswr_t g_defr = {0, -1, -1, -1, -1, -1, -1}; -struct _kswq_t { +struct _kswq_t +{ int qlen, slen; uint8_t shift, mdiff, max, size; __m128i *qp, *H0, *H1, *E, *Hmax; @@ -71,44 +72,54 @@ kswq_t *ksw_qinit(int size, int qlen, const uint8_t *query, int m, const int8_t kswq_t *q; int slen, a, tmp, p; - size = size > 1? 2 : 1; - p = 8 * (3 - size); // # values per __m128i - slen = (qlen + p - 1) / p; // segmented length - q = (kswq_t*)malloc(sizeof(kswq_t) + 256 + 16 * slen * (m + 4)); // a single block of memory - q->qp = (__m128i*)(((size_t)q + sizeof(kswq_t) + 15) >> 4 << 4); // align memory + size = size > 1 ? 2 : 1; + p = 8 * (3 - size); // # values per __m128i + slen = (qlen + p - 1) / p; // segmented length + q = (kswq_t *)malloc(sizeof(kswq_t) + 256 + 16 * slen * (m + 4)); // a single block of memory + q->qp = (__m128i *)(((size_t)q + sizeof(kswq_t) + 15) >> 4 << 4); // align memory q->H0 = q->qp + slen * m; q->H1 = q->H0 + slen; - q->E = q->H1 + slen; + q->E = q->H1 + slen; q->Hmax = q->E + slen; - q->slen = slen; q->qlen = qlen; q->size = size; + q->slen = slen; + q->qlen = qlen; + q->size = size; // compute shift tmp = m * m; - for (a = 0, q->shift = 127, q->mdiff = 0; a < tmp; ++a) { // find the minimum and maximum score - if (mat[a] < (int8_t)q->shift) q->shift = mat[a]; - if (mat[a] > (int8_t)q->mdiff) q->mdiff = mat[a]; + for (a = 0, q->shift = 127, q->mdiff = 0; a < tmp; ++a) + { // find the minimum and maximum score + if (mat[a] < (int8_t)q->shift) + q->shift = mat[a]; + if (mat[a] > (int8_t)q->mdiff) + q->mdiff = mat[a]; } q->max = q->mdiff; q->shift = 256 - q->shift; // NB: q->shift is uint8_t - q->mdiff += q->shift; // this is the difference between the min and max scores + q->mdiff += q->shift; // this is the difference between the min and max scores // An example: p=8, qlen=19, slen=3 and segmentation: // {{0,3,6,9,12,15,18,-1},{1,4,7,10,13,16,-1,-1},{2,5,8,11,14,17,-1,-1}} - if (size == 1) { - int8_t *t = (int8_t*)q->qp; - for (a = 0; a < m; ++a) { + if (size == 1) + { + int8_t *t = (int8_t *)q->qp; + for (a = 0; a < m; ++a) + { int i, k, nlen = slen * p; const int8_t *ma = mat + a * m; for (i = 0; i < slen; ++i) for (k = i; k < nlen; k += slen) // p iterations - *t++ = (k >= qlen? 0 : ma[query[k]]) + q->shift; + *t++ = (k >= qlen ? 0 : ma[query[k]]) + q->shift; } - } else { - int16_t *t = (int16_t*)q->qp; - for (a = 0; a < m; ++a) { + } + else + { + int16_t *t = (int16_t *)q->qp; + for (a = 0; a < m; ++a) + { int i, k, nlen = slen * p; const int8_t *ma = mat + a * m; for (i = 0; i < slen; ++i) for (k = i; k < nlen; k += slen) // p iterations - *t++ = (k >= qlen? 0 : ma[query[k]]); + *t++ = (k >= qlen ? 0 : ma[query[k]]); } } return q; @@ -127,12 +138,14 @@ kswr_t ksw_u8(kswq_t *q, int tlen, const uint8_t *target, int _o_del, int _e_del kswr_t r; #if defined __SSE2__ -#define __max_16(ret, xx) do { \ +#define __max_16(ret, xx) \ + do \ + { \ (xx) = _mm_max_epu8((xx), _mm_srli_si128((xx), 8)); \ (xx) = _mm_max_epu8((xx), _mm_srli_si128((xx), 4)); \ (xx) = _mm_max_epu8((xx), _mm_srli_si128((xx), 2)); \ (xx) = _mm_max_epu8((xx), _mm_srli_si128((xx), 1)); \ - (ret) = _mm_extract_epi16((xx), 0) & 0x00ff; \ + (ret) = _mm_extract_epi16((xx), 0) & 0x00ff; \ } while (0) // Given entries with arbitrary values, return whether they are all 0x00 @@ -149,29 +162,36 @@ kswr_t ksw_u8(kswq_t *q, int tlen, const uint8_t *target, int _o_del, int _e_del // initialization r = g_defr; - minsc = (xtra&KSW_XSUBO)? xtra&0xffff : 0x10000; - endsc = (xtra&KSW_XSTOP)? xtra&0xffff : 0x10000; - m_b = n_b = 0; b = 0; + minsc = (xtra & KSW_XSUBO) ? xtra & 0xffff : 0x10000; + endsc = (xtra & KSW_XSTOP) ? xtra & 0xffff : 0x10000; + m_b = n_b = 0; + b = 0; zero = _mm_set1_epi32(0); oe_del = _mm_set1_epi8(_o_del + _e_del); e_del = _mm_set1_epi8(_e_del); oe_ins = _mm_set1_epi8(_o_ins + _e_ins); e_ins = _mm_set1_epi8(_e_ins); shift = _mm_set1_epi8(q->shift); - H0 = q->H0; H1 = q->H1; E = q->E; Hmax = q->Hmax; + H0 = q->H0; + H1 = q->H1; + E = q->E; + Hmax = q->Hmax; slen = q->slen; - for (i = 0; i < slen; ++i) { + for (i = 0; i < slen; ++i) + { _mm_store_si128(E + i, zero); _mm_store_si128(H0 + i, zero); _mm_store_si128(Hmax + i, zero); } // the core loop - for (i = 0; i < tlen; ++i) { + for (i = 0; i < tlen; ++i) + { int j, k, imax; __m128i e, h, t, f = zero, max = zero, *S = q->qp + target[i] * slen; // s is the 1st score vector - h = _mm_load_si128(H0 + slen - 1); // h={2,5,8,11,14,17,-1,-1} in the above example - h = _mm_slli_si128(h, 1); // h=H(i-1,-1); << instead of >> because x64 is little-endian - for (j = 0; LIKELY(j < slen); ++j) { + h = _mm_load_si128(H0 + slen - 1); // h={2,5,8,11,14,17,-1,-1} in the above example + h = _mm_slli_si128(h, 1); // h=H(i-1,-1); << instead of >> because x64 is little-endian + for (j = 0; LIKELY(j < slen); ++j) + { /* SW cells are computed in the following order: * H(i,j) = max{H(i-1,j-1)+S(i,j), E(i,j), F(i,j)} * E(i+1,j) = max{H(i,j)-q, E(i,j)-r} @@ -180,16 +200,16 @@ kswr_t ksw_u8(kswq_t *q, int tlen, const uint8_t *target, int _o_del, int _e_del // compute H'(i,j); note that at the beginning, h=H'(i-1,j-1) h = _mm_adds_epu8(h, _mm_load_si128(S + j)); h = _mm_subs_epu8(h, shift); // h=H'(i-1,j-1)+S(i,j) - e = _mm_load_si128(E + j); // e=E'(i,j) + e = _mm_load_si128(E + j); // e=E'(i,j) h = _mm_max_epu8(h, e); - h = _mm_max_epu8(h, f); // h=H'(i,j) + h = _mm_max_epu8(h, f); // h=H'(i,j) max = _mm_max_epu8(max, h); // set max _mm_store_si128(H1 + j, h); // save to H'(i,j) // now compute E'(i+1,j) - e = _mm_subs_epu8(e, e_del); // e=E'(i,j) - e_del + e = _mm_subs_epu8(e, e_del); // e=E'(i,j) - e_del t = _mm_subs_epu8(h, oe_del); // h=H'(i,j) - o_del - e_del - e = _mm_max_epu8(e, t); // e=E'(i+1,j) - _mm_store_si128(E + j, e); // save to E'(i+1,j) + e = _mm_max_epu8(e, t); // e=E'(i+1,j) + _mm_store_si128(E + j, e); // save to E'(i+1,j) // now compute F'(i,j+1) f = _mm_subs_epu8(f, e_ins); t = _mm_subs_epu8(h, oe_ins); // h=H'(i,j) - o_ins - e_ins @@ -198,53 +218,72 @@ kswr_t ksw_u8(kswq_t *q, int tlen, const uint8_t *target, int _o_del, int _e_del h = _mm_load_si128(H0 + j); // h=H'(i-1,j) } // NB: we do not need to set E(i,j) as we disallow adjecent insertion and then deletion - for (k = 0; LIKELY(k < 16); ++k) { // this block mimics SWPS3; NB: H(i,j) updated in the lazy-F loop cannot exceed max + for (k = 0; LIKELY(k < 16); ++k) + { // this block mimics SWPS3; NB: H(i,j) updated in the lazy-F loop cannot exceed max f = _mm_slli_si128(f, 1); - for (j = 0; LIKELY(j < slen); ++j) { + for (j = 0; LIKELY(j < slen); ++j) + { h = _mm_load_si128(H1 + j); h = _mm_max_epu8(h, f); // h=H'(i,j) _mm_store_si128(H1 + j, h); h = _mm_subs_epu8(h, oe_ins); f = _mm_subs_epu8(f, e_ins); - if (UNLIKELY(allzero_16(_mm_subs_epu8(f, h)))) goto end_loop16; + if (UNLIKELY(allzero_16(_mm_subs_epu8(f, h)))) + goto end_loop16; } } -end_loop16: - //int k;for (k=0;k<16;++k)printf("%d ", ((uint8_t*)&max)[k]);printf("\n"); + end_loop16: + // int k;for (k=0;k<16;++k)printf("%d ", ((uint8_t*)&max)[k]);printf("\n"); __max_16(imax, max); // imax is the maximum number in max - if (imax >= minsc) { // write the b array; this condition adds branching unfornately - if (n_b == 0 || (int32_t)b[n_b-1] + 1 != i) { // then append - if (n_b == m_b) { - m_b = m_b? m_b<<1 : 8; - b = (uint64_t*)realloc(b, 8 * m_b); + if (imax >= minsc) + { // write the b array; this condition adds branching unfornately + if (n_b == 0 || (int32_t)b[n_b - 1] + 1 != i) + { // then append + if (n_b == m_b) + { + m_b = m_b ? m_b << 1 : 8; + b = (uint64_t *)realloc(b, 8 * m_b); } - b[n_b++] = (uint64_t)imax<<32 | i; - } else if ((int)(b[n_b-1]>>32) < imax) b[n_b-1] = (uint64_t)imax<<32 | i; // modify the last + b[n_b++] = (uint64_t)imax << 32 | i; + } + else if ((int)(b[n_b - 1] >> 32) < imax) + b[n_b - 1] = (uint64_t)imax << 32 | i; // modify the last } - if (imax > gmax) { - gmax = imax; te = i; // te is the end position on the target + if (imax > gmax) + { + gmax = imax; + te = i; // te is the end position on the target for (j = 0; LIKELY(j < slen); ++j) // keep the H1 vector _mm_store_si128(Hmax + j, _mm_load_si128(H1 + j)); - if (gmax + q->shift >= 255 || gmax >= endsc) break; + if (gmax + q->shift >= 255 || gmax >= endsc) + break; } - S = H1; H1 = H0; H0 = S; // swap H0 and H1 + S = H1; + H1 = H0; + H0 = S; // swap H0 and H1 } - r.score = gmax + q->shift < 255? gmax : 255; + r.score = gmax + q->shift < 255 ? gmax : 255; r.te = te; - if (r.score != 255) { // get a->qe, the end of query match; find the 2nd best score + if (r.score != 255) + { // get a->qe, the end of query match; find the 2nd best score int max = -1, tmp, low, high, qlen = slen * 16; - uint8_t *t = (uint8_t*)Hmax; + uint8_t *t = (uint8_t *)Hmax; for (i = 0; i < qlen; ++i, ++t) - if ((int)*t > max) max = *t, r.qe = i / 16 + i % 16 * slen; - else if ((int)*t == max && (tmp = i / 16 + i % 16 * slen) < r.qe) r.qe = tmp; - //printf("%d,%d\n", max, gmax); - if (b) { + if ((int)*t > max) + max = *t, r.qe = i / 16 + i % 16 * slen; + else if ((int)*t == max && (tmp = i / 16 + i % 16 * slen) < r.qe) + r.qe = tmp; + // printf("%d,%d\n", max, gmax); + if (b) + { i = (r.score + q->max - 1) / q->max; - low = te - i; high = te + i; - for (i = 0; i < n_b; ++i) { + low = te - i; + high = te + i; + for (i = 0; i < n_b; ++i) + { int e = (int32_t)b[i]; - if ((e < low || e > high) && (int)(b[i]>>32) > r.score2) - r.score2 = b[i]>>32, r.te2 = e; + if ((e < low || e > high) && (int)(b[i] >> 32) > r.score2) + r.score2 = b[i] >> 32, r.te2 = e; } } } @@ -260,11 +299,13 @@ kswr_t ksw_i16(kswq_t *q, int tlen, const uint8_t *target, int _o_del, int _e_de kswr_t r; #if defined __SSE2__ -#define __max_8(ret, xx) do { \ +#define __max_8(ret, xx) \ + do \ + { \ (xx) = _mm_max_epi16((xx), _mm_srli_si128((xx), 8)); \ (xx) = _mm_max_epi16((xx), _mm_srli_si128((xx), 4)); \ (xx) = _mm_max_epi16((xx), _mm_srli_si128((xx), 2)); \ - (ret) = _mm_extract_epi16((xx), 0); \ + (ret) = _mm_extract_epi16((xx), 0); \ } while (0) // Given entries all either 0x0000 or 0xffff, return whether they are all 0x0000 @@ -281,28 +322,35 @@ kswr_t ksw_i16(kswq_t *q, int tlen, const uint8_t *target, int _o_del, int _e_de // initialization r = g_defr; - minsc = (xtra&KSW_XSUBO)? xtra&0xffff : 0x10000; - endsc = (xtra&KSW_XSTOP)? xtra&0xffff : 0x10000; - m_b = n_b = 0; b = 0; + minsc = (xtra & KSW_XSUBO) ? xtra & 0xffff : 0x10000; + endsc = (xtra & KSW_XSTOP) ? xtra & 0xffff : 0x10000; + m_b = n_b = 0; + b = 0; zero = _mm_set1_epi32(0); oe_del = _mm_set1_epi16(_o_del + _e_del); e_del = _mm_set1_epi16(_e_del); oe_ins = _mm_set1_epi16(_o_ins + _e_ins); e_ins = _mm_set1_epi16(_e_ins); - H0 = q->H0; H1 = q->H1; E = q->E; Hmax = q->Hmax; + H0 = q->H0; + H1 = q->H1; + E = q->E; + Hmax = q->Hmax; slen = q->slen; - for (i = 0; i < slen; ++i) { + for (i = 0; i < slen; ++i) + { _mm_store_si128(E + i, zero); _mm_store_si128(H0 + i, zero); _mm_store_si128(Hmax + i, zero); } // the core loop - for (i = 0; i < tlen; ++i) { + for (i = 0; i < tlen; ++i) + { int j, k, imax; __m128i e, t, h, f = zero, max = zero, *S = q->qp + target[i] * slen; // s is the 1st score vector - h = _mm_load_si128(H0 + slen - 1); // h={2,5,8,11,14,17,-1,-1} in the above example + h = _mm_load_si128(H0 + slen - 1); // h={2,5,8,11,14,17,-1,-1} in the above example h = _mm_slli_si128(h, 2); - for (j = 0; LIKELY(j < slen); ++j) { + for (j = 0; LIKELY(j < slen); ++j) + { h = _mm_adds_epi16(h, _mm_load_si128(S++)); e = _mm_load_si128(E + j); h = _mm_max_epi16(h, e); @@ -318,50 +366,69 @@ kswr_t ksw_i16(kswq_t *q, int tlen, const uint8_t *target, int _o_del, int _e_de f = _mm_max_epi16(f, t); h = _mm_load_si128(H0 + j); } - for (k = 0; LIKELY(k < 16); ++k) { + for (k = 0; LIKELY(k < 16); ++k) + { f = _mm_slli_si128(f, 2); - for (j = 0; LIKELY(j < slen); ++j) { + for (j = 0; LIKELY(j < slen); ++j) + { h = _mm_load_si128(H1 + j); h = _mm_max_epi16(h, f); _mm_store_si128(H1 + j, h); h = _mm_subs_epu16(h, oe_ins); f = _mm_subs_epu16(f, e_ins); - if(UNLIKELY(allzero_0f_8(_mm_cmpgt_epi16(f, h)))) goto end_loop8; + if (UNLIKELY(allzero_0f_8(_mm_cmpgt_epi16(f, h)))) + goto end_loop8; } } -end_loop8: + end_loop8: __max_8(imax, max); - if (imax >= minsc) { - if (n_b == 0 || (int32_t)b[n_b-1] + 1 != i) { - if (n_b == m_b) { - m_b = m_b? m_b<<1 : 8; - b = (uint64_t*)realloc(b, 8 * m_b); + if (imax >= minsc) + { + if (n_b == 0 || (int32_t)b[n_b - 1] + 1 != i) + { + if (n_b == m_b) + { + m_b = m_b ? m_b << 1 : 8; + b = (uint64_t *)realloc(b, 8 * m_b); } - b[n_b++] = (uint64_t)imax<<32 | i; - } else if ((int)(b[n_b-1]>>32) < imax) b[n_b-1] = (uint64_t)imax<<32 | i; // modify the last + b[n_b++] = (uint64_t)imax << 32 | i; + } + else if ((int)(b[n_b - 1] >> 32) < imax) + b[n_b - 1] = (uint64_t)imax << 32 | i; // modify the last } - if (imax > gmax) { - gmax = imax; te = i; + if (imax > gmax) + { + gmax = imax; + te = i; for (j = 0; LIKELY(j < slen); ++j) _mm_store_si128(Hmax + j, _mm_load_si128(H1 + j)); - if (gmax >= endsc) break; + if (gmax >= endsc) + break; } - S = H1; H1 = H0; H0 = S; + S = H1; + H1 = H0; + H0 = S; } - r.score = gmax; r.te = te; + r.score = gmax; + r.te = te; { int max = -1, tmp, low, high, qlen = slen * 8; - uint16_t *t = (uint16_t*)Hmax; + uint16_t *t = (uint16_t *)Hmax; for (i = 0, r.qe = -1; i < qlen; ++i, ++t) - if ((int)*t > max) max = *t, r.qe = i / 8 + i % 8 * slen; - else if ((int)*t == max && (tmp = i / 8 + i % 8 * slen) < r.qe) r.qe = tmp; - if (b) { + if ((int)*t > max) + max = *t, r.qe = i / 8 + i % 8 * slen; + else if ((int)*t == max && (tmp = i / 8 + i % 8 * slen) < r.qe) + r.qe = tmp; + if (b) + { i = (r.score + q->max - 1) / q->max; - low = te - i; high = te + i; - for (i = 0; i < n_b; ++i) { + low = te - i; + high = te + i; + for (i = 0; i < n_b; ++i) + { int e = (int32_t)b[i]; - if ((e < low || e > high) && (int)(b[i]>>32) > r.score2) - r.score2 = b[i]>>32, r.te2 = e; + if ((e < low || e > high) && (int)(b[i] >> 32) > r.score2) + r.score2 = b[i] >> 32, r.te2 = e; } } } @@ -372,7 +439,7 @@ end_loop8: static inline void revseq(int l, uint8_t *s) { int i, t; - for (i = 0; i < l>>1; ++i) + for (i = 0; i < l >> 1; ++i) t = s[i], s[i] = s[l - 1 - i], s[l - 1 - i] = t; } @@ -381,19 +448,24 @@ kswr_t ksw_align2(int qlen, uint8_t *query, int tlen, uint8_t *target, int m, co int size; kswq_t *q; kswr_t r, rr; - kswr_t (*func)(kswq_t*, int, const uint8_t*, int, int, int, int, int); + kswr_t (*func)(kswq_t *, int, const uint8_t *, int, int, int, int, int); - q = (qry && *qry)? *qry : ksw_qinit((xtra&KSW_XBYTE)? 1 : 2, qlen, query, m, mat); - if (qry && *qry == 0) *qry = q; - func = q->size == 2? ksw_i16 : ksw_u8; + q = (qry && *qry) ? *qry : ksw_qinit((xtra & KSW_XBYTE) ? 1 : 2, qlen, query, m, mat); + if (qry && *qry == 0) + *qry = q; + func = q->size == 2 ? ksw_i16 : ksw_u8; size = q->size; r = func(q, tlen, target, o_del, e_del, o_ins, e_ins, xtra); - if (qry == 0) free(q); - if ((xtra&KSW_XSTART) == 0 || ((xtra&KSW_XSUBO) && r.score < (xtra&0xffff))) return r; - revseq(r.qe + 1, query); revseq(r.te + 1, target); // +1 because qe/te points to the exact end, not the position after the end + if (qry == 0) + free(q); + if ((xtra & KSW_XSTART) == 0 || ((xtra & KSW_XSUBO) && r.score < (xtra & 0xffff))) + return r; + revseq(r.qe + 1, query); + revseq(r.te + 1, target); // +1 because qe/te points to the exact end, not the position after the end q = ksw_qinit(size, r.qe + 1, query, m, mat); rr = func(q, tlen, target, o_del, e_del, o_ins, e_ins, KSW_XSTOP | r.score); - revseq(r.qe + 1, query); revseq(r.te + 1, target); + revseq(r.qe + 1, query); + revseq(r.te + 1, target); free(q); if (r.score == rr.score) r.tb = r.te - rr.te, r.qb = r.qe - rr.qe; @@ -409,55 +481,103 @@ kswr_t ksw_align(int qlen, uint8_t *query, int tlen, uint8_t *target, int m, con *** SW extension *** ********************/ -typedef struct { +// 将 + +void write_query_target_sequence(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int h0) +{ + // 写到三个文件里,query.fa,target.fa,每行一个序列,info.txt,包含前缀得分h0,和长度信息qlen,tlen + extern FILE *query_f, *target_f, *info_f; + const char seq_map[5] = {'A', 'C', 'G', 'T', 'N'}; + int i; + // 处理query + for (i = 0; i < qlen; ++i) + fprintf(query_f, "%c", seq_map[query[i]]); + fprintf(query_f, "\n"); + // 处理target + for (i = 0; i < tlen; ++i) + fprintf(target_f, "%c", seq_map[target[i]]); + fprintf(target_f, "\n"); + // 处理其他信息 + fprintf(info_f, "%-8d%-8d%-8d\n", qlen, tlen, h0); +} + +typedef struct +{ int32_t h, e; } eh_t; +extern int ksw_extend2_avx2_u8(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int is_left, int m, const int8_t *mat, int o_del, int e_del, + int o_ins, int e_ins, int a, int b, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off); + int ksw_extend2(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int o_del, int e_del, int o_ins, int e_ins, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off) { - eh_t *eh; // score array + // return ksw_extend2_avx2_u8(qlen, query, tlen, target, 0, m, mat, o_del, e_del, o_ins, e_ins, 1, 4, w, end_bonus, zdrop, h0, _qle, _tle, _gtle, _gscore, _max_off); + + eh_t *eh; // score array int8_t *qp; // query profile int i, j, k, oe_del = o_del + e_del, oe_ins = o_ins + e_ins, beg, end, max, max_i, max_j, max_ins, max_del, max_ie, gscore, max_off; assert(h0 > 0); + write_query_target_sequence(qlen, query, tlen, target, h0); + // // 调试用,打印mat信息 + // for (i=0; i<5; ++i) { + // for (j=0; j<5; ++j) { + // fprintf(stderr, "%-4d", mat[5*i + j]); + // } + // fprintf(stderr, "\n"); + // } + // fprintf(stderr, "\n"); // allocate memory qp = malloc(qlen * m); eh = calloc(qlen + 1, 8); // generate the query profile - for (k = i = 0; k < m; ++k) { + for (k = i = 0; k < m; ++k) + { const int8_t *p = &mat[k * m]; - for (j = 0; j < qlen; ++j) qp[i++] = p[query[j]]; + for (j = 0; j < qlen; ++j) + qp[i++] = p[query[j]]; } // fill the first row - eh[0].h = h0; eh[1].h = h0 > oe_ins? h0 - oe_ins : 0; - for (j = 2; j <= qlen && eh[j-1].h > e_ins; ++j) - eh[j].h = eh[j-1].h - e_ins; + eh[0].h = h0; + eh[1].h = h0 > oe_ins ? h0 - oe_ins : 0; + for (j = 2; j <= qlen && eh[j - 1].h > e_ins; ++j) + eh[j].h = eh[j - 1].h - e_ins; // adjust $w if it is too large k = m * m; for (i = 0, max = 0; i < k; ++i) // get the max score - max = max > mat[i]? max : mat[i]; + max = max > mat[i] ? max : mat[i]; max_ins = (int)((double)(qlen * max + end_bonus - o_ins) / e_ins + 1.); - max_ins = max_ins > 1? max_ins : 1; - w = w < max_ins? w : max_ins; + max_ins = max_ins > 1 ? max_ins : 1; + w = w < max_ins ? w : max_ins; max_del = (int)((double)(qlen * max + end_bonus - o_del) / e_del + 1.); - max_del = max_del > 1? max_del : 1; - w = w < max_del? w : max_del; // TODO: is this necessary? + max_del = max_del > 1 ? max_del : 1; + w = w < max_del ? w : max_del; // TODO: is this necessary? // DP loop - max = h0, max_i = max_j = -1; max_ie = -1, gscore = -1; + max = h0, max_i = max_j = -1; + max_ie = -1, gscore = -1; max_off = 0; beg = 0, end = qlen; - for (i = 0; LIKELY(i < tlen); ++i) { + for (i = 0; LIKELY(i < tlen); ++i) + { int t, f = 0, h1, m = 0, mj = -1; int8_t *q = &qp[target[i] * qlen]; // apply the band and the constraint (if provided) - if (beg < i - w) beg = i - w; - if (end > i + w + 1) end = i + w + 1; - if (end > qlen) end = qlen; + if (beg < i - w) + beg = i - w; + if (end > i + w + 1) + end = i + w + 1; + if (end > qlen) + end = qlen; // compute the first column - if (beg == 0) { + if (beg == 0) + { h1 = h0 - (o_del + e_del * (i + 1)); - if (h1 < 0) h1 = 0; - } else h1 = 0; - for (j = beg; LIKELY(j < end); ++j) { + if (h1 < 0) + h1 = 0; + } + else + h1 = 0; + for (j = beg; LIKELY(j < end); ++j) + { // At the beginning of the loop: eh[j] = { H(i-1,j-1), E(i,j) }, f = F(i,j) and h1 = H(i,j-1) // Similar to SSE2-SW, cells are computed in the following order: // H(i,j) = max{H(i-1,j-1)+S(i,j), E(i,j), F(i,j)} @@ -465,52 +585,71 @@ int ksw_extend2(int qlen, const uint8_t *query, int tlen, const uint8_t *target, // F(i,j+1) = max{H(i,j)-gapo, F(i,j)} - gape eh_t *p = &eh[j]; int h, M = p->h, e = p->e; // get H(i-1,j-1) and E(i-1,j) - p->h = h1; // set H(i,j-1) for the next row - M = M? M + q[j] : 0;// separating H and M to disallow a cigar like "100M3I3D20M" - h = M > e? M : e; // e and f are guaranteed to be non-negative, so h>=0 even if M<0 - h = h > f? h : f; - h1 = h; // save H(i,j) to h1 for the next column - mj = m > h? mj : j; // record the position where max score is achieved - m = m > h? m : h; // m is stored at eh[mj+1] + p->h = h1; // set H(i,j-1) for the next row + M = M ? M + q[j] : 0; // separating H and M to disallow a cigar like "100M3I3D20M" + h = M > e ? M : e; // e and f are guaranteed to be non-negative, so h>=0 even if M<0 + h = h > f ? h : f; + h1 = h; // save H(i,j) to h1 for the next column + mj = m > h ? mj : j; // record the position where max score is achieved + m = m > h ? m : h; // m is stored at eh[mj+1] t = M - oe_del; - t = t > 0? t : 0; + t = t > 0 ? t : 0; e -= e_del; - e = e > t? e : t; // computed E(i+1,j) - p->e = e; // save E(i+1,j) for the next row + e = e > t ? e : t; // computed E(i+1,j) + p->e = e; // save E(i+1,j) for the next row t = M - oe_ins; - t = t > 0? t : 0; + t = t > 0 ? t : 0; f -= e_ins; - f = f > t? f : t; // computed F(i,j+1) + f = f > t ? f : t; // computed F(i,j+1) } - eh[end].h = h1; eh[end].e = 0; - if (j == qlen) { - max_ie = gscore > h1? max_ie : i; - gscore = gscore > h1? gscore : h1; + eh[end].h = h1; + eh[end].e = 0; + if (j == qlen) + { + max_ie = gscore > h1 ? max_ie : i; + gscore = gscore > h1 ? gscore : h1; } - if (m == 0) break; - if (m > max) { + if (m == 0) + break; + if (m > max) + { max = m, max_i = i, max_j = mj; - max_off = max_off > abs(mj - i)? max_off : abs(mj - i); - } else if (zdrop > 0) { - if (i - max_i > mj - max_j) { - if (max - m - ((i - max_i) - (mj - max_j)) * e_del > zdrop) break; - } else { - if (max - m - ((mj - max_j) - (i - max_i)) * e_ins > zdrop) break; + max_off = max_off > abs(mj - i) ? max_off : abs(mj - i); + } + else if (zdrop > 0) + { + if (i - max_i > mj - max_j) + { + if (max - m - ((i - max_i) - (mj - max_j)) * e_del > zdrop) + break; + } + else + { + if (max - m - ((mj - max_j) - (i - max_i)) * e_ins > zdrop) + break; } } // update beg and end for the next round - for (j = beg; LIKELY(j < end) && eh[j].h == 0 && eh[j].e == 0; ++j); + for (j = beg; LIKELY(j < end) && eh[j].h == 0 && eh[j].e == 0; ++j) + ; beg = j; - for (j = end; LIKELY(j >= beg) && eh[j].h == 0 && eh[j].e == 0; --j); - end = j + 2 < qlen? j + 2 : qlen; - //beg = 0; end = qlen; // uncomment this line for debugging + for (j = end; LIKELY(j >= beg) && eh[j].h == 0 && eh[j].e == 0; --j) + ; + end = j + 2 < qlen ? j + 2 : qlen; + // beg = 0; end = qlen; // uncomment this line for debugging } - free(eh); free(qp); - if (_qle) *_qle = max_j + 1; - if (_tle) *_tle = max_i + 1; - if (_gtle) *_gtle = max_ie + 1; - if (_gscore) *_gscore = gscore; - if (_max_off) *_max_off = max_off; + free(eh); + free(qp); + if (_qle) + *_qle = max_j + 1; + if (_tle) + *_tle = max_i + 1; + if (_gtle) + *_gtle = max_ie + 1; + if (_gscore) + *_gscore = gscore; + if (_max_off) + *_max_off = max_off; return max; } @@ -527,13 +666,17 @@ int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, static inline uint32_t *push_cigar(int *n_cigar, int *m_cigar, uint32_t *cigar, int op, int len) { - if (*n_cigar == 0 || op != (cigar[(*n_cigar) - 1]&0xf)) { - if (*n_cigar == *m_cigar) { - *m_cigar = *m_cigar? (*m_cigar)<<1 : 4; + if (*n_cigar == 0 || op != (cigar[(*n_cigar) - 1] & 0xf)) + { + if (*n_cigar == *m_cigar) + { + *m_cigar = *m_cigar ? (*m_cigar) << 1 : 4; cigar = realloc(cigar, (*m_cigar) << 2); } - cigar[(*n_cigar)++] = len<<4 | op; - } else cigar[(*n_cigar)-1] += len<<4; + cigar[(*n_cigar)++] = len << 4 | op; + } + else + cigar[(*n_cigar) - 1] += len << 4; return cigar; } @@ -543,32 +686,40 @@ int ksw_global2(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t *qp; // query profile int i, j, k, oe_del = o_del + e_del, oe_ins = o_ins + e_ins, score, n_col; uint8_t *z; // backtrack matrix; in each cell: f<<4|e<<2|h; in principle, we can halve the memory, but backtrack will be a little more complex - if (n_cigar_) *n_cigar_ = 0; + if (n_cigar_) + *n_cigar_ = 0; // allocate memory - n_col = qlen < 2*w+1? qlen : 2*w+1; // maximum #columns of the backtrack matrix - z = n_cigar_ && cigar_? malloc((long)n_col * tlen) : 0; + n_col = qlen < 2 * w + 1 ? qlen : 2 * w + 1; // maximum #columns of the backtrack matrix + z = n_cigar_ && cigar_ ? malloc((long)n_col * tlen) : 0; qp = malloc(qlen * m); eh = calloc(qlen + 1, 8); // generate the query profile - for (k = i = 0; k < m; ++k) { + for (k = i = 0; k < m; ++k) + { const int8_t *p = &mat[k * m]; - for (j = 0; j < qlen; ++j) qp[i++] = p[query[j]]; + for (j = 0; j < qlen; ++j) + qp[i++] = p[query[j]]; } // fill the first row - eh[0].h = 0; eh[0].e = MINUS_INF; + eh[0].h = 0; + eh[0].e = MINUS_INF; for (j = 1; j <= qlen && j <= w; ++j) eh[j].h = -(o_ins + e_ins * j), eh[j].e = MINUS_INF; - for (; j <= qlen; ++j) eh[j].h = eh[j].e = MINUS_INF; // everything is -inf outside the band + for (; j <= qlen; ++j) + eh[j].h = eh[j].e = MINUS_INF; // everything is -inf outside the band // DP loop - for (i = 0; LIKELY(i < tlen); ++i) { // target sequence is in the outer loop + for (i = 0; LIKELY(i < tlen); ++i) + { // target sequence is in the outer loop int32_t f = MINUS_INF, h1, beg, end, t; int8_t *q = &qp[target[i] * qlen]; - beg = i > w? i - w : 0; - end = i + w + 1 < qlen? i + w + 1 : qlen; // only loop through [beg,end) of the query sequence - h1 = beg == 0? -(o_del + e_del * (i + 1)) : MINUS_INF; - if (n_cigar_ && cigar_) { + beg = i > w ? i - w : 0; + end = i + w + 1 < qlen ? i + w + 1 : qlen; // only loop through [beg,end) of the query sequence + h1 = beg == 0 ? -(o_del + e_del * (i + 1)) : MINUS_INF; + if (n_cigar_ && cigar_) + { uint8_t *zi = &z[(long)i * n_col]; - for (j = beg; LIKELY(j < end); ++j) { + for (j = beg; LIKELY(j < end); ++j) + { // At the beginning of the loop: eh[j] = { H(i-1,j-1), E(i,j) }, f = F(i,j) and h1 = H(i,j-1) // Cells are computed in the following order: // M(i,j) = H(i-1,j-1) + S(i,j) @@ -584,60 +735,74 @@ int ksw_global2(int qlen, const uint8_t *query, int tlen, const uint8_t *target, uint8_t d; // direction p->h = h1; m += q[j]; - d = m >= e? 0 : 1; - h = m >= e? m : e; - d = h >= f? d : 2; - h = h >= f? h : f; + d = m >= e ? 0 : 1; + h = m >= e ? m : e; + d = h >= f ? d : 2; + h = h >= f ? h : f; h1 = h; t = m - oe_del; e -= e_del; - d |= e > t? 1<<2 : 0; - e = e > t? e : t; + d |= e > t ? 1 << 2 : 0; + e = e > t ? e : t; p->e = e; t = m - oe_ins; f -= e_ins; - d |= f > t? 2<<4 : 0; // if we want to halve the memory, use one bit only, instead of two - f = f > t? f : t; + d |= f > t ? 2 << 4 : 0; // if we want to halve the memory, use one bit only, instead of two + f = f > t ? f : t; zi[j - beg] = d; // z[i,j] keeps h for the current cell and e/f for the next cell } - } else { - for (j = beg; LIKELY(j < end); ++j) { + } + else + { + for (j = beg; LIKELY(j < end); ++j) + { eh_t *p = &eh[j]; int32_t h, m = p->h, e = p->e; p->h = h1; m += q[j]; - h = m >= e? m : e; - h = h >= f? h : f; + h = m >= e ? m : e; + h = h >= f ? h : f; h1 = h; t = m - oe_del; e -= e_del; - e = e > t? e : t; + e = e > t ? e : t; p->e = e; t = m - oe_ins; f -= e_ins; - f = f > t? f : t; + f = f > t ? f : t; } } - eh[end].h = h1; eh[end].e = MINUS_INF; + eh[end].h = h1; + eh[end].e = MINUS_INF; } score = eh[qlen].h; - if (n_cigar_ && cigar_) { // backtrack + if (n_cigar_ && cigar_) + { // backtrack int n_cigar = 0, m_cigar = 0, which = 0; uint32_t *cigar = 0, tmp; - i = tlen - 1; k = (i + w + 1 < qlen? i + w + 1 : qlen) - 1; // (i,k) points to the last cell - while (i >= 0 && k >= 0) { - which = z[(long)i * n_col + (k - (i > w? i - w : 0))] >> (which<<1) & 3; - if (which == 0) cigar = push_cigar(&n_cigar, &m_cigar, cigar, 0, 1), --i, --k; - else if (which == 1) cigar = push_cigar(&n_cigar, &m_cigar, cigar, 2, 1), --i; - else cigar = push_cigar(&n_cigar, &m_cigar, cigar, 1, 1), --k; + i = tlen - 1; + k = (i + w + 1 < qlen ? i + w + 1 : qlen) - 1; // (i,k) points to the last cell + while (i >= 0 && k >= 0) + { + which = z[(long)i * n_col + (k - (i > w ? i - w : 0))] >> (which << 1) & 3; + if (which == 0) + cigar = push_cigar(&n_cigar, &m_cigar, cigar, 0, 1), --i, --k; + else if (which == 1) + cigar = push_cigar(&n_cigar, &m_cigar, cigar, 2, 1), --i; + else + cigar = push_cigar(&n_cigar, &m_cigar, cigar, 1, 1), --k; } - if (i >= 0) cigar = push_cigar(&n_cigar, &m_cigar, cigar, 2, i + 1); - if (k >= 0) cigar = push_cigar(&n_cigar, &m_cigar, cigar, 1, k + 1); - for (i = 0; i < n_cigar>>1; ++i) // reverse CIGAR - tmp = cigar[i], cigar[i] = cigar[n_cigar-1-i], cigar[n_cigar-1-i] = tmp; + if (i >= 0) + cigar = push_cigar(&n_cigar, &m_cigar, cigar, 2, i + 1); + if (k >= 0) + cigar = push_cigar(&n_cigar, &m_cigar, cigar, 1, k + 1); + for (i = 0; i < n_cigar >> 1; ++i) // reverse CIGAR + tmp = cigar[i], cigar[i] = cigar[n_cigar - 1 - i], cigar[n_cigar - 1 - i] = tmp; *n_cigar_ = n_cigar, *cigar_ = cigar; } - free(eh); free(qp); free(z); + free(eh); + free(qp); + free(z); return score; } @@ -659,23 +824,22 @@ int ksw_global(int qlen, const uint8_t *query, int tlen, const uint8_t *target, KSEQ_INIT(gzFile, err_gzread) unsigned char seq_nt4_table[256] = { - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 -}; + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4}; int main(int argc, char *argv[]) { @@ -687,63 +851,96 @@ int main(int argc, char *argv[]) kseq_t *kst, *ksq; // parse command line - while ((c = getopt(argc, argv, "a:b:q:r:ft:1")) >= 0) { - switch (c) { - case 'a': sa = atoi(optarg); break; - case 'b': sb = atoi(optarg); break; - case 'q': gapo = atoi(optarg); break; - case 'r': gape = atoi(optarg); break; - case 't': minsc = atoi(optarg); break; - case 'f': forward_only = 1; break; - case '1': xtra |= KSW_XBYTE; break; + while ((c = getopt(argc, argv, "a:b:q:r:ft:1")) >= 0) + { + switch (c) + { + case 'a': + sa = atoi(optarg); + break; + case 'b': + sb = atoi(optarg); + break; + case 'q': + gapo = atoi(optarg); + break; + case 'r': + gape = atoi(optarg); + break; + case 't': + minsc = atoi(optarg); + break; + case 'f': + forward_only = 1; + break; + case '1': + xtra |= KSW_XBYTE; + break; } } - if (optind + 2 > argc) { + if (optind + 2 > argc) + { fprintf(stderr, "Usage: ksw [-1] [-f] [-a%d] [-b%d] [-q%d] [-r%d] [-t%d] \n", sa, sb, gapo, gape, minsc); return 1; } - if (minsc > 0xffff) minsc = 0xffff; + if (minsc > 0xffff) + minsc = 0xffff; xtra |= KSW_XSUBO | minsc; // initialize scoring matrix - for (i = k = 0; i < 4; ++i) { + for (i = k = 0; i < 4; ++i) + { for (j = 0; j < 4; ++j) - mat[k++] = i == j? sa : -sb; + mat[k++] = i == j ? sa : -sb; mat[k++] = 0; // ambiguous base } - for (j = 0; j < 5; ++j) mat[k++] = 0; + for (j = 0; j < 5; ++j) + mat[k++] = 0; // open file - fpt = xzopen(argv[optind], "r"); kst = kseq_init(fpt); - fpq = xzopen(argv[optind+1], "r"); ksq = kseq_init(fpq); + fpt = xzopen(argv[optind], "r"); + kst = kseq_init(fpt); + fpq = xzopen(argv[optind + 1], "r"); + ksq = kseq_init(fpq); // all-pair alignment - while (kseq_read(ksq) > 0) { + while (kseq_read(ksq) > 0) + { kswq_t *q[2] = {0, 0}; kswr_t r; - for (i = 0; i < (int)ksq->seq.l; ++i) ksq->seq.s[i] = seq_nt4_table[(int)ksq->seq.s[i]]; - if (!forward_only) { // reverse - if ((int)ksq->seq.m > max_rseq) { + for (i = 0; i < (int)ksq->seq.l; ++i) + ksq->seq.s[i] = seq_nt4_table[(int)ksq->seq.s[i]]; + if (!forward_only) + { // reverse + if ((int)ksq->seq.m > max_rseq) + { max_rseq = ksq->seq.m; - rseq = (uint8_t*)realloc(rseq, max_rseq); + rseq = (uint8_t *)realloc(rseq, max_rseq); } for (i = 0, j = ksq->seq.l - 1; i < (int)ksq->seq.l; ++i, --j) - rseq[j] = ksq->seq.s[i] == 4? 4 : 3 - ksq->seq.s[i]; + rseq[j] = ksq->seq.s[i] == 4 ? 4 : 3 - ksq->seq.s[i]; } - gzrewind(fpt); kseq_rewind(kst); - while (kseq_read(kst) > 0) { - for (i = 0; i < (int)kst->seq.l; ++i) kst->seq.s[i] = seq_nt4_table[(int)kst->seq.s[i]]; - r = ksw_align(ksq->seq.l, (uint8_t*)ksq->seq.s, kst->seq.l, (uint8_t*)kst->seq.s, 5, mat, gapo, gape, xtra, &q[0]); + gzrewind(fpt); + kseq_rewind(kst); + while (kseq_read(kst) > 0) + { + for (i = 0; i < (int)kst->seq.l; ++i) + kst->seq.s[i] = seq_nt4_table[(int)kst->seq.s[i]]; + r = ksw_align(ksq->seq.l, (uint8_t *)ksq->seq.s, kst->seq.l, (uint8_t *)kst->seq.s, 5, mat, gapo, gape, xtra, &q[0]); if (r.score >= minsc) - err_printf("%s\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%d\n", kst->name.s, r.tb, r.te+1, ksq->name.s, r.qb, r.qe+1, r.score, r.score2, r.te2); - if (rseq) { - r = ksw_align(ksq->seq.l, rseq, kst->seq.l, (uint8_t*)kst->seq.s, 5, mat, gapo, gape, xtra, &q[1]); + err_printf("%s\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%d\n", kst->name.s, r.tb, r.te + 1, ksq->name.s, r.qb, r.qe + 1, r.score, r.score2, r.te2); + if (rseq) + { + r = ksw_align(ksq->seq.l, rseq, kst->seq.l, (uint8_t *)kst->seq.s, 5, mat, gapo, gape, xtra, &q[1]); if (r.score >= minsc) - err_printf("%s\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%d\n", kst->name.s, r.tb, r.te+1, ksq->name.s, (int)ksq->seq.l - r.qb, (int)ksq->seq.l - 1 - r.qe, r.score, r.score2, r.te2); + err_printf("%s\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%d\n", kst->name.s, r.tb, r.te + 1, ksq->name.s, (int)ksq->seq.l - r.qb, (int)ksq->seq.l - 1 - r.qe, r.score, r.score2, r.te2); } } - free(q[0]); free(q[1]); + free(q[0]); + free(q[1]); } free(rseq); - kseq_destroy(kst); err_gzclose(fpt); - kseq_destroy(ksq); err_gzclose(fpq); + kseq_destroy(kst); + err_gzclose(fpt); + kseq_destroy(ksq); + err_gzclose(fpq); return 0; } #endif diff --git a/ksw_avx2.c b/ksw_avx2.c new file mode 100644 index 0000000..1ad1767 --- /dev/null +++ b/ksw_avx2.c @@ -0,0 +1,450 @@ +#include +#include +#include +#include +#include +#include +#include + +#ifdef __GNUC__ +#define LIKELY(x) __builtin_expect((x), 1) +#define UNLIKELY(x) __builtin_expect((x), 0) +#else +#define LIKELY(x) (x) +#define UNLIKELY(x) (x) +#endif + +#undef MAX +#undef MIN +#define MAX(x, y) ((x) > (y) ? (x) : (y)) +#define MIN(x, y) ((x) < (y) ? (x) : (y)) +#define SIMD_WIDTH 32 + +static const uint8_t h_vec_int_mask[SIMD_WIDTH][SIMD_WIDTH] = { + {0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}; + +// static const uint8_t reverse_mask[SIMD_WIDTH] = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}; +static const uint8_t reverse_mask[SIMD_WIDTH] = {7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8}; + +// const int permute_mask = _MM_SHUFFLE(0, 1, 2, 3); +#define permute_mask _MM_SHUFFLE(0, 1, 2, 3) +// 初始化变量 +#define SIMD_INIT \ + int oe_del = o_del + e_del, oe_ins = o_ins + e_ins; \ + __m256i zero_vec; \ + __m256i max_vec; \ + __m256i oe_del_vec; \ + __m256i oe_ins_vec; \ + __m256i e_del_vec; \ + __m256i e_ins_vec; \ + __m256i h_vec_mask[SIMD_WIDTH]; \ + __m256i reverse_mask_vec; \ + zero_vec = _mm256_setzero_si256(); \ + oe_del_vec = _mm256_set1_epi8(oe_del); \ + oe_ins_vec = _mm256_set1_epi8(oe_ins); \ + e_del_vec = _mm256_set1_epi8(e_del); \ + e_ins_vec = _mm256_set1_epi8(e_ins); \ + __m256i match_sc_vec = _mm256_set1_epi8(a); \ + __m256i mis_sc_vec = _mm256_set1_epi8(b); \ + __m256i amb_sc_vec = _mm256_set1_epi8(1); \ + __m256i amb_vec = _mm256_set1_epi8(4); \ + reverse_mask_vec = _mm256_loadu_si256((__m256i *)(reverse_mask)); \ + for (i = 0; i < SIMD_WIDTH; ++i) \ + h_vec_mask[i] = _mm256_loadu_si256((__m256i *)(&h_vec_int_mask[i])); + +/* + * e 表示当前ref的碱基被删除 + * f 表示当前seq的碱基插入 + * m 表示当前碱基匹配(可以相等,也可以不想等) + * h 表示最大值 + */ +// load向量化数据 +#define SIMD_LOAD \ + __m256i m1 = _mm256_loadu_si256((__m256i *)(&mA1[j])); \ + __m256i e1 = _mm256_loadu_si256((__m256i *)(&eA1[j])); \ + __m256i m1j1 = _mm256_loadu_si256((__m256i *)(&mA1[j - 1])); \ + __m256i f1j1 = _mm256_loadu_si256((__m256i *)(&fA1[j - 1])); \ + __m256i h0j1 = _mm256_loadu_si256((__m256i *)(&hA0[j - 1])); \ + __m256i qs_vec = _mm256_loadu_si256((__m256i *)(&seq[j - 1])); \ + __m256i ts_vec = _mm256_loadu_si256((__m256i *)(&ref[i])); + +// 比对ref和seq的序列,计算罚分 +#define SIMD_CMP_SEQ \ + ts_vec = _mm256_permute4x64_epi64(ts_vec, permute_mask); \ + ts_vec = _mm256_shuffle_epi8(ts_vec, reverse_mask_vec); \ + __m256i match_mask_vec = _mm256_cmpeq_epi8(qs_vec, ts_vec); \ + __m256i mis_score_vec = _mm256_andnot_si256(match_mask_vec, mis_sc_vec); \ + __m256i match_score_vec = _mm256_and_si256(match_sc_vec, match_mask_vec); \ + __m256i q_amb_mask_vec = _mm256_cmpeq_epi8(qs_vec, amb_vec); \ + __m256i t_amb_mask_vec = _mm256_cmpeq_epi8(ts_vec, amb_vec); \ + __m256i amb_mask_vec = _mm256_or_si256(q_amb_mask_vec, t_amb_mask_vec); \ + __m256i amb_score_vec = _mm256_and_si256(amb_mask_vec, amb_sc_vec); \ + mis_score_vec = _mm256_andnot_si256(amb_mask_vec, mis_score_vec); \ + mis_score_vec = _mm256_or_si256(amb_score_vec, mis_score_vec); \ + match_score_vec = _mm256_andnot_si256(amb_mask_vec, match_score_vec); + +// 向量化计算h, e, f, m +#define SIMD_COMPUTE \ + __m256i en_vec0 = _mm256_max_epu8(m1, oe_del_vec); \ + en_vec0 = _mm256_subs_epu8(en_vec0, oe_del_vec); \ + __m256i en_vec1 = _mm256_max_epu8(e1, e_del_vec); \ + en_vec1 = _mm256_subs_epu8(en_vec1, e_del_vec); \ + __m256i en_vec = _mm256_max_epu8(en_vec0, en_vec1); \ + __m256i fn_vec0 = _mm256_max_epu8(m1j1, oe_ins_vec); \ + fn_vec0 = _mm256_subs_epu8(fn_vec0, oe_ins_vec); \ + __m256i fn_vec1 = _mm256_max_epu8(f1j1, e_ins_vec); \ + fn_vec1 = _mm256_subs_epu8(fn_vec1, e_ins_vec); \ + __m256i fn_vec = _mm256_max_epu8(fn_vec0, fn_vec1); \ + __m256i mn_vec0 = _mm256_adds_epu8(h0j1, match_score_vec); \ + mn_vec0 = _mm256_max_epu8(mn_vec0, mis_score_vec); \ + mn_vec0 = _mm256_subs_epu8(mn_vec0, mis_score_vec); \ + __m256i mn_mask = _mm256_cmpeq_epi8(h0j1, zero_vec); \ + __m256i mn_vec = _mm256_andnot_si256(mn_mask, mn_vec0); \ + __m256i hn_vec0 = _mm256_max_epu8(en_vec, fn_vec); \ + __m256i hn_vec = _mm256_max_epu8(hn_vec0, mn_vec); + +// 存储向量化结果 +#define SIMD_STORE \ + max_vec = _mm256_max_epu8(max_vec, hn_vec); \ + _mm256_storeu_si256((__m256i *)&eA2[j], en_vec); \ + _mm256_storeu_si256((__m256i *)&fA2[j], fn_vec); \ + _mm256_storeu_si256((__m256i *)&mA2[j], mn_vec); \ + _mm256_storeu_si256((__m256i *)&hA2[j], hn_vec); + +// 去除多余的部分 +#define SIMD_REMOVE_EXTRA \ + en_vec = _mm256_and_si256(en_vec, h_vec_mask[end - j]); \ + fn_vec = _mm256_and_si256(fn_vec, h_vec_mask[end - j]); \ + mn_vec = _mm256_and_si256(mn_vec, h_vec_mask[end - j]); \ + hn_vec = _mm256_and_si256(hn_vec, h_vec_mask[end - j]); + +// 找最大值和位置 +#define SIMD_FIND_MAX \ + uint8_t *maxVal = (uint8_t *)&max_vec; \ + max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 1)); \ + max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 2)); \ + max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 3)); \ + max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 4)); \ + max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 5)); \ + max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 6)); \ + max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 7)); \ + max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 8)); \ + max_vec = _mm256_max_epu8(max_vec, _mm256_permute2x128_si256(max_vec, max_vec, 0x01)); \ + m = maxVal[0]; \ + if (m > 0) \ + { \ + for (j = beg, i = iend; j <= end; j += SIMD_WIDTH, i -= SIMD_WIDTH) \ + { \ + __m256i h2_vec = _mm256_loadu_si256((__m256i *)(&hA2[j])); \ + __m256i vcmp = _mm256_cmpeq_epi8(h2_vec, max_vec); \ + uint32_t mask = _mm256_movemask_epi8(vcmp); \ + if (mask > 0) \ + { \ + int pos = SIMD_WIDTH - 1 - __builtin_clz(mask); \ + mj = j - 1 + pos; \ + mi = i - 1 - pos; \ + } \ + } \ + } + +// 每轮迭代后,交换数组 +#define SWAP_DATA_POINTER \ + uint8_t *tmp = hA0; \ + hA0 = hA1; \ + hA1 = hA2; \ + hA2 = tmp; \ + tmp = eA1; \ + eA1 = eA2; \ + eA2 = tmp; \ + tmp = fA1; \ + fA1 = fA2; \ + fA2 = tmp; \ + tmp = mA1; \ + mA1 = mA2; \ + mA2 = tmp; + +int ksw_extend2_avx2_u8(int qlen, // query length 待匹配段碱基的query长度 + const uint8_t *query, // read碱基序列 + int tlen, // target length reference的长度 + const uint8_t *target, // reference序列 + int is_left, // 是不是向左扩展 + int m, // 碱基种类 (5) + const int8_t *mat, // 每个位置的query和target的匹配得分 m*m + int o_del, // deletion 错配开始的惩罚系数 + int e_del, // deletion extension的惩罚系数 + int o_ins, // insertion 错配开始的惩罚系数 + int e_ins, // insertion extension的惩罚系数 + int a, // 碱基match时的分数 + int b, // 碱基mismatch时的惩罚分数(正数) + int w, // 提前剪枝系数,w =100 匹配位置和beg的最大距离 + int end_bonus, + int zdrop, + int h0, // 该seed的初始得分(完全匹配query的碱基数) + int *_qle, // 匹配得到全局最大得分的碱基在query的位置 + int *_tle, // 匹配得到全局最大得分的碱基在reference的位置 + int *_gtle, // query全部匹配上的target的长度 + int *_gscore, // query的端到端匹配得分 + int *_max_off) // 取得最大得分时在query和reference上位置差的 最大值 +{ + uint8_t *mA, *hA, *eA, *fA, *mA1, *mA2, *hA0, *hA1, *eA1, *fA1, *hA2, *eA2, *fA2; // hA0保存上上个col的H,其他的保存上个H E F M + uint8_t *seq, *ref; + uint8_t *mem, *qtmem, *vmem; + int seq_size = qlen + SIMD_WIDTH, ref_size = tlen + SIMD_WIDTH; + int i, iStart, D, j, k, beg, end, max, max_i, max_j, max_ins, max_del, max_ie, gscore, max_off; + int Dloop = tlen + qlen; // 循环跳出条件 + int span, beg1, end1; // 边界条件计算 + int col_size = qlen + 2 + SIMD_WIDTH; + int val_mem_size = (col_size * 9 + 31) >> 5 << 5; // 32字节的整数倍 + int mem_size = seq_size + ref_size + val_mem_size; + + SIMD_INIT; // 初始化simd用的数据 + + assert(h0 > 0); + + // allocate memory + mem = malloc(mem_size); + qtmem = &mem[0]; + seq = (uint8_t *)&qtmem[0]; + ref = (uint8_t *)&qtmem[seq_size]; + if (is_left) + { + for (i = 0; i < qlen; ++i) + seq[i] = query[qlen - 1 - i]; + for (i = 0; i < tlen; ++i) + ref[i + SIMD_WIDTH] = target[tlen - 1 - i]; + } + else + { + for (i = 0; i < qlen; ++i) + seq[i] = query[i]; + for (i = 0; i < tlen; ++i) + ref[i + SIMD_WIDTH] = target[i]; + } + + vmem = &ref[ref_size]; + for (i = 0; i < val_mem_size; i += SIMD_WIDTH) + { + _mm256_storeu_si256((__m256i *)&vmem[i], zero_vec); + } + + hA = &vmem[0]; + mA = &vmem[col_size * 3]; + eA = &vmem[col_size * 5]; + fA = &vmem[col_size * 7]; + + hA0 = &hA[0]; + hA1 = &hA[col_size]; + hA2 = &hA1[col_size]; + mA1 = &mA[0]; + mA2 = &mA[col_size]; + eA1 = &eA[0]; + eA2 = &eA[col_size]; + fA1 = &fA[0]; + fA2 = &fA[col_size]; + + // adjust $w if it is too large + k = m * m; + // get the max score + for (i = 0, max = 0; i < k; ++i) + max = max > mat[i] ? max : mat[i]; + max_ins = (int)((double)(qlen * max + end_bonus - o_ins) / e_ins + 1.); + max_ins = max_ins > 1 ? max_ins : 1; + w = w < max_ins ? w : max_ins; + max_del = (int)((double)(qlen * max + end_bonus - o_del) / e_del + 1.); + max_del = max_del > 1 ? max_del : 1; + w = w < max_del ? w : max_del; // TODO: is this necessary? + if (tlen < qlen) + w = MIN(tlen - 1, w); + + // DP loop + max = h0, max_i = max_j = -1; + max_ie = -1, gscore = -1; + ; + max_off = 0; + beg = 1; + end = qlen; + // init h0 + hA0[0] = h0; // 左上角 + + if (qlen == 0 || tlen == 0) + Dloop = 0; // 防止意外情况 + if (w >= qlen) + { + max_ie = 0; + gscore = 0; + } + + int m_last = 0; + int iend; + + for (D = 1; LIKELY(D < Dloop); ++D) + { + // 边界条件一定要注意! tlen 大于,等于,小于 qlen时的情况 + if (D > tlen) + { + span = MIN(Dloop - D, w); + beg1 = MAX(D - tlen + 1, ((D - w) / 2) + 1); + } + else + { + span = MIN(D - 1, w); + beg1 = MAX(1, ((D - w) / 2) + 1); + } + end1 = MIN(qlen, beg1 + span); + + if (beg < beg1) + beg = beg1; + if (end > end1) + end = end1; + if (beg > end) + break; // 不用计算了,直接跳出,否则hA2没有被赋值,里边是上一轮hA0的值,会出bug + + iend = D - (beg - 1); // ref开始计算的位置,倒序 + span = end - beg; + iStart = iend - span - 1; // 0开始的ref索引位置 + + // 每一轮需要记录的数据 + int m = 0, mj = -1, mi = -1; + max_vec = zero_vec; + + // 要处理边界 + // 左边界 处理f (insert) + if (iStart == 0) + { + hA1[end] = MAX(0, h0 - (o_ins + e_ins * end)); + } + // 上边界 + if (beg == 1) + { + hA1[0] = MAX(0, h0 - (o_del + e_del * iend)); + } + else + { + hA1[beg - 1] = 0; + eA1[beg - 1] = 0; + } + + for (j = beg, i = iend; j <= end + 1 - SIMD_WIDTH; j += SIMD_WIDTH, i -= SIMD_WIDTH) + { + // 取数据 + SIMD_LOAD; + // 比对seq,计算罚分 + SIMD_CMP_SEQ; + // 计算 + SIMD_COMPUTE; + // 存储结果 + SIMD_STORE; + } + // 剩下的计算单元 + if (j <= end) + { + // 取数据 + SIMD_LOAD; + // 比对seq,计算罚分 + SIMD_CMP_SEQ; + // 计算 + SIMD_COMPUTE; + // 去除多余计算的部分 + SIMD_REMOVE_EXTRA; + // 存储结果 + SIMD_STORE; + } + + SIMD_FIND_MAX; + + // 注意最后跳出循环j的值 + j = end + 1; + + if (j == qlen + 1) + { + max_ie = gscore > hA2[qlen] ? max_ie : iStart; + gscore = gscore > hA2[qlen] ? gscore : hA2[qlen]; + } + if (m == 0 && m_last == 0) + break; // 一定要注意,斜对角遍历和按列遍历的不同点 + if (m > max) + { + max = m, max_i = mi, max_j = mj; + max_off = max_off > abs(mj - mi) ? max_off : abs(mj - mi); + } + else if (zdrop > 0) + { + if (mi - max_i > mj - max_j) + { + if (max - m - ((mi - max_i) - (mj - max_j)) * e_del > zdrop) + break; + } + else + { + if (max - m - ((mj - max_j) - (mi - max_i)) * e_ins > zdrop) + break; + } + } + + // 调整计算的边界 + for (j = beg; LIKELY(j <= end); ++j) + { + int has_val = hA1[j - 1] | hA2[j]; + if (has_val) + break; + } + beg = j; + for (j = end + 1; LIKELY(j >= beg); --j) + { + int has_val = hA1[j - 1] | hA2[j]; + if (has_val) + break; + else + hA0[j - 1] = 0; + } + end = j + 1 <= qlen ? j + 1 : qlen; + + m_last = m; + // swap m, h, e, f + SWAP_DATA_POINTER; + } + + free(mem); + if (_qle) + *_qle = max_j + 1; + if (_tle) + *_tle = max_i + 1; + if (_gtle) + *_gtle = max_ie + 1; + if (_gscore) + *_gscore = gscore; + if (_max_off) + *_max_off = max_off; + return max; +} diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..fff66e8 --- /dev/null +++ b/run.sh @@ -0,0 +1,23 @@ + time ./bwa mem -t 1 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \ + /public/home/zzh/data/reference/human_g1k_v37_decoy.fasta \ + /public/home/zzh/data/fastq/ZY2003109152013000/nm1.fq \ + /public/home/zzh/data/fastq/ZY2003109152013000/nm2.fq \ + -o /dev/null + +# time ./bwa mem -t 64 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \ +# /public/home/zzh/data/reference/human_g1k_v37_decoy.fasta \ +# /public/home/zzh/data/fastq/ZY2003109152013000/nm1.fq \ +# /public/home/zzh/data/fastq/ZY2003109152013000/nm2.fq \ +# -o /dev/null + + #/public/home/zzh/data/fastq/n1.fq \ + #/public/home/zzh/data/fastq/n2.fq \ + + + #/share_nas3/zyseq-release-v1.1.3/zyseq/wes/resource/reference/human_g1k_v37_decoy.fasta \ + #/share_nas3/zyseq-release-v1.1.3/zyseq/data/n1.fq \ + #/share_nas3/zyseq-release-v1.1.3/zyseq/data/n2.fq \ + + #-o reads_mapping.sam + +