合并更改(run.sh)

This commit is contained in:
zzh 2023-08-24 14:30:01 +08:00
commit aa31eac682
14 changed files with 3312 additions and 1398 deletions

3
.gitignore vendored
View File

@ -1,4 +1,7 @@
test_out/
*.[oa]
*.fa
*.txt
bwa
test
test64

44
.vscode/launch.json vendored 100644
View File

@ -0,0 +1,44 @@
{
// 使 IntelliSense
//
// 访: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "bwa-mem",
"preLaunchTask": "Build",
"type": "cppdbg",
"request": "launch",
"program": "${workspaceRoot}/bwa",
"args": [
"mem",
"-t",
"64",
"-M",
"-R",
"'@RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa'",
"/share_nas3/zyseq-release-v1.1.3/zyseq/wes/resource/reference/human_g1k_v37_decoy.fasta",
"/share_nas3/zyseq-release-v1.1.3/zyseq/data/n1.fq",
"/share_nas3/zyseq-release-v1.1.3/zyseq/data/n2.fq",
//"/public/home/zzh/data/reference/human_g1k_v37_decoy.fasta",
//"/public/home/zzh/data/fastq/n1.fq",
//"/public/home/zzh/data/fastq/n2.fq",
"-o",
"/dev/null"
],
"cwd": "${workspaceFolder}", //
},
{
"name": "index",
"preLaunchTask": "Build",
"type": "cppdbg",
"request": "launch",
"program": "${workspaceRoot}/bwa",
"args": [
"index",
"reference/human_g1k_v37_decoy.fasta"
],
"cwd": "${workspaceFolder}", //
}
]
}

5
.vscode/settings.json vendored 100644
View File

@ -0,0 +1,5 @@
{
"files.associations": {
"random": "c"
}
}

17
.vscode/tasks.json vendored 100644
View File

@ -0,0 +1,17 @@
{
// See https://go.microsoft.com/fwlink/?LinkId=733558
// for the documentation about the tasks.json format
"version": "2.0.0",
"tasks": [
{
"label": "Build",
"type": "shell",
"command": "make clean; make -j 16",
"problemMatcher": [],
"group": {
"kind": "build",
"isDefault": true
}
}
]
}

View File

@ -1,18 +1,22 @@
CC= gcc
#CC= clang --analyze
CFLAGS= -g -Wall -Wno-unused-function -O2
#CFLAGS= -g -Wall -Wno-unused-function -mavx2 -I../jemalloc-5.3.0/include
CFLAGS= -g -Wall -Wno-unused-function -O2 -mavx2 -I../jemalloc-5.3.0/include
#CFLAGS= -g -Wall -Wno-unused-function -O2 -mavx2
WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS
SHOW_PERF=-DSHOW_PERF
AR= ar
DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC)
DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC) $(SHOW_PERF)
LOBJS= utils.o kthread.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o bwamem_extra.o malloc_wrap.o \
QSufSort.o bwt_gen.o rope.o rle.o is.o bwtindex.o
QSufSort.o bwt_gen.o rope.o rle.o is.o bwtindex.o ksw_avx2.o
AOBJS= bwashm.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \
bwape.o kopen.o pemerge.o maxk.o \
bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \
bwtsw2_chain.o fastmap.o bwtsw2_pair.o
PROG= bwa
INCLUDES=
LIBS= -lm -lz -lpthread
LIBS= -lm -lz -lpthread -ldl
#LIBS= -lm -lz -lpthread
SUBDIRS= .
ifeq ($(shell uname -s),Linux)
@ -26,8 +30,12 @@ endif
all:$(PROG)
bwa:libbwa.a $(AOBJS) main.o
$(CC) $(CFLAGS) $(LDFLAGS) $(AOBJS) main.o -o $@ -L. -lbwa $(LIBS)
# 用jemalloc代替malloc
bwa:libbwa.a ../jemalloc-5.3.0/lib/libjemalloc.a $(AOBJS) main.o
$(CC) $(CFLAGS) $(LDFLAGS) $(AOBJS) ../jemalloc-5.3.0/lib/libjemalloc.a main.o -o $@ -L. -lbwa $(LIBS)
# 原始malloc
#bwa:libbwa.a $(AOBJS) main.o
# $(CC) $(CFLAGS) $(LDFLAGS) $(AOBJS) main.o -o $@ -L. -lbwa $(LIBS)
bwamem-lite:libbwa.a example.o
$(CC) $(CFLAGS) $(LDFLAGS) example.o -o $@ -L. -lbwa $(LIBS)

461
bwa.c
View File

@ -36,7 +36,12 @@
#include "kvec.h"
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#include "malloc_wrap.h"
#endif
#ifdef SHOW_PERF
extern int64_t get_mseconds();
extern int64_t time_ksw_global2;
#endif
int bwa_verbose = 3;
@ -53,14 +58,15 @@ KSEQ_DECLARE(gzFile)
static inline void trim_readno(kstring_t *s)
{
if (s->l > 2 && s->s[s->l-2] == '/' && isdigit(s->s[s->l-1]))
if (s->l > 2 && s->s[s->l - 2] == '/' && isdigit(s->s[s->l - 1]))
s->l -= 2, s->s[s->l] = 0;
}
static inline char *dupkstring(const kstring_t *str, int dupempty)
{
char *s = (str->l > 0 || dupempty)? malloc(str->l + 1) : NULL;
if (!s) return NULL;
char *s = (str->l > 0 || dupempty) ? malloc(str->l + 1) : NULL;
if (!s)
return NULL;
memcpy(s, str->s, str->l);
s[str->l] = '\0';
@ -78,32 +84,39 @@ static inline void kseq2bseq1(const kseq_t *ks, bseq1_t *s)
bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_)
{
kseq_t *ks = (kseq_t*)ks1_, *ks2 = (kseq_t*)ks2_;
kseq_t *ks = (kseq_t *)ks1_, *ks2 = (kseq_t *)ks2_;
int size = 0, m, n;
bseq1_t *seqs;
m = n = 0; seqs = 0;
while (kseq_read(ks) >= 0) {
if (ks2 && kseq_read(ks2) < 0) { // the 2nd file has fewer reads
m = n = 0;
seqs = 0;
while (kseq_read(ks) >= 0)
{
if (ks2 && kseq_read(ks2) < 0)
{ // the 2nd file has fewer reads
fprintf(stderr, "[W::%s] the 2nd file has fewer sequences.\n", __func__);
break;
}
if (n >= m) {
m = m? m<<1 : 256;
if (n >= m)
{
m = m ? m << 1 : 256;
seqs = realloc(seqs, m * sizeof(bseq1_t));
}
trim_readno(&ks->name);
kseq2bseq1(ks, &seqs[n]);
seqs[n].id = n;
size += seqs[n++].l_seq;
if (ks2) {
if (ks2)
{
trim_readno(&ks2->name);
kseq2bseq1(ks2, &seqs[n]);
seqs[n].id = n;
size += seqs[n++].l_seq;
}
if (size >= chunk_size && (n&1) == 0) break;
if (size >= chunk_size && (n & 1) == 0)
break;
}
if (size == 0) { // test if the 2nd file is finished
if (size == 0)
{ // test if the 2nd file is finished
if (ks2 && kseq_read(ks2) >= 0)
fprintf(stderr, "[W::%s] the 1st file has fewer sequences.\n", __func__);
}
@ -114,17 +127,25 @@ bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_)
void bseq_classify(int n, bseq1_t *seqs, int m[2], bseq1_t *sep[2])
{
int i, has_last;
kvec_t(bseq1_t) a[2] = {{0,0,0}, {0,0,0}};
for (i = 1, has_last = 1; i < n; ++i) {
if (has_last) {
if (strcmp(seqs[i].name, seqs[i-1].name) == 0) {
kv_push(bseq1_t, a[1], seqs[i-1]);
kvec_t(bseq1_t) a[2] = {{0, 0, 0}, {0, 0, 0}};
for (i = 1, has_last = 1; i < n; ++i)
{
if (has_last)
{
if (strcmp(seqs[i].name, seqs[i - 1].name) == 0)
{
kv_push(bseq1_t, a[1], seqs[i - 1]);
kv_push(bseq1_t, a[1], seqs[i]);
has_last = 0;
} else kv_push(bseq1_t, a[0], seqs[i-1]);
} else has_last = 1;
}
if (has_last) kv_push(bseq1_t, a[0], seqs[i-1]);
else
kv_push(bseq1_t, a[0], seqs[i - 1]);
}
else
has_last = 1;
}
if (has_last)
kv_push(bseq1_t, a[0], seqs[i - 1]);
sep[0] = a[0].a, m[0] = a[0].n;
sep[1] = a[1].a, m[1] = a[1].n;
}
@ -136,12 +157,14 @@ void bseq_classify(int n, bseq1_t *seqs, int m[2], bseq1_t *sep[2])
void bwa_fill_scmat(int a, int b, int8_t mat[25])
{
int i, j, k;
for (i = k = 0; i < 4; ++i) {
for (i = k = 0; i < 4; ++i)
{
for (j = 0; j < 4; ++j)
mat[k++] = i == j? a : -b;
mat[k++] = i == j ? a : -b;
mat[k++] = -1; // ambiguous base
}
for (j = 0; j < 5; ++j) mat[k++] = -1;
for (j = 0; j < 5; ++j)
mat[k++] = -1;
}
// Generate CIGAR when the alignment end points are known
@ -154,78 +177,119 @@ uint32_t *bwa_gen_cigar2(const int8_t mat[25], int o_del, int e_del, int o_ins,
kstring_t str;
const char *int2base;
if (n_cigar) *n_cigar = 0;
if (NM) *NM = -1;
if (l_query <= 0 || rb >= re || (rb < l_pac && re > l_pac)) return 0; // reject if negative length or bridging the forward and reverse strand
if (n_cigar)
*n_cigar = 0;
if (NM)
*NM = -1;
if (l_query <= 0 || rb >= re || (rb < l_pac && re > l_pac))
return 0; // reject if negative length or bridging the forward and reverse strand
rseq = bns_get_seq(l_pac, pac, rb, re, &rlen);
if (re - rb != rlen) goto ret_gen_cigar; // possible if out of range
if (rb >= l_pac) { // then reverse both query and rseq; this is to ensure indels to be placed at the leftmost position
for (i = 0; i < l_query>>1; ++i)
if (re - rb != rlen)
goto ret_gen_cigar; // possible if out of range
if (rb >= l_pac)
{ // then reverse both query and rseq; this is to ensure indels to be placed at the leftmost position
for (i = 0; i < l_query >> 1; ++i)
tmp = query[i], query[i] = query[l_query - 1 - i], query[l_query - 1 - i] = tmp;
for (i = 0; i < rlen>>1; ++i)
for (i = 0; i < rlen >> 1; ++i)
tmp = rseq[i], rseq[i] = rseq[rlen - 1 - i], rseq[rlen - 1 - i] = tmp;
}
if (l_query == re - rb && w_ == 0) { // no gap; no need to do DP
if (l_query == re - rb && w_ == 0)
{ // no gap; no need to do DP
// UPDATE: we come to this block now... FIXME: due to an issue in mem_reg2aln(), we never come to this block. This does not affect accuracy, but it hurts performance.
if (n_cigar) {
if (n_cigar)
{
cigar = malloc(4);
cigar[0] = l_query<<4 | 0;
cigar[0] = l_query << 4 | 0;
*n_cigar = 1;
}
for (i = 0, *score = 0; i < l_query; ++i)
*score += mat[rseq[i]*5 + query[i]];
} else {
*score += mat[rseq[i] * 5 + query[i]];
}
else
{
int w, max_gap, max_ins, max_del, min_w;
// set the band-width
max_ins = (int)((double)(((l_query+1)>>1) * mat[0] - o_ins) / e_ins + 1.);
max_del = (int)((double)(((l_query+1)>>1) * mat[0] - o_del) / e_del + 1.);
max_gap = max_ins > max_del? max_ins : max_del;
max_gap = max_gap > 1? max_gap : 1;
max_ins = (int)((double)(((l_query + 1) >> 1) * mat[0] - o_ins) / e_ins + 1.);
max_del = (int)((double)(((l_query + 1) >> 1) * mat[0] - o_del) / e_del + 1.);
max_gap = max_ins > max_del ? max_ins : max_del;
max_gap = max_gap > 1 ? max_gap : 1;
w = (max_gap + abs((int)rlen - l_query) + 1) >> 1;
w = w < w_? w : w_;
w = w < w_ ? w : w_;
min_w = abs((int)rlen - l_query) + 3;
w = w > min_w? w : min_w;
w = w > min_w ? w : min_w;
// NW alignment
if (bwa_verbose >= 4) {
if (bwa_verbose >= 4)
{
printf("* Global bandwidth: %d\n", w);
printf("* Global ref: "); for (i = 0; i < rlen; ++i) putchar("ACGTN"[(int)rseq[i]]); putchar('\n');
printf("* Global query: "); for (i = 0; i < l_query; ++i) putchar("ACGTN"[(int)query[i]]); putchar('\n');
printf("* Global ref: ");
for (i = 0; i < rlen; ++i)
putchar("ACGTN"[(int)rseq[i]]);
putchar('\n');
printf("* Global query: ");
for (i = 0; i < l_query; ++i)
putchar("ACGTN"[(int)query[i]]);
putchar('\n');
}
#ifdef SHOW_PERF
int64_t start_time = get_mseconds();
#endif
*score = ksw_global2(l_query, query, rlen, rseq, 5, mat, o_del, e_del, o_ins, e_ins, w, n_cigar, &cigar);
#ifdef SHOW_PERF
int64_t tmp_diff = get_mseconds() - start_time;
__sync_fetch_and_add(&time_ksw_global2, tmp_diff);
#endif
}
if (NM && n_cigar) {// compute NM and MD
if (NM && n_cigar)
{ // compute NM and MD
int k, x, y, u, n_mm = 0, n_gap = 0;
str.l = str.m = *n_cigar * 4; str.s = (char*)cigar; // append MD to CIGAR
int2base = rb < l_pac? "ACGTN" : "TGCAN";
for (k = 0, x = y = u = 0; k < *n_cigar; ++k) {
str.l = str.m = *n_cigar * 4;
str.s = (char *)cigar; // append MD to CIGAR
int2base = rb < l_pac ? "ACGTN" : "TGCAN";
for (k = 0, x = y = u = 0; k < *n_cigar; ++k)
{
int op, len;
cigar = (uint32_t*)str.s;
op = cigar[k]&0xf, len = cigar[k]>>4;
if (op == 0) { // match
for (i = 0; i < len; ++i) {
if (query[x + i] != rseq[y + i]) {
kputw(u, &str);
kputc(int2base[rseq[y+i]], &str);
++n_mm; u = 0;
} else ++u;
}
x += len; y += len;
} else if (op == 2) { // deletion
if (k > 0 && k < *n_cigar - 1) { // don't do the following if D is the first or the last CIGAR
kputw(u, &str); kputc('^', &str);
cigar = (uint32_t *)str.s;
op = cigar[k] & 0xf, len = cigar[k] >> 4;
if (op == 0)
{ // match
for (i = 0; i < len; ++i)
kputc(int2base[rseq[y+i]], &str);
u = 0; n_gap += len;
{
if (query[x + i] != rseq[y + i])
{
kputw(u, &str);
kputc(int2base[rseq[y + i]], &str);
++n_mm;
u = 0;
}
else
++u;
}
x += len;
y += len;
}
else if (op == 2)
{ // deletion
if (k > 0 && k < *n_cigar - 1)
{ // don't do the following if D is the first or the last CIGAR
kputw(u, &str);
kputc('^', &str);
for (i = 0; i < len; ++i)
kputc(int2base[rseq[y + i]], &str);
u = 0;
n_gap += len;
}
y += len;
} else if (op == 1) x += len, n_gap += len; // insertion
}
kputw(u, &str); kputc(0, &str);
else if (op == 1)
x += len, n_gap += len; // insertion
}
kputw(u, &str);
kputc(0, &str);
*NM = n_mm + n_gap;
cigar = (uint32_t*)str.s;
cigar = (uint32_t *)str.s;
}
if (rb >= l_pac) // reverse back query
for (i = 0; i < l_query>>1; ++i)
for (i = 0; i < l_query >> 1; ++i)
tmp = query[i], query[i] = query[l_query - 1 - i], query[l_query - 1 - i] = tmp;
ret_gen_cigar:
@ -251,16 +315,22 @@ char *bwa_idx_infer_prefix(const char *hint)
prefix = malloc(l_hint + 3 + 4 + 1);
strcpy(prefix, hint);
strcpy(prefix + l_hint, ".64.bwt");
if ((fp = fopen(prefix, "rb")) != 0) {
if ((fp = fopen(prefix, "rb")) != 0)
{
fclose(fp);
prefix[l_hint + 3] = 0;
return prefix;
} else {
}
else
{
strcpy(prefix + l_hint, ".bwt");
if ((fp = fopen(prefix, "rb")) == 0) {
if ((fp = fopen(prefix, "rb")) == 0)
{
free(prefix);
return 0;
} else {
}
else
{
fclose(fp);
prefix[l_hint] = 0;
return prefix;
@ -273,8 +343,10 @@ bwt_t *bwa_idx_load_bwt(const char *hint)
char *tmp, *prefix;
bwt_t *bwt;
prefix = bwa_idx_infer_prefix(hint);
if (prefix == 0) {
if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] fail to locate the index files\n", __func__);
if (prefix == 0)
{
if (bwa_verbose >= 1)
fprintf(stderr, "[E::%s] fail to locate the index files\n", __func__);
return 0;
}
tmp = calloc(strlen(prefix) + 5, 1);
@ -282,7 +354,8 @@ bwt_t *bwa_idx_load_bwt(const char *hint)
bwt = bwt_restore_bwt(tmp);
strcat(strcpy(tmp, prefix), ".sa"); // partial suffix array (SA)
bwt_restore_sa(tmp, bwt);
free(tmp); free(prefix);
free(tmp);
free(prefix);
return bwt;
}
@ -291,22 +364,28 @@ bwaidx_t *bwa_idx_load_from_disk(const char *hint, int which)
bwaidx_t *idx;
char *prefix;
prefix = bwa_idx_infer_prefix(hint);
if (prefix == 0) {
if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] fail to locate the index files\n", __func__);
if (prefix == 0)
{
if (bwa_verbose >= 1)
fprintf(stderr, "[E::%s] fail to locate the index files\n", __func__);
return 0;
}
idx = calloc(1, sizeof(bwaidx_t));
if (which & BWA_IDX_BWT) idx->bwt = bwa_idx_load_bwt(hint);
if (which & BWA_IDX_BNS) {
if (which & BWA_IDX_BWT)
idx->bwt = bwa_idx_load_bwt(hint);
if (which & BWA_IDX_BNS)
{
int i, c;
idx->bns = bns_restore(prefix);
for (i = c = 0; i < idx->bns->n_seqs; ++i)
if (idx->bns->anns[i].is_alt) ++c;
if (idx->bns->anns[i].is_alt)
++c;
if (bwa_verbose >= 3)
fprintf(stderr, "[M::%s] read %d ALT contigs\n", __func__, c);
if (which & BWA_IDX_PAC) {
idx->pac = calloc(idx->bns->l_pac/4+1, 1);
err_fread_noeof(idx->pac, 1, idx->bns->l_pac/4+1, idx->bns->fp_pac); // concatenated 2-bit encoded sequence
if (which & BWA_IDX_PAC)
{
idx->pac = calloc(idx->bns->l_pac / 4 + 1, 1);
err_fread_noeof(idx->pac, 1, idx->bns->l_pac / 4 + 1, idx->bns->fp_pac); // concatenated 2-bit encoded sequence
err_fclose(idx->bns->fp_pac);
idx->bns->fp_pac = 0;
}
@ -322,14 +401,24 @@ bwaidx_t *bwa_idx_load(const char *hint, int which)
void bwa_idx_destroy(bwaidx_t *idx)
{
if (idx == 0) return;
if (idx->mem == 0) {
if (idx->bwt) bwt_destroy(idx->bwt);
if (idx->bns) bns_destroy(idx->bns);
if (idx->pac) free(idx->pac);
} else {
free(idx->bwt); free(idx->bns->anns); free(idx->bns);
if (!idx->is_shm) free(idx->mem);
if (idx == 0)
return;
if (idx->mem == 0)
{
if (idx->bwt)
bwt_destroy(idx->bwt);
if (idx->bns)
bns_destroy(idx->bns);
if (idx->pac)
free(idx->pac);
}
else
{
free(idx->bwt);
free(idx->bns->anns);
free(idx->bns);
if (!idx->is_shm)
free(idx->mem);
}
free(idx);
}
@ -340,22 +429,42 @@ int bwa_mem2idx(int64_t l_mem, uint8_t *mem, bwaidx_t *idx)
int i;
// generate idx->bwt
x = sizeof(bwt_t); idx->bwt = malloc(x); memcpy(idx->bwt, mem + k, x); k += x;
x = idx->bwt->bwt_size * 4; idx->bwt->bwt = (uint32_t*)(mem + k); k += x;
x = idx->bwt->n_sa * sizeof(bwtint_t); idx->bwt->sa = (bwtint_t*)(mem + k); k += x;
x = sizeof(bwt_t);
idx->bwt = malloc(x);
memcpy(idx->bwt, mem + k, x);
k += x;
x = idx->bwt->bwt_size * 4;
idx->bwt->bwt = (uint32_t *)(mem + k);
k += x;
x = idx->bwt->n_sa * sizeof(bwtint_t);
idx->bwt->sa = (bwtint_t *)(mem + k);
k += x;
// generate idx->bns and idx->pac
x = sizeof(bntseq_t); idx->bns = malloc(x); memcpy(idx->bns, mem + k, x); k += x;
x = idx->bns->n_holes * sizeof(bntamb1_t); idx->bns->ambs = (bntamb1_t*)(mem + k); k += x;
x = idx->bns->n_seqs * sizeof(bntann1_t); idx->bns->anns = malloc(x); memcpy(idx->bns->anns, mem + k, x); k += x;
for (i = 0; i < idx->bns->n_seqs; ++i) {
idx->bns->anns[i].name = (char*)(mem + k); k += strlen(idx->bns->anns[i].name) + 1;
idx->bns->anns[i].anno = (char*)(mem + k); k += strlen(idx->bns->anns[i].anno) + 1;
x = sizeof(bntseq_t);
idx->bns = malloc(x);
memcpy(idx->bns, mem + k, x);
k += x;
x = idx->bns->n_holes * sizeof(bntamb1_t);
idx->bns->ambs = (bntamb1_t *)(mem + k);
k += x;
x = idx->bns->n_seqs * sizeof(bntann1_t);
idx->bns->anns = malloc(x);
memcpy(idx->bns->anns, mem + k, x);
k += x;
for (i = 0; i < idx->bns->n_seqs; ++i)
{
idx->bns->anns[i].name = (char *)(mem + k);
k += strlen(idx->bns->anns[i].name) + 1;
idx->bns->anns[i].anno = (char *)(mem + k);
k += strlen(idx->bns->anns[i].anno) + 1;
}
idx->pac = (uint8_t*)(mem + k); k += idx->bns->l_pac/4+1;
idx->pac = (uint8_t *)(mem + k);
k += idx->bns->l_pac / 4 + 1;
assert(k == l_mem);
idx->l_mem = k; idx->mem = mem;
idx->l_mem = k;
idx->mem = mem;
return 0;
}
@ -367,35 +476,56 @@ int bwa_idx2mem(bwaidx_t *idx)
// copy idx->bwt
x = idx->bwt->bwt_size * 4;
mem = realloc(idx->bwt->bwt, sizeof(bwt_t) + x); idx->bwt->bwt = 0;
mem = realloc(idx->bwt->bwt, sizeof(bwt_t) + x);
idx->bwt->bwt = 0;
memmove(mem + sizeof(bwt_t), mem, x);
memcpy(mem, idx->bwt, sizeof(bwt_t)); k = sizeof(bwt_t) + x;
x = idx->bwt->n_sa * sizeof(bwtint_t); mem = realloc(mem, k + x); memcpy(mem + k, idx->bwt->sa, x); k += x;
memcpy(mem, idx->bwt, sizeof(bwt_t));
k = sizeof(bwt_t) + x;
x = idx->bwt->n_sa * sizeof(bwtint_t);
mem = realloc(mem, k + x);
memcpy(mem + k, idx->bwt->sa, x);
k += x;
free(idx->bwt->sa);
free(idx->bwt); idx->bwt = 0;
free(idx->bwt);
idx->bwt = 0;
// copy idx->bns
tmp = idx->bns->n_seqs * sizeof(bntann1_t) + idx->bns->n_holes * sizeof(bntamb1_t);
for (i = 0; i < idx->bns->n_seqs; ++i) // compute the size of heap-allocated memory
tmp += strlen(idx->bns->anns[i].name) + strlen(idx->bns->anns[i].anno) + 2;
mem = realloc(mem, k + sizeof(bntseq_t) + tmp);
x = sizeof(bntseq_t); memcpy(mem + k, idx->bns, x); k += x;
x = idx->bns->n_holes * sizeof(bntamb1_t); memcpy(mem + k, idx->bns->ambs, x); k += x;
x = sizeof(bntseq_t);
memcpy(mem + k, idx->bns, x);
k += x;
x = idx->bns->n_holes * sizeof(bntamb1_t);
memcpy(mem + k, idx->bns->ambs, x);
k += x;
free(idx->bns->ambs);
x = idx->bns->n_seqs * sizeof(bntann1_t); memcpy(mem + k, idx->bns->anns, x); k += x;
for (i = 0; i < idx->bns->n_seqs; ++i) {
x = strlen(idx->bns->anns[i].name) + 1; memcpy(mem + k, idx->bns->anns[i].name, x); k += x;
x = strlen(idx->bns->anns[i].anno) + 1; memcpy(mem + k, idx->bns->anns[i].anno, x); k += x;
free(idx->bns->anns[i].name); free(idx->bns->anns[i].anno);
x = idx->bns->n_seqs * sizeof(bntann1_t);
memcpy(mem + k, idx->bns->anns, x);
k += x;
for (i = 0; i < idx->bns->n_seqs; ++i)
{
x = strlen(idx->bns->anns[i].name) + 1;
memcpy(mem + k, idx->bns->anns[i].name, x);
k += x;
x = strlen(idx->bns->anns[i].anno) + 1;
memcpy(mem + k, idx->bns->anns[i].anno, x);
k += x;
free(idx->bns->anns[i].name);
free(idx->bns->anns[i].anno);
}
free(idx->bns->anns);
// copy idx->pac
x = idx->bns->l_pac/4+1;
x = idx->bns->l_pac / 4 + 1;
mem = realloc(mem, k + x);
memcpy(mem + k, idx->pac, x); k += x;
free(idx->bns); idx->bns = 0;
free(idx->pac); idx->pac = 0;
memcpy(mem + k, idx->pac, x);
k += x;
free(idx->bns);
idx->bns = 0;
free(idx->pac);
idx->pac = 0;
return bwa_mem2idx(k, mem, idx);
}
@ -409,45 +539,65 @@ void bwa_print_sam_hdr(const bntseq_t *bns, const char *hdr_line)
int i, n_HD = 0, n_SQ = 0;
extern char *bwa_pg;
if (hdr_line) {
if (hdr_line)
{
// check for HD line
const char *p = hdr_line;
if ((p = strstr(p, "@HD")) != 0) {
if ((p = strstr(p, "@HD")) != 0)
{
++n_HD;
}
// check for SQ lines
p = hdr_line;
while ((p = strstr(p, "@SQ\t")) != 0) {
if (p == hdr_line || *(p-1) == '\n') ++n_SQ;
while ((p = strstr(p, "@SQ\t")) != 0)
{
if (p == hdr_line || *(p - 1) == '\n')
++n_SQ;
p += 4;
}
}
if (n_SQ == 0) {
for (i = 0; i < bns->n_seqs; ++i) {
if (n_SQ == 0)
{
for (i = 0; i < bns->n_seqs; ++i)
{
err_printf("@SQ\tSN:%s\tLN:%d", bns->anns[i].name, bns->anns[i].len);
if (bns->anns[i].is_alt) err_printf("\tAH:*\n");
else err_fputc('\n', stdout);
if (bns->anns[i].is_alt)
err_printf("\tAH:*\n");
else
err_fputc('\n', stdout);
}
} else if (n_SQ != bns->n_seqs && bwa_verbose >= 2)
}
else if (n_SQ != bns->n_seqs && bwa_verbose >= 2)
fprintf(stderr, "[W::%s] %d @SQ lines provided with -H; %d sequences in the index. Continue anyway.\n", __func__, n_SQ, bns->n_seqs);
if (n_HD == 0) {
if (n_HD == 0)
{
err_printf("@HD\tVN:1.5\tSO:unsorted\tGO:query\n");
}
if (hdr_line) err_printf("%s\n", hdr_line);
if (bwa_pg) err_printf("%s\n", bwa_pg);
if (hdr_line)
err_printf("%s\n", hdr_line);
if (bwa_pg)
err_printf("%s\n", bwa_pg);
}
static char *bwa_escape(char *s)
{
char *p, *q;
for (p = q = s; *p; ++p) {
if (*p == '\\') {
for (p = q = s; *p; ++p)
{
if (*p == '\\')
{
++p;
if (*p == 't') *q++ = '\t';
else if (*p == 'n') *q++ = '\n';
else if (*p == 'r') *q++ = '\r';
else if (*p == '\\') *q++ = '\\';
} else *q++ = *p;
if (*p == 't')
*q++ = '\t';
else if (*p == 'n')
*q++ = '\n';
else if (*p == 'r')
*q++ = '\r';
else if (*p == '\\')
*q++ = '\\';
}
else
*q++ = *p;
}
*q = '\0';
return s;
@ -457,24 +607,33 @@ char *bwa_set_rg(const char *s)
{
char *p, *q, *r, *rg_line = 0;
memset(bwa_rg_id, 0, 256);
if (strstr(s, "@RG") != s) {
if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] the read group line is not started with @RG\n", __func__);
if (strstr(s, "@RG") != s)
{
if (bwa_verbose >= 1)
fprintf(stderr, "[E::%s] the read group line is not started with @RG\n", __func__);
goto err_set_rg;
}
if (strstr(s, "\t") != NULL) {
if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] the read group line contained literal <tab> characters -- replace with escaped tabs: \\t\n", __func__);
if (strstr(s, "\t") != NULL)
{
if (bwa_verbose >= 1)
fprintf(stderr, "[E::%s] the read group line contained literal <tab> characters -- replace with escaped tabs: \\t\n", __func__);
goto err_set_rg;
}
rg_line = strdup(s);
bwa_escape(rg_line);
if ((p = strstr(rg_line, "\tID:")) == 0) {
if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] no ID within the read group line\n", __func__);
if ((p = strstr(rg_line, "\tID:")) == 0)
{
if (bwa_verbose >= 1)
fprintf(stderr, "[E::%s] no ID within the read group line\n", __func__);
goto err_set_rg;
}
p += 4;
for (q = p; *q && *q != '\t' && *q != '\n'; ++q);
if (q - p + 1 > 256) {
if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] @RG:ID is longer than 255 characters\n", __func__);
for (q = p; *q && *q != '\t' && *q != '\n'; ++q)
;
if (q - p + 1 > 256)
{
if (bwa_verbose >= 1)
fprintf(stderr, "[E::%s] @RG:ID is longer than 255 characters\n", __func__);
goto err_set_rg;
}
for (q = p, r = bwa_rg_id; *q && *q != '\t' && *q != '\n'; ++q)
@ -489,13 +648,17 @@ err_set_rg:
char *bwa_insert_header(const char *s, char *hdr)
{
int len = 0;
if (s == 0 || s[0] != '@') return hdr;
if (hdr) {
if (s == 0 || s[0] != '@')
return hdr;
if (hdr)
{
len = strlen(hdr);
hdr = realloc(hdr, len + strlen(s) + 2);
hdr[len++] = '\n';
strcpy(hdr + len, s);
} else hdr = strdup(s);
}
else
hdr = strdup(s);
bwa_escape(hdr + len);
return hdr;
}

1307
bwamem.c

File diff suppressed because it is too large Load Diff

View File

@ -35,9 +35,13 @@
#include "ksw.h"
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#include "malloc_wrap.h"
#endif
#ifdef SHOW_PERF
extern int64_t get_mseconds();
extern int64_t time_ksw_align2;
#endif
#define MIN_RATIO 0.8
#define MIN_DIR_CNT 10
@ -50,23 +54,26 @@ static inline int mem_infer_dir(int64_t l_pac, int64_t b1, int64_t b2, int64_t *
{
int64_t p2;
int r1 = (b1 >= l_pac), r2 = (b2 >= l_pac);
p2 = r1 == r2? b2 : (l_pac<<1) - 1 - b2; // p2 is the coordinate of read 2 on the read 1 strand
*dist = p2 > b1? p2 - b1 : b1 - p2;
return (r1 == r2? 0 : 1) ^ (p2 > b1? 0 : 3);
p2 = r1 == r2 ? b2 : (l_pac << 1) - 1 - b2; // p2 is the coordinate of read 2 on the read 1 strand
*dist = p2 > b1 ? p2 - b1 : b1 - p2;
return (r1 == r2 ? 0 : 1) ^ (p2 > b1 ? 0 : 3);
}
static int cal_sub(const mem_opt_t *opt, mem_alnreg_v *r)
{
int j;
for (j = 1; j < r->n; ++j) { // choose unique alignment
int b_max = r->a[j].qb > r->a[0].qb? r->a[j].qb : r->a[0].qb;
int e_min = r->a[j].qe < r->a[0].qe? r->a[j].qe : r->a[0].qe;
if (e_min > b_max) { // have overlap
int min_l = r->a[j].qe - r->a[j].qb < r->a[0].qe - r->a[0].qb? r->a[j].qe - r->a[j].qb : r->a[0].qe - r->a[0].qb;
if (e_min - b_max >= min_l * opt->mask_level) break; // significant overlap
for (j = 1; j < r->n; ++j)
{ // choose unique alignment
int b_max = r->a[j].qb > r->a[0].qb ? r->a[j].qb : r->a[0].qb;
int e_min = r->a[j].qe < r->a[0].qe ? r->a[j].qe : r->a[0].qe;
if (e_min > b_max)
{ // have overlap
int min_l = r->a[j].qe - r->a[j].qb < r->a[0].qe - r->a[0].qb ? r->a[j].qe - r->a[j].qb : r->a[0].qe - r->a[0].qb;
if (e_min - b_max >= min_l * opt->mask_level)
break; // significant overlap
}
}
return j < r->n? r->a[j].score : opt->min_seed_len * opt->a;
return j < r->n ? r->a[j].score : opt->min_seed_len * opt->a;
}
void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v *regs, mem_pestat_t pes[4])
@ -75,36 +82,48 @@ void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v *
uint64_v isize[4];
memset(pes, 0, 4 * sizeof(mem_pestat_t));
memset(isize, 0, sizeof(kvec_t(int)) * 4);
for (i = 0; i < n>>1; ++i) {
for (i = 0; i < n >> 1; ++i)
{
int dir;
int64_t is;
mem_alnreg_v *r[2];
r[0] = (mem_alnreg_v*)&regs[i<<1|0];
r[1] = (mem_alnreg_v*)&regs[i<<1|1];
if (r[0]->n == 0 || r[1]->n == 0) continue;
if (cal_sub(opt, r[0]) > MIN_RATIO * r[0]->a[0].score) continue;
if (cal_sub(opt, r[1]) > MIN_RATIO * r[1]->a[0].score) continue;
if (r[0]->a[0].rid != r[1]->a[0].rid) continue; // not on the same chr
r[0] = (mem_alnreg_v *)&regs[i << 1 | 0];
r[1] = (mem_alnreg_v *)&regs[i << 1 | 1];
if (r[0]->n == 0 || r[1]->n == 0)
continue;
if (cal_sub(opt, r[0]) > MIN_RATIO * r[0]->a[0].score)
continue;
if (cal_sub(opt, r[1]) > MIN_RATIO * r[1]->a[0].score)
continue;
if (r[0]->a[0].rid != r[1]->a[0].rid)
continue; // not on the same chr
dir = mem_infer_dir(l_pac, r[0]->a[0].rb, r[1]->a[0].rb, &is);
if (is && is <= opt->max_ins) kv_push(uint64_t, isize[dir], is);
if (is && is <= opt->max_ins)
kv_push(uint64_t, isize[dir], is);
}
if (bwa_verbose >= 3) fprintf(stderr, "[M::%s] # candidate unique pairs for (FF, FR, RF, RR): (%ld, %ld, %ld, %ld)\n", __func__, isize[0].n, isize[1].n, isize[2].n, isize[3].n);
for (d = 0; d < 4; ++d) { // TODO: this block is nearly identical to the one in bwtsw2_pair.c. It would be better to merge these two.
if (bwa_verbose >= 3)
fprintf(stderr, "[M::%s] # candidate unique pairs for (FF, FR, RF, RR): (%ld, %ld, %ld, %ld)\n", __func__, isize[0].n, isize[1].n, isize[2].n, isize[3].n);
for (d = 0; d < 4; ++d)
{ // TODO: this block is nearly identical to the one in bwtsw2_pair.c. It would be better to merge these two.
mem_pestat_t *r = &pes[d];
uint64_v *q = &isize[d];
int p25, p50, p75, x;
if (q->n < MIN_DIR_CNT) {
fprintf(stderr, "[M::%s] skip orientation %c%c as there are not enough pairs\n", __func__, "FR"[d>>1&1], "FR"[d&1]);
if (q->n < MIN_DIR_CNT)
{
fprintf(stderr, "[M::%s] skip orientation %c%c as there are not enough pairs\n", __func__, "FR"[d >> 1 & 1], "FR"[d & 1]);
r->failed = 1;
free(q->a);
continue;
} else fprintf(stderr, "[M::%s] analyzing insert size distribution for orientation %c%c...\n", __func__, "FR"[d>>1&1], "FR"[d&1]);
}
else
fprintf(stderr, "[M::%s] analyzing insert size distribution for orientation %c%c...\n", __func__, "FR"[d >> 1 & 1], "FR"[d & 1]);
ks_introsort_64(q->n, q->a);
p25 = q->a[(int)(.25 * q->n + .499)];
p50 = q->a[(int)(.50 * q->n + .499)];
p75 = q->a[(int)(.75 * q->n + .499)];
r->low = (int)(p25 - OUTLIER_BOUND * (p75 - p25) + .499);
if (r->low < 1) r->low = 1;
if (r->low < 1)
r->low = 1;
r->high = (int)(p75 + OUTLIER_BOUND * (p75 - p25) + .499);
fprintf(stderr, "[M::%s] (25, 50, 75) percentile: (%d, %d, %d)\n", __func__, p25, p50, p75);
fprintf(stderr, "[M::%s] low and high boundaries for computing mean and std.dev: (%d, %d)\n", __func__, r->low, r->high);
@ -119,18 +138,22 @@ void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v *
fprintf(stderr, "[M::%s] mean and std.dev: (%.2f, %.2f)\n", __func__, r->avg, r->std);
r->low = (int)(p25 - MAPPING_BOUND * (p75 - p25) + .499);
r->high = (int)(p75 + MAPPING_BOUND * (p75 - p25) + .499);
if (r->low > r->avg - MAX_STDDEV * r->std) r->low = (int)(r->avg - MAX_STDDEV * r->std + .499);
if (r->high < r->avg + MAX_STDDEV * r->std) r->high = (int)(r->avg + MAX_STDDEV * r->std + .499);
if (r->low < 1) r->low = 1;
if (r->low > r->avg - MAX_STDDEV * r->std)
r->low = (int)(r->avg - MAX_STDDEV * r->std + .499);
if (r->high < r->avg + MAX_STDDEV * r->std)
r->high = (int)(r->avg + MAX_STDDEV * r->std + .499);
if (r->low < 1)
r->low = 1;
fprintf(stderr, "[M::%s] low and high boundaries for proper pairs: (%d, %d)\n", __func__, r->low, r->high);
free(q->a);
}
for (d = 0, max = 0; d < 4; ++d)
max = max > isize[d].n? max : isize[d].n;
max = max > isize[d].n ? max : isize[d].n;
for (d = 0; d < 4; ++d)
if (pes[d].failed == 0 && isize[d].n < max * MIN_DIR_RATIO) {
if (pes[d].failed == 0 && isize[d].n < max * MIN_DIR_RATIO)
{
pes[d].failed = 1;
fprintf(stderr, "[M::%s] skip orientation %c%c\n", __func__, "FR"[d>>1&1], "FR"[d&1]);
fprintf(stderr, "[M::%s] skip orientation %c%c\n", __func__, "FR"[d >> 1 & 1], "FR"[d & 1]);
}
}
@ -140,66 +163,93 @@ int mem_matesw(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
int64_t l_pac = bns->l_pac;
int i, r, skip[4], n = 0, rid;
for (r = 0; r < 4; ++r)
skip[r] = pes[r].failed? 1 : 0;
for (i = 0; i < ma->n; ++i) { // check which orinentation has been found
skip[r] = pes[r].failed ? 1 : 0;
for (i = 0; i < ma->n; ++i)
{ // check which orinentation has been found
int64_t dist;
r = mem_infer_dir(l_pac, a->rb, ma->a[i].rb, &dist);
if (dist >= pes[r].low && dist <= pes[r].high)
skip[r] = 1;
}
if (skip[0] + skip[1] + skip[2] + skip[3] == 4) return 0; // consistent pair exist; no need to perform SW
for (r = 0; r < 4; ++r) {
if (skip[0] + skip[1] + skip[2] + skip[3] == 4)
return 0; // consistent pair exist; no need to perform SW
for (r = 0; r < 4; ++r)
{
int is_rev, is_larger;
uint8_t *seq, *rev = 0, *ref = 0;
int64_t rb, re;
if (skip[r]) continue;
is_rev = (r>>1 != (r&1)); // whether to reverse complement the mate
is_larger = !(r>>1); // whether the mate has larger coordinate
if (is_rev) {
if (skip[r])
continue;
is_rev = (r >> 1 != (r & 1)); // whether to reverse complement the mate
is_larger = !(r >> 1); // whether the mate has larger coordinate
if (is_rev)
{
rev = malloc(l_ms); // this is the reverse complement of $ms
for (i = 0; i < l_ms; ++i) rev[l_ms - 1 - i] = ms[i] < 4? 3 - ms[i] : 4;
for (i = 0; i < l_ms; ++i)
rev[l_ms - 1 - i] = ms[i] < 4 ? 3 - ms[i] : 4;
seq = rev;
} else seq = (uint8_t*)ms;
if (!is_rev) {
rb = is_larger? a->rb + pes[r].low : a->rb - pes[r].high;
re = (is_larger? a->rb + pes[r].high: a->rb - pes[r].low) + l_ms; // if on the same strand, end position should be larger to make room for the seq length
} else {
rb = (is_larger? a->rb + pes[r].low : a->rb - pes[r].high) - l_ms; // similarly on opposite strands
re = is_larger? a->rb + pes[r].high: a->rb - pes[r].low;
}
if (rb < 0) rb = 0;
if (re > l_pac<<1) re = l_pac<<1;
if (rb < re) ref = bns_fetch_seq(bns, pac, &rb, (rb+re)>>1, &re, &rid);
if (a->rid == rid && re - rb >= opt->min_seed_len) { // no funny things happening
else
seq = (uint8_t *)ms;
if (!is_rev)
{
rb = is_larger ? a->rb + pes[r].low : a->rb - pes[r].high;
re = (is_larger ? a->rb + pes[r].high : a->rb - pes[r].low) + l_ms; // if on the same strand, end position should be larger to make room for the seq length
}
else
{
rb = (is_larger ? a->rb + pes[r].low : a->rb - pes[r].high) - l_ms; // similarly on opposite strands
re = is_larger ? a->rb + pes[r].high : a->rb - pes[r].low;
}
if (rb < 0)
rb = 0;
if (re > l_pac << 1)
re = l_pac << 1;
if (rb < re)
ref = bns_fetch_seq(bns, pac, &rb, (rb + re) >> 1, &re, &rid);
if (a->rid == rid && re - rb >= opt->min_seed_len)
{ // no funny things happening
kswr_t aln;
mem_alnreg_t b;
int tmp, xtra = KSW_XSUBO | KSW_XSTART | (l_ms * opt->a < 250? KSW_XBYTE : 0) | (opt->min_seed_len * opt->a);
int tmp, xtra = KSW_XSUBO | KSW_XSTART | (l_ms * opt->a < 250 ? KSW_XBYTE : 0) | (opt->min_seed_len * opt->a);
#ifdef SHOW_PERF
int64_t start_time = get_mseconds();
#endif
aln = ksw_align2(l_ms, seq, re - rb, ref, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, xtra, 0);
#ifdef SHOW_PERF
int64_t tmp_diff = get_mseconds() - start_time;
__sync_fetch_and_add(&time_ksw_align2, tmp_diff);
#endif
memset(&b, 0, sizeof(mem_alnreg_t));
if (aln.score >= opt->min_seed_len && aln.qb >= 0) { // something goes wrong if aln.qb < 0
if (aln.score >= opt->min_seed_len && aln.qb >= 0)
{ // something goes wrong if aln.qb < 0
b.rid = a->rid;
b.is_alt = a->is_alt;
b.qb = is_rev? l_ms - (aln.qe + 1) : aln.qb;
b.qe = is_rev? l_ms - aln.qb : aln.qe + 1;
b.rb = is_rev? (l_pac<<1) - (rb + aln.te + 1) : rb + aln.tb;
b.re = is_rev? (l_pac<<1) - (rb + aln.tb) : rb + aln.te + 1;
b.qb = is_rev ? l_ms - (aln.qe + 1) : aln.qb;
b.qe = is_rev ? l_ms - aln.qb : aln.qe + 1;
b.rb = is_rev ? (l_pac << 1) - (rb + aln.te + 1) : rb + aln.tb;
b.re = is_rev ? (l_pac << 1) - (rb + aln.tb) : rb + aln.te + 1;
b.score = aln.score;
b.csub = aln.score2;
b.secondary = -1;
b.seedcov = (b.re - b.rb < b.qe - b.qb? b.re - b.rb : b.qe - b.qb) >> 1;
// printf("*** %d, [%lld,%lld], %d:%d, (%lld,%lld), (%lld,%lld) == (%lld,%lld)\n", aln.score, rb, re, is_rev, is_larger, a->rb, a->re, ma->a[0].rb, ma->a[0].re, b.rb, b.re);
b.seedcov = (b.re - b.rb < b.qe - b.qb ? b.re - b.rb : b.qe - b.qb) >> 1;
// printf("*** %d, [%lld,%lld], %d:%d, (%lld,%lld), (%lld,%lld) == (%lld,%lld)\n", aln.score, rb, re, is_rev, is_larger, a->rb, a->re, ma->a[0].rb, ma->a[0].re, b.rb, b.re);
kv_push(mem_alnreg_t, *ma, b); // make room for a new element
// move b s.t. ma is sorted
for (i = 0; i < ma->n - 1; ++i) // find the insertion point
if (ma->a[i].score < b.score) break;
if (ma->a[i].score < b.score)
break;
tmp = i;
for (i = ma->n - 1; i > tmp; --i) ma->a[i] = ma->a[i-1];
for (i = ma->n - 1; i > tmp; --i)
ma->a[i] = ma->a[i - 1];
ma->a[i] = b;
}
++n;
}
if (n) ma->n = mem_sort_dedup_patch(opt, 0, 0, 0, ma->n, ma->a);
if (rev) free(rev);
if (n)
ma->n = mem_sort_dedup_patch(opt, 0, 0, 0, ma->n, ma->a);
if (rev)
free(rev);
free(ref);
}
return n;
@ -210,61 +260,79 @@ int mem_pair(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, cons
pair64_v v, u;
int r, i, k, y[4], ret; // y[] keeps the last hit
int64_t l_pac = bns->l_pac;
kv_init(v); kv_init(u);
for (r = 0; r < 2; ++r) { // loop through read number
for (i = 0; i < n_pri[r]; ++i) {
kv_init(v);
kv_init(u);
for (r = 0; r < 2; ++r)
{ // loop through read number
for (i = 0; i < n_pri[r]; ++i)
{
pair64_t key;
mem_alnreg_t *e = &a[r].a[i];
key.x = e->rb < l_pac? e->rb : (l_pac<<1) - 1 - e->rb; // forward position
key.x = (uint64_t)e->rid<<32 | (key.x - bns->anns[e->rid].offset);
key.y = (uint64_t)e->score << 32 | i << 2 | (e->rb >= l_pac)<<1 | r;
key.x = e->rb < l_pac ? e->rb : (l_pac << 1) - 1 - e->rb; // forward position
key.x = (uint64_t)e->rid << 32 | (key.x - bns->anns[e->rid].offset);
key.y = (uint64_t)e->score << 32 | i << 2 | (e->rb >= l_pac) << 1 | r;
kv_push(pair64_t, v, key);
}
}
ks_introsort_128(v.n, v.a);
y[0] = y[1] = y[2] = y[3] = -1;
//for (i = 0; i < v.n; ++i) printf("[%d]\t%d\t%c%ld\n", i, (int)(v.a[i].y&1)+1, "+-"[v.a[i].y>>1&1], (long)v.a[i].x);
for (i = 0; i < v.n; ++i) {
for (r = 0; r < 2; ++r) { // loop through direction
int dir = r<<1 | (v.a[i].y>>1&1), which;
if (pes[dir].failed) continue; // invalid orientation
which = r<<1 | ((v.a[i].y&1)^1);
if (y[which] < 0) continue; // no previous hits
for (k = y[which]; k >= 0; --k) { // TODO: this is a O(n^2) solution in the worst case; remember to check if this loop takes a lot of time (I doubt)
// for (i = 0; i < v.n; ++i) printf("[%d]\t%d\t%c%ld\n", i, (int)(v.a[i].y&1)+1, "+-"[v.a[i].y>>1&1], (long)v.a[i].x);
for (i = 0; i < v.n; ++i)
{
for (r = 0; r < 2; ++r)
{ // loop through direction
int dir = r << 1 | (v.a[i].y >> 1 & 1), which;
if (pes[dir].failed)
continue; // invalid orientation
which = r << 1 | ((v.a[i].y & 1) ^ 1);
if (y[which] < 0)
continue; // no previous hits
for (k = y[which]; k >= 0; --k)
{ // TODO: this is a O(n^2) solution in the worst case; remember to check if this loop takes a lot of time (I doubt)
int64_t dist;
int q;
double ns;
pair64_t *p;
if ((v.a[k].y&3) != which) continue;
if ((v.a[k].y & 3) != which)
continue;
dist = (int64_t)v.a[i].x - v.a[k].x;
//printf("%d: %lld\n", k, dist);
if (dist > pes[dir].high) break;
if (dist < pes[dir].low) continue;
// printf("%d: %lld\n", k, dist);
if (dist > pes[dir].high)
break;
if (dist < pes[dir].low)
continue;
ns = (dist - pes[dir].avg) / pes[dir].std;
q = (int)((v.a[i].y>>32) + (v.a[k].y>>32) + .721 * log(2. * erfc(fabs(ns) * M_SQRT1_2)) * opt->a + .499); // .721 = 1/log(4)
if (q < 0) q = 0;
q = (int)((v.a[i].y >> 32) + (v.a[k].y >> 32) + .721 * log(2. * erfc(fabs(ns) * M_SQRT1_2)) * opt->a + .499); // .721 = 1/log(4)
if (q < 0)
q = 0;
p = kv_pushp(pair64_t, u);
p->y = (uint64_t)k<<32 | i;
p->x = (uint64_t)q<<32 | (hash_64(p->y ^ id<<8) & 0xffffffffU);
//printf("[%lld,%lld]\t%d\tdist=%ld\n", v.a[k].x, v.a[i].x, q, (long)dist);
p->y = (uint64_t)k << 32 | i;
p->x = (uint64_t)q << 32 | (hash_64(p->y ^ id << 8) & 0xffffffffU);
// printf("[%lld,%lld]\t%d\tdist=%ld\n", v.a[k].x, v.a[i].x, q, (long)dist);
}
}
y[v.a[i].y&3] = i;
y[v.a[i].y & 3] = i;
}
if (u.n) { // found at least one proper pair
if (u.n)
{ // found at least one proper pair
int tmp = opt->a + opt->b;
tmp = tmp > opt->o_del + opt->e_del? tmp : opt->o_del + opt->e_del;
tmp = tmp > opt->o_ins + opt->e_ins? tmp : opt->o_ins + opt->e_ins;
tmp = tmp > opt->o_del + opt->e_del ? tmp : opt->o_del + opt->e_del;
tmp = tmp > opt->o_ins + opt->e_ins ? tmp : opt->o_ins + opt->e_ins;
ks_introsort_128(u.n, u.a);
i = u.a[u.n-1].y >> 32; k = u.a[u.n-1].y << 32 >> 32;
z[v.a[i].y&1] = v.a[i].y<<32>>34; // index of the best pair
z[v.a[k].y&1] = v.a[k].y<<32>>34;
ret = u.a[u.n-1].x >> 32;
*sub = u.n > 1? u.a[u.n-2].x>>32 : 0;
i = u.a[u.n - 1].y >> 32;
k = u.a[u.n - 1].y << 32 >> 32;
z[v.a[i].y & 1] = v.a[i].y << 32 >> 34; // index of the best pair
z[v.a[k].y & 1] = v.a[k].y << 32 >> 34;
ret = u.a[u.n - 1].x >> 32;
*sub = u.n > 1 ? u.a[u.n - 2].x >> 32 : 0;
for (i = (long)u.n - 2, *n_sub = 0; i >= 0; --i)
if (*sub - (int)(u.a[i].x>>32) <= tmp) ++*n_sub;
} else ret = 0, *sub = 0, *n_sub = 0;
free(u.a); free(v.a);
if (*sub - (int)(u.a[i].x >> 32) <= tmp)
++*n_sub;
}
else
ret = 0, *sub = 0, *n_sub = 0;
free(u.a);
free(v.a);
return ret;
}
@ -284,72 +352,94 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
kstring_t str;
mem_aln_t h[2], g[2], aa[2][2];
str.l = str.m = 0; str.s = 0;
str.l = str.m = 0;
str.s = 0;
memset(h, 0, sizeof(mem_aln_t) * 2);
memset(g, 0, sizeof(mem_aln_t) * 2);
n_aa[0] = n_aa[1] = 0;
if (!(opt->flag & MEM_F_NO_RESCUE)) { // then perform SW for the best alignment
if (!(opt->flag & MEM_F_NO_RESCUE))
{ // then perform SW for the best alignment
mem_alnreg_v b[2];
kv_init(b[0]); kv_init(b[1]);
kv_init(b[0]);
kv_init(b[1]);
for (i = 0; i < 2; ++i)
for (j = 0; j < a[i].n; ++j)
if (a[i].a[j].score >= a[i].a[0].score - opt->pen_unpaired)
kv_push(mem_alnreg_t, b[i], a[i].a[j]);
for (i = 0; i < 2; ++i)
for (j = 0; j < b[i].n && j < opt->max_matesw; ++j)
n += mem_matesw(opt, bns, pac, pes, &b[i].a[j], s[!i].l_seq, (uint8_t*)s[!i].seq, &a[!i]);
free(b[0].a); free(b[1].a);
n += mem_matesw(opt, bns, pac, pes, &b[i].a[j], s[!i].l_seq, (uint8_t *)s[!i].seq, &a[!i]);
free(b[0].a);
free(b[1].a);
}
n_pri[0] = mem_mark_primary_se(opt, a[0].n, a[0].a, id<<1|0);
n_pri[1] = mem_mark_primary_se(opt, a[1].n, a[1].a, id<<1|1);
if (opt->flag & MEM_F_PRIMARY5) {
n_pri[0] = mem_mark_primary_se(opt, a[0].n, a[0].a, id << 1 | 0);
n_pri[1] = mem_mark_primary_se(opt, a[1].n, a[1].a, id << 1 | 1);
if (opt->flag & MEM_F_PRIMARY5)
{
mem_reorder_primary5(opt->T, &a[0]);
mem_reorder_primary5(opt->T, &a[1]);
}
if (opt->flag&MEM_F_NOPAIRING) goto no_pairing;
if (opt->flag & MEM_F_NOPAIRING)
goto no_pairing;
// pairing single-end hits
if (n_pri[0] && n_pri[1] && (o = mem_pair(opt, bns, pac, pes, s, a, id, &subo, &n_sub, z, n_pri)) > 0) {
if (n_pri[0] && n_pri[1] && (o = mem_pair(opt, bns, pac, pes, s, a, id, &subo, &n_sub, z, n_pri)) > 0)
{
int is_multi[2], q_pe, score_un, q_se[2];
char **XA[2];
// check if an end has multiple hits even after mate-SW
for (i = 0; i < 2; ++i) {
for (i = 0; i < 2; ++i)
{
for (j = 1; j < n_pri[i]; ++j)
if (a[i].a[j].secondary < 0 && a[i].a[j].score >= opt->T) break;
is_multi[i] = j < n_pri[i]? 1 : 0;
if (a[i].a[j].secondary < 0 && a[i].a[j].score >= opt->T)
break;
is_multi[i] = j < n_pri[i] ? 1 : 0;
}
if (is_multi[0] || is_multi[1]) goto no_pairing; // TODO: in rare cases, the true hit may be long but with low score
if (is_multi[0] || is_multi[1])
goto no_pairing; // TODO: in rare cases, the true hit may be long but with low score
// compute mapQ for the best SE hit
score_un = a[0].a[0].score + a[1].a[0].score - opt->pen_unpaired;
//q_pe = o && subo < o? (int)(MEM_MAPQ_COEF * (1. - (double)subo / o) * log(a[0].a[z[0]].seedcov + a[1].a[z[1]].seedcov) + .499) : 0;
subo = subo > score_un? subo : score_un;
// q_pe = o && subo < o? (int)(MEM_MAPQ_COEF * (1. - (double)subo / o) * log(a[0].a[z[0]].seedcov + a[1].a[z[1]].seedcov) + .499) : 0;
subo = subo > score_un ? subo : score_un;
q_pe = raw_mapq(o - subo, opt->a);
if (n_sub > 0) q_pe -= (int)(4.343 * log(n_sub+1) + .499);
if (q_pe < 0) q_pe = 0;
if (q_pe > 60) q_pe = 60;
if (n_sub > 0)
q_pe -= (int)(4.343 * log(n_sub + 1) + .499);
if (q_pe < 0)
q_pe = 0;
if (q_pe > 60)
q_pe = 60;
q_pe = (int)(q_pe * (1. - .5 * (a[0].a[0].frac_rep + a[1].a[0].frac_rep)) + .499);
// the following assumes no split hits
if (o > score_un) { // paired alignment is preferred
if (o > score_un)
{ // paired alignment is preferred
mem_alnreg_t *c[2];
c[0] = &a[0].a[z[0]]; c[1] = &a[1].a[z[1]];
for (i = 0; i < 2; ++i) {
c[0] = &a[0].a[z[0]];
c[1] = &a[1].a[z[1]];
for (i = 0; i < 2; ++i)
{
if (c[i]->secondary >= 0)
c[i]->sub = a[i].a[c[i]->secondary].score, c[i]->secondary = -2;
q_se[i] = mem_approx_mapq_se(opt, c[i]);
}
q_se[0] = q_se[0] > q_pe? q_se[0] : q_pe < q_se[0] + 40? q_pe : q_se[0] + 40;
q_se[1] = q_se[1] > q_pe? q_se[1] : q_pe < q_se[1] + 40? q_pe : q_se[1] + 40;
q_se[0] = q_se[0] > q_pe ? q_se[0] : q_pe < q_se[0] + 40 ? q_pe
: q_se[0] + 40;
q_se[1] = q_se[1] > q_pe ? q_se[1] : q_pe < q_se[1] + 40 ? q_pe
: q_se[1] + 40;
extra_flag |= 2;
// cap at the tandem repeat score
q_se[0] = q_se[0] < raw_mapq(c[0]->score - c[0]->csub, opt->a)? q_se[0] : raw_mapq(c[0]->score - c[0]->csub, opt->a);
q_se[1] = q_se[1] < raw_mapq(c[1]->score - c[1]->csub, opt->a)? q_se[1] : raw_mapq(c[1]->score - c[1]->csub, opt->a);
} else { // the unpaired alignment is preferred
q_se[0] = q_se[0] < raw_mapq(c[0]->score - c[0]->csub, opt->a) ? q_se[0] : raw_mapq(c[0]->score - c[0]->csub, opt->a);
q_se[1] = q_se[1] < raw_mapq(c[1]->score - c[1]->csub, opt->a) ? q_se[1] : raw_mapq(c[1]->score - c[1]->csub, opt->a);
}
else
{ // the unpaired alignment is preferred
z[0] = z[1] = 0;
q_se[0] = mem_approx_mapq_se(opt, &a[0].a[0]);
q_se[1] = mem_approx_mapq_se(opt, &a[1].a[0]);
}
for (i = 0; i < 2; ++i) {
for (i = 0; i < 2; ++i)
{
int k = a[i].a[z[i]].secondary_all;
if (k >= 0 && k < n_pri[i]) { // switch secondary and primary if both of them are non-ALT
if (k >= 0 && k < n_pri[i])
{ // switch secondary and primary if both of them are non-ALT
assert(a[i].a[k].secondary_all < 0);
for (j = 0; j < a[i].n; ++j)
if (a[i].a[j].secondary_all == k || j == k)
@ -357,63 +447,86 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
a[i].a[z[i]].secondary_all = -1;
}
}
if (!(opt->flag & MEM_F_ALL)) {
if (!(opt->flag & MEM_F_ALL))
{
for (i = 0; i < 2; ++i)
XA[i] = mem_gen_alt(opt, bns, pac, &a[i], s[i].l_seq, s[i].seq);
} else XA[0] = XA[1] = 0;
}
else
XA[0] = XA[1] = 0;
// write SAM
for (i = 0; i < 2; ++i) {
for (i = 0; i < 2; ++i)
{
h[i] = mem_reg2aln(opt, bns, pac, s[i].l_seq, s[i].seq, &a[i].a[z[i]]);
h[i].mapq = q_se[i];
h[i].flag |= 0x40<<i | extra_flag;
h[i].XA = XA[i]? XA[i][z[i]] : 0;
h[i].flag |= 0x40 << i | extra_flag;
h[i].XA = XA[i] ? XA[i][z[i]] : 0;
aa[i][n_aa[i]++] = h[i];
if (n_pri[i] < a[i].n) { // the read has ALT hits
if (n_pri[i] < a[i].n)
{ // the read has ALT hits
mem_alnreg_t *p = &a[i].a[n_pri[i]];
if (p->score < opt->T || p->secondary >= 0 || !p->is_alt) continue;
if (p->score < opt->T || p->secondary >= 0 || !p->is_alt)
continue;
g[i] = mem_reg2aln(opt, bns, pac, s[i].l_seq, s[i].seq, p);
g[i].flag |= 0x800 | 0x40<<i | extra_flag;
g[i].XA = XA[i]? XA[i][n_pri[i]] : 0;
g[i].flag |= 0x800 | 0x40 << i | extra_flag;
g[i].XA = XA[i] ? XA[i][n_pri[i]] : 0;
aa[i][n_aa[i]++] = g[i];
}
}
for (i = 0; i < n_aa[0]; ++i)
mem_aln2sam(opt, bns, &str, &s[0], n_aa[0], aa[0], i, &h[1]); // write read1 hits
s[0].sam = strdup(str.s); str.l = 0;
s[0].sam = strdup(str.s);
str.l = 0;
for (i = 0; i < n_aa[1]; ++i)
mem_aln2sam(opt, bns, &str, &s[1], n_aa[1], aa[1], i, &h[0]); // write read2 hits
s[1].sam = str.s;
if (strcmp(s[0].name, s[1].name) != 0) err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name);
if (strcmp(s[0].name, s[1].name) != 0)
err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name);
// free
for (i = 0; i < 2; ++i) {
free(h[i].cigar); free(g[i].cigar);
if (XA[i] == 0) continue;
for (j = 0; j < a[i].n; ++j) free(XA[i][j]);
for (i = 0; i < 2; ++i)
{
free(h[i].cigar);
free(g[i].cigar);
if (XA[i] == 0)
continue;
for (j = 0; j < a[i].n; ++j)
free(XA[i][j]);
free(XA[i]);
}
} else goto no_pairing;
}
else
goto no_pairing;
return n;
no_pairing:
for (i = 0; i < 2; ++i) {
for (i = 0; i < 2; ++i)
{
int which = -1;
if (a[i].n) {
if (a[i].a[0].score >= opt->T) which = 0;
if (a[i].n)
{
if (a[i].a[0].score >= opt->T)
which = 0;
else if (n_pri[i] < a[i].n && a[i].a[n_pri[i]].score >= opt->T)
which = n_pri[i];
}
if (which >= 0) h[i] = mem_reg2aln(opt, bns, pac, s[i].l_seq, s[i].seq, &a[i].a[which]);
else h[i] = mem_reg2aln(opt, bns, pac, s[i].l_seq, s[i].seq, 0);
if (which >= 0)
h[i] = mem_reg2aln(opt, bns, pac, s[i].l_seq, s[i].seq, &a[i].a[which]);
else
h[i] = mem_reg2aln(opt, bns, pac, s[i].l_seq, s[i].seq, 0);
}
if (!(opt->flag & MEM_F_NOPAIRING) && h[0].rid == h[1].rid && h[0].rid >= 0) { // if the top hits from the two ends constitute a proper pair, flag it.
if (!(opt->flag & MEM_F_NOPAIRING) && h[0].rid == h[1].rid && h[0].rid >= 0)
{ // if the top hits from the two ends constitute a proper pair, flag it.
int64_t dist;
int d;
d = mem_infer_dir(bns->l_pac, a[0].a[0].rb, a[1].a[0].rb, &dist);
if (!pes[d].failed && dist >= pes[d].low && dist <= pes[d].high) extra_flag |= 2;
if (!pes[d].failed && dist >= pes[d].low && dist <= pes[d].high)
extra_flag |= 2;
}
mem_reg2sam(opt, bns, pac, &s[0], &a[0], 0x41|extra_flag, &h[1]);
mem_reg2sam(opt, bns, pac, &s[1], &a[1], 0x81|extra_flag, &h[0]);
if (strcmp(s[0].name, s[1].name) != 0) err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name);
free(h[0].cigar); free(h[1].cigar);
mem_reg2sam(opt, bns, pac, &s[0], &a[0], 0x41 | extra_flag, &h[1]);
mem_reg2sam(opt, bns, pac, &s[1], &a[1], 0x81 | extra_flag, &h[0]);
if (strcmp(s[0].name, s[1].name) != 0)
err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name);
free(h[0].cigar);
free(h[1].cigar);
return n;
}

347
bwt.c
View File

@ -36,16 +36,24 @@
#include "kvec.h"
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#include "malloc_wrap.h"
#endif
#ifdef SHOW_PERF
extern int64_t get_mseconds();
extern int64_t time_bwt_sa,
time_bwt_occ4,
time_bwt_smem1a;
#endif
void bwt_gen_cnt_table(bwt_t *bwt)
{
int i, j;
for (i = 0; i != 256; ++i) {
for (i = 0; i != 256; ++i)
{
uint32_t x = 0;
for (j = 0; j != 4; ++j)
x |= (((i&3) == j) + ((i>>2&3) == j) + ((i>>4&3) == j) + (i>>6 == j)) << (j<<3);
x |= (((i & 3) == j) + ((i >> 2 & 3) == j) + ((i >> 4 & 3) == j) + (i >> 6 == j)) << (j << 3);
bwt->cnt_table[i] = x;
}
}
@ -55,7 +63,7 @@ static inline bwtint_t bwt_invPsi(const bwt_t *bwt, bwtint_t k) // compute inver
bwtint_t x = k - (k > bwt->primary);
x = bwt_B0(bwt, x);
x = bwt->L2[x] + bwt_occ(bwt, k, x);
return k == bwt->primary? 0 : x;
return k == bwt->primary ? 0 : x;
}
// bwt->bwt and bwt->occ must be precalculated
@ -68,37 +76,50 @@ void bwt_cal_sa(bwt_t *bwt, int intv)
xassert(intv_round == intv, "SA sample interval is not a power of 2.");
xassert(bwt->bwt, "bwt_t::bwt is not initialized.");
if (bwt->sa) free(bwt->sa);
if (bwt->sa)
free(bwt->sa);
bwt->sa_intv = intv;
bwt->n_sa = (bwt->seq_len + intv) / intv;
bwt->sa = (bwtint_t*)calloc(bwt->n_sa, sizeof(bwtint_t));
bwt->sa = (bwtint_t *)calloc(bwt->n_sa, sizeof(bwtint_t));
// calculate SA value
isa = 0; sa = bwt->seq_len;
for (i = 0; i < bwt->seq_len; ++i) {
if (isa % intv == 0) bwt->sa[isa/intv] = sa;
isa = 0;
sa = bwt->seq_len;
for (i = 0; i < bwt->seq_len; ++i)
{
if (isa % intv == 0)
bwt->sa[isa / intv] = sa;
--sa;
isa = bwt_invPsi(bwt, isa);
}
if (isa % intv == 0) bwt->sa[isa/intv] = sa;
if (isa % intv == 0)
bwt->sa[isa / intv] = sa;
bwt->sa[0] = (bwtint_t)-1; // before this line, bwt->sa[0] = bwt->seq_len
}
bwtint_t bwt_sa(const bwt_t *bwt, bwtint_t k)
{
#ifdef SHOW_PERF
int64_t start_time = get_mseconds();
#endif
bwtint_t sa = 0, mask = bwt->sa_intv - 1;
while (k & mask) {
while (k & mask)
{
++sa;
k = bwt_invPsi(bwt, k);
}
/* without setting bwt->sa[0] = -1, the following line should be
changed to (sa + bwt->sa[k/bwt->sa_intv]) % (bwt->seq_len + 1) */
return sa + bwt->sa[k/bwt->sa_intv];
#ifdef SHOW_PERF
int64_t tmp_diff = get_mseconds() - start_time;
__sync_fetch_and_add(&time_bwt_sa, tmp_diff);
#endif
return sa + bwt->sa[k / bwt->sa_intv];
}
static inline int __occ_aux(uint64_t y, int c)
{
// reduce nucleotide counting to bits counting
y = ((c&2)? y : ~y) >> 1 & ((c&1)? y : ~y) & 0x5555555555555555ull;
y = ((c & 2) ? y : ~y) >> 1 & ((c & 1) ? y : ~y) & 0x5555555555555555ull;
// count the number of 1s in y
y = (y & 0x3333333333333333ull) + (y >> 2 & 0x3333333333333333ull);
return ((y + (y >> 4)) & 0xf0f0f0f0f0f0f0full) * 0x101010101010101ull >> 56;
@ -109,21 +130,25 @@ bwtint_t bwt_occ(const bwt_t *bwt, bwtint_t k, ubyte_t c)
bwtint_t n;
uint32_t *p, *end;
if (k == bwt->seq_len) return bwt->L2[c+1] - bwt->L2[c];
if (k == (bwtint_t)(-1)) return 0;
if (k == bwt->seq_len)
return bwt->L2[c + 1] - bwt->L2[c];
if (k == (bwtint_t)(-1))
return 0;
k -= (k >= bwt->primary); // because $ is not in bwt
// retrieve Occ at k/OCC_INTERVAL
n = ((bwtint_t*)(p = bwt_occ_intv(bwt, k)))[c];
n = ((bwtint_t *)(p = bwt_occ_intv(bwt, k)))[c];
p += sizeof(bwtint_t); // jump to the start of the first BWT cell
// calculate Occ up to the last k/32
end = p + (((k>>5) - ((k&~OCC_INTV_MASK)>>5))<<1);
for (; p < end; p += 2) n += __occ_aux((uint64_t)p[0]<<32 | p[1], c);
end = p + (((k >> 5) - ((k & ~OCC_INTV_MASK) >> 5)) << 1);
for (; p < end; p += 2)
n += __occ_aux((uint64_t)p[0] << 32 | p[1], c);
// calculate Occ
n += __occ_aux(((uint64_t)p[0]<<32 | p[1]) & ~((1ull<<((~k&31)<<1)) - 1), c);
if (c == 0) n -= ~k&31; // corrected for the masked bits
n += __occ_aux(((uint64_t)p[0] << 32 | p[1]) & ~((1ull << ((~k & 31) << 1)) - 1), c);
if (c == 0)
n -= ~k & 31; // corrected for the masked bits
return n;
}
@ -132,45 +157,52 @@ bwtint_t bwt_occ(const bwt_t *bwt, bwtint_t k, ubyte_t c)
void bwt_2occ(const bwt_t *bwt, bwtint_t k, bwtint_t l, ubyte_t c, bwtint_t *ok, bwtint_t *ol)
{
bwtint_t _k, _l;
_k = (k >= bwt->primary)? k-1 : k;
_l = (l >= bwt->primary)? l-1 : l;
if (_l/OCC_INTERVAL != _k/OCC_INTERVAL || k == (bwtint_t)(-1) || l == (bwtint_t)(-1)) {
_k = (k >= bwt->primary) ? k - 1 : k;
_l = (l >= bwt->primary) ? l - 1 : l;
if (_l / OCC_INTERVAL != _k / OCC_INTERVAL || k == (bwtint_t)(-1) || l == (bwtint_t)(-1))
{
*ok = bwt_occ(bwt, k, c);
*ol = bwt_occ(bwt, l, c);
} else {
}
else
{
bwtint_t m, n, i, j;
uint32_t *p;
if (k >= bwt->primary) --k;
if (l >= bwt->primary) --l;
n = ((bwtint_t*)(p = bwt_occ_intv(bwt, k)))[c];
if (k >= bwt->primary)
--k;
if (l >= bwt->primary)
--l;
n = ((bwtint_t *)(p = bwt_occ_intv(bwt, k)))[c];
p += sizeof(bwtint_t);
// calculate *ok
j = k >> 5 << 5;
for (i = k/OCC_INTERVAL*OCC_INTERVAL; i < j; i += 32, p += 2)
n += __occ_aux((uint64_t)p[0]<<32 | p[1], c);
for (i = k / OCC_INTERVAL * OCC_INTERVAL; i < j; i += 32, p += 2)
n += __occ_aux((uint64_t)p[0] << 32 | p[1], c);
m = n;
n += __occ_aux(((uint64_t)p[0]<<32 | p[1]) & ~((1ull<<((~k&31)<<1)) - 1), c);
if (c == 0) n -= ~k&31; // corrected for the masked bits
n += __occ_aux(((uint64_t)p[0] << 32 | p[1]) & ~((1ull << ((~k & 31) << 1)) - 1), c);
if (c == 0)
n -= ~k & 31; // corrected for the masked bits
*ok = n;
// calculate *ol
j = l >> 5 << 5;
for (; i < j; i += 32, p += 2)
m += __occ_aux((uint64_t)p[0]<<32 | p[1], c);
m += __occ_aux(((uint64_t)p[0]<<32 | p[1]) & ~((1ull<<((~l&31)<<1)) - 1), c);
if (c == 0) m -= ~l&31; // corrected for the masked bits
m += __occ_aux((uint64_t)p[0] << 32 | p[1], c);
m += __occ_aux(((uint64_t)p[0] << 32 | p[1]) & ~((1ull << ((~l & 31) << 1)) - 1), c);
if (c == 0)
m -= ~l & 31; // corrected for the masked bits
*ol = m;
}
}
#define __occ_aux4(bwt, b) \
((bwt)->cnt_table[(b)&0xff] + (bwt)->cnt_table[(b)>>8&0xff] \
+ (bwt)->cnt_table[(b)>>16&0xff] + (bwt)->cnt_table[(b)>>24])
((bwt)->cnt_table[(b)&0xff] + (bwt)->cnt_table[(b) >> 8 & 0xff] + (bwt)->cnt_table[(b) >> 16 & 0xff] + (bwt)->cnt_table[(b) >> 24])
void bwt_occ4(const bwt_t *bwt, bwtint_t k, bwtint_t cnt[4])
{
bwtint_t x;
uint32_t *p, tmp, *end;
if (k == (bwtint_t)(-1)) {
if (k == (bwtint_t)(-1))
{
memset(cnt, 0, 4 * sizeof(bwtint_t));
return;
}
@ -178,23 +210,33 @@ void bwt_occ4(const bwt_t *bwt, bwtint_t k, bwtint_t cnt[4])
p = bwt_occ_intv(bwt, k);
memcpy(cnt, p, 4 * sizeof(bwtint_t));
p += sizeof(bwtint_t); // sizeof(bwtint_t) = 4*(sizeof(bwtint_t)/sizeof(uint32_t))
end = p + ((k>>4) - ((k&~OCC_INTV_MASK)>>4)); // this is the end point of the following loop
for (x = 0; p < end; ++p) x += __occ_aux4(bwt, *p);
tmp = *p & ~((1U<<((~k&15)<<1)) - 1);
x += __occ_aux4(bwt, tmp) - (~k&15);
cnt[0] += x&0xff; cnt[1] += x>>8&0xff; cnt[2] += x>>16&0xff; cnt[3] += x>>24;
end = p + ((k >> 4) - ((k & ~OCC_INTV_MASK) >> 4)); // this is the end point of the following loop
for (x = 0; p < end; ++p)
x += __occ_aux4(bwt, *p);
tmp = *p & ~((1U << ((~k & 15) << 1)) - 1);
x += __occ_aux4(bwt, tmp) - (~k & 15);
cnt[0] += x & 0xff;
cnt[1] += x >> 8 & 0xff;
cnt[2] += x >> 16 & 0xff;
cnt[3] += x >> 24;
}
// an analogy to bwt_occ4() but more efficient, requiring k <= l
void bwt_2occ4(const bwt_t *bwt, bwtint_t k, bwtint_t l, bwtint_t cntk[4], bwtint_t cntl[4])
{
#ifdef SHOW_PERF
// int64_t start_time = get_mseconds();
#endif
bwtint_t _k, _l;
_k = k - (k >= bwt->primary);
_l = l - (l >= bwt->primary);
if (_l>>OCC_INTV_SHIFT != _k>>OCC_INTV_SHIFT || k == (bwtint_t)(-1) || l == (bwtint_t)(-1)) {
if (_l >> OCC_INTV_SHIFT != _k >> OCC_INTV_SHIFT || k == (bwtint_t)(-1) || l == (bwtint_t)(-1))
{
bwt_occ4(bwt, k, cntk);
bwt_occ4(bwt, l, cntl);
} else {
}
else
{
bwtint_t x, y;
uint32_t *p, tmp, *endk, *endl;
k -= (k >= bwt->primary); // because $ is not in bwt
@ -203,38 +245,57 @@ void bwt_2occ4(const bwt_t *bwt, bwtint_t k, bwtint_t l, bwtint_t cntk[4], bwtin
memcpy(cntk, p, 4 * sizeof(bwtint_t));
p += sizeof(bwtint_t); // sizeof(bwtint_t) = 4*(sizeof(bwtint_t)/sizeof(uint32_t))
// prepare cntk[]
endk = p + ((k>>4) - ((k&~OCC_INTV_MASK)>>4));
endl = p + ((l>>4) - ((l&~OCC_INTV_MASK)>>4));
for (x = 0; p < endk; ++p) x += __occ_aux4(bwt, *p);
endk = p + ((k >> 4) - ((k & ~OCC_INTV_MASK) >> 4));
endl = p + ((l >> 4) - ((l & ~OCC_INTV_MASK) >> 4));
for (x = 0; p < endk; ++p)
x += __occ_aux4(bwt, *p);
y = x;
tmp = *p & ~((1U<<((~k&15)<<1)) - 1);
x += __occ_aux4(bwt, tmp) - (~k&15);
tmp = *p & ~((1U << ((~k & 15) << 1)) - 1);
x += __occ_aux4(bwt, tmp) - (~k & 15);
// calculate cntl[] and finalize cntk[]
for (; p < endl; ++p) y += __occ_aux4(bwt, *p);
tmp = *p & ~((1U<<((~l&15)<<1)) - 1);
y += __occ_aux4(bwt, tmp) - (~l&15);
for (; p < endl; ++p)
y += __occ_aux4(bwt, *p);
tmp = *p & ~((1U << ((~l & 15) << 1)) - 1);
y += __occ_aux4(bwt, tmp) - (~l & 15);
memcpy(cntl, cntk, 4 * sizeof(bwtint_t));
cntk[0] += x&0xff; cntk[1] += x>>8&0xff; cntk[2] += x>>16&0xff; cntk[3] += x>>24;
cntl[0] += y&0xff; cntl[1] += y>>8&0xff; cntl[2] += y>>16&0xff; cntl[3] += y>>24;
cntk[0] += x & 0xff;
cntk[1] += x >> 8 & 0xff;
cntk[2] += x >> 16 & 0xff;
cntk[3] += x >> 24;
cntl[0] += y & 0xff;
cntl[1] += y >> 8 & 0xff;
cntl[2] += y >> 16 & 0xff;
cntl[3] += y >> 24;
}
#ifdef SHOW_PERF
// int64_t tmp_diff = get_mseconds() - start_time;
// __sync_fetch_and_add(&time_bwt_occ4, tmp_diff);
#endif
}
int bwt_match_exact(const bwt_t *bwt, int len, const ubyte_t *str, bwtint_t *sa_begin, bwtint_t *sa_end)
{
bwtint_t k, l, ok, ol;
int i;
k = 0; l = bwt->seq_len;
for (i = len - 1; i >= 0; --i) {
k = 0;
l = bwt->seq_len;
for (i = len - 1; i >= 0; --i)
{
ubyte_t c = str[i];
if (c > 3) return 0; // no match
if (c > 3)
return 0; // no match
bwt_2occ(bwt, k - 1, l, c, &ok, &ol);
k = bwt->L2[c] + ok + 1;
l = bwt->L2[c] + ol;
if (k > l) break; // no match
if (k > l)
break; // no match
}
if (k > l) return 0; // no match
if (sa_begin) *sa_begin = k;
if (sa_end) *sa_end = l;
if (k > l)
return 0; // no match
if (sa_begin)
*sa_begin = k;
if (sa_end)
*sa_end = l;
return l - k + 1;
}
@ -242,16 +303,21 @@ int bwt_match_exact_alt(const bwt_t *bwt, int len, const ubyte_t *str, bwtint_t
{
int i;
bwtint_t k, l, ok, ol;
k = *k0; l = *l0;
for (i = len - 1; i >= 0; --i) {
k = *k0;
l = *l0;
for (i = len - 1; i >= 0; --i)
{
ubyte_t c = str[i];
if (c > 3) return 0; // there is an N here. no match
if (c > 3)
return 0; // there is an N here. no match
bwt_2occ(bwt, k - 1, l, c, &ok, &ol);
k = bwt->L2[c] + ok + 1;
l = bwt->L2[c] + ol;
if (k > l) return 0; // no match
if (k > l)
return 0; // no match
}
*k0 = k; *l0 = l;
*k0 = k;
*l0 = l;
return l - k + 1;
}
@ -264,7 +330,8 @@ void bwt_extend(const bwt_t *bwt, const bwtintv_t *ik, bwtintv_t ok[4], int is_b
bwtint_t tk[4], tl[4];
int i;
bwt_2occ4(bwt, ik->x[!is_back] - 1, ik->x[!is_back] - 1 + ik->x[2], tk, tl);
for (i = 0; i != 4; ++i) {
for (i = 0; i != 4; ++i)
{
ok[i].x[!is_back] = bwt->L2[i] + 1 + tk[i];
ok[i].x[2] = tl[i] - tk[i];
}
@ -276,9 +343,11 @@ void bwt_extend(const bwt_t *bwt, const bwtintv_t *ik, bwtintv_t ok[4], int is_b
static void bwt_reverse_intvs(bwtintv_v *p)
{
if (p->n > 1) {
if (p->n > 1)
{
int j;
for (j = 0; j < p->n>>1; ++j) {
for (j = 0; j < p->n >> 1; ++j)
{
bwtintv_t tmp = p->a[p->n - 1 - j];
p->a[p->n - 1 - j] = p->a[j];
p->a[j] = tmp;
@ -288,65 +357,102 @@ static void bwt_reverse_intvs(bwtintv_v *p)
// NOTE: $max_intv is not currently used in BWA-MEM
int bwt_smem1a(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, uint64_t max_intv, bwtintv_v *mem, bwtintv_v *tmpvec[2])
{
#ifdef SHOW_PERF
int64_t start_time = get_mseconds();
#endif
int i, j, c, ret;
bwtintv_t ik, ok[4];
bwtintv_v a[2], *prev, *curr, *swap;
mem->n = 0;
if (q[x] > 3) return x + 1;
if (min_intv < 1) min_intv = 1; // the interval size should be at least 1
kv_init(a[0]); kv_init(a[1]);
prev = tmpvec && tmpvec[0]? tmpvec[0] : &a[0]; // use the temporary vector if provided
curr = tmpvec && tmpvec[1]? tmpvec[1] : &a[1];
if (q[x] > 3)
return x + 1;
if (min_intv < 1)
min_intv = 1; // the interval size should be at least 1
kv_init(a[0]);
kv_init(a[1]);
prev = tmpvec && tmpvec[0] ? tmpvec[0] : &a[0]; // use the temporary vector if provided
curr = tmpvec && tmpvec[1] ? tmpvec[1] : &a[1];
bwt_set_intv(bwt, q[x], ik); // the initial interval of a single base
ik.info = x + 1;
for (i = x + 1, curr->n = 0; i < len; ++i) { // forward search
if (ik.x[2] < max_intv) { // an interval small enough
for (i = x + 1, curr->n = 0; i < len; ++i)
{ // forward search
if (ik.x[2] < max_intv)
{ // an interval small enough
kv_push(bwtintv_t, *curr, ik);
break;
} else if (q[i] < 4) { // an A/C/G/T base
}
else if (q[i] < 4)
{ // an A/C/G/T base
c = 3 - q[i]; // complement of q[i]
bwt_extend(bwt, &ik, ok, 0);
if (ok[c].x[2] != ik.x[2]) { // change of the interval size
if (ok[c].x[2] != ik.x[2])
{ // change of the interval size
kv_push(bwtintv_t, *curr, ik);
if (ok[c].x[2] < min_intv) break; // the interval size is too small to be extended further
if (ok[c].x[2] < min_intv)
break; // the interval size is too small to be extended further
}
ik = ok[c]; ik.info = i + 1;
} else { // an ambiguous base
ik = ok[c];
ik.info = i + 1;
}
else
{ // an ambiguous base
kv_push(bwtintv_t, *curr, ik);
break; // always terminate extension at an ambiguous base; in this case, i<len always stands
}
}
if (i == len) kv_push(bwtintv_t, *curr, ik); // push the last interval if we reach the end
if (i == len)
kv_push(bwtintv_t, *curr, ik); // push the last interval if we reach the end
bwt_reverse_intvs(curr); // s.t. smaller intervals (i.e. longer matches) visited first
ret = curr->a[0].info; // this will be the returned value
swap = curr; curr = prev; prev = swap;
swap = curr;
curr = prev;
prev = swap;
for (i = x - 1; i >= -1; --i) { // backward search for MEMs
c = i < 0? -1 : q[i] < 4? q[i] : -1; // c==-1 if i<0 or q[i] is an ambiguous base
for (j = 0, curr->n = 0; j < prev->n; ++j) {
for (i = x - 1; i >= -1; --i)
{ // backward search for MEMs
c = i < 0 ? -1 : q[i] < 4 ? q[i]
: -1; // c==-1 if i<0 or q[i] is an ambiguous base
for (j = 0, curr->n = 0; j < prev->n; ++j)
{
bwtintv_t *p = &prev->a[j];
if (c >= 0 && ik.x[2] >= max_intv) bwt_extend(bwt, p, ok, 1);
if (c < 0 || ik.x[2] < max_intv || ok[c].x[2] < min_intv) { // keep the hit if reaching the beginning or an ambiguous base or the intv is small enough
if (curr->n == 0) { // test curr->n>0 to make sure there are no longer matches
if (mem->n == 0 || i + 1 < mem->a[mem->n-1].info>>32) { // skip contained matches
ik = *p; ik.info |= (uint64_t)(i + 1)<<32;
if (c >= 0 && ik.x[2] >= max_intv)
bwt_extend(bwt, p, ok, 1);
if (c < 0 || ik.x[2] < max_intv || ok[c].x[2] < min_intv)
{ // keep the hit if reaching the beginning or an ambiguous base or the intv is small enough
if (curr->n == 0)
{ // test curr->n>0 to make sure there are no longer matches
if (mem->n == 0 || i + 1 < mem->a[mem->n - 1].info >> 32)
{ // skip contained matches
ik = *p;
ik.info |= (uint64_t)(i + 1) << 32;
kv_push(bwtintv_t, *mem, ik);
}
} // otherwise the match is contained in another longer match
} else if (curr->n == 0 || ok[c].x[2] != curr->a[curr->n-1].x[2]) {
}
else if (curr->n == 0 || ok[c].x[2] != curr->a[curr->n - 1].x[2])
{
ok[c].info = p->info;
kv_push(bwtintv_t, *curr, ok[c]);
}
}
if (curr->n == 0) break;
swap = curr; curr = prev; prev = swap;
if (curr->n == 0)
break;
swap = curr;
curr = prev;
prev = swap;
}
bwt_reverse_intvs(mem); // s.t. sorted by the start coordinate
if (tmpvec == 0 || tmpvec[0] == 0) free(a[0].a);
if (tmpvec == 0 || tmpvec[1] == 0) free(a[1].a);
if (tmpvec == 0 || tmpvec[0] == 0)
free(a[0].a);
if (tmpvec == 0 || tmpvec[1] == 0)
free(a[1].a);
#ifdef SHOW_PERF
int64_t tmp_diff = get_mseconds() - start_time;
__sync_fetch_and_add(&time_bwt_smem1a, tmp_diff);
#endif
return ret;
}
@ -361,19 +467,25 @@ int bwt_seed_strategy1(const bwt_t *bwt, int len, const uint8_t *q, int x, int m
bwtintv_t ik, ok[4];
memset(mem, 0, sizeof(bwtintv_t));
if (q[x] > 3) return x + 1;
if (q[x] > 3)
return x + 1;
bwt_set_intv(bwt, q[x], ik); // the initial interval of a single base
for (i = x + 1; i < len; ++i) { // forward search
if (q[i] < 4) { // an A/C/G/T base
for (i = x + 1; i < len; ++i)
{ // forward search
if (q[i] < 4)
{ // an A/C/G/T base
c = 3 - q[i]; // complement of q[i]
bwt_extend(bwt, &ik, ok, 0);
if (ok[c].x[2] < max_intv && i - x >= min_len) {
if (ok[c].x[2] < max_intv && i - x >= min_len)
{
*mem = ok[c];
mem->info = (uint64_t)x<<32 | (i + 1);
mem->info = (uint64_t)x << 32 | (i + 1);
return i + 1;
}
ik = ok[c];
} else return i + 1;
}
else
return i + 1;
}
return len;
}
@ -387,7 +499,7 @@ void bwt_dump_bwt(const char *fn, const bwt_t *bwt)
FILE *fp;
fp = xopen(fn, "wb");
err_fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp);
err_fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp);
err_fwrite(bwt->L2 + 1, sizeof(bwtint_t), 4, fp);
err_fwrite(bwt->bwt, 4, bwt->bwt_size, fp);
err_fflush(fp);
err_fclose(fp);
@ -398,7 +510,7 @@ void bwt_dump_sa(const char *fn, const bwt_t *bwt)
FILE *fp;
fp = xopen(fn, "wb");
err_fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp);
err_fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp);
err_fwrite(bwt->L2 + 1, sizeof(bwtint_t), 4, fp);
err_fwrite(&bwt->sa_intv, sizeof(bwtint_t), 1, fp);
err_fwrite(&bwt->seq_len, sizeof(bwtint_t), 1, fp);
err_fwrite(bwt->sa + 1, sizeof(bwtint_t), bwt->n_sa - 1, fp);
@ -410,10 +522,13 @@ static bwtint_t fread_fix(FILE *fp, bwtint_t size, void *a)
{ // Mac/Darwin has a bug when reading data longer than 2GB. This function fixes this issue by reading data in small chunks
const int bufsize = 0x1000000; // 16M block
bwtint_t offset = 0;
while (size) {
int x = bufsize < size? bufsize : size;
if ((x = err_fread_noeof(a + offset, 1, x, fp)) == 0) break;
size -= x; offset += x;
while (size)
{
int x = bufsize < size ? bufsize : size;
if ((x = err_fread_noeof(a + offset, 1, x, fp)) == 0)
break;
size -= x;
offset += x;
}
return offset;
}
@ -433,7 +548,7 @@ void bwt_restore_sa(const char *fn, bwt_t *bwt)
xassert(primary == bwt->seq_len, "SA-BWT inconsistency: seq_len is not the same.");
bwt->n_sa = (bwt->seq_len + bwt->sa_intv) / bwt->sa_intv;
bwt->sa = (bwtint_t*)calloc(bwt->n_sa, sizeof(bwtint_t));
bwt->sa = (bwtint_t *)calloc(bwt->n_sa, sizeof(bwtint_t));
bwt->sa[0] = -1;
fread_fix(fp, sizeof(bwtint_t) * (bwt->n_sa - 1), bwt->sa + 1);
@ -445,15 +560,15 @@ bwt_t *bwt_restore_bwt(const char *fn)
bwt_t *bwt;
FILE *fp;
bwt = (bwt_t*)calloc(1, sizeof(bwt_t));
bwt = (bwt_t *)calloc(1, sizeof(bwt_t));
fp = xopen(fn, "rb");
err_fseek(fp, 0, SEEK_END);
bwt->bwt_size = (err_ftell(fp) - sizeof(bwtint_t) * 5) >> 2;
bwt->bwt = (uint32_t*)calloc(bwt->bwt_size, 4);
bwt->bwt = (uint32_t *)calloc(bwt->bwt_size, 4);
err_fseek(fp, 0, SEEK_SET);
err_fread_noeof(&bwt->primary, sizeof(bwtint_t), 1, fp);
err_fread_noeof(bwt->L2+1, sizeof(bwtint_t), 4, fp);
fread_fix(fp, bwt->bwt_size<<2, bwt->bwt);
err_fread_noeof(bwt->L2 + 1, sizeof(bwtint_t), 4, fp);
fread_fix(fp, bwt->bwt_size << 2, bwt->bwt);
bwt->seq_len = bwt->L2[4];
err_fclose(fp);
bwt_gen_cnt_table(bwt);
@ -463,7 +578,9 @@ bwt_t *bwt_restore_bwt(const char *fn)
void bwt_destroy(bwt_t *bwt)
{
if (bwt == 0) return;
free(bwt->sa); free(bwt->bwt);
if (bwt == 0)
return;
free(bwt->sa);
free(bwt->bwt);
free(bwt);
}

View File

@ -42,10 +42,9 @@
#endif
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#include "malloc_wrap.h"
#endif
int is_bwt(ubyte_t *T, int n);
int64_t bwa_seq_len(const char *fn_pac)
@ -69,46 +68,55 @@ bwt_t *bwt_pac2bwt(const char *fn_pac, int use_is)
FILE *fp;
// initialization
bwt = (bwt_t*)calloc(1, sizeof(bwt_t));
bwt = (bwt_t *)calloc(1, sizeof(bwt_t));
bwt->seq_len = bwa_seq_len(fn_pac);
bwt->bwt_size = (bwt->seq_len + 15) >> 4;
fp = xopen(fn_pac, "rb");
// prepare sequence
pac_size = (bwt->seq_len>>2) + ((bwt->seq_len&3) == 0? 0 : 1);
buf2 = (ubyte_t*)calloc(pac_size, 1);
pac_size = (bwt->seq_len >> 2) + ((bwt->seq_len & 3) == 0 ? 0 : 1);
buf2 = (ubyte_t *)calloc(pac_size, 1);
err_fread_noeof(buf2, 1, pac_size, fp);
err_fclose(fp);
memset(bwt->L2, 0, 5 * 4);
buf = (ubyte_t*)calloc(bwt->seq_len + 1, 1);
for (i = 0; i < bwt->seq_len; ++i) {
buf[i] = buf2[i>>2] >> ((3 - (i&3)) << 1) & 3;
++bwt->L2[1+buf[i]];
buf = (ubyte_t *)calloc(bwt->seq_len + 1, 1);
for (i = 0; i < bwt->seq_len; ++i)
{
buf[i] = buf2[i >> 2] >> ((3 - (i & 3)) << 1) & 3;
++bwt->L2[1 + buf[i]];
}
for (i = 2; i <= 4; ++i) bwt->L2[i] += bwt->L2[i-1];
for (i = 2; i <= 4; ++i)
bwt->L2[i] += bwt->L2[i - 1];
free(buf2);
// Burrows-Wheeler Transform
if (use_is) {
if (use_is)
{
bwt->primary = is_bwt(buf, bwt->seq_len);
} else {
}
else
{
rope_t *r;
int64_t x;
rpitr_t itr;
const uint8_t *blk;
r = rope_init(ROPE_DEF_MAX_NODES, ROPE_DEF_BLOCK_LEN);
for (i = bwt->seq_len - 1, x = 0; i >= 0; --i) {
for (i = bwt->seq_len - 1, x = 0; i >= 0; --i)
{
int c = buf[i] + 1;
x = rope_insert_run(r, x, c, 1, 0) + 1;
while (--c >= 0) x += r->c[c];
while (--c >= 0)
x += r->c[c];
}
bwt->primary = x;
rope_itr_first(r, &itr);
x = 0;
while ((blk = rope_itr_next_block(&itr)) != 0) {
while ((blk = rope_itr_next_block(&itr)) != 0)
{
const uint8_t *q = blk + 2, *end = blk + 2 + *rle_nptr(blk);
while (q < end) {
while (q < end)
{
int c = 0;
int64_t l;
rle_dec1(q, c, l);
@ -118,9 +126,9 @@ bwt_t *bwt_pac2bwt(const char *fn_pac, int use_is)
}
rope_destroy(r);
}
bwt->bwt = (uint32_t*)calloc(bwt->bwt_size, 4);
bwt->bwt = (uint32_t *)calloc(bwt->bwt_size, 4);
for (i = 0; i < bwt->seq_len; ++i)
bwt->bwt[i>>4] |= buf[i] << ((15 - (i&15)) << 1);
bwt->bwt[i >> 4] |= buf[i] << ((15 - (i & 15)) << 1);
free(buf);
return bwt;
}
@ -129,23 +137,29 @@ int bwa_pac2bwt(int argc, char *argv[]) // the "pac2bwt" command; IMPORTANT: bwt
{
bwt_t *bwt;
int c, use_is = 1;
while ((c = getopt(argc, argv, "d")) >= 0) {
switch (c) {
case 'd': use_is = 0; break;
default: return 1;
while ((c = getopt(argc, argv, "d")) >= 0)
{
switch (c)
{
case 'd':
use_is = 0;
break;
default:
return 1;
}
}
if (optind + 2 > argc) {
if (optind + 2 > argc)
{
fprintf(stderr, "Usage: bwa pac2bwt [-d] <in.pac> <out.bwt>\n");
return 1;
}
bwt = bwt_pac2bwt(argv[optind], use_is);
bwt_dump_bwt(argv[optind+1], bwt);
bwt_dump_bwt(argv[optind + 1], bwt);
bwt_destroy(bwt);
return 0;
}
#define bwt_B00(b, k) ((b)->bwt[(k)>>4]>>((~(k)&0xf)<<1)&3)
#define bwt_B00(b, k) ((b)->bwt[(k) >> 4] >> ((~(k)&0xf) << 1) & 3)
void bwt_bwtupdate_core(bwt_t *bwt)
{
@ -154,27 +168,32 @@ void bwt_bwtupdate_core(bwt_t *bwt)
n_occ = (bwt->seq_len + OCC_INTERVAL - 1) / OCC_INTERVAL + 1;
bwt->bwt_size += n_occ * sizeof(bwtint_t); // the new size
buf = (uint32_t*)calloc(bwt->bwt_size, 4); // will be the new bwt
buf = (uint32_t *)calloc(bwt->bwt_size, 4); // will be the new bwt
c[0] = c[1] = c[2] = c[3] = 0;
for (i = k = 0; i < bwt->seq_len; ++i) {
if (i % OCC_INTERVAL == 0) {
for (i = k = 0; i < bwt->seq_len; ++i)
{
if (i % OCC_INTERVAL == 0)
{
memcpy(buf + k, c, sizeof(bwtint_t) * 4);
k += sizeof(bwtint_t); // in fact: sizeof(bwtint_t)=4*(sizeof(bwtint_t)/4)
}
if (i % 16 == 0) buf[k++] = bwt->bwt[i/16]; // 16 == sizeof(uint32_t)/2
if (i % 16 == 0)
buf[k++] = bwt->bwt[i / 16]; // 16 == sizeof(uint32_t)/2
++c[bwt_B00(bwt, i)];
}
// the last element
memcpy(buf + k, c, sizeof(bwtint_t) * 4);
xassert(k + sizeof(bwtint_t) == bwt->bwt_size, "inconsistent bwt_size");
// update bwt
free(bwt->bwt); bwt->bwt = buf;
free(bwt->bwt);
bwt->bwt = buf;
}
int bwa_bwtupdate(int argc, char *argv[]) // the "bwtupdate" command
{
bwt_t *bwt;
if (argc != 2) {
if (argc != 2)
{
fprintf(stderr, "Usage: bwa bwtupdate <the.bwt>\n");
return 1;
}
@ -189,19 +208,25 @@ int bwa_bwt2sa(int argc, char *argv[]) // the "bwt2sa" command
{
bwt_t *bwt;
int c, sa_intv = 32;
while ((c = getopt(argc, argv, "i:")) >= 0) {
switch (c) {
case 'i': sa_intv = atoi(optarg); break;
default: return 1;
while ((c = getopt(argc, argv, "i:")) >= 0)
{
switch (c)
{
case 'i':
sa_intv = atoi(optarg);
break;
default:
return 1;
}
}
if (optind + 2 > argc) {
if (optind + 2 > argc)
{
fprintf(stderr, "Usage: bwa bwt2sa [-i %d] <in.bwt> <out.sa>\n", sa_intv);
return 1;
}
bwt = bwt_restore_bwt(argv[optind]);
bwt_cal_sa(bwt, sa_intv);
bwt_dump_sa(argv[optind+1], bwt);
bwt_dump_sa(argv[optind + 1], bwt);
bwt_destroy(bwt);
return 0;
}
@ -210,27 +235,42 @@ int bwa_index(int argc, char *argv[]) // the "index" command
{
int c, algo_type = BWTALGO_AUTO, is_64 = 0, block_size = 10000000;
char *prefix = 0, *str;
while ((c = getopt(argc, argv, "6a:p:b:")) >= 0) {
switch (c) {
while ((c = getopt(argc, argv, "6a:p:b:")) >= 0)
{
switch (c)
{
case 'a': // if -a is not set, algo_type will be determined later
if (strcmp(optarg, "rb2") == 0) algo_type = BWTALGO_RB2;
else if (strcmp(optarg, "bwtsw") == 0) algo_type = BWTALGO_BWTSW;
else if (strcmp(optarg, "is") == 0) algo_type = BWTALGO_IS;
else err_fatal(__func__, "unknown algorithm: '%s'.", optarg);
if (strcmp(optarg, "rb2") == 0)
algo_type = BWTALGO_RB2;
else if (strcmp(optarg, "bwtsw") == 0)
algo_type = BWTALGO_BWTSW;
else if (strcmp(optarg, "is") == 0)
algo_type = BWTALGO_IS;
else
err_fatal(__func__, "unknown algorithm: '%s'.", optarg);
break;
case 'p':
prefix = strdup(optarg);
break;
case '6':
is_64 = 1;
break;
case 'p': prefix = strdup(optarg); break;
case '6': is_64 = 1; break;
case 'b':
block_size = strtol(optarg, &str, 10);
if (*str == 'G' || *str == 'g') block_size *= 1024 * 1024 * 1024;
else if (*str == 'M' || *str == 'm') block_size *= 1024 * 1024;
else if (*str == 'K' || *str == 'k') block_size *= 1024;
if (*str == 'G' || *str == 'g')
block_size *= 1024 * 1024 * 1024;
else if (*str == 'M' || *str == 'm')
block_size *= 1024 * 1024;
else if (*str == 'K' || *str == 'k')
block_size *= 1024;
break;
default: return 1;
default:
return 1;
}
}
if (optind + 1 > argc) {
if (optind + 1 > argc)
{
fprintf(stderr, "\n");
fprintf(stderr, "Usage: bwa index [options] <in.fasta>\n\n");
fprintf(stderr, "Options: -a STR BWT construction algorithm: bwtsw, is or rb2 [auto]\n");
@ -242,10 +282,12 @@ int bwa_index(int argc, char *argv[]) // the "index" command
fprintf(stderr, " `-a div' do not work not for long genomes.\n\n");
return 1;
}
if (prefix == 0) {
if (prefix == 0)
{
prefix = malloc(strlen(argv[optind]) + 4);
strcpy(prefix, argv[optind]);
if (is_64) strcat(prefix, ".64");
if (is_64)
strcat(prefix, ".64");
}
bwa_idx_build(argv[optind], prefix, algo_type, block_size);
free(prefix);
@ -260,64 +302,84 @@ int bwa_idx_build(const char *fa, const char *prefix, int algo_type, int block_s
clock_t t;
int64_t l_pac;
str = (char*)calloc(strlen(prefix) + 10, 1);
str2 = (char*)calloc(strlen(prefix) + 10, 1);
str3 = (char*)calloc(strlen(prefix) + 10, 1);
str = (char *)calloc(strlen(prefix) + 10, 1);
str2 = (char *)calloc(strlen(prefix) + 10, 1);
str3 = (char *)calloc(strlen(prefix) + 10, 1);
{ // nucleotide indexing
gzFile fp = xzopen(fa, "r");
t = clock();
if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Pack FASTA... ");
if (bwa_verbose >= 3)
fprintf(stderr, "[bwa_index] Pack FASTA... ");
l_pac = bns_fasta2bntseq(fp, prefix, 0);
if (bwa_verbose >= 3) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
if (bwa_verbose >= 3)
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
err_gzclose(fp);
}
if (algo_type == 0) algo_type = l_pac > 50000000? 2 : 3; // set the algorithm for generating BWT
if (algo_type == 0)
algo_type = l_pac > 50000000 ? 2 : 3; // set the algorithm for generating BWT
{
strcpy(str, prefix); strcat(str, ".pac");
strcpy(str2, prefix); strcat(str2, ".bwt");
strcpy(str, prefix);
strcat(str, ".pac");
strcpy(str2, prefix);
strcat(str2, ".bwt");
t = clock();
if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Construct BWT for the packed sequence...\n");
if (algo_type == 2) bwt_bwtgen2(str, str2, block_size);
else if (algo_type == 1 || algo_type == 3) {
if (bwa_verbose >= 3)
fprintf(stderr, "[bwa_index] Construct BWT for the packed sequence...\n");
if (algo_type == 2)
bwt_bwtgen2(str, str2, block_size);
else if (algo_type == 1 || algo_type == 3)
{
bwt_t *bwt;
bwt = bwt_pac2bwt(str, algo_type == 3);
bwt_dump_bwt(str2, bwt);
bwt_destroy(bwt);
}
if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] %.2f seconds elapse.\n", (float)(clock() - t) / CLOCKS_PER_SEC);
if (bwa_verbose >= 3)
fprintf(stderr, "[bwa_index] %.2f seconds elapse.\n", (float)(clock() - t) / CLOCKS_PER_SEC);
}
{
bwt_t *bwt;
strcpy(str, prefix); strcat(str, ".bwt");
strcpy(str, prefix);
strcat(str, ".bwt");
t = clock();
if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Update BWT... ");
if (bwa_verbose >= 3)
fprintf(stderr, "[bwa_index] Update BWT... ");
bwt = bwt_restore_bwt(str);
bwt_bwtupdate_core(bwt);
bwt_dump_bwt(str, bwt);
bwt_destroy(bwt);
if (bwa_verbose >= 3) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
if (bwa_verbose >= 3)
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
}
{
gzFile fp = xzopen(fa, "r");
t = clock();
if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Pack forward-only FASTA... ");
if (bwa_verbose >= 3)
fprintf(stderr, "[bwa_index] Pack forward-only FASTA... ");
l_pac = bns_fasta2bntseq(fp, prefix, 1);
if (bwa_verbose >= 3) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
if (bwa_verbose >= 3)
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
err_gzclose(fp);
}
{
bwt_t *bwt;
strcpy(str, prefix); strcat(str, ".bwt");
strcpy(str3, prefix); strcat(str3, ".sa");
strcpy(str, prefix);
strcat(str, ".bwt");
strcpy(str3, prefix);
strcat(str3, ".sa");
t = clock();
if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Construct SA from BWT and Occ... ");
if (bwa_verbose >= 3)
fprintf(stderr, "[bwa_index] Construct SA from BWT and Occ... ");
bwt = bwt_restore_bwt(str);
bwt_cal_sa(bwt, 32);
bwt_dump_sa(str3, bwt);
bwt_destroy(bwt);
if (bwa_verbose >= 3) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
if (bwa_verbose >= 3)
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
}
free(str3); free(str2); free(str);
free(str3);
free(str2);
free(str);
return 0;
}

615
fastmap.c
View File

@ -40,13 +40,48 @@
#include "kseq.h"
KSEQ_DECLARE(gzFile)
FILE *query_f = 0, *target_f = 0, *info_f = 0;
// 记录运行时间的变量
#ifdef SHOW_PERF
// 用来调试,计算感兴趣部分的运行时间
#include "sys/time.h"
// 获取当前毫秒数
int64_t get_mseconds()
{
struct timeval tv;
gettimeofday(&tv, NULL);
return (int64_t)1000 * (tv.tv_sec + ((1e-6) * tv.tv_usec));
}
int64_t time_ksw_extend2 = 0,
time_ksw_global2 = 0,
time_ksw_align2 = 0,
time_bwt_smem1a = 0,
time_bwt_occ4 = 0,
time_bwt_sa = 0,
time_mem_chain = 0,
time_worker1 = 0,
time_worker2 = 0,
time_process_step0 = 0,
time_process_step1 = 0,
time_process_step2 = 0,
time_before_process = 0,
time_process = 0,
time_after_process = 0,
count_process_step2 = 0,
count_ksw_extend2 = 0;
#endif
//////////////////////////////////
extern unsigned char nst_nt4_table[256];
void *kopen(const char *fn, int *_fd);
int kclose(void *a);
void kt_pipeline(int n_threads, void *(*func)(void*, int, void*), void *shared_data, int n_steps);
void kt_pipeline(int n_threads, void *(*func)(void *, int, void *), void *shared_data, int n_steps);
typedef struct {
typedef struct
{
kseq_t *ks, *ks2;
mem_opt_t *opt;
mem_pestat_t *pes0;
@ -55,7 +90,8 @@ typedef struct {
bwaidx_t *idx;
} ktp_aux_t;
typedef struct {
typedef struct
{
ktp_aux_t *aux;
int n_seqs;
bseq1_t *seqs;
@ -63,60 +99,106 @@ typedef struct {
static void *process(void *shared, int step, void *_data)
{
ktp_aux_t *aux = (ktp_aux_t*)shared;
ktp_data_t *data = (ktp_data_t*)_data;
ktp_aux_t *aux = (ktp_aux_t *)shared;
ktp_data_t *data = (ktp_data_t *)_data;
int i;
if (step == 0) {
if (step == 0)
{
#ifdef SHOW_PERF
int64_t start_time = get_mseconds();
#endif
ktp_data_t *ret;
int64_t size = 0;
ret = calloc(1, sizeof(ktp_data_t));
ret->seqs = bseq_read(aux->actual_chunk_size, &ret->n_seqs, aux->ks, aux->ks2);
if (ret->seqs == 0) {
if (ret->seqs == 0)
{
free(ret);
#ifdef SHOW_PERF
int64_t tmp_diff = get_mseconds() - start_time;
__sync_fetch_and_add(&time_process_step0, tmp_diff);
#endif
return 0;
}
if (!aux->copy_comment)
for (i = 0; i < ret->n_seqs; ++i) {
for (i = 0; i < ret->n_seqs; ++i)
{
free(ret->seqs[i].comment);
ret->seqs[i].comment = 0;
}
for (i = 0; i < ret->n_seqs; ++i) size += ret->seqs[i].l_seq;
for (i = 0; i < ret->n_seqs; ++i)
size += ret->seqs[i].l_seq;
if (bwa_verbose >= 3)
fprintf(stderr, "[M::%s] read %d sequences (%ld bp)...\n", __func__, ret->n_seqs, (long)size);
#ifdef SHOW_PERF
int64_t tmp_diff = get_mseconds() - start_time;
__sync_fetch_and_add(&time_process_step0, tmp_diff);
#endif
return ret;
} else if (step == 1) {
}
else if (step == 1)
{
#ifdef SHOW_PERF
int64_t start_time = get_mseconds();
#endif
const mem_opt_t *opt = aux->opt;
const bwaidx_t *idx = aux->idx;
if (opt->flag & MEM_F_SMARTPE) {
if (opt->flag & MEM_F_SMARTPE)
{
bseq1_t *sep[2];
int n_sep[2];
mem_opt_t tmp_opt = *opt;
bseq_classify(data->n_seqs, data->seqs, n_sep, sep);
if (bwa_verbose >= 3)
fprintf(stderr, "[M::%s] %d single-end sequences; %d paired-end sequences\n", __func__, n_sep[0], n_sep[1]);
if (n_sep[0]) {
if (n_sep[0])
{
tmp_opt.flag &= ~MEM_F_PE;
mem_process_seqs(&tmp_opt, idx->bwt, idx->bns, idx->pac, aux->n_processed, n_sep[0], sep[0], 0);
for (i = 0; i < n_sep[0]; ++i)
data->seqs[sep[0][i].id].sam = sep[0][i].sam;
}
if (n_sep[1]) {
if (n_sep[1])
{
tmp_opt.flag |= MEM_F_PE;
mem_process_seqs(&tmp_opt, idx->bwt, idx->bns, idx->pac, aux->n_processed + n_sep[0], n_sep[1], sep[1], aux->pes0);
for (i = 0; i < n_sep[1]; ++i)
data->seqs[sep[1][i].id].sam = sep[1][i].sam;
}
free(sep[0]); free(sep[1]);
} else mem_process_seqs(opt, idx->bwt, idx->bns, idx->pac, aux->n_processed, data->n_seqs, data->seqs, aux->pes0);
aux->n_processed += data->n_seqs;
return data;
} else if (step == 2) {
for (i = 0; i < data->n_seqs; ++i) {
if (data->seqs[i].sam) err_fputs(data->seqs[i].sam, stdout);
free(data->seqs[i].name); free(data->seqs[i].comment);
free(data->seqs[i].seq); free(data->seqs[i].qual); free(data->seqs[i].sam);
free(sep[0]);
free(sep[1]);
}
free(data->seqs); free(data);
else
mem_process_seqs(opt, idx->bwt, idx->bns, idx->pac, aux->n_processed, data->n_seqs, data->seqs, aux->pes0);
aux->n_processed += data->n_seqs;
#ifdef SHOW_PERF
int64_t tmp_diff = get_mseconds() - start_time;
__sync_fetch_and_add(&time_process_step1, tmp_diff);
__sync_fetch_and_add(&count_process_step2, 1);
#endif
return data;
}
else if (step == 2)
{
#ifdef SHOW_PERF
int64_t start_time = get_mseconds();
#endif
for (i = 0; i < data->n_seqs; ++i)
{
if (data->seqs[i].sam)
err_fputs(data->seqs[i].sam, stdout);
free(data->seqs[i].name);
free(data->seqs[i].comment);
free(data->seqs[i].seq);
free(data->seqs[i].qual);
free(data->seqs[i].sam);
}
free(data->seqs);
free(data);
#ifdef SHOW_PERF
int64_t tmp_diff = get_mseconds() - start_time;
__sync_fetch_and_add(&time_process_step2, tmp_diff);
#endif
return 0;
}
return 0;
@ -124,22 +206,36 @@ static void *process(void *shared, int step, void *_data)
static void update_a(mem_opt_t *opt, const mem_opt_t *opt0)
{
if (opt0->a) { // matching score is changed
if (!opt0->b) opt->b *= opt->a;
if (!opt0->T) opt->T *= opt->a;
if (!opt0->o_del) opt->o_del *= opt->a;
if (!opt0->e_del) opt->e_del *= opt->a;
if (!opt0->o_ins) opt->o_ins *= opt->a;
if (!opt0->e_ins) opt->e_ins *= opt->a;
if (!opt0->zdrop) opt->zdrop *= opt->a;
if (!opt0->pen_clip5) opt->pen_clip5 *= opt->a;
if (!opt0->pen_clip3) opt->pen_clip3 *= opt->a;
if (!opt0->pen_unpaired) opt->pen_unpaired *= opt->a;
if (opt0->a)
{ // matching score is changed
if (!opt0->b)
opt->b *= opt->a;
if (!opt0->T)
opt->T *= opt->a;
if (!opt0->o_del)
opt->o_del *= opt->a;
if (!opt0->e_del)
opt->e_del *= opt->a;
if (!opt0->o_ins)
opt->o_ins *= opt->a;
if (!opt0->e_ins)
opt->e_ins *= opt->a;
if (!opt0->zdrop)
opt->zdrop *= opt->a;
if (!opt0->pen_clip5)
opt->pen_clip5 *= opt->a;
if (!opt0->pen_clip3)
opt->pen_clip3 *= opt->a;
if (!opt0->pen_unpaired)
opt->pen_unpaired *= opt->a;
}
}
int main_mem(int argc, char *argv[])
{
#ifdef SHOW_PERF
int64_t start_time = get_mseconds();
#endif
mem_opt_t *opt, opt0;
int fd, fd2, i, c, ignore_alt = 0, no_mt_io = 0;
int fixed_chunk_size = -1;
@ -152,119 +248,187 @@ int main_mem(int argc, char *argv[])
memset(&aux, 0, sizeof(ktp_aux_t));
memset(pes, 0, 4 * sizeof(mem_pestat_t));
for (i = 0; i < 4; ++i) pes[i].failed = 1;
for (i = 0; i < 4; ++i)
pes[i].failed = 1;
query_f = fopen("query.fa", "w");
target_f = fopen("target.fa", "w");
info_f = fopen("info.txt", "w");
aux.opt = opt = mem_opt_init();
memset(&opt0, 0, sizeof(mem_opt_t));
while ((c = getopt(argc, argv, "51qpaMCSPVYjuk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:o:f:W:x:G:h:y:K:X:H:F:z:")) >= 0) {
if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1;
else if (c == '1') no_mt_io = 1;
else if (c == 'x') mode = optarg;
else if (c == 'w') opt->w = atoi(optarg), opt0.w = 1;
else if (c == 'A') opt->a = atoi(optarg), opt0.a = 1;
else if (c == 'B') opt->b = atoi(optarg), opt0.b = 1;
else if (c == 'T') opt->T = atoi(optarg), opt0.T = 1;
else if (c == 'U') opt->pen_unpaired = atoi(optarg), opt0.pen_unpaired = 1;
else if (c == 't') opt->n_threads = atoi(optarg), opt->n_threads = opt->n_threads > 1? opt->n_threads : 1;
else if (c == 'P') opt->flag |= MEM_F_NOPAIRING;
else if (c == 'a') opt->flag |= MEM_F_ALL;
else if (c == 'p') opt->flag |= MEM_F_PE | MEM_F_SMARTPE;
else if (c == 'M') opt->flag |= MEM_F_NO_MULTI;
else if (c == 'S') opt->flag |= MEM_F_NO_RESCUE;
else if (c == 'Y') opt->flag |= MEM_F_SOFTCLIP;
else if (c == 'V') opt->flag |= MEM_F_REF_HDR;
else if (c == '5') opt->flag |= MEM_F_PRIMARY5 | MEM_F_KEEP_SUPP_MAPQ; // always apply MEM_F_KEEP_SUPP_MAPQ with -5
else if (c == 'q') opt->flag |= MEM_F_KEEP_SUPP_MAPQ;
else if (c == 'u') opt->flag |= MEM_F_XB;
else if (c == 'c') opt->max_occ = atoi(optarg), opt0.max_occ = 1;
else if (c == 'd') opt->zdrop = atoi(optarg), opt0.zdrop = 1;
else if (c == 'v') bwa_verbose = atoi(optarg);
else if (c == 'j') ignore_alt = 1;
else if (c == 'r') opt->split_factor = atof(optarg), opt0.split_factor = 1.;
else if (c == 'D') opt->drop_ratio = atof(optarg), opt0.drop_ratio = 1.;
else if (c == 'm') opt->max_matesw = atoi(optarg), opt0.max_matesw = 1;
else if (c == 's') opt->split_width = atoi(optarg), opt0.split_width = 1;
else if (c == 'G') opt->max_chain_gap = atoi(optarg), opt0.max_chain_gap = 1;
else if (c == 'N') opt->max_chain_extend = atoi(optarg), opt0.max_chain_extend = 1;
else if (c == 'o' || c == 'f') xreopen(optarg, "wb", stdout);
else if (c == 'W') opt->min_chain_weight = atoi(optarg), opt0.min_chain_weight = 1;
else if (c == 'y') opt->max_mem_intv = atol(optarg), opt0.max_mem_intv = 1;
else if (c == 'C') aux.copy_comment = 1;
else if (c == 'K') fixed_chunk_size = atoi(optarg);
else if (c == 'X') opt->mask_level = atof(optarg);
else if (c == 'F') bwa_dbg = atoi(optarg);
else if (c == 'h') {
while ((c = getopt(argc, argv, "51qpaMCSPVYjuk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:o:f:W:x:G:h:y:K:X:H:F:z:")) >= 0)
{
if (c == 'k')
opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1;
else if (c == '1')
no_mt_io = 1;
else if (c == 'x')
mode = optarg;
else if (c == 'w')
opt->w = atoi(optarg), opt0.w = 1;
else if (c == 'A')
opt->a = atoi(optarg), opt0.a = 1;
else if (c == 'B')
opt->b = atoi(optarg), opt0.b = 1;
else if (c == 'T')
opt->T = atoi(optarg), opt0.T = 1;
else if (c == 'U')
opt->pen_unpaired = atoi(optarg), opt0.pen_unpaired = 1;
else if (c == 't')
opt->n_threads = atoi(optarg), opt->n_threads = opt->n_threads > 1 ? opt->n_threads : 1;
else if (c == 'P')
opt->flag |= MEM_F_NOPAIRING;
else if (c == 'a')
opt->flag |= MEM_F_ALL;
else if (c == 'p')
opt->flag |= MEM_F_PE | MEM_F_SMARTPE;
else if (c == 'M')
opt->flag |= MEM_F_NO_MULTI;
else if (c == 'S')
opt->flag |= MEM_F_NO_RESCUE;
else if (c == 'Y')
opt->flag |= MEM_F_SOFTCLIP;
else if (c == 'V')
opt->flag |= MEM_F_REF_HDR;
else if (c == '5')
opt->flag |= MEM_F_PRIMARY5 | MEM_F_KEEP_SUPP_MAPQ; // always apply MEM_F_KEEP_SUPP_MAPQ with -5
else if (c == 'q')
opt->flag |= MEM_F_KEEP_SUPP_MAPQ;
else if (c == 'u')
opt->flag |= MEM_F_XB;
else if (c == 'c')
opt->max_occ = atoi(optarg), opt0.max_occ = 1;
else if (c == 'd')
opt->zdrop = atoi(optarg), opt0.zdrop = 1;
else if (c == 'v')
bwa_verbose = atoi(optarg);
else if (c == 'j')
ignore_alt = 1;
else if (c == 'r')
opt->split_factor = atof(optarg), opt0.split_factor = 1.;
else if (c == 'D')
opt->drop_ratio = atof(optarg), opt0.drop_ratio = 1.;
else if (c == 'm')
opt->max_matesw = atoi(optarg), opt0.max_matesw = 1;
else if (c == 's')
opt->split_width = atoi(optarg), opt0.split_width = 1;
else if (c == 'G')
opt->max_chain_gap = atoi(optarg), opt0.max_chain_gap = 1;
else if (c == 'N')
opt->max_chain_extend = atoi(optarg), opt0.max_chain_extend = 1;
else if (c == 'o' || c == 'f')
xreopen(optarg, "wb", stdout);
else if (c == 'W')
opt->min_chain_weight = atoi(optarg), opt0.min_chain_weight = 1;
else if (c == 'y')
opt->max_mem_intv = atol(optarg), opt0.max_mem_intv = 1;
else if (c == 'C')
aux.copy_comment = 1;
else if (c == 'K')
fixed_chunk_size = atoi(optarg);
else if (c == 'X')
opt->mask_level = atof(optarg);
else if (c == 'F')
bwa_dbg = atoi(optarg);
else if (c == 'h')
{
opt0.max_XA_hits = opt0.max_XA_hits_alt = 1;
opt->max_XA_hits = opt->max_XA_hits_alt = strtol(optarg, &p, 10);
if (*p != 0 && ispunct(*p) && isdigit(p[1]))
opt->max_XA_hits_alt = strtol(p+1, &p, 10);
opt->max_XA_hits_alt = strtol(p + 1, &p, 10);
}
else if (c == 'z') opt->XA_drop_ratio = atof(optarg);
else if (c == 'Q') {
else if (c == 'z')
opt->XA_drop_ratio = atof(optarg);
else if (c == 'Q')
{
opt0.mapQ_coef_len = 1;
opt->mapQ_coef_len = atoi(optarg);
opt->mapQ_coef_fac = opt->mapQ_coef_len > 0? log(opt->mapQ_coef_len) : 0;
} else if (c == 'O') {
opt->mapQ_coef_fac = opt->mapQ_coef_len > 0 ? log(opt->mapQ_coef_len) : 0;
}
else if (c == 'O')
{
opt0.o_del = opt0.o_ins = 1;
opt->o_del = opt->o_ins = strtol(optarg, &p, 10);
if (*p != 0 && ispunct(*p) && isdigit(p[1]))
opt->o_ins = strtol(p+1, &p, 10);
} else if (c == 'E') {
opt->o_ins = strtol(p + 1, &p, 10);
}
else if (c == 'E')
{
opt0.e_del = opt0.e_ins = 1;
opt->e_del = opt->e_ins = strtol(optarg, &p, 10);
if (*p != 0 && ispunct(*p) && isdigit(p[1]))
opt->e_ins = strtol(p+1, &p, 10);
} else if (c == 'L') {
opt->e_ins = strtol(p + 1, &p, 10);
}
else if (c == 'L')
{
opt0.pen_clip5 = opt0.pen_clip3 = 1;
opt->pen_clip5 = opt->pen_clip3 = strtol(optarg, &p, 10);
if (*p != 0 && ispunct(*p) && isdigit(p[1]))
opt->pen_clip3 = strtol(p+1, &p, 10);
} else if (c == 'R') {
if ((rg_line = bwa_set_rg(optarg)) == 0) return 1; // FIXME: memory leak
} else if (c == 'H') {
if (optarg[0] != '@') {
opt->pen_clip3 = strtol(p + 1, &p, 10);
}
else if (c == 'R')
{
if ((rg_line = bwa_set_rg(optarg)) == 0)
return 1; // FIXME: memory leak
}
else if (c == 'H')
{
if (optarg[0] != '@')
{
FILE *fp;
if ((fp = fopen(optarg, "r")) != 0) {
if ((fp = fopen(optarg, "r")) != 0)
{
char *buf;
buf = calloc(1, 0x10000);
while (fgets(buf, 0xffff, fp)) {
while (fgets(buf, 0xffff, fp))
{
i = strlen(buf);
assert(buf[i-1] == '\n'); // a long line
buf[i-1] = 0;
assert(buf[i - 1] == '\n'); // a long line
buf[i - 1] = 0;
hdr_line = bwa_insert_header(buf, hdr_line);
}
free(buf);
fclose(fp);
}
} else hdr_line = bwa_insert_header(optarg, hdr_line);
} else if (c == 'I') { // specify the insert size distribution
}
else
hdr_line = bwa_insert_header(optarg, hdr_line);
}
else if (c == 'I')
{ // specify the insert size distribution
aux.pes0 = pes;
pes[1].failed = 0;
pes[1].avg = strtod(optarg, &p);
pes[1].std = pes[1].avg * .1;
if (*p != 0 && ispunct(*p) && isdigit(p[1]))
pes[1].std = strtod(p+1, &p);
pes[1].std = strtod(p + 1, &p);
pes[1].high = (int)(pes[1].avg + 4. * pes[1].std + .499);
pes[1].low = (int)(pes[1].avg - 4. * pes[1].std + .499);
if (pes[1].low < 1) pes[1].low = 1;
if (pes[1].low < 1)
pes[1].low = 1;
if (*p != 0 && ispunct(*p) && isdigit(p[1]))
pes[1].high = (int)(strtod(p+1, &p) + .499);
pes[1].high = (int)(strtod(p + 1, &p) + .499);
if (*p != 0 && ispunct(*p) && isdigit(p[1]))
pes[1].low = (int)(strtod(p+1, &p) + .499);
pes[1].low = (int)(strtod(p + 1, &p) + .499);
if (bwa_verbose >= 3)
fprintf(stderr, "[M::%s] mean insert size: %.3f, stddev: %.3f, max: %d, min: %d\n",
__func__, pes[1].avg, pes[1].std, pes[1].high, pes[1].low);
}
else return 1;
else
return 1;
}
if (rg_line) {
if (rg_line)
{
hdr_line = bwa_insert_header(rg_line, hdr_line);
free(rg_line);
}
if (opt->n_threads < 1) opt->n_threads = 1;
if (optind + 1 >= argc || optind + 3 < argc) {
if (opt->n_threads < 1)
opt->n_threads = 1;
if (optind + 1 >= argc || optind + 3 < argc)
{
fprintf(stderr, "\n");
fprintf(stderr, "Usage: bwa mem [options] <idxbase> <in1.fq> [in2.fq]\n\n");
fprintf(stderr, "Algorithm options:\n\n");
@ -274,7 +438,7 @@ int main_mem(int argc, char *argv[])
fprintf(stderr, " -d INT off-diagonal X-dropoff [%d]\n", opt->zdrop);
fprintf(stderr, " -r FLOAT look for internal seeds inside a seed longer than {-k} * FLOAT [%g]\n", opt->split_factor);
fprintf(stderr, " -y INT seed occurrence for the 3rd round seeding [%ld]\n", (long)opt->max_mem_intv);
// fprintf(stderr, " -s INT look for internal seeds inside a seed with less than INT occ [%d]\n", opt->split_width);
// fprintf(stderr, " -s INT look for internal seeds inside a seed with less than INT occ [%d]\n", opt->split_width);
fprintf(stderr, " -c INT skip seeds with more than INT occurrences [%d]\n", opt->max_occ);
fprintf(stderr, " -D FLOAT drop chains shorter than FLOAT fraction of the longest overlapping chain [%.2f]\n", opt->drop_ratio);
fprintf(stderr, " -W INT discard a chain if seeded bases shorter than INT [0]\n");
@ -327,62 +491,103 @@ int main_mem(int argc, char *argv[])
return 1;
}
if (mode) {
if (strcmp(mode, "intractg") == 0) {
if (!opt0.o_del) opt->o_del = 16;
if (!opt0.o_ins) opt->o_ins = 16;
if (!opt0.b) opt->b = 9;
if (!opt0.pen_clip5) opt->pen_clip5 = 5;
if (!opt0.pen_clip3) opt->pen_clip3 = 5;
} else if (strcmp(mode, "pacbio") == 0 || strcmp(mode, "pbref") == 0 || strcmp(mode, "ont2d") == 0) {
if (!opt0.o_del) opt->o_del = 1;
if (!opt0.e_del) opt->e_del = 1;
if (!opt0.o_ins) opt->o_ins = 1;
if (!opt0.e_ins) opt->e_ins = 1;
if (!opt0.b) opt->b = 1;
if (opt0.split_factor == 0.) opt->split_factor = 10.;
if (strcmp(mode, "ont2d") == 0) {
if (!opt0.min_chain_weight) opt->min_chain_weight = 20;
if (!opt0.min_seed_len) opt->min_seed_len = 14;
if (!opt0.pen_clip5) opt->pen_clip5 = 0;
if (!opt0.pen_clip3) opt->pen_clip3 = 0;
} else {
if (!opt0.min_chain_weight) opt->min_chain_weight = 40;
if (!opt0.min_seed_len) opt->min_seed_len = 17;
if (!opt0.pen_clip5) opt->pen_clip5 = 0;
if (!opt0.pen_clip3) opt->pen_clip3 = 0;
if (mode)
{
if (strcmp(mode, "intractg") == 0)
{
if (!opt0.o_del)
opt->o_del = 16;
if (!opt0.o_ins)
opt->o_ins = 16;
if (!opt0.b)
opt->b = 9;
if (!opt0.pen_clip5)
opt->pen_clip5 = 5;
if (!opt0.pen_clip3)
opt->pen_clip3 = 5;
}
} else {
else if (strcmp(mode, "pacbio") == 0 || strcmp(mode, "pbref") == 0 || strcmp(mode, "ont2d") == 0)
{
if (!opt0.o_del)
opt->o_del = 1;
if (!opt0.e_del)
opt->e_del = 1;
if (!opt0.o_ins)
opt->o_ins = 1;
if (!opt0.e_ins)
opt->e_ins = 1;
if (!opt0.b)
opt->b = 1;
if (opt0.split_factor == 0.)
opt->split_factor = 10.;
if (strcmp(mode, "ont2d") == 0)
{
if (!opt0.min_chain_weight)
opt->min_chain_weight = 20;
if (!opt0.min_seed_len)
opt->min_seed_len = 14;
if (!opt0.pen_clip5)
opt->pen_clip5 = 0;
if (!opt0.pen_clip3)
opt->pen_clip3 = 0;
}
else
{
if (!opt0.min_chain_weight)
opt->min_chain_weight = 40;
if (!opt0.min_seed_len)
opt->min_seed_len = 17;
if (!opt0.pen_clip5)
opt->pen_clip5 = 0;
if (!opt0.pen_clip3)
opt->pen_clip3 = 0;
}
}
else
{
fprintf(stderr, "[E::%s] unknown read type '%s'\n", __func__, mode);
return 1; // FIXME memory leak
}
} else update_a(opt, &opt0);
}
else
update_a(opt, &opt0);
bwa_fill_scmat(opt->a, opt->b, opt->mat);
aux.idx = bwa_idx_load_from_shm(argv[optind]);
if (aux.idx == 0) {
if ((aux.idx = bwa_idx_load(argv[optind], BWA_IDX_ALL)) == 0) return 1; // FIXME: memory leak
} else if (bwa_verbose >= 3)
if (aux.idx == 0)
{
if ((aux.idx = bwa_idx_load(argv[optind], BWA_IDX_ALL)) == 0)
return 1; // FIXME: memory leak
}
else if (bwa_verbose >= 3)
fprintf(stderr, "[M::%s] load the bwa index from shared memory\n", __func__);
if (ignore_alt)
for (i = 0; i < aux.idx->bns->n_seqs; ++i)
aux.idx->bns->anns[i].is_alt = 0;
ko = kopen(argv[optind + 1], &fd);
if (ko == 0) {
if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] fail to open file `%s'.\n", __func__, argv[optind + 1]);
if (ko == 0)
{
if (bwa_verbose >= 1)
fprintf(stderr, "[E::%s] fail to open file `%s'.\n", __func__, argv[optind + 1]);
return 1;
}
fp = gzdopen(fd, "r");
aux.ks = kseq_init(fp);
if (optind + 2 < argc) {
if (opt->flag&MEM_F_PE) {
if (optind + 2 < argc)
{
if (opt->flag & MEM_F_PE)
{
if (bwa_verbose >= 2)
fprintf(stderr, "[W::%s] when '-p' is in use, the second query file is ignored.\n", __func__);
} else {
}
else
{
ko2 = kopen(argv[optind + 2], &fd2);
if (ko2 == 0) {
if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] fail to open file `%s'.\n", __func__, argv[optind + 2]);
if (ko2 == 0)
{
if (bwa_verbose >= 1)
fprintf(stderr, "[E::%s] fail to open file `%s'.\n", __func__, argv[optind + 2]);
return 1;
}
fp2 = gzdopen(fd2, "r");
@ -391,17 +596,64 @@ int main_mem(int argc, char *argv[])
}
}
bwa_print_sam_hdr(aux.idx->bns, hdr_line);
aux.actual_chunk_size = fixed_chunk_size > 0? fixed_chunk_size : opt->chunk_size * opt->n_threads;
kt_pipeline(no_mt_io? 1 : 2, process, &aux, 3);
aux.actual_chunk_size = fixed_chunk_size > 0 ? fixed_chunk_size : opt->chunk_size * opt->n_threads;
#ifdef SHOW_PERF
int64_t tmp_diff = get_mseconds() - start_time;
__sync_fetch_and_add(&time_before_process, tmp_diff);
start_time = get_mseconds();
#endif
kt_pipeline(no_mt_io ? 1 : 2, process, &aux, 3);
#ifdef SHOW_PERF
tmp_diff = get_mseconds() - start_time;
__sync_fetch_and_add(&time_process, tmp_diff);
start_time = get_mseconds();
#endif
free(hdr_line);
free(opt);
bwa_idx_destroy(aux.idx);
kseq_destroy(aux.ks);
err_gzclose(fp); kclose(ko);
if (aux.ks2) {
err_gzclose(fp);
kclose(ko);
if (aux.ks2)
{
kseq_destroy(aux.ks2);
err_gzclose(fp2); kclose(ko2);
err_gzclose(fp2);
kclose(ko2);
}
if (query_f != 0)
fclose(query_f);
if (target_f != 0)
fclose(target_f);
if (info_f != 0)
fclose(info_f);
#ifdef SHOW_PERF
tmp_diff = get_mseconds() - start_time;
__sync_fetch_and_add(&time_after_process, tmp_diff);
#endif
#ifdef SHOW_PERF
fprintf(stderr, "\n");
fprintf(stderr, "time_ksw_extend2: %f s\n", time_ksw_extend2 / 1000.0 / opt->n_threads);
fprintf(stderr, "time_ksw_global2: %f s\n", time_ksw_global2 / 1000.0 / opt->n_threads);
fprintf(stderr, "time_ksw_align2: %f s\n", time_ksw_align2 / 1000.0 / opt->n_threads);
fprintf(stderr, "time_bwt_smem1a: %f s\n", time_bwt_smem1a / 1000.0 / opt->n_threads);
// fprintf(stderr, "time_bwt_occ4: %f s\n", time_bwt_occ4 / 1000.0 / opt->n_threads);
fprintf(stderr, "time_bwt_sa: %f s\n", time_bwt_sa / 1000.0 / opt->n_threads);
fprintf(stderr, "time_mem_chain: %f s\n", time_mem_chain / 1000.0 / opt->n_threads);
fprintf(stderr, "time_worker1: %f s\n", time_worker1 / 1000.0);
fprintf(stderr, "time_worker2: %f s\n", time_worker2 / 1000.0);
fprintf(stderr, "time_process_step0: %f s\n", time_process_step0 / 1000.0);
fprintf(stderr, "time_process_step1: %f s\n", time_process_step1 / 1000.0);
fprintf(stderr, "time_process_step2: %f s\n", time_process_step2 / 1000.0);
fprintf(stderr, "time_before_process: %f s\n", time_before_process / 1000.0);
fprintf(stderr, "time_process: %f s\n", time_process / 1000.0);
fprintf(stderr, "time_after_process: %f s\n", time_after_process / 1000.0);
fprintf(stderr, "count_process_step2: %ld cnts\n", count_process_step2);
fprintf(stderr, "count_ksw_extend2: %ld cnts\n", count_ksw_extend2);
fprintf(stderr, "\n");
#endif
return 0;
}
@ -416,18 +668,34 @@ int main_fastmap(int argc, char *argv[])
const bwtintv_v *a;
bwaidx_t *idx;
while ((c = getopt(argc, argv, "w:l:pi:I:L:")) >= 0) {
switch (c) {
case 'p': print_seq = 1; break;
case 'w': min_iwidth = atoi(optarg); break;
case 'l': min_len = atoi(optarg); break;
case 'i': min_intv = atoi(optarg); break;
case 'I': max_intv = atol(optarg); break;
case 'L': max_len = atoi(optarg); break;
default: return 1;
while ((c = getopt(argc, argv, "w:l:pi:I:L:")) >= 0)
{
switch (c)
{
case 'p':
print_seq = 1;
break;
case 'w':
min_iwidth = atoi(optarg);
break;
case 'l':
min_len = atoi(optarg);
break;
case 'i':
min_intv = atoi(optarg);
break;
case 'I':
max_intv = atol(optarg);
break;
case 'L':
max_len = atoi(optarg);
break;
default:
return 1;
}
}
if (optind + 1 >= argc) {
if (optind + 1 >= argc)
{
fprintf(stderr, "\n");
fprintf(stderr, "Usage: bwa fastmap [options] <idxbase> <in.fq>\n\n");
fprintf(stderr, "Options: -l INT min SMEM length to output [%d]\n", min_len);
@ -441,34 +709,47 @@ int main_fastmap(int argc, char *argv[])
fp = xzopen(argv[optind + 1], "r");
seq = kseq_init(fp);
if ((idx = bwa_idx_load(argv[optind], BWA_IDX_BWT|BWA_IDX_BNS)) == 0) return 1;
if ((idx = bwa_idx_load(argv[optind], BWA_IDX_BWT | BWA_IDX_BNS)) == 0)
return 1;
itr = smem_itr_init(idx->bwt);
smem_config(itr, min_intv, max_len, max_intv);
while (kseq_read(seq) >= 0) {
while (kseq_read(seq) >= 0)
{
err_printf("SQ\t%s\t%ld", seq->name.s, seq->seq.l);
if (print_seq) {
if (print_seq)
{
err_putchar('\t');
err_puts(seq->seq.s);
} else err_putchar('\n');
}
else
err_putchar('\n');
for (i = 0; i < seq->seq.l; ++i)
seq->seq.s[i] = nst_nt4_table[(int)seq->seq.s[i]];
smem_set_query(itr, seq->seq.l, (uint8_t*)seq->seq.s);
while ((a = smem_next(itr)) != 0) {
for (i = 0; i < a->n; ++i) {
smem_set_query(itr, seq->seq.l, (uint8_t *)seq->seq.s);
while ((a = smem_next(itr)) != 0)
{
for (i = 0; i < a->n; ++i)
{
bwtintv_t *p = &a->a[i];
if ((uint32_t)p->info - (p->info>>32) < min_len) continue;
err_printf("EM\t%d\t%d\t%ld", (uint32_t)(p->info>>32), (uint32_t)p->info, (long)p->x[2]);
if (p->x[2] <= min_iwidth) {
for (k = 0; k < p->x[2]; ++k) {
if ((uint32_t)p->info - (p->info >> 32) < min_len)
continue;
err_printf("EM\t%d\t%d\t%ld", (uint32_t)(p->info >> 32), (uint32_t)p->info, (long)p->x[2]);
if (p->x[2] <= min_iwidth)
{
for (k = 0; k < p->x[2]; ++k)
{
bwtint_t pos;
int len, is_rev, ref_id;
len = (uint32_t)p->info - (p->info>>32);
len = (uint32_t)p->info - (p->info >> 32);
pos = bns_depos(idx->bns, bwt_sa(idx->bwt, p->x[0] + k), &is_rev);
if (is_rev) pos -= len - 1;
if (is_rev)
pos -= len - 1;
bns_cnt_ambi(idx->bns, pos, len, &ref_id);
err_printf("\t%s:%c%ld", idx->bns->anns[ref_id].name, "+-"[is_rev], (long)(pos - idx->bns->anns[ref_id].offset) + 1);
}
} else err_puts("\t*");
}
else
err_puts("\t*");
err_putchar('\n');
}
}

653
ksw.c
View File

@ -22,7 +22,7 @@
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>
@ -36,20 +36,21 @@
#include "ksw.h"
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#include "malloc_wrap.h"
#endif
#ifdef __GNUC__
#define LIKELY(x) __builtin_expect((x),1)
#define UNLIKELY(x) __builtin_expect((x),0)
#define LIKELY(x) __builtin_expect((x), 1)
#define UNLIKELY(x) __builtin_expect((x), 0)
#else
#define LIKELY(x) (x)
#define UNLIKELY(x) (x)
#endif
const kswr_t g_defr = { 0, -1, -1, -1, -1, -1, -1 };
const kswr_t g_defr = {0, -1, -1, -1, -1, -1, -1};
struct _kswq_t {
struct _kswq_t
{
int qlen, slen;
uint8_t shift, mdiff, max, size;
__m128i *qp, *H0, *H1, *E, *Hmax;
@ -71,44 +72,54 @@ kswq_t *ksw_qinit(int size, int qlen, const uint8_t *query, int m, const int8_t
kswq_t *q;
int slen, a, tmp, p;
size = size > 1? 2 : 1;
size = size > 1 ? 2 : 1;
p = 8 * (3 - size); // # values per __m128i
slen = (qlen + p - 1) / p; // segmented length
q = (kswq_t*)malloc(sizeof(kswq_t) + 256 + 16 * slen * (m + 4)); // a single block of memory
q->qp = (__m128i*)(((size_t)q + sizeof(kswq_t) + 15) >> 4 << 4); // align memory
q = (kswq_t *)malloc(sizeof(kswq_t) + 256 + 16 * slen * (m + 4)); // a single block of memory
q->qp = (__m128i *)(((size_t)q + sizeof(kswq_t) + 15) >> 4 << 4); // align memory
q->H0 = q->qp + slen * m;
q->H1 = q->H0 + slen;
q->E = q->H1 + slen;
q->Hmax = q->E + slen;
q->slen = slen; q->qlen = qlen; q->size = size;
q->slen = slen;
q->qlen = qlen;
q->size = size;
// compute shift
tmp = m * m;
for (a = 0, q->shift = 127, q->mdiff = 0; a < tmp; ++a) { // find the minimum and maximum score
if (mat[a] < (int8_t)q->shift) q->shift = mat[a];
if (mat[a] > (int8_t)q->mdiff) q->mdiff = mat[a];
for (a = 0, q->shift = 127, q->mdiff = 0; a < tmp; ++a)
{ // find the minimum and maximum score
if (mat[a] < (int8_t)q->shift)
q->shift = mat[a];
if (mat[a] > (int8_t)q->mdiff)
q->mdiff = mat[a];
}
q->max = q->mdiff;
q->shift = 256 - q->shift; // NB: q->shift is uint8_t
q->mdiff += q->shift; // this is the difference between the min and max scores
// An example: p=8, qlen=19, slen=3 and segmentation:
// {{0,3,6,9,12,15,18,-1},{1,4,7,10,13,16,-1,-1},{2,5,8,11,14,17,-1,-1}}
if (size == 1) {
int8_t *t = (int8_t*)q->qp;
for (a = 0; a < m; ++a) {
if (size == 1)
{
int8_t *t = (int8_t *)q->qp;
for (a = 0; a < m; ++a)
{
int i, k, nlen = slen * p;
const int8_t *ma = mat + a * m;
for (i = 0; i < slen; ++i)
for (k = i; k < nlen; k += slen) // p iterations
*t++ = (k >= qlen? 0 : ma[query[k]]) + q->shift;
*t++ = (k >= qlen ? 0 : ma[query[k]]) + q->shift;
}
} else {
int16_t *t = (int16_t*)q->qp;
for (a = 0; a < m; ++a) {
}
else
{
int16_t *t = (int16_t *)q->qp;
for (a = 0; a < m; ++a)
{
int i, k, nlen = slen * p;
const int8_t *ma = mat + a * m;
for (i = 0; i < slen; ++i)
for (k = i; k < nlen; k += slen) // p iterations
*t++ = (k >= qlen? 0 : ma[query[k]]);
*t++ = (k >= qlen ? 0 : ma[query[k]]);
}
}
return q;
@ -127,7 +138,9 @@ kswr_t ksw_u8(kswq_t *q, int tlen, const uint8_t *target, int _o_del, int _e_del
kswr_t r;
#if defined __SSE2__
#define __max_16(ret, xx) do { \
#define __max_16(ret, xx) \
do \
{ \
(xx) = _mm_max_epu8((xx), _mm_srli_si128((xx), 8)); \
(xx) = _mm_max_epu8((xx), _mm_srli_si128((xx), 4)); \
(xx) = _mm_max_epu8((xx), _mm_srli_si128((xx), 2)); \
@ -149,29 +162,36 @@ kswr_t ksw_u8(kswq_t *q, int tlen, const uint8_t *target, int _o_del, int _e_del
// initialization
r = g_defr;
minsc = (xtra&KSW_XSUBO)? xtra&0xffff : 0x10000;
endsc = (xtra&KSW_XSTOP)? xtra&0xffff : 0x10000;
m_b = n_b = 0; b = 0;
minsc = (xtra & KSW_XSUBO) ? xtra & 0xffff : 0x10000;
endsc = (xtra & KSW_XSTOP) ? xtra & 0xffff : 0x10000;
m_b = n_b = 0;
b = 0;
zero = _mm_set1_epi32(0);
oe_del = _mm_set1_epi8(_o_del + _e_del);
e_del = _mm_set1_epi8(_e_del);
oe_ins = _mm_set1_epi8(_o_ins + _e_ins);
e_ins = _mm_set1_epi8(_e_ins);
shift = _mm_set1_epi8(q->shift);
H0 = q->H0; H1 = q->H1; E = q->E; Hmax = q->Hmax;
H0 = q->H0;
H1 = q->H1;
E = q->E;
Hmax = q->Hmax;
slen = q->slen;
for (i = 0; i < slen; ++i) {
for (i = 0; i < slen; ++i)
{
_mm_store_si128(E + i, zero);
_mm_store_si128(H0 + i, zero);
_mm_store_si128(Hmax + i, zero);
}
// the core loop
for (i = 0; i < tlen; ++i) {
for (i = 0; i < tlen; ++i)
{
int j, k, imax;
__m128i e, h, t, f = zero, max = zero, *S = q->qp + target[i] * slen; // s is the 1st score vector
h = _mm_load_si128(H0 + slen - 1); // h={2,5,8,11,14,17,-1,-1} in the above example
h = _mm_slli_si128(h, 1); // h=H(i-1,-1); << instead of >> because x64 is little-endian
for (j = 0; LIKELY(j < slen); ++j) {
for (j = 0; LIKELY(j < slen); ++j)
{
/* SW cells are computed in the following order:
* H(i,j) = max{H(i-1,j-1)+S(i,j), E(i,j), F(i,j)}
* E(i+1,j) = max{H(i,j)-q, E(i,j)-r}
@ -198,53 +218,72 @@ kswr_t ksw_u8(kswq_t *q, int tlen, const uint8_t *target, int _o_del, int _e_del
h = _mm_load_si128(H0 + j); // h=H'(i-1,j)
}
// NB: we do not need to set E(i,j) as we disallow adjecent insertion and then deletion
for (k = 0; LIKELY(k < 16); ++k) { // this block mimics SWPS3; NB: H(i,j) updated in the lazy-F loop cannot exceed max
for (k = 0; LIKELY(k < 16); ++k)
{ // this block mimics SWPS3; NB: H(i,j) updated in the lazy-F loop cannot exceed max
f = _mm_slli_si128(f, 1);
for (j = 0; LIKELY(j < slen); ++j) {
for (j = 0; LIKELY(j < slen); ++j)
{
h = _mm_load_si128(H1 + j);
h = _mm_max_epu8(h, f); // h=H'(i,j)
_mm_store_si128(H1 + j, h);
h = _mm_subs_epu8(h, oe_ins);
f = _mm_subs_epu8(f, e_ins);
if (UNLIKELY(allzero_16(_mm_subs_epu8(f, h)))) goto end_loop16;
if (UNLIKELY(allzero_16(_mm_subs_epu8(f, h))))
goto end_loop16;
}
}
end_loop16:
//int k;for (k=0;k<16;++k)printf("%d ", ((uint8_t*)&max)[k]);printf("\n");
end_loop16:
// int k;for (k=0;k<16;++k)printf("%d ", ((uint8_t*)&max)[k]);printf("\n");
__max_16(imax, max); // imax is the maximum number in max
if (imax >= minsc) { // write the b array; this condition adds branching unfornately
if (n_b == 0 || (int32_t)b[n_b-1] + 1 != i) { // then append
if (n_b == m_b) {
m_b = m_b? m_b<<1 : 8;
b = (uint64_t*)realloc(b, 8 * m_b);
if (imax >= minsc)
{ // write the b array; this condition adds branching unfornately
if (n_b == 0 || (int32_t)b[n_b - 1] + 1 != i)
{ // then append
if (n_b == m_b)
{
m_b = m_b ? m_b << 1 : 8;
b = (uint64_t *)realloc(b, 8 * m_b);
}
b[n_b++] = (uint64_t)imax<<32 | i;
} else if ((int)(b[n_b-1]>>32) < imax) b[n_b-1] = (uint64_t)imax<<32 | i; // modify the last
b[n_b++] = (uint64_t)imax << 32 | i;
}
if (imax > gmax) {
gmax = imax; te = i; // te is the end position on the target
else if ((int)(b[n_b - 1] >> 32) < imax)
b[n_b - 1] = (uint64_t)imax << 32 | i; // modify the last
}
if (imax > gmax)
{
gmax = imax;
te = i; // te is the end position on the target
for (j = 0; LIKELY(j < slen); ++j) // keep the H1 vector
_mm_store_si128(Hmax + j, _mm_load_si128(H1 + j));
if (gmax + q->shift >= 255 || gmax >= endsc) break;
if (gmax + q->shift >= 255 || gmax >= endsc)
break;
}
S = H1; H1 = H0; H0 = S; // swap H0 and H1
S = H1;
H1 = H0;
H0 = S; // swap H0 and H1
}
r.score = gmax + q->shift < 255? gmax : 255;
r.score = gmax + q->shift < 255 ? gmax : 255;
r.te = te;
if (r.score != 255) { // get a->qe, the end of query match; find the 2nd best score
if (r.score != 255)
{ // get a->qe, the end of query match; find the 2nd best score
int max = -1, tmp, low, high, qlen = slen * 16;
uint8_t *t = (uint8_t*)Hmax;
uint8_t *t = (uint8_t *)Hmax;
for (i = 0; i < qlen; ++i, ++t)
if ((int)*t > max) max = *t, r.qe = i / 16 + i % 16 * slen;
else if ((int)*t == max && (tmp = i / 16 + i % 16 * slen) < r.qe) r.qe = tmp;
//printf("%d,%d\n", max, gmax);
if (b) {
if ((int)*t > max)
max = *t, r.qe = i / 16 + i % 16 * slen;
else if ((int)*t == max && (tmp = i / 16 + i % 16 * slen) < r.qe)
r.qe = tmp;
// printf("%d,%d\n", max, gmax);
if (b)
{
i = (r.score + q->max - 1) / q->max;
low = te - i; high = te + i;
for (i = 0; i < n_b; ++i) {
low = te - i;
high = te + i;
for (i = 0; i < n_b; ++i)
{
int e = (int32_t)b[i];
if ((e < low || e > high) && (int)(b[i]>>32) > r.score2)
r.score2 = b[i]>>32, r.te2 = e;
if ((e < low || e > high) && (int)(b[i] >> 32) > r.score2)
r.score2 = b[i] >> 32, r.te2 = e;
}
}
}
@ -260,7 +299,9 @@ kswr_t ksw_i16(kswq_t *q, int tlen, const uint8_t *target, int _o_del, int _e_de
kswr_t r;
#if defined __SSE2__
#define __max_8(ret, xx) do { \
#define __max_8(ret, xx) \
do \
{ \
(xx) = _mm_max_epi16((xx), _mm_srli_si128((xx), 8)); \
(xx) = _mm_max_epi16((xx), _mm_srli_si128((xx), 4)); \
(xx) = _mm_max_epi16((xx), _mm_srli_si128((xx), 2)); \
@ -281,28 +322,35 @@ kswr_t ksw_i16(kswq_t *q, int tlen, const uint8_t *target, int _o_del, int _e_de
// initialization
r = g_defr;
minsc = (xtra&KSW_XSUBO)? xtra&0xffff : 0x10000;
endsc = (xtra&KSW_XSTOP)? xtra&0xffff : 0x10000;
m_b = n_b = 0; b = 0;
minsc = (xtra & KSW_XSUBO) ? xtra & 0xffff : 0x10000;
endsc = (xtra & KSW_XSTOP) ? xtra & 0xffff : 0x10000;
m_b = n_b = 0;
b = 0;
zero = _mm_set1_epi32(0);
oe_del = _mm_set1_epi16(_o_del + _e_del);
e_del = _mm_set1_epi16(_e_del);
oe_ins = _mm_set1_epi16(_o_ins + _e_ins);
e_ins = _mm_set1_epi16(_e_ins);
H0 = q->H0; H1 = q->H1; E = q->E; Hmax = q->Hmax;
H0 = q->H0;
H1 = q->H1;
E = q->E;
Hmax = q->Hmax;
slen = q->slen;
for (i = 0; i < slen; ++i) {
for (i = 0; i < slen; ++i)
{
_mm_store_si128(E + i, zero);
_mm_store_si128(H0 + i, zero);
_mm_store_si128(Hmax + i, zero);
}
// the core loop
for (i = 0; i < tlen; ++i) {
for (i = 0; i < tlen; ++i)
{
int j, k, imax;
__m128i e, t, h, f = zero, max = zero, *S = q->qp + target[i] * slen; // s is the 1st score vector
h = _mm_load_si128(H0 + slen - 1); // h={2,5,8,11,14,17,-1,-1} in the above example
h = _mm_slli_si128(h, 2);
for (j = 0; LIKELY(j < slen); ++j) {
for (j = 0; LIKELY(j < slen); ++j)
{
h = _mm_adds_epi16(h, _mm_load_si128(S++));
e = _mm_load_si128(E + j);
h = _mm_max_epi16(h, e);
@ -318,50 +366,69 @@ kswr_t ksw_i16(kswq_t *q, int tlen, const uint8_t *target, int _o_del, int _e_de
f = _mm_max_epi16(f, t);
h = _mm_load_si128(H0 + j);
}
for (k = 0; LIKELY(k < 16); ++k) {
for (k = 0; LIKELY(k < 16); ++k)
{
f = _mm_slli_si128(f, 2);
for (j = 0; LIKELY(j < slen); ++j) {
for (j = 0; LIKELY(j < slen); ++j)
{
h = _mm_load_si128(H1 + j);
h = _mm_max_epi16(h, f);
_mm_store_si128(H1 + j, h);
h = _mm_subs_epu16(h, oe_ins);
f = _mm_subs_epu16(f, e_ins);
if(UNLIKELY(allzero_0f_8(_mm_cmpgt_epi16(f, h)))) goto end_loop8;
if (UNLIKELY(allzero_0f_8(_mm_cmpgt_epi16(f, h))))
goto end_loop8;
}
}
end_loop8:
end_loop8:
__max_8(imax, max);
if (imax >= minsc) {
if (n_b == 0 || (int32_t)b[n_b-1] + 1 != i) {
if (n_b == m_b) {
m_b = m_b? m_b<<1 : 8;
b = (uint64_t*)realloc(b, 8 * m_b);
if (imax >= minsc)
{
if (n_b == 0 || (int32_t)b[n_b - 1] + 1 != i)
{
if (n_b == m_b)
{
m_b = m_b ? m_b << 1 : 8;
b = (uint64_t *)realloc(b, 8 * m_b);
}
b[n_b++] = (uint64_t)imax<<32 | i;
} else if ((int)(b[n_b-1]>>32) < imax) b[n_b-1] = (uint64_t)imax<<32 | i; // modify the last
b[n_b++] = (uint64_t)imax << 32 | i;
}
if (imax > gmax) {
gmax = imax; te = i;
else if ((int)(b[n_b - 1] >> 32) < imax)
b[n_b - 1] = (uint64_t)imax << 32 | i; // modify the last
}
if (imax > gmax)
{
gmax = imax;
te = i;
for (j = 0; LIKELY(j < slen); ++j)
_mm_store_si128(Hmax + j, _mm_load_si128(H1 + j));
if (gmax >= endsc) break;
if (gmax >= endsc)
break;
}
S = H1; H1 = H0; H0 = S;
S = H1;
H1 = H0;
H0 = S;
}
r.score = gmax; r.te = te;
r.score = gmax;
r.te = te;
{
int max = -1, tmp, low, high, qlen = slen * 8;
uint16_t *t = (uint16_t*)Hmax;
uint16_t *t = (uint16_t *)Hmax;
for (i = 0, r.qe = -1; i < qlen; ++i, ++t)
if ((int)*t > max) max = *t, r.qe = i / 8 + i % 8 * slen;
else if ((int)*t == max && (tmp = i / 8 + i % 8 * slen) < r.qe) r.qe = tmp;
if (b) {
if ((int)*t > max)
max = *t, r.qe = i / 8 + i % 8 * slen;
else if ((int)*t == max && (tmp = i / 8 + i % 8 * slen) < r.qe)
r.qe = tmp;
if (b)
{
i = (r.score + q->max - 1) / q->max;
low = te - i; high = te + i;
for (i = 0; i < n_b; ++i) {
low = te - i;
high = te + i;
for (i = 0; i < n_b; ++i)
{
int e = (int32_t)b[i];
if ((e < low || e > high) && (int)(b[i]>>32) > r.score2)
r.score2 = b[i]>>32, r.te2 = e;
if ((e < low || e > high) && (int)(b[i] >> 32) > r.score2)
r.score2 = b[i] >> 32, r.te2 = e;
}
}
}
@ -372,7 +439,7 @@ end_loop8:
static inline void revseq(int l, uint8_t *s)
{
int i, t;
for (i = 0; i < l>>1; ++i)
for (i = 0; i < l >> 1; ++i)
t = s[i], s[i] = s[l - 1 - i], s[l - 1 - i] = t;
}
@ -381,19 +448,24 @@ kswr_t ksw_align2(int qlen, uint8_t *query, int tlen, uint8_t *target, int m, co
int size;
kswq_t *q;
kswr_t r, rr;
kswr_t (*func)(kswq_t*, int, const uint8_t*, int, int, int, int, int);
kswr_t (*func)(kswq_t *, int, const uint8_t *, int, int, int, int, int);
q = (qry && *qry)? *qry : ksw_qinit((xtra&KSW_XBYTE)? 1 : 2, qlen, query, m, mat);
if (qry && *qry == 0) *qry = q;
func = q->size == 2? ksw_i16 : ksw_u8;
q = (qry && *qry) ? *qry : ksw_qinit((xtra & KSW_XBYTE) ? 1 : 2, qlen, query, m, mat);
if (qry && *qry == 0)
*qry = q;
func = q->size == 2 ? ksw_i16 : ksw_u8;
size = q->size;
r = func(q, tlen, target, o_del, e_del, o_ins, e_ins, xtra);
if (qry == 0) free(q);
if ((xtra&KSW_XSTART) == 0 || ((xtra&KSW_XSUBO) && r.score < (xtra&0xffff))) return r;
revseq(r.qe + 1, query); revseq(r.te + 1, target); // +1 because qe/te points to the exact end, not the position after the end
if (qry == 0)
free(q);
if ((xtra & KSW_XSTART) == 0 || ((xtra & KSW_XSUBO) && r.score < (xtra & 0xffff)))
return r;
revseq(r.qe + 1, query);
revseq(r.te + 1, target); // +1 because qe/te points to the exact end, not the position after the end
q = ksw_qinit(size, r.qe + 1, query, m, mat);
rr = func(q, tlen, target, o_del, e_del, o_ins, e_ins, KSW_XSTOP | r.score);
revseq(r.qe + 1, query); revseq(r.te + 1, target);
revseq(r.qe + 1, query);
revseq(r.te + 1, target);
free(q);
if (r.score == rr.score)
r.tb = r.te - rr.te, r.qb = r.qe - rr.qe;
@ -409,55 +481,103 @@ kswr_t ksw_align(int qlen, uint8_t *query, int tlen, uint8_t *target, int m, con
*** SW extension ***
********************/
typedef struct {
// 将
void write_query_target_sequence(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int h0)
{
// 写到三个文件里query.fatarget.fa每行一个序列info.txt包含前缀得分h0和长度信息qlentlen
extern FILE *query_f, *target_f, *info_f;
const char seq_map[5] = {'A', 'C', 'G', 'T', 'N'};
int i;
// 处理query
for (i = 0; i < qlen; ++i)
fprintf(query_f, "%c", seq_map[query[i]]);
fprintf(query_f, "\n");
// 处理target
for (i = 0; i < tlen; ++i)
fprintf(target_f, "%c", seq_map[target[i]]);
fprintf(target_f, "\n");
// 处理其他信息
fprintf(info_f, "%-8d%-8d%-8d\n", qlen, tlen, h0);
}
typedef struct
{
int32_t h, e;
} eh_t;
extern int ksw_extend2_avx2_u8(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int is_left, int m, const int8_t *mat, int o_del, int e_del,
int o_ins, int e_ins, int a, int b, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off);
int ksw_extend2(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int o_del, int e_del, int o_ins, int e_ins, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off)
{
// return ksw_extend2_avx2_u8(qlen, query, tlen, target, 0, m, mat, o_del, e_del, o_ins, e_ins, 1, 4, w, end_bonus, zdrop, h0, _qle, _tle, _gtle, _gscore, _max_off);
eh_t *eh; // score array
int8_t *qp; // query profile
int i, j, k, oe_del = o_del + e_del, oe_ins = o_ins + e_ins, beg, end, max, max_i, max_j, max_ins, max_del, max_ie, gscore, max_off;
assert(h0 > 0);
write_query_target_sequence(qlen, query, tlen, target, h0);
// // 调试用打印mat信息
// for (i=0; i<5; ++i) {
// for (j=0; j<5; ++j) {
// fprintf(stderr, "%-4d", mat[5*i + j]);
// }
// fprintf(stderr, "\n");
// }
// fprintf(stderr, "\n");
// allocate memory
qp = malloc(qlen * m);
eh = calloc(qlen + 1, 8);
// generate the query profile
for (k = i = 0; k < m; ++k) {
for (k = i = 0; k < m; ++k)
{
const int8_t *p = &mat[k * m];
for (j = 0; j < qlen; ++j) qp[i++] = p[query[j]];
for (j = 0; j < qlen; ++j)
qp[i++] = p[query[j]];
}
// fill the first row
eh[0].h = h0; eh[1].h = h0 > oe_ins? h0 - oe_ins : 0;
for (j = 2; j <= qlen && eh[j-1].h > e_ins; ++j)
eh[j].h = eh[j-1].h - e_ins;
eh[0].h = h0;
eh[1].h = h0 > oe_ins ? h0 - oe_ins : 0;
for (j = 2; j <= qlen && eh[j - 1].h > e_ins; ++j)
eh[j].h = eh[j - 1].h - e_ins;
// adjust $w if it is too large
k = m * m;
for (i = 0, max = 0; i < k; ++i) // get the max score
max = max > mat[i]? max : mat[i];
max = max > mat[i] ? max : mat[i];
max_ins = (int)((double)(qlen * max + end_bonus - o_ins) / e_ins + 1.);
max_ins = max_ins > 1? max_ins : 1;
w = w < max_ins? w : max_ins;
max_ins = max_ins > 1 ? max_ins : 1;
w = w < max_ins ? w : max_ins;
max_del = (int)((double)(qlen * max + end_bonus - o_del) / e_del + 1.);
max_del = max_del > 1? max_del : 1;
w = w < max_del? w : max_del; // TODO: is this necessary?
max_del = max_del > 1 ? max_del : 1;
w = w < max_del ? w : max_del; // TODO: is this necessary?
// DP loop
max = h0, max_i = max_j = -1; max_ie = -1, gscore = -1;
max = h0, max_i = max_j = -1;
max_ie = -1, gscore = -1;
max_off = 0;
beg = 0, end = qlen;
for (i = 0; LIKELY(i < tlen); ++i) {
for (i = 0; LIKELY(i < tlen); ++i)
{
int t, f = 0, h1, m = 0, mj = -1;
int8_t *q = &qp[target[i] * qlen];
// apply the band and the constraint (if provided)
if (beg < i - w) beg = i - w;
if (end > i + w + 1) end = i + w + 1;
if (end > qlen) end = qlen;
if (beg < i - w)
beg = i - w;
if (end > i + w + 1)
end = i + w + 1;
if (end > qlen)
end = qlen;
// compute the first column
if (beg == 0) {
if (beg == 0)
{
h1 = h0 - (o_del + e_del * (i + 1));
if (h1 < 0) h1 = 0;
} else h1 = 0;
for (j = beg; LIKELY(j < end); ++j) {
if (h1 < 0)
h1 = 0;
}
else
h1 = 0;
for (j = beg; LIKELY(j < end); ++j)
{
// At the beginning of the loop: eh[j] = { H(i-1,j-1), E(i,j) }, f = F(i,j) and h1 = H(i,j-1)
// Similar to SSE2-SW, cells are computed in the following order:
// H(i,j) = max{H(i-1,j-1)+S(i,j), E(i,j), F(i,j)}
@ -466,51 +586,70 @@ int ksw_extend2(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
eh_t *p = &eh[j];
int h, M = p->h, e = p->e; // get H(i-1,j-1) and E(i-1,j)
p->h = h1; // set H(i,j-1) for the next row
M = M? M + q[j] : 0;// separating H and M to disallow a cigar like "100M3I3D20M"
h = M > e? M : e; // e and f are guaranteed to be non-negative, so h>=0 even if M<0
h = h > f? h : f;
M = M ? M + q[j] : 0; // separating H and M to disallow a cigar like "100M3I3D20M"
h = M > e ? M : e; // e and f are guaranteed to be non-negative, so h>=0 even if M<0
h = h > f ? h : f;
h1 = h; // save H(i,j) to h1 for the next column
mj = m > h? mj : j; // record the position where max score is achieved
m = m > h? m : h; // m is stored at eh[mj+1]
mj = m > h ? mj : j; // record the position where max score is achieved
m = m > h ? m : h; // m is stored at eh[mj+1]
t = M - oe_del;
t = t > 0? t : 0;
t = t > 0 ? t : 0;
e -= e_del;
e = e > t? e : t; // computed E(i+1,j)
e = e > t ? e : t; // computed E(i+1,j)
p->e = e; // save E(i+1,j) for the next row
t = M - oe_ins;
t = t > 0? t : 0;
t = t > 0 ? t : 0;
f -= e_ins;
f = f > t? f : t; // computed F(i,j+1)
f = f > t ? f : t; // computed F(i,j+1)
}
eh[end].h = h1; eh[end].e = 0;
if (j == qlen) {
max_ie = gscore > h1? max_ie : i;
gscore = gscore > h1? gscore : h1;
eh[end].h = h1;
eh[end].e = 0;
if (j == qlen)
{
max_ie = gscore > h1 ? max_ie : i;
gscore = gscore > h1 ? gscore : h1;
}
if (m == 0) break;
if (m > max) {
if (m == 0)
break;
if (m > max)
{
max = m, max_i = i, max_j = mj;
max_off = max_off > abs(mj - i)? max_off : abs(mj - i);
} else if (zdrop > 0) {
if (i - max_i > mj - max_j) {
if (max - m - ((i - max_i) - (mj - max_j)) * e_del > zdrop) break;
} else {
if (max - m - ((mj - max_j) - (i - max_i)) * e_ins > zdrop) break;
max_off = max_off > abs(mj - i) ? max_off : abs(mj - i);
}
else if (zdrop > 0)
{
if (i - max_i > mj - max_j)
{
if (max - m - ((i - max_i) - (mj - max_j)) * e_del > zdrop)
break;
}
else
{
if (max - m - ((mj - max_j) - (i - max_i)) * e_ins > zdrop)
break;
}
}
// update beg and end for the next round
for (j = beg; LIKELY(j < end) && eh[j].h == 0 && eh[j].e == 0; ++j);
for (j = beg; LIKELY(j < end) && eh[j].h == 0 && eh[j].e == 0; ++j)
;
beg = j;
for (j = end; LIKELY(j >= beg) && eh[j].h == 0 && eh[j].e == 0; --j);
end = j + 2 < qlen? j + 2 : qlen;
//beg = 0; end = qlen; // uncomment this line for debugging
for (j = end; LIKELY(j >= beg) && eh[j].h == 0 && eh[j].e == 0; --j)
;
end = j + 2 < qlen ? j + 2 : qlen;
// beg = 0; end = qlen; // uncomment this line for debugging
}
free(eh); free(qp);
if (_qle) *_qle = max_j + 1;
if (_tle) *_tle = max_i + 1;
if (_gtle) *_gtle = max_ie + 1;
if (_gscore) *_gscore = gscore;
if (_max_off) *_max_off = max_off;
free(eh);
free(qp);
if (_qle)
*_qle = max_j + 1;
if (_tle)
*_tle = max_i + 1;
if (_gtle)
*_gtle = max_ie + 1;
if (_gscore)
*_gscore = gscore;
if (_max_off)
*_max_off = max_off;
return max;
}
@ -527,13 +666,17 @@ int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
static inline uint32_t *push_cigar(int *n_cigar, int *m_cigar, uint32_t *cigar, int op, int len)
{
if (*n_cigar == 0 || op != (cigar[(*n_cigar) - 1]&0xf)) {
if (*n_cigar == *m_cigar) {
*m_cigar = *m_cigar? (*m_cigar)<<1 : 4;
if (*n_cigar == 0 || op != (cigar[(*n_cigar) - 1] & 0xf))
{
if (*n_cigar == *m_cigar)
{
*m_cigar = *m_cigar ? (*m_cigar) << 1 : 4;
cigar = realloc(cigar, (*m_cigar) << 2);
}
cigar[(*n_cigar)++] = len<<4 | op;
} else cigar[(*n_cigar)-1] += len<<4;
cigar[(*n_cigar)++] = len << 4 | op;
}
else
cigar[(*n_cigar) - 1] += len << 4;
return cigar;
}
@ -543,32 +686,40 @@ int ksw_global2(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
int8_t *qp; // query profile
int i, j, k, oe_del = o_del + e_del, oe_ins = o_ins + e_ins, score, n_col;
uint8_t *z; // backtrack matrix; in each cell: f<<4|e<<2|h; in principle, we can halve the memory, but backtrack will be a little more complex
if (n_cigar_) *n_cigar_ = 0;
if (n_cigar_)
*n_cigar_ = 0;
// allocate memory
n_col = qlen < 2*w+1? qlen : 2*w+1; // maximum #columns of the backtrack matrix
z = n_cigar_ && cigar_? malloc((long)n_col * tlen) : 0;
n_col = qlen < 2 * w + 1 ? qlen : 2 * w + 1; // maximum #columns of the backtrack matrix
z = n_cigar_ && cigar_ ? malloc((long)n_col * tlen) : 0;
qp = malloc(qlen * m);
eh = calloc(qlen + 1, 8);
// generate the query profile
for (k = i = 0; k < m; ++k) {
for (k = i = 0; k < m; ++k)
{
const int8_t *p = &mat[k * m];
for (j = 0; j < qlen; ++j) qp[i++] = p[query[j]];
for (j = 0; j < qlen; ++j)
qp[i++] = p[query[j]];
}
// fill the first row
eh[0].h = 0; eh[0].e = MINUS_INF;
eh[0].h = 0;
eh[0].e = MINUS_INF;
for (j = 1; j <= qlen && j <= w; ++j)
eh[j].h = -(o_ins + e_ins * j), eh[j].e = MINUS_INF;
for (; j <= qlen; ++j) eh[j].h = eh[j].e = MINUS_INF; // everything is -inf outside the band
for (; j <= qlen; ++j)
eh[j].h = eh[j].e = MINUS_INF; // everything is -inf outside the band
// DP loop
for (i = 0; LIKELY(i < tlen); ++i) { // target sequence is in the outer loop
for (i = 0; LIKELY(i < tlen); ++i)
{ // target sequence is in the outer loop
int32_t f = MINUS_INF, h1, beg, end, t;
int8_t *q = &qp[target[i] * qlen];
beg = i > w? i - w : 0;
end = i + w + 1 < qlen? i + w + 1 : qlen; // only loop through [beg,end) of the query sequence
h1 = beg == 0? -(o_del + e_del * (i + 1)) : MINUS_INF;
if (n_cigar_ && cigar_) {
beg = i > w ? i - w : 0;
end = i + w + 1 < qlen ? i + w + 1 : qlen; // only loop through [beg,end) of the query sequence
h1 = beg == 0 ? -(o_del + e_del * (i + 1)) : MINUS_INF;
if (n_cigar_ && cigar_)
{
uint8_t *zi = &z[(long)i * n_col];
for (j = beg; LIKELY(j < end); ++j) {
for (j = beg; LIKELY(j < end); ++j)
{
// At the beginning of the loop: eh[j] = { H(i-1,j-1), E(i,j) }, f = F(i,j) and h1 = H(i,j-1)
// Cells are computed in the following order:
// M(i,j) = H(i-1,j-1) + S(i,j)
@ -584,60 +735,74 @@ int ksw_global2(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
uint8_t d; // direction
p->h = h1;
m += q[j];
d = m >= e? 0 : 1;
h = m >= e? m : e;
d = h >= f? d : 2;
h = h >= f? h : f;
d = m >= e ? 0 : 1;
h = m >= e ? m : e;
d = h >= f ? d : 2;
h = h >= f ? h : f;
h1 = h;
t = m - oe_del;
e -= e_del;
d |= e > t? 1<<2 : 0;
e = e > t? e : t;
d |= e > t ? 1 << 2 : 0;
e = e > t ? e : t;
p->e = e;
t = m - oe_ins;
f -= e_ins;
d |= f > t? 2<<4 : 0; // if we want to halve the memory, use one bit only, instead of two
f = f > t? f : t;
d |= f > t ? 2 << 4 : 0; // if we want to halve the memory, use one bit only, instead of two
f = f > t ? f : t;
zi[j - beg] = d; // z[i,j] keeps h for the current cell and e/f for the next cell
}
} else {
for (j = beg; LIKELY(j < end); ++j) {
}
else
{
for (j = beg; LIKELY(j < end); ++j)
{
eh_t *p = &eh[j];
int32_t h, m = p->h, e = p->e;
p->h = h1;
m += q[j];
h = m >= e? m : e;
h = h >= f? h : f;
h = m >= e ? m : e;
h = h >= f ? h : f;
h1 = h;
t = m - oe_del;
e -= e_del;
e = e > t? e : t;
e = e > t ? e : t;
p->e = e;
t = m - oe_ins;
f -= e_ins;
f = f > t? f : t;
f = f > t ? f : t;
}
}
eh[end].h = h1; eh[end].e = MINUS_INF;
eh[end].h = h1;
eh[end].e = MINUS_INF;
}
score = eh[qlen].h;
if (n_cigar_ && cigar_) { // backtrack
if (n_cigar_ && cigar_)
{ // backtrack
int n_cigar = 0, m_cigar = 0, which = 0;
uint32_t *cigar = 0, tmp;
i = tlen - 1; k = (i + w + 1 < qlen? i + w + 1 : qlen) - 1; // (i,k) points to the last cell
while (i >= 0 && k >= 0) {
which = z[(long)i * n_col + (k - (i > w? i - w : 0))] >> (which<<1) & 3;
if (which == 0) cigar = push_cigar(&n_cigar, &m_cigar, cigar, 0, 1), --i, --k;
else if (which == 1) cigar = push_cigar(&n_cigar, &m_cigar, cigar, 2, 1), --i;
else cigar = push_cigar(&n_cigar, &m_cigar, cigar, 1, 1), --k;
i = tlen - 1;
k = (i + w + 1 < qlen ? i + w + 1 : qlen) - 1; // (i,k) points to the last cell
while (i >= 0 && k >= 0)
{
which = z[(long)i * n_col + (k - (i > w ? i - w : 0))] >> (which << 1) & 3;
if (which == 0)
cigar = push_cigar(&n_cigar, &m_cigar, cigar, 0, 1), --i, --k;
else if (which == 1)
cigar = push_cigar(&n_cigar, &m_cigar, cigar, 2, 1), --i;
else
cigar = push_cigar(&n_cigar, &m_cigar, cigar, 1, 1), --k;
}
if (i >= 0) cigar = push_cigar(&n_cigar, &m_cigar, cigar, 2, i + 1);
if (k >= 0) cigar = push_cigar(&n_cigar, &m_cigar, cigar, 1, k + 1);
for (i = 0; i < n_cigar>>1; ++i) // reverse CIGAR
tmp = cigar[i], cigar[i] = cigar[n_cigar-1-i], cigar[n_cigar-1-i] = tmp;
if (i >= 0)
cigar = push_cigar(&n_cigar, &m_cigar, cigar, 2, i + 1);
if (k >= 0)
cigar = push_cigar(&n_cigar, &m_cigar, cigar, 1, k + 1);
for (i = 0; i < n_cigar >> 1; ++i) // reverse CIGAR
tmp = cigar[i], cigar[i] = cigar[n_cigar - 1 - i], cigar[n_cigar - 1 - i] = tmp;
*n_cigar_ = n_cigar, *cigar_ = cigar;
}
free(eh); free(qp); free(z);
free(eh);
free(qp);
free(z);
return score;
}
@ -674,8 +839,7 @@ unsigned char seq_nt4_table[256] = {
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
};
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4};
int main(int argc, char *argv[])
{
@ -687,63 +851,96 @@ int main(int argc, char *argv[])
kseq_t *kst, *ksq;
// parse command line
while ((c = getopt(argc, argv, "a:b:q:r:ft:1")) >= 0) {
switch (c) {
case 'a': sa = atoi(optarg); break;
case 'b': sb = atoi(optarg); break;
case 'q': gapo = atoi(optarg); break;
case 'r': gape = atoi(optarg); break;
case 't': minsc = atoi(optarg); break;
case 'f': forward_only = 1; break;
case '1': xtra |= KSW_XBYTE; break;
while ((c = getopt(argc, argv, "a:b:q:r:ft:1")) >= 0)
{
switch (c)
{
case 'a':
sa = atoi(optarg);
break;
case 'b':
sb = atoi(optarg);
break;
case 'q':
gapo = atoi(optarg);
break;
case 'r':
gape = atoi(optarg);
break;
case 't':
minsc = atoi(optarg);
break;
case 'f':
forward_only = 1;
break;
case '1':
xtra |= KSW_XBYTE;
break;
}
}
if (optind + 2 > argc) {
if (optind + 2 > argc)
{
fprintf(stderr, "Usage: ksw [-1] [-f] [-a%d] [-b%d] [-q%d] [-r%d] [-t%d] <target.fa> <query.fa>\n", sa, sb, gapo, gape, minsc);
return 1;
}
if (minsc > 0xffff) minsc = 0xffff;
if (minsc > 0xffff)
minsc = 0xffff;
xtra |= KSW_XSUBO | minsc;
// initialize scoring matrix
for (i = k = 0; i < 4; ++i) {
for (i = k = 0; i < 4; ++i)
{
for (j = 0; j < 4; ++j)
mat[k++] = i == j? sa : -sb;
mat[k++] = i == j ? sa : -sb;
mat[k++] = 0; // ambiguous base
}
for (j = 0; j < 5; ++j) mat[k++] = 0;
for (j = 0; j < 5; ++j)
mat[k++] = 0;
// open file
fpt = xzopen(argv[optind], "r"); kst = kseq_init(fpt);
fpq = xzopen(argv[optind+1], "r"); ksq = kseq_init(fpq);
fpt = xzopen(argv[optind], "r");
kst = kseq_init(fpt);
fpq = xzopen(argv[optind + 1], "r");
ksq = kseq_init(fpq);
// all-pair alignment
while (kseq_read(ksq) > 0) {
while (kseq_read(ksq) > 0)
{
kswq_t *q[2] = {0, 0};
kswr_t r;
for (i = 0; i < (int)ksq->seq.l; ++i) ksq->seq.s[i] = seq_nt4_table[(int)ksq->seq.s[i]];
if (!forward_only) { // reverse
if ((int)ksq->seq.m > max_rseq) {
for (i = 0; i < (int)ksq->seq.l; ++i)
ksq->seq.s[i] = seq_nt4_table[(int)ksq->seq.s[i]];
if (!forward_only)
{ // reverse
if ((int)ksq->seq.m > max_rseq)
{
max_rseq = ksq->seq.m;
rseq = (uint8_t*)realloc(rseq, max_rseq);
rseq = (uint8_t *)realloc(rseq, max_rseq);
}
for (i = 0, j = ksq->seq.l - 1; i < (int)ksq->seq.l; ++i, --j)
rseq[j] = ksq->seq.s[i] == 4? 4 : 3 - ksq->seq.s[i];
rseq[j] = ksq->seq.s[i] == 4 ? 4 : 3 - ksq->seq.s[i];
}
gzrewind(fpt); kseq_rewind(kst);
while (kseq_read(kst) > 0) {
for (i = 0; i < (int)kst->seq.l; ++i) kst->seq.s[i] = seq_nt4_table[(int)kst->seq.s[i]];
r = ksw_align(ksq->seq.l, (uint8_t*)ksq->seq.s, kst->seq.l, (uint8_t*)kst->seq.s, 5, mat, gapo, gape, xtra, &q[0]);
gzrewind(fpt);
kseq_rewind(kst);
while (kseq_read(kst) > 0)
{
for (i = 0; i < (int)kst->seq.l; ++i)
kst->seq.s[i] = seq_nt4_table[(int)kst->seq.s[i]];
r = ksw_align(ksq->seq.l, (uint8_t *)ksq->seq.s, kst->seq.l, (uint8_t *)kst->seq.s, 5, mat, gapo, gape, xtra, &q[0]);
if (r.score >= minsc)
err_printf("%s\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%d\n", kst->name.s, r.tb, r.te+1, ksq->name.s, r.qb, r.qe+1, r.score, r.score2, r.te2);
if (rseq) {
r = ksw_align(ksq->seq.l, rseq, kst->seq.l, (uint8_t*)kst->seq.s, 5, mat, gapo, gape, xtra, &q[1]);
err_printf("%s\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%d\n", kst->name.s, r.tb, r.te + 1, ksq->name.s, r.qb, r.qe + 1, r.score, r.score2, r.te2);
if (rseq)
{
r = ksw_align(ksq->seq.l, rseq, kst->seq.l, (uint8_t *)kst->seq.s, 5, mat, gapo, gape, xtra, &q[1]);
if (r.score >= minsc)
err_printf("%s\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%d\n", kst->name.s, r.tb, r.te+1, ksq->name.s, (int)ksq->seq.l - r.qb, (int)ksq->seq.l - 1 - r.qe, r.score, r.score2, r.te2);
err_printf("%s\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%d\n", kst->name.s, r.tb, r.te + 1, ksq->name.s, (int)ksq->seq.l - r.qb, (int)ksq->seq.l - 1 - r.qe, r.score, r.score2, r.te2);
}
}
free(q[0]); free(q[1]);
free(q[0]);
free(q[1]);
}
free(rseq);
kseq_destroy(kst); err_gzclose(fpt);
kseq_destroy(ksq); err_gzclose(fpq);
kseq_destroy(kst);
err_gzclose(fpt);
kseq_destroy(ksq);
err_gzclose(fpq);
return 0;
}
#endif

450
ksw_avx2.c 100644
View File

@ -0,0 +1,450 @@
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>
#include <emmintrin.h>
#include <stdio.h>
#include <immintrin.h>
#include <emmintrin.h>
#ifdef __GNUC__
#define LIKELY(x) __builtin_expect((x), 1)
#define UNLIKELY(x) __builtin_expect((x), 0)
#else
#define LIKELY(x) (x)
#define UNLIKELY(x) (x)
#endif
#undef MAX
#undef MIN
#define MAX(x, y) ((x) > (y) ? (x) : (y))
#define MIN(x, y) ((x) < (y) ? (x) : (y))
#define SIMD_WIDTH 32
static const uint8_t h_vec_int_mask[SIMD_WIDTH][SIMD_WIDTH] = {
{0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0},
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0},
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0},
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0},
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0},
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0},
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0},
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0},
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}};
// static const uint8_t reverse_mask[SIMD_WIDTH] = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14};
static const uint8_t reverse_mask[SIMD_WIDTH] = {7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8};
// const int permute_mask = _MM_SHUFFLE(0, 1, 2, 3);
#define permute_mask _MM_SHUFFLE(0, 1, 2, 3)
// 初始化变量
#define SIMD_INIT \
int oe_del = o_del + e_del, oe_ins = o_ins + e_ins; \
__m256i zero_vec; \
__m256i max_vec; \
__m256i oe_del_vec; \
__m256i oe_ins_vec; \
__m256i e_del_vec; \
__m256i e_ins_vec; \
__m256i h_vec_mask[SIMD_WIDTH]; \
__m256i reverse_mask_vec; \
zero_vec = _mm256_setzero_si256(); \
oe_del_vec = _mm256_set1_epi8(oe_del); \
oe_ins_vec = _mm256_set1_epi8(oe_ins); \
e_del_vec = _mm256_set1_epi8(e_del); \
e_ins_vec = _mm256_set1_epi8(e_ins); \
__m256i match_sc_vec = _mm256_set1_epi8(a); \
__m256i mis_sc_vec = _mm256_set1_epi8(b); \
__m256i amb_sc_vec = _mm256_set1_epi8(1); \
__m256i amb_vec = _mm256_set1_epi8(4); \
reverse_mask_vec = _mm256_loadu_si256((__m256i *)(reverse_mask)); \
for (i = 0; i < SIMD_WIDTH; ++i) \
h_vec_mask[i] = _mm256_loadu_si256((__m256i *)(&h_vec_int_mask[i]));
/*
* e ref
* f seq
* m
* h
*/
// load向量化数据
#define SIMD_LOAD \
__m256i m1 = _mm256_loadu_si256((__m256i *)(&mA1[j])); \
__m256i e1 = _mm256_loadu_si256((__m256i *)(&eA1[j])); \
__m256i m1j1 = _mm256_loadu_si256((__m256i *)(&mA1[j - 1])); \
__m256i f1j1 = _mm256_loadu_si256((__m256i *)(&fA1[j - 1])); \
__m256i h0j1 = _mm256_loadu_si256((__m256i *)(&hA0[j - 1])); \
__m256i qs_vec = _mm256_loadu_si256((__m256i *)(&seq[j - 1])); \
__m256i ts_vec = _mm256_loadu_si256((__m256i *)(&ref[i]));
// 比对ref和seq的序列计算罚分
#define SIMD_CMP_SEQ \
ts_vec = _mm256_permute4x64_epi64(ts_vec, permute_mask); \
ts_vec = _mm256_shuffle_epi8(ts_vec, reverse_mask_vec); \
__m256i match_mask_vec = _mm256_cmpeq_epi8(qs_vec, ts_vec); \
__m256i mis_score_vec = _mm256_andnot_si256(match_mask_vec, mis_sc_vec); \
__m256i match_score_vec = _mm256_and_si256(match_sc_vec, match_mask_vec); \
__m256i q_amb_mask_vec = _mm256_cmpeq_epi8(qs_vec, amb_vec); \
__m256i t_amb_mask_vec = _mm256_cmpeq_epi8(ts_vec, amb_vec); \
__m256i amb_mask_vec = _mm256_or_si256(q_amb_mask_vec, t_amb_mask_vec); \
__m256i amb_score_vec = _mm256_and_si256(amb_mask_vec, amb_sc_vec); \
mis_score_vec = _mm256_andnot_si256(amb_mask_vec, mis_score_vec); \
mis_score_vec = _mm256_or_si256(amb_score_vec, mis_score_vec); \
match_score_vec = _mm256_andnot_si256(amb_mask_vec, match_score_vec);
// 向量化计算h, e, f, m
#define SIMD_COMPUTE \
__m256i en_vec0 = _mm256_max_epu8(m1, oe_del_vec); \
en_vec0 = _mm256_subs_epu8(en_vec0, oe_del_vec); \
__m256i en_vec1 = _mm256_max_epu8(e1, e_del_vec); \
en_vec1 = _mm256_subs_epu8(en_vec1, e_del_vec); \
__m256i en_vec = _mm256_max_epu8(en_vec0, en_vec1); \
__m256i fn_vec0 = _mm256_max_epu8(m1j1, oe_ins_vec); \
fn_vec0 = _mm256_subs_epu8(fn_vec0, oe_ins_vec); \
__m256i fn_vec1 = _mm256_max_epu8(f1j1, e_ins_vec); \
fn_vec1 = _mm256_subs_epu8(fn_vec1, e_ins_vec); \
__m256i fn_vec = _mm256_max_epu8(fn_vec0, fn_vec1); \
__m256i mn_vec0 = _mm256_adds_epu8(h0j1, match_score_vec); \
mn_vec0 = _mm256_max_epu8(mn_vec0, mis_score_vec); \
mn_vec0 = _mm256_subs_epu8(mn_vec0, mis_score_vec); \
__m256i mn_mask = _mm256_cmpeq_epi8(h0j1, zero_vec); \
__m256i mn_vec = _mm256_andnot_si256(mn_mask, mn_vec0); \
__m256i hn_vec0 = _mm256_max_epu8(en_vec, fn_vec); \
__m256i hn_vec = _mm256_max_epu8(hn_vec0, mn_vec);
// 存储向量化结果
#define SIMD_STORE \
max_vec = _mm256_max_epu8(max_vec, hn_vec); \
_mm256_storeu_si256((__m256i *)&eA2[j], en_vec); \
_mm256_storeu_si256((__m256i *)&fA2[j], fn_vec); \
_mm256_storeu_si256((__m256i *)&mA2[j], mn_vec); \
_mm256_storeu_si256((__m256i *)&hA2[j], hn_vec);
// 去除多余的部分
#define SIMD_REMOVE_EXTRA \
en_vec = _mm256_and_si256(en_vec, h_vec_mask[end - j]); \
fn_vec = _mm256_and_si256(fn_vec, h_vec_mask[end - j]); \
mn_vec = _mm256_and_si256(mn_vec, h_vec_mask[end - j]); \
hn_vec = _mm256_and_si256(hn_vec, h_vec_mask[end - j]);
// 找最大值和位置
#define SIMD_FIND_MAX \
uint8_t *maxVal = (uint8_t *)&max_vec; \
max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 1)); \
max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 2)); \
max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 3)); \
max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 4)); \
max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 5)); \
max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 6)); \
max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 7)); \
max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 8)); \
max_vec = _mm256_max_epu8(max_vec, _mm256_permute2x128_si256(max_vec, max_vec, 0x01)); \
m = maxVal[0]; \
if (m > 0) \
{ \
for (j = beg, i = iend; j <= end; j += SIMD_WIDTH, i -= SIMD_WIDTH) \
{ \
__m256i h2_vec = _mm256_loadu_si256((__m256i *)(&hA2[j])); \
__m256i vcmp = _mm256_cmpeq_epi8(h2_vec, max_vec); \
uint32_t mask = _mm256_movemask_epi8(vcmp); \
if (mask > 0) \
{ \
int pos = SIMD_WIDTH - 1 - __builtin_clz(mask); \
mj = j - 1 + pos; \
mi = i - 1 - pos; \
} \
} \
}
// 每轮迭代后,交换数组
#define SWAP_DATA_POINTER \
uint8_t *tmp = hA0; \
hA0 = hA1; \
hA1 = hA2; \
hA2 = tmp; \
tmp = eA1; \
eA1 = eA2; \
eA2 = tmp; \
tmp = fA1; \
fA1 = fA2; \
fA2 = tmp; \
tmp = mA1; \
mA1 = mA2; \
mA2 = tmp;
int ksw_extend2_avx2_u8(int qlen, // query length 待匹配段碱基的query长度
const uint8_t *query, // read碱基序列
int tlen, // target length reference的长度
const uint8_t *target, // reference序列
int is_left, // 是不是向左扩展
int m, // 碱基种类 (5)
const int8_t *mat, // 每个位置的query和target的匹配得分 m*m
int o_del, // deletion 错配开始的惩罚系数
int e_del, // deletion extension的惩罚系数
int o_ins, // insertion 错配开始的惩罚系数
int e_ins, // insertion extension的惩罚系数
int a, // 碱基match时的分数
int b, // 碱基mismatch时的惩罚分数正数
int w, // 提前剪枝系数w =100 匹配位置和beg的最大距离
int end_bonus,
int zdrop,
int h0, // 该seed的初始得分完全匹配query的碱基数
int *_qle, // 匹配得到全局最大得分的碱基在query的位置
int *_tle, // 匹配得到全局最大得分的碱基在reference的位置
int *_gtle, // query全部匹配上的target的长度
int *_gscore, // query的端到端匹配得分
int *_max_off) // 取得最大得分时在query和reference上位置差的 最大值
{
uint8_t *mA, *hA, *eA, *fA, *mA1, *mA2, *hA0, *hA1, *eA1, *fA1, *hA2, *eA2, *fA2; // hA0保存上上个col的H其他的保存上个H E F M
uint8_t *seq, *ref;
uint8_t *mem, *qtmem, *vmem;
int seq_size = qlen + SIMD_WIDTH, ref_size = tlen + SIMD_WIDTH;
int i, iStart, D, j, k, beg, end, max, max_i, max_j, max_ins, max_del, max_ie, gscore, max_off;
int Dloop = tlen + qlen; // 循环跳出条件
int span, beg1, end1; // 边界条件计算
int col_size = qlen + 2 + SIMD_WIDTH;
int val_mem_size = (col_size * 9 + 31) >> 5 << 5; // 32字节的整数倍
int mem_size = seq_size + ref_size + val_mem_size;
SIMD_INIT; // 初始化simd用的数据
assert(h0 > 0);
// allocate memory
mem = malloc(mem_size);
qtmem = &mem[0];
seq = (uint8_t *)&qtmem[0];
ref = (uint8_t *)&qtmem[seq_size];
if (is_left)
{
for (i = 0; i < qlen; ++i)
seq[i] = query[qlen - 1 - i];
for (i = 0; i < tlen; ++i)
ref[i + SIMD_WIDTH] = target[tlen - 1 - i];
}
else
{
for (i = 0; i < qlen; ++i)
seq[i] = query[i];
for (i = 0; i < tlen; ++i)
ref[i + SIMD_WIDTH] = target[i];
}
vmem = &ref[ref_size];
for (i = 0; i < val_mem_size; i += SIMD_WIDTH)
{
_mm256_storeu_si256((__m256i *)&vmem[i], zero_vec);
}
hA = &vmem[0];
mA = &vmem[col_size * 3];
eA = &vmem[col_size * 5];
fA = &vmem[col_size * 7];
hA0 = &hA[0];
hA1 = &hA[col_size];
hA2 = &hA1[col_size];
mA1 = &mA[0];
mA2 = &mA[col_size];
eA1 = &eA[0];
eA2 = &eA[col_size];
fA1 = &fA[0];
fA2 = &fA[col_size];
// adjust $w if it is too large
k = m * m;
// get the max score
for (i = 0, max = 0; i < k; ++i)
max = max > mat[i] ? max : mat[i];
max_ins = (int)((double)(qlen * max + end_bonus - o_ins) / e_ins + 1.);
max_ins = max_ins > 1 ? max_ins : 1;
w = w < max_ins ? w : max_ins;
max_del = (int)((double)(qlen * max + end_bonus - o_del) / e_del + 1.);
max_del = max_del > 1 ? max_del : 1;
w = w < max_del ? w : max_del; // TODO: is this necessary?
if (tlen < qlen)
w = MIN(tlen - 1, w);
// DP loop
max = h0, max_i = max_j = -1;
max_ie = -1, gscore = -1;
;
max_off = 0;
beg = 1;
end = qlen;
// init h0
hA0[0] = h0; // 左上角
if (qlen == 0 || tlen == 0)
Dloop = 0; // 防止意外情况
if (w >= qlen)
{
max_ie = 0;
gscore = 0;
}
int m_last = 0;
int iend;
for (D = 1; LIKELY(D < Dloop); ++D)
{
// 边界条件一定要注意! tlen 大于,等于,小于 qlen时的情况
if (D > tlen)
{
span = MIN(Dloop - D, w);
beg1 = MAX(D - tlen + 1, ((D - w) / 2) + 1);
}
else
{
span = MIN(D - 1, w);
beg1 = MAX(1, ((D - w) / 2) + 1);
}
end1 = MIN(qlen, beg1 + span);
if (beg < beg1)
beg = beg1;
if (end > end1)
end = end1;
if (beg > end)
break; // 不用计算了直接跳出否则hA2没有被赋值里边是上一轮hA0的值会出bug
iend = D - (beg - 1); // ref开始计算的位置倒序
span = end - beg;
iStart = iend - span - 1; // 0开始的ref索引位置
// 每一轮需要记录的数据
int m = 0, mj = -1, mi = -1;
max_vec = zero_vec;
// 要处理边界
// 左边界 处理f (insert)
if (iStart == 0)
{
hA1[end] = MAX(0, h0 - (o_ins + e_ins * end));
}
// 上边界
if (beg == 1)
{
hA1[0] = MAX(0, h0 - (o_del + e_del * iend));
}
else
{
hA1[beg - 1] = 0;
eA1[beg - 1] = 0;
}
for (j = beg, i = iend; j <= end + 1 - SIMD_WIDTH; j += SIMD_WIDTH, i -= SIMD_WIDTH)
{
// 取数据
SIMD_LOAD;
// 比对seq计算罚分
SIMD_CMP_SEQ;
// 计算
SIMD_COMPUTE;
// 存储结果
SIMD_STORE;
}
// 剩下的计算单元
if (j <= end)
{
// 取数据
SIMD_LOAD;
// 比对seq计算罚分
SIMD_CMP_SEQ;
// 计算
SIMD_COMPUTE;
// 去除多余计算的部分
SIMD_REMOVE_EXTRA;
// 存储结果
SIMD_STORE;
}
SIMD_FIND_MAX;
// 注意最后跳出循环j的值
j = end + 1;
if (j == qlen + 1)
{
max_ie = gscore > hA2[qlen] ? max_ie : iStart;
gscore = gscore > hA2[qlen] ? gscore : hA2[qlen];
}
if (m == 0 && m_last == 0)
break; // 一定要注意,斜对角遍历和按列遍历的不同点
if (m > max)
{
max = m, max_i = mi, max_j = mj;
max_off = max_off > abs(mj - mi) ? max_off : abs(mj - mi);
}
else if (zdrop > 0)
{
if (mi - max_i > mj - max_j)
{
if (max - m - ((mi - max_i) - (mj - max_j)) * e_del > zdrop)
break;
}
else
{
if (max - m - ((mj - max_j) - (mi - max_i)) * e_ins > zdrop)
break;
}
}
// 调整计算的边界
for (j = beg; LIKELY(j <= end); ++j)
{
int has_val = hA1[j - 1] | hA2[j];
if (has_val)
break;
}
beg = j;
for (j = end + 1; LIKELY(j >= beg); --j)
{
int has_val = hA1[j - 1] | hA2[j];
if (has_val)
break;
else
hA0[j - 1] = 0;
}
end = j + 1 <= qlen ? j + 1 : qlen;
m_last = m;
// swap m, h, e, f
SWAP_DATA_POINTER;
}
free(mem);
if (_qle)
*_qle = max_j + 1;
if (_tle)
*_tle = max_i + 1;
if (_gtle)
*_gtle = max_ie + 1;
if (_gscore)
*_gscore = gscore;
if (_max_off)
*_max_off = max_off;
return max;
}

23
run.sh
View File

@ -1,23 +1,26 @@
time ./bwa mem -t 12 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \
time ./bwa mem -t 12 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \
/home/zzh/data/reference/human_g1k_v37_decoy.fasta \
/home/zzh/data/fastq/nm1.fq \
/home/zzh/data/fastq/nm2.fq \
-o /dev/null
# time ./bwa mem -t 1 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \
# /public/home/zzh/data/reference/human_g1k_v37_decoy.fasta \
# /public/home/zzh/data/fastq/ZY2003109152013000/nm1.fq \
# /public/home/zzh/data/fastq/ZY2003109152013000/nm2.fq \
# -o /dev/null
# time ./bwa mem -t 64 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \
# /public/home/zzh/data/reference/human_g1k_v37_decoy.fasta \
# /public/home/zzh/data/fastq/ZY2003109152013000/nm1.fq \
# /public/home/zzh/data/fastq/ZY2003109152013000/nm2.fq \
# -o /dev/null
#/public/home/zzh/data/fastq/n1.fq \
#/public/home/zzh/data/fastq/n2.fq \
#/share_nas3/zyseq-release-v1.1.3/zyseq/wes/resource/reference/human_g1k_v37_decoy.fasta \
#/share_nas3/zyseq-release-v1.1.3/zyseq/data/n1.fq \
#/share_nas3/zyseq-release-v1.1.3/zyseq/data/n2.fq \
#-o reads_mapping.sam
#/public/home/zzh/data/fastq/n1.fq \
#/public/home/zzh/data/fastq/n2.fq \
#/share_nas3/zyseq-release-v1.1.3/zyseq/wes/resource/reference/human_g1k_v37_decoy.fasta \
#/share_nas3/zyseq-release-v1.1.3/zyseq/data/n1.fq \
#/share_nas3/zyseq-release-v1.1.3/zyseq/data/n2.fq \
#-o reads_mapping.sam