解决了bsw上边界同样的问题,解决了seed3的一个bug
This commit is contained in:
parent
2eaeb26858
commit
d728ddbe2c
5
Makefile
5
Makefile
|
|
@ -3,9 +3,10 @@ CC= gcc
|
||||||
# CFLAGS= -g -Wall -Wno-unused-function -O2
|
# CFLAGS= -g -Wall -Wno-unused-function -O2
|
||||||
CFLAGS= -g -Wall -Wno-unused-function -mavx2 -O2
|
CFLAGS= -g -Wall -Wno-unused-function -mavx2 -O2
|
||||||
WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS
|
WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS
|
||||||
SHOW_PERF= -DSHOW_PERF
|
SHOW_PERF= #-DSHOW_PERF
|
||||||
|
FILTER_FULL_MATCH=#-DFILTER_FULL_MATCH
|
||||||
AR= ar
|
AR= ar
|
||||||
DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC) $(SHOW_PERF) -DUSE_AVX2 -DKSW_EQUAL #-DUSE_MT_READ #-DFILTER_FULL_MATCH
|
DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC) $(SHOW_PERF) $(FILTER_FULL_MATCH) -DUSE_AVX2 -DKSW_EQUAL -DUSE_MT_READ
|
||||||
LOBJS= utils.o kthread.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o bwamem_extra.o malloc_wrap.o \
|
LOBJS= utils.o kthread.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o bwamem_extra.o malloc_wrap.o \
|
||||||
QSufSort.o bwt_gen.o rope.o rle.o is.o bwtindex.o
|
QSufSort.o bwt_gen.o rope.o rle.o is.o bwtindex.o
|
||||||
AOBJS= bwashm.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \
|
AOBJS= bwashm.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \
|
||||||
|
|
|
||||||
43
bwamem.c
43
bwamem.c
|
|
@ -133,7 +133,7 @@ static void mem_collect_intv(const mem_opt_t *opt, const FMTIndex *fmt, int len,
|
||||||
while (x < len) {
|
while (x < len) {
|
||||||
if (seq[x] < 4) {
|
if (seq[x] < 4) {
|
||||||
start_flag = 0;
|
start_flag = 0;
|
||||||
x = fmt_smem(fmt, len, seq, x, start_width, opt->min_seed_len, &a->mem1, a->tmpv[0]);
|
x = fmt_smem(fmt, len, seq, x, start_width, opt->min_seed_len, 0, &a->mem1, a->tmpv[0]);
|
||||||
for (i = 0; i < a->mem1.n; ++i) {
|
for (i = 0; i < a->mem1.n; ++i) {
|
||||||
bwtintv_t *p = &a->mem1.a[i];
|
bwtintv_t *p = &a->mem1.a[i];
|
||||||
int slen = (uint32_t)p->info - (p->info >> 32); // seed length
|
int slen = (uint32_t)p->info - (p->info >> 32); // seed length
|
||||||
|
|
@ -163,7 +163,7 @@ static void mem_collect_intv(const mem_opt_t *opt, const FMTIndex *fmt, int len,
|
||||||
bwtintv_t *p = &a->mem.a[k];
|
bwtintv_t *p = &a->mem.a[k];
|
||||||
int start = p->info>>32, end = (int32_t)p->info;
|
int start = p->info>>32, end = (int32_t)p->info;
|
||||||
if (end - start < split_len || p->x[2] > opt->split_width) continue;
|
if (end - start < split_len || p->x[2] > opt->split_width) continue;
|
||||||
fmt_smem(fmt, len, seq, (start + end) >> 1, p->x[2] + 1, opt->min_seed_len, &a->mem1, a->tmpv[0]);
|
fmt_smem(fmt, len, seq, (start + end) >> 1, p->x[2] + 1, opt->min_seed_len, p, &a->mem1, a->tmpv[0]);
|
||||||
for (i = 0; i < a->mem1.n; ++i) {
|
for (i = 0; i < a->mem1.n; ++i) {
|
||||||
bwtintv_t *p = &a->mem1.a[i];
|
bwtintv_t *p = &a->mem1.a[i];
|
||||||
int slen = (uint32_t)p->info - (p->info >> 32);
|
int slen = (uint32_t)p->info - (p->info >> 32);
|
||||||
|
|
@ -1384,10 +1384,13 @@ static void mem_collect_intv_batch(const mem_opt_t *opt, const FMTIndex *fmt, in
|
||||||
int64_t tmp_time = realtime_msec();
|
int64_t tmp_time = realtime_msec();
|
||||||
#endif
|
#endif
|
||||||
// 1. first pass: find all SMEMs
|
// 1. first pass: find all SMEMs
|
||||||
|
//fprintf(gfp1, "read\n");
|
||||||
while (x < len) {
|
while (x < len) {
|
||||||
if (seq[x] < 4) {
|
if (seq[x] < 4) {
|
||||||
start_flag = 0;
|
start_flag = 0;
|
||||||
x = fmt_smem(fmt, len, seq, x, start_width, opt->min_seed_len, &a->mem1, a->tmpv[0]);
|
//int last_x = x;
|
||||||
|
x = fmt_smem(fmt, len, seq, x, start_width, opt->min_seed_len, 0, &a->mem1, a->tmpv[0]);
|
||||||
|
//fprintf(gfp1, "%d\n", x - last_x);
|
||||||
for (i = 0; i < a->mem1.n; ++i) {
|
for (i = 0; i < a->mem1.n; ++i) {
|
||||||
bwtintv_t *p = &a->mem1.a[i];
|
bwtintv_t *p = &a->mem1.a[i];
|
||||||
int slen = (uint32_t)p->info - (p->info >> 32); // seed length
|
int slen = (uint32_t)p->info - (p->info >> 32); // seed length
|
||||||
|
|
@ -1405,12 +1408,11 @@ static void mem_collect_intv_batch(const mem_opt_t *opt, const FMTIndex *fmt, in
|
||||||
#ifdef SHOW_PERF
|
#ifdef SHOW_PERF
|
||||||
a->time_seed_1 += realtime_msec() - tmp_time;
|
a->time_seed_1 += realtime_msec() - tmp_time;
|
||||||
tmp_time = realtime_msec();
|
tmp_time = realtime_msec();
|
||||||
#endif
|
|
||||||
|
|
||||||
s2n += 1;
|
s2n += 1;
|
||||||
if (max_seed_len == len - start_N_num)
|
if (max_seed_len == len - start_N_num)
|
||||||
s1n += 1;
|
s1n += 1;
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef FILTER_FULL_MATCH
|
#ifdef FILTER_FULL_MATCH
|
||||||
if (max_seed_len == len - start_N_num) goto collect_intv_end;
|
if (max_seed_len == len - start_N_num) goto collect_intv_end;
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -1421,14 +1423,17 @@ static void mem_collect_intv_batch(const mem_opt_t *opt, const FMTIndex *fmt, in
|
||||||
bwtintv_t *p = &smem->mem.a[k];
|
bwtintv_t *p = &smem->mem.a[k];
|
||||||
int start = p->info>>32, end = (int32_t)p->info;
|
int start = p->info>>32, end = (int32_t)p->info;
|
||||||
if (end - start < split_len || p->x[2] > opt->split_width) continue;
|
if (end - start < split_len || p->x[2] > opt->split_width) continue;
|
||||||
fmt_smem(fmt, len, seq, (start + end) >> 1, p->x[2] + 1, opt->min_seed_len, &a->mem1, a->tmpv[0]);
|
// fprintf(gfp1, "start\n");
|
||||||
|
fmt_smem(fmt, len, seq, (start + end) >> 1, p->x[2] + 1, opt->min_seed_len, p, &a->mem1, a->tmpv[0]);
|
||||||
for (i = 0; i < a->mem1.n; ++i) {
|
for (i = 0; i < a->mem1.n; ++i) {
|
||||||
bwtintv_t *p = &a->mem1.a[i];
|
bwtintv_t *p = &a->mem1.a[i];
|
||||||
int slen = (uint32_t)p->info - (p->info >> 32);
|
int slen = (uint32_t)p->info - (p->info >> 32);
|
||||||
|
// fprintf(gfp1, "%d\n", slen);
|
||||||
if (slen >= opt->min_seed_len) {
|
if (slen >= opt->min_seed_len) {
|
||||||
kv_push(bwtintv_t, smem->mem, a->mem1.a[i]);
|
kv_push(bwtintv_t, smem->mem, a->mem1.a[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// fprintf(gfp1, "end\n");
|
||||||
}
|
}
|
||||||
#ifdef SHOW_PERF
|
#ifdef SHOW_PERF
|
||||||
a->time_seed_2 += realtime_msec() - tmp_time;
|
a->time_seed_2 += realtime_msec() - tmp_time;
|
||||||
|
|
@ -1442,6 +1447,7 @@ static void mem_collect_intv_batch(const mem_opt_t *opt, const FMTIndex *fmt, in
|
||||||
bwtintv_t m;
|
bwtintv_t m;
|
||||||
x = fmt_seed_strategy1(fmt, len, seq, x, opt->min_seed_len, opt->max_mem_intv, &m);
|
x = fmt_seed_strategy1(fmt, len, seq, x, opt->min_seed_len, opt->max_mem_intv, &m);
|
||||||
if (m.x[2] > 0) {
|
if (m.x[2] > 0) {
|
||||||
|
// fprintf(stderr, "3: %ld %ld %ld\n", m.info >> 32, m.info & ((1L << 32) - 1), m.x[2]);
|
||||||
kv_push(bwtintv_t, smem->mem, m);
|
kv_push(bwtintv_t, smem->mem, m);
|
||||||
}
|
}
|
||||||
} else ++x;
|
} else ++x;
|
||||||
|
|
@ -1456,6 +1462,10 @@ collect_intv_end:
|
||||||
#endif
|
#endif
|
||||||
// sort
|
// sort
|
||||||
ks_introsort(mem_intv, smem->mem.n, smem->mem.a);
|
ks_introsort(mem_intv, smem->mem.n, smem->mem.a);
|
||||||
|
//for (i = 0; i < smem->mem.n; ++i) {
|
||||||
|
// fprintf(stderr, "seed %d: %ld %ld %ld\n", i, smem->mem.a[i].x[0], smem->mem.a[i].x[1], smem->mem.a[i].x[2]);
|
||||||
|
//}
|
||||||
|
//fprintf(stderr, "split\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
void find_smem(const mem_opt_t *opt, const FMTIndex *fmt, int len, const uint8_t *seq, smem_aux_t *aux, smem_v *smemv)
|
void find_smem(const mem_opt_t *opt, const FMTIndex *fmt, int len, const uint8_t *seq, smem_aux_t *aux, smem_v *smemv)
|
||||||
|
|
@ -1642,10 +1652,10 @@ void mem_core_process(const mem_opt_t *opt, const FMTIndex *fmt, const bntseq_t
|
||||||
mem_chain2aln(opt, bns, pac, l_seq, (uint8_t*)seq, p, regp, aux);
|
mem_chain2aln(opt, bns, pac, l_seq, (uint8_t*)seq, p, regp, aux);
|
||||||
free(chnp->a[j].seeds);
|
free(chnp->a[j].seeds);
|
||||||
}
|
}
|
||||||
#ifdef SHOW_PERF
|
//#ifdef SHOW_PERF
|
||||||
tmp_time = realtime_msec() - tmp_time;
|
// tmp_time = realtime_msec() - tmp_time;
|
||||||
__sync_fetch_and_add(&time_ksw_extend2, tmp_time);
|
// __sync_fetch_and_add(&time_ksw_extend2, tmp_time);
|
||||||
#endif
|
//#endif
|
||||||
free(chnp->a); chnp->m = 0; chnp->a = 0;
|
free(chnp->a); chnp->m = 0; chnp->a = 0;
|
||||||
regp->n = mem_sort_dedup_patch(opt, bns, pac, (uint8_t *)seq, regp->n, regp->a);
|
regp->n = mem_sort_dedup_patch(opt, bns, pac, (uint8_t *)seq, regp->n, regp->a);
|
||||||
if (bwa_verbose >= 4) {
|
if (bwa_verbose >= 4) {
|
||||||
|
|
@ -1660,6 +1670,11 @@ void mem_core_process(const mem_opt_t *opt, const FMTIndex *fmt, const bntseq_t
|
||||||
if (p->rid >= 0 && bns->anns[p->rid].is_alt)
|
if (p->rid >= 0 && bns->anns[p->rid].is_alt)
|
||||||
p->is_alt = 1;
|
p->is_alt = 1;
|
||||||
}
|
}
|
||||||
|
#ifdef SHOW_PERF
|
||||||
|
tmp_time = realtime_msec() - tmp_time;
|
||||||
|
__sync_fetch_and_add(&time_ksw_extend2, tmp_time);
|
||||||
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 1
|
#if 1
|
||||||
|
|
@ -1747,11 +1762,15 @@ void mem_process_seqs(const mem_opt_t *opt, mem_worker_t *w, int64_t n_processed
|
||||||
w->isize_arr[i][j].n = 0;
|
w->isize_arr[i][j].n = 0;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef SHOW_PERF
|
||||||
int64_t tmp_time = realtime_msec();
|
int64_t tmp_time = realtime_msec();
|
||||||
|
#endif
|
||||||
//kt_for(opt->n_threads, worker1, w, (opt->flag&MEM_F_PE)? n>>1 : n); // find mapping positions
|
//kt_for(opt->n_threads, worker1, w, (opt->flag&MEM_F_PE)? n>>1 : n); // find mapping positions
|
||||||
kt_for(opt->n_threads, worker_smem_align, w, n_batch); // find mapping positions
|
kt_for(opt->n_threads, worker_smem_align, w, n_batch); // find mapping positions
|
||||||
|
#ifdef SHOW_PERF
|
||||||
time_work1 += realtime_msec() - tmp_time;
|
time_work1 += realtime_msec() - tmp_time;
|
||||||
tmp_time = realtime_msec();
|
tmp_time = realtime_msec();
|
||||||
|
#endif
|
||||||
if (opt->flag&MEM_F_PE) { // infer insert sizes if not provided
|
if (opt->flag&MEM_F_PE) { // infer insert sizes if not provided
|
||||||
if (pes0) memcpy(pes, pes0, 4 * sizeof(mem_pestat_t)); // if pes0 != NULL, set the insert-size distribution as pes0
|
if (pes0) memcpy(pes, pes0, 4 * sizeof(mem_pestat_t)); // if pes0 != NULL, set the insert-size distribution as pes0
|
||||||
else mem_pestat(opt, w->bns->l_pac, n, w->isize_arr, pes); // otherwise, infer the insert size distribution from data
|
else mem_pestat(opt, w->bns->l_pac, n, w->isize_arr, pes); // otherwise, infer the insert size distribution from data
|
||||||
|
|
@ -1759,7 +1778,9 @@ void mem_process_seqs(const mem_opt_t *opt, mem_worker_t *w, int64_t n_processed
|
||||||
}
|
}
|
||||||
kt_for(opt->n_threads, worker2, w, (opt->flag&MEM_F_PE)? n>>1 : n); // generate alignment
|
kt_for(opt->n_threads, worker2, w, (opt->flag&MEM_F_PE)? n>>1 : n); // generate alignment
|
||||||
//kt_for(opt->n_threads, worker_sam, w, n_batch);
|
//kt_for(opt->n_threads, worker_sam, w, n_batch);
|
||||||
|
#ifdef SHOW_PERF
|
||||||
time_work2 += realtime_msec() - tmp_time;
|
time_work2 += realtime_msec() - tmp_time;
|
||||||
|
#endif
|
||||||
//free(w.regs);
|
//free(w.regs);
|
||||||
if (bwa_verbose >= 3)
|
if (bwa_verbose >= 3)
|
||||||
fprintf(stderr, "[M::%s] Processed %d reads in %.3f CPU sec, %.3f real sec\n", __func__, n, cputime() - ctime, realtime() - rtime);
|
fprintf(stderr, "[M::%s] Processed %d reads in %.3f CPU sec, %.3f real sec\n", __func__, n, cputime() - ctime, realtime() - rtime);
|
||||||
|
|
|
||||||
6
debug.sh
6
debug.sh
|
|
@ -1,4 +1,6 @@
|
||||||
thread=1
|
thread=1
|
||||||
|
n_r1=~/data/fastq/dataset/na12878_wes_144/diff_r1.fq
|
||||||
|
n_r2=~/data/fastq/dataset/na12878_wes_144/diff_r2.fq
|
||||||
#n_r1=~/data/fastq/ZY2105177532213000/n_r1.fq
|
#n_r1=~/data/fastq/ZY2105177532213000/n_r1.fq
|
||||||
#n_r2=~/data/fastq/ZY2105177532213000/n_r2.fq
|
#n_r2=~/data/fastq/ZY2105177532213000/n_r2.fq
|
||||||
#n_r1=~/data/fastq/ZY2105177532213000/sn_r1.fq
|
#n_r1=~/data/fastq/ZY2105177532213000/sn_r1.fq
|
||||||
|
|
@ -10,8 +12,8 @@ thread=1
|
||||||
#n_r2=~/fastq/ssn_r2.fq
|
#n_r2=~/fastq/ssn_r2.fq
|
||||||
#n_r1=~/fastq/tiny_n_r1.fq
|
#n_r1=~/fastq/tiny_n_r1.fq
|
||||||
#n_r2=~/fastq/tiny_n_r2.fq
|
#n_r2=~/fastq/tiny_n_r2.fq
|
||||||
n_r1=~/fastq/diff_r1.fq
|
#n_r1=~/fastq/diff_r1.fq
|
||||||
n_r2=~/fastq/diff_r2.fq
|
#n_r2=~/fastq/diff_r2.fq
|
||||||
#n_r1=~/fastq/diff_all_r1.fq
|
#n_r1=~/fastq/diff_all_r1.fq
|
||||||
#n_r2=~/fastq/diff_all_r2.fq
|
#n_r2=~/fastq/diff_all_r2.fq
|
||||||
#n_r1=~/fastq/d_r1.fq
|
#n_r1=~/fastq/d_r1.fq
|
||||||
|
|
|
||||||
|
|
@ -63,6 +63,7 @@ int64_t time_ksw_extend2 = 0,
|
||||||
|
|
||||||
int64_t dn = 0, n16 = 0, n17 = 0, n18 = 0, n19 = 0, nall = 0, num_sa = 0;
|
int64_t dn = 0, n16 = 0, n17 = 0, n18 = 0, n19 = 0, nall = 0, num_sa = 0;
|
||||||
int64_t s1n = 0, s2n = 0, s3n = 0, s1l = 0, s2l = 0, s3l = 0;
|
int64_t s1n = 0, s2n = 0, s3n = 0, s1l = 0, s2l = 0, s3l = 0;
|
||||||
|
int64_t gn[100] = {0};
|
||||||
int64_t g_num_smem2 = 0;
|
int64_t g_num_smem2 = 0;
|
||||||
FILE *gfp1, *gfp2, *gfp3;
|
FILE *gfp1, *gfp2, *gfp3;
|
||||||
|
|
||||||
|
|
@ -292,7 +293,7 @@ int main_mem(int argc, char *argv[])
|
||||||
}
|
}
|
||||||
|
|
||||||
if (opt->n_threads < 1) opt->n_threads = 1;
|
if (opt->n_threads < 1) opt->n_threads = 1;
|
||||||
if (opt->batch_size < 1) opt->batch_size = 512;
|
if (opt->batch_size < 1) opt->batch_size = 256;
|
||||||
fprintf(stderr, "batch_size: %d\n", opt->batch_size);
|
fprintf(stderr, "batch_size: %d\n", opt->batch_size);
|
||||||
|
|
||||||
if (optind + 1 >= argc || optind + 3 < argc) {
|
if (optind + 1 >= argc || optind + 3 < argc) {
|
||||||
|
|
@ -430,6 +431,7 @@ int main_mem(int argc, char *argv[])
|
||||||
bwa_print_sam_hdr(aux.idx->bns, hdr_line);
|
bwa_print_sam_hdr(aux.idx->bns, hdr_line);
|
||||||
aux.actual_chunk_size = fixed_chunk_size > 0? fixed_chunk_size : opt->chunk_size * opt->n_threads;
|
aux.actual_chunk_size = fixed_chunk_size > 0? fixed_chunk_size : opt->chunk_size * opt->n_threads;
|
||||||
kt_pipeline(no_mt_io? 1 : 2, process, &aux, 3);
|
kt_pipeline(no_mt_io? 1 : 2, process, &aux, 3);
|
||||||
|
//kt_pipeline(3, process, &aux, 3);
|
||||||
free(hdr_line);
|
free(hdr_line);
|
||||||
free(opt);
|
free(opt);
|
||||||
bwa_idx_destroy(aux.idx);
|
bwa_idx_destroy(aux.idx);
|
||||||
|
|
|
||||||
41
fmt_idx.c
41
fmt_idx.c
|
|
@ -708,7 +708,10 @@ int fmt_smem_forward(const FMTIndex *fmt, int len, const uint8_t *q, int x, int
|
||||||
#if 1
|
#if 1
|
||||||
if (min_intv == 1 && ok2.x[2] == min_intv)
|
if (min_intv == 1 && ok2.x[2] == min_intv)
|
||||||
{
|
{
|
||||||
|
// fprintf(gfp1, "%d\t", i + 2 - x);
|
||||||
direct_extend(fmt, len, q, x, i + 2, ok2.x[0], &mt);
|
direct_extend(fmt, len, q, x, i + 2, ok2.x[0], &mt);
|
||||||
|
//mt.x[0] = ok2.x[0];
|
||||||
|
//fprintf(gfp1, "mt %ld %ld\n", ok2.x[0], ok2.x[1]);
|
||||||
kv_push(bwtintv_t, *mem, mt);
|
kv_push(bwtintv_t, *mem, mt);
|
||||||
ret = (uint32_t)mt.info;
|
ret = (uint32_t)mt.info;
|
||||||
goto fmt_smem_forward_end;
|
goto fmt_smem_forward_end;
|
||||||
|
|
@ -767,8 +770,8 @@ fmt_smem_forward_end:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 找smem(seed)
|
// 找smem(seed)(lm: long_smem)
|
||||||
int fmt_smem(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv, int min_seed_len, bwtintv_v *mem, bwtintv_v *tmpvec)
|
int fmt_smem(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv, int min_seed_len, bwtintv_t *lm, bwtintv_v *mem, bwtintv_v *tmpvec)
|
||||||
{
|
{
|
||||||
if (x == 0 || q[x - 1] > 3)
|
if (x == 0 || q[x - 1] > 3)
|
||||||
return fmt_smem_forward(fmt, len, q, x, min_intv, min_seed_len, mem);
|
return fmt_smem_forward(fmt, len, q, x, min_intv, min_seed_len, mem);
|
||||||
|
|
@ -788,7 +791,7 @@ int fmt_smem(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv
|
||||||
qbit = build_forward_kmer(&q[x], len - x, HASH_KMER_LEN, &kmer_len);
|
qbit = build_forward_kmer(&q[x], len - x, HASH_KMER_LEN, &kmer_len);
|
||||||
bwt_kmer_get(&fmt->kmer_hash, &ik, qbit, 0); // 初始碱基位置
|
bwt_kmer_get(&fmt->kmer_hash, &ik, qbit, 0); // 初始碱基位置
|
||||||
ik.info = x + 1;
|
ik.info = x + 1;
|
||||||
|
//int print_flag = 0;
|
||||||
// check change of the interval size and whether the interval size is too small to be extended further
|
// check change of the interval size and whether the interval size is too small to be extended further
|
||||||
#define CHECK_INTV_CHANGE(iv, ov, end_pos) \
|
#define CHECK_INTV_CHANGE(iv, ov, end_pos) \
|
||||||
if (ov.x[2] != iv.x[2]) \
|
if (ov.x[2] != iv.x[2]) \
|
||||||
|
|
@ -826,18 +829,37 @@ int fmt_smem(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv
|
||||||
__builtin_prefetch(fmt_occ_intv(fmt, ok2.x[1] - 1), 0, 2);
|
__builtin_prefetch(fmt_occ_intv(fmt, ok2.x[1] - 1), 0, 2);
|
||||||
__builtin_prefetch(fmt_occ_intv(fmt, ok2.x[1] - 1 + ok2.x[2]), 0, 2);
|
__builtin_prefetch(fmt_occ_intv(fmt, ok2.x[1] - 1 + ok2.x[2]), 0, 2);
|
||||||
CHECK_INTV_CHANGE(ik, ok1, i + 1);
|
CHECK_INTV_CHANGE(ik, ok1, i + 1);
|
||||||
|
#if 0
|
||||||
|
if (min_intv == 2 && ok1.x[2] == 2 && ok2.x[2] == 1 && lm && lm->num_match == 1)
|
||||||
|
{
|
||||||
|
bwtint_t bwt_mtx_row = ok1.x[0];
|
||||||
|
if (bwt_mtx_row == ok2.x[0])
|
||||||
|
bwt_mtx_row = ok1.x[0] + 1;
|
||||||
|
direct_extend(fmt, len, q, x, i + 1, bwt_mtx_row, &mt);
|
||||||
|
kv_push(bwtintv_t, *mem, mt);
|
||||||
|
//fprintf(gfp1, "cond-appear: %d %d %ld %ld %ld %ld\n", x, i + 2, ok1.x[0], ok1.x[1], ok2.x[0], ok2.x[1]);
|
||||||
|
//print_flag = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
CHECK_INTV_CHANGE(ik, ok2, i + 2);
|
||||||
|
}
|
||||||
|
#else
|
||||||
CHECK_INTV_CHANGE(ik, ok2, i + 2);
|
CHECK_INTV_CHANGE(ik, ok2, i + 2);
|
||||||
// 在这里进行判断是否只有一个候选了
|
#endif
|
||||||
#if 1
|
#if 1
|
||||||
|
// 在这里进行判断是否只有一个候选了
|
||||||
if (min_intv == 1 && ok2.x[2] == min_intv)
|
if (min_intv == 1 && ok2.x[2] == min_intv)
|
||||||
{
|
{
|
||||||
direct_extend(fmt, len, q, x, i + 2, ok2.x[0], &mt);
|
direct_extend(fmt, len, q, x, i + 2, ok2.x[0], &mt);
|
||||||
kv_push(bwtintv_t, *mem, mt);
|
//mt.x[0] = ok2.x[0];
|
||||||
|
//fprintf(gfp1, "mt %ld %ld\n", ok2.x[0], ok2.x[1]);
|
||||||
|
kv_push(bwtintv_t, *mem, mt); // 这里可以判断一下是否大于min_seed_len
|
||||||
ret = (uint32_t)mt.info;
|
ret = (uint32_t)mt.info;
|
||||||
if (mt.rm[0].qs == 0 || q[mt.rm[0].qs - 1] > 3)
|
if (mt.rm[0].qs == 0 || q[mt.rm[0].qs - 1] > 3)
|
||||||
goto fmt_smem_end;
|
goto fmt_smem_end;
|
||||||
goto backward_search;
|
goto backward_search;
|
||||||
}
|
} // if (min_intv == 2 && ok2.x[2] == min_intv && !print_flag)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
else if (q[i] < 4) // q[i+1] >= 4
|
else if (q[i] < 4) // q[i+1] >= 4
|
||||||
|
|
@ -868,7 +890,6 @@ backward_search:
|
||||||
fmt_reverse_intvs(curr); // s.t. smaller intervals (i.e. longer matches) visited first
|
fmt_reverse_intvs(curr); // s.t. smaller intervals (i.e. longer matches) visited first
|
||||||
if (mt.num_match == 0)
|
if (mt.num_match == 0)
|
||||||
ret = curr->a[0].info; // this will be the returned value,扩展到的最远的位置
|
ret = curr->a[0].info; // this will be the returned value,扩展到的最远的位置
|
||||||
|
|
||||||
// 按照种子进行遍历,反向扩展
|
// 按照种子进行遍历,反向扩展
|
||||||
#define CHECK_ADD_MEM(pos, intv, mem) \
|
#define CHECK_ADD_MEM(pos, intv, mem) \
|
||||||
if (((int)((intv).info) - (pos) >= min_seed_len) && (mem->n == 0 || (pos) < mem->a[mem->n - 1].info >> 32)) \
|
if (((int)((intv).info) - (pos) >= min_seed_len) && (mem->n == 0 || (pos) < mem->a[mem->n - 1].info >> 32)) \
|
||||||
|
|
@ -966,6 +987,7 @@ backward_search:
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt_smem_end:
|
fmt_smem_end:
|
||||||
|
//if (mem->n == 0 && min_intv > 1 && print_flag) fprintf(gfp1, "\n");
|
||||||
fmt_reverse_intvs(mem); // s.t. sorted by the start coordinate
|
fmt_reverse_intvs(mem); // s.t. sorted by the start coordinate
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
@ -1029,10 +1051,12 @@ int fmt_seed_strategy1(const FMTIndex *fmt, int len, const uint8_t *q, int x, in
|
||||||
COND_SET_RETURN(ok1, *mem, x, i, max_intv, min_len);
|
COND_SET_RETURN(ok1, *mem, x, i, max_intv, min_len);
|
||||||
COND_SET_RETURN(ok2, *mem, x, i + 1, max_intv, min_len);
|
COND_SET_RETURN(ok2, *mem, x, i + 1, max_intv, min_len);
|
||||||
ik = ok2;
|
ik = ok2;
|
||||||
|
//fprintf(stderr, "d: %d %ld\n", i, ok2.x[2]);
|
||||||
}
|
}
|
||||||
else if (q[i] < 4) // q[i+1] >= 4
|
else if (q[i] < 4) // q[i+1] >= 4
|
||||||
{
|
{
|
||||||
fmt_extend1(fmt, &ik, &ok1, 0, 3 - q[i]);
|
fmt_extend1(fmt, &ik, &ok1, 0, 3 - q[i]);
|
||||||
|
//fprintf(stderr, "d: %d %ld\n", i, ok1.x[2]);
|
||||||
COND_SET_RETURN(ok1, *mem, x, i, max_intv, min_len);
|
COND_SET_RETURN(ok1, *mem, x, i, max_intv, min_len);
|
||||||
return i + 2;
|
return i + 2;
|
||||||
}
|
}
|
||||||
|
|
@ -1041,9 +1065,10 @@ int fmt_seed_strategy1(const FMTIndex *fmt, int len, const uint8_t *q, int x, in
|
||||||
return i + 1;
|
return i + 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (i == len - 1)
|
if (i == len - 1 && q[i] < 4)
|
||||||
{
|
{
|
||||||
fmt_extend1(fmt, &ik, &ok1, 0, 3 - q[i]);
|
fmt_extend1(fmt, &ik, &ok1, 0, 3 - q[i]);
|
||||||
|
//fprintf(stderr, "d: %d %ld\n", i, ok1.x[2]);
|
||||||
COND_SET_RETURN(ok1, *mem, x, i, max_intv, min_len);
|
COND_SET_RETURN(ok1, *mem, x, i, max_intv, min_len);
|
||||||
}
|
}
|
||||||
return len;
|
return len;
|
||||||
|
|
|
||||||
|
|
@ -107,7 +107,7 @@ void kmer_setval_at(uint8_t *mem_addr, bwtintv_t ik, int pos);
|
||||||
void kmer_getval_at(uint8_t *mem_addr, bwtintv_t *ok, int pos);
|
void kmer_getval_at(uint8_t *mem_addr, bwtintv_t *ok, int pos);
|
||||||
void fmt_kmer_get(const FMTIndex *fmt, bwtintv_t *ok, uint32_t qbit, int pos);
|
void fmt_kmer_get(const FMTIndex *fmt, bwtintv_t *ok, uint32_t qbit, int pos);
|
||||||
// 找smem(seed)
|
// 找smem(seed)
|
||||||
int fmt_smem(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv, int min_seed_len, bwtintv_v *mem, bwtintv_v *tmpvec);
|
int fmt_smem(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv, int min_seed_len, bwtintv_t *lm, bwtintv_v *mem, bwtintv_v *tmpvec);
|
||||||
|
|
||||||
int fmt_seed_strategy1(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_len, int max_intv, bwtintv_t *mem);
|
int fmt_seed_strategy1(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_len, int max_intv, bwtintv_t *mem);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,10 @@
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
#include <emmintrin.h>
|
#include <emmintrin.h>
|
||||||
|
|
||||||
|
#ifdef SHOW_PERF
|
||||||
extern FILE *gfp1, *gfp2, *gfp3;
|
extern FILE *gfp1, *gfp2, *gfp3;
|
||||||
|
#endif
|
||||||
|
|
||||||
//#define DEBUG_SW_EXTEND 1
|
//#define DEBUG_SW_EXTEND 1
|
||||||
|
|
||||||
#define NO_VAL -1
|
#define NO_VAL -1
|
||||||
|
|
@ -148,8 +151,8 @@ static const uint16_t h_vec_int_mask[SIMD_WIDTH][SIMD_WIDTH] = {
|
||||||
max_vec = _mm256_max_epu16(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 8)); \
|
max_vec = _mm256_max_epu16(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 8)); \
|
||||||
max_vec = _mm256_max_epu16(max_vec, _mm256_permute2x128_si256(max_vec, max_vec, 0x01)); \
|
max_vec = _mm256_max_epu16(max_vec, _mm256_permute2x128_si256(max_vec, max_vec, 0x01)); \
|
||||||
int16_t *maxVal = (int16_t*)&max_vec; \
|
int16_t *maxVal = (int16_t*)&max_vec; \
|
||||||
m = maxVal[0]; \
|
m = MAX(m,maxVal[0]); \
|
||||||
if (m > 0) { \
|
if (maxVal[0] > 0) { \
|
||||||
for(j=beg, i=iend; j<=end; j+=SIMD_WIDTH, i-=SIMD_WIDTH) { \
|
for(j=beg, i=iend; j<=end; j+=SIMD_WIDTH, i-=SIMD_WIDTH) { \
|
||||||
__m256i h2_vec = _mm256_loadu_si256((__m256i*) (&hA2[j])); \
|
__m256i h2_vec = _mm256_loadu_si256((__m256i*) (&hA2[j])); \
|
||||||
__m256i vcmp = _mm256_cmpeq_epi16(h2_vec, max_vec); \
|
__m256i vcmp = _mm256_cmpeq_epi16(h2_vec, max_vec); \
|
||||||
|
|
@ -197,7 +200,9 @@ int ksw_extend2_avx2(int qlen, // query length 待匹配段碱基的query长度
|
||||||
buf_t *buf) // 之前已经开辟过的缓存
|
buf_t *buf) // 之前已经开辟过的缓存
|
||||||
{
|
{
|
||||||
//return ksw_extend2_origin(qlen, query, tlen, target, is_left, m, mat, o_del, e_del, o_ins, e_ins, w, end_bonus, zdrop, h0, _qle, _tle, _gtle, _gscore, _max_off);
|
//return ksw_extend2_origin(qlen, query, tlen, target, is_left, m, mat, o_del, e_del, o_ins, e_ins, w, end_bonus, zdrop, h0, _qle, _tle, _gtle, _gscore, _max_off);
|
||||||
|
|
||||||
|
//fprintf(gfp1, "%d\n", qlen);
|
||||||
|
|
||||||
if (qlen * a + h0 < 255) return ksw_extend2_avx2_u8(qlen, query, tlen, target, is_left, m, mat, o_del, e_del, o_ins, e_ins, a, b, w, end_bonus, zdrop, h0, _qle, _tle, _gtle, _gscore, _max_off, buf);
|
if (qlen * a + h0 < 255) return ksw_extend2_avx2_u8(qlen, query, tlen, target, is_left, m, mat, o_del, e_del, o_ins, e_ins, a, b, w, end_bonus, zdrop, h0, _qle, _tle, _gtle, _gscore, _max_off, buf);
|
||||||
int16_t *mA,*hA, *eA, *fA, *mA1, *mA2, *hA0, *hA1, *eA1, *fA1, *hA2, *eA2, *fA2; // hA0保存上上个col的H,其他的保存上个H E F M
|
int16_t *mA,*hA, *eA, *fA, *mA1, *mA2, *hA0, *hA1, *eA1, *fA1, *hA2, *eA2, *fA2; // hA0保存上上个col的H,其他的保存上个H E F M
|
||||||
int16_t *seq, *ref;
|
int16_t *seq, *ref;
|
||||||
|
|
@ -335,7 +340,7 @@ int ksw_extend2_avx2(int qlen, // query length 待匹配段碱基的query长度
|
||||||
//}
|
//}
|
||||||
// 要处理边界
|
// 要处理边界
|
||||||
// 左边界 处理f (insert)
|
// 左边界 处理f (insert)
|
||||||
if (ibeg == 0) { hA1[end] = MAX(0, h0 - (o_ins + e_ins * end)); }
|
if (ibeg == 0) { hA1[end] = MAX(0, h0 - (o_ins + e_ins * end)); m = hA1[end];}
|
||||||
// 上边界
|
// 上边界
|
||||||
if (beg == 1) { hA1[0] = MAX(0, h0 - (o_del + e_del * iend)); }
|
if (beg == 1) { hA1[0] = MAX(0, h0 - (o_del + e_del * iend)); }
|
||||||
else if (D & 1) {
|
else if (D & 1) {
|
||||||
|
|
@ -616,14 +621,26 @@ int ksw_extend2_origin(int qlen, // query length 待匹配段碱基的query长
|
||||||
}
|
}
|
||||||
ins[0][0] = del[0][0] = score[0][0] = h0;
|
ins[0][0] = del[0][0] = score[0][0] = h0;
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef SHOW_PERF
|
||||||
|
// fprintf(gfp1, "start\n");
|
||||||
|
int bsw_cal_num = 0;
|
||||||
|
int real_cal_num = 0;
|
||||||
|
for (i = 0; i < tlen; ++i)
|
||||||
|
{
|
||||||
|
int beg = MAX(0, i - w);
|
||||||
|
int end = MIN(qlen, i + w + 1);
|
||||||
|
if (beg >= end) break;
|
||||||
|
bsw_cal_num += end - beg;
|
||||||
|
}
|
||||||
|
// fprintf(gfp1, "%d\n", bsw_cal_num);
|
||||||
|
#endif
|
||||||
for (i = 0; LIKELY(i < tlen); ++i) {
|
for (i = 0; LIKELY(i < tlen); ++i) {
|
||||||
int t, f = 0, h1, m = 0, mj = -1;
|
int t, f = 0, h1, m = 0, mj = -1;
|
||||||
int8_t *q = &qp[ref[i] * qlen];
|
int8_t *q = &qp[ref[i] * qlen];
|
||||||
// apply the band and the constraint (if provided)
|
// apply the band and the constraint (if provided)
|
||||||
if (beg < i - w) beg = i - w;
|
if (beg < i - w) beg = i - w;
|
||||||
if (end > i + w + 1) end = i + w + 1;
|
if (end > i + w + 1) end = i + w + 1;
|
||||||
//if (end > qlen) end = qlen; 没用
|
if (end > qlen) end = qlen; // 没用
|
||||||
// compute the first column
|
// compute the first column
|
||||||
if (beg == 0) {
|
if (beg == 0) {
|
||||||
h1 = h0 - (o_del + e_del * (i + 1));
|
h1 = h0 - (o_del + e_del * (i + 1));
|
||||||
|
|
@ -631,6 +648,11 @@ int ksw_extend2_origin(int qlen, // query length 待匹配段碱基的query长
|
||||||
} else h1 = 0;
|
} else h1 = 0;
|
||||||
//m = h1;
|
//m = h1;
|
||||||
for (j = beg; LIKELY(j < end); ++j) {
|
for (j = beg; LIKELY(j < end); ++j) {
|
||||||
|
|
||||||
|
#ifdef SHOW_PERF
|
||||||
|
real_cal_num++;
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef DEBUG_SW_EXTEND
|
#ifdef DEBUG_SW_EXTEND
|
||||||
ins[i+1][j+1] = f;
|
ins[i+1][j+1] = f;
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -737,6 +759,9 @@ int ksw_extend2_origin(int qlen, // query length 待匹配段碱基的query长
|
||||||
fprintf(gfp3, "\n");
|
fprintf(gfp3, "\n");
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef SHOW_PERF
|
||||||
|
//fprintf(gfp1, "%d\nend\n", real_cal_num);
|
||||||
|
#endif
|
||||||
|
|
||||||
free(eh); free(qp); free(qmem);
|
free(eh); free(qp); free(qmem);
|
||||||
if (_qle) *_qle = max_j + 1;
|
if (_qle) *_qle = max_j + 1;
|
||||||
|
|
|
||||||
|
|
@ -162,8 +162,8 @@ static const uint8_t reverse_mask[SIMD_WIDTH] = {7,6,5,4,3,2,1,0,15,14,13,12,11,
|
||||||
max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 7)); \
|
max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 7)); \
|
||||||
max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 8)); \
|
max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 8)); \
|
||||||
max_vec = _mm256_max_epu8(max_vec, _mm256_permute2x128_si256(max_vec, max_vec, 0x01)); \
|
max_vec = _mm256_max_epu8(max_vec, _mm256_permute2x128_si256(max_vec, max_vec, 0x01)); \
|
||||||
m = maxVal[0]; \
|
m = MAX(m, maxVal[0]); \
|
||||||
if (m > 0) { \
|
if (maxVal[0] > 0) { \
|
||||||
for(j=beg, i=iend; j<=end; j+=SIMD_WIDTH, i-=SIMD_WIDTH) { \
|
for(j=beg, i=iend; j<=end; j+=SIMD_WIDTH, i-=SIMD_WIDTH) { \
|
||||||
__m256i h2_vec = _mm256_loadu_si256((__m256i*) (&hA2[j])); \
|
__m256i h2_vec = _mm256_loadu_si256((__m256i*) (&hA2[j])); \
|
||||||
__m256i vcmp = _mm256_cmpeq_epi8(h2_vec, max_vec); \
|
__m256i vcmp = _mm256_cmpeq_epi8(h2_vec, max_vec); \
|
||||||
|
|
@ -313,7 +313,7 @@ int ksw_extend2_avx2_u8(int qlen, // query length 待匹配段碱基的query长
|
||||||
|
|
||||||
// 要处理边界
|
// 要处理边界
|
||||||
// 左边界 处理f (insert)
|
// 左边界 处理f (insert)
|
||||||
if (ibeg == 0) { hA1[end] = MAX(0, h0 - (o_ins + e_ins * end)); }
|
if (ibeg == 0) { hA1[end] = MAX(0, h0 - (o_ins + e_ins * end)); m = hA1[end]; }
|
||||||
// 上边界
|
// 上边界
|
||||||
if (beg == 1) { hA1[0] = MAX(0, h0 - (o_del + e_del * iend)); }
|
if (beg == 1) { hA1[0] = MAX(0, h0 - (o_del + e_del * iend)); }
|
||||||
else if (D & 1) {
|
else if (D & 1) {
|
||||||
|
|
|
||||||
20
run.sh
20
run.sh
|
|
@ -1,4 +1,12 @@
|
||||||
thread=1
|
thread=64
|
||||||
|
#n_r1=~/data/fastq/dataset/na12878_wgs_150/bn1.fq
|
||||||
|
#n_r2=~/data/fastq/dataset/na12878_wgs_150/bn2.fq
|
||||||
|
n_r1=~/data/fastq/dataset/na12878_wes_144/SRR25735653_1.fastq
|
||||||
|
n_r2=~/data/fastq/dataset/na12878_wes_144/SRR25735653_2.fastq
|
||||||
|
#n_r1=~/data/fastq/dataset/na12878_wgs_150/n1.fq
|
||||||
|
#n_r2=~/data/fastq/dataset/na12878_wgs_150/n2.fq
|
||||||
|
#n_r1=~/data/fastq/dataset/na12878_wgs_101/na_1.fq
|
||||||
|
#n_r2=~/data/fastq/dataset/na12878_wgs_101/na_2.fq
|
||||||
#n_r1=~/data/fastq/zy/wgs/n1.fq
|
#n_r1=~/data/fastq/zy/wgs/n1.fq
|
||||||
#n_r2=~/data/fastq/zy/wgs/n2.fq
|
#n_r2=~/data/fastq/zy/wgs/n2.fq
|
||||||
#n_r1=~/data/fastq/ds/n1.fq
|
#n_r1=~/data/fastq/ds/n1.fq
|
||||||
|
|
@ -7,10 +15,12 @@ thread=1
|
||||||
#n_r2=~/data/fastq/platinum/n2.fq
|
#n_r2=~/data/fastq/platinum/n2.fq
|
||||||
#n_r1=~/data/fastq/zy/t1.fq
|
#n_r1=~/data/fastq/zy/t1.fq
|
||||||
#n_r2=~/data/fastq/zy/t2.fq
|
#n_r2=~/data/fastq/zy/t2.fq
|
||||||
n_r1=~/data/fastq/zy/n1.fq
|
#n_r1=~/data/fastq/zy/n1.fq
|
||||||
n_r2=~/data/fastq/zy/n2.fq
|
#n_r2=~/data/fastq/zy/n2.fq
|
||||||
#n_r1=~/data/fastq/ds/d1_1.fq
|
#n_r1=~/data/fastq/ds/d1_1.fq
|
||||||
#n_r2=~/data/fastq/ds/d1_2.fq
|
#n_r2=~/data/fastq/ds/d1_2.fq
|
||||||
|
#n_r1=~/data/fastq/ds/d2_1.fq
|
||||||
|
#n_r2=~/data/fastq/ds/d2_2.fq
|
||||||
#n_r1=~/data/fastq/ds/wes/n1.fq
|
#n_r1=~/data/fastq/ds/wes/n1.fq
|
||||||
#n_r2=~/data/fastq/ds/wes/n2.fq
|
#n_r2=~/data/fastq/ds/wes/n2.fq
|
||||||
#n_r1=~/fastq/ZY2105177532213010_L4_1.fq
|
#n_r1=~/fastq/ZY2105177532213010_L4_1.fq
|
||||||
|
|
@ -44,8 +54,8 @@ reference=~/data/reference/human_g1k_v37_decoy.fasta
|
||||||
#out=./all.sam
|
#out=./all.sam
|
||||||
#out=./sn.sam
|
#out=./sn.sam
|
||||||
#out=./ssn-x1.sam
|
#out=./ssn-x1.sam
|
||||||
#out=./out.sam
|
out=./out.sam
|
||||||
out=/dev/null
|
#out=/dev/null
|
||||||
#out=./na12878.sam
|
#out=./na12878.sam
|
||||||
#time ./bwa mem -t 12 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \
|
#time ./bwa mem -t 12 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \
|
||||||
# /home/zzh/data/reference/human_g1k_v37_decoy.fasta \
|
# /home/zzh/data/reference/human_g1k_v37_decoy.fasta \
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,21 @@
|
||||||
|
#!/bin/env python3
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
fn = sys.argv[1]
|
||||||
|
n_all = 0
|
||||||
|
n_2 = 0
|
||||||
|
n_intv1_len = 0
|
||||||
|
n_seed_len = 0
|
||||||
|
|
||||||
|
with open(fn, 'r') as f:
|
||||||
|
for line in f:
|
||||||
|
dat = line.strip().split('\t')
|
||||||
|
n_all += 1
|
||||||
|
if (len(dat) == 2):
|
||||||
|
n_2 += 1
|
||||||
|
n_intv1_len += int(dat[0])
|
||||||
|
n_seed_len += int(dat[1])
|
||||||
|
|
||||||
|
print(n_all, n_2, n_2 / n_all, n_intv1_len / n_2, n_seed_len / n_2)
|
||||||
|
|
@ -0,0 +1,22 @@
|
||||||
|
#!/bin/env python3
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
fn = sys.argv[1]
|
||||||
|
n_all = 0
|
||||||
|
n_direct = 0
|
||||||
|
n_direct_start_len = 0
|
||||||
|
n_seed_len = 0
|
||||||
|
|
||||||
|
with open(fn, 'r') as f:
|
||||||
|
for line in f:
|
||||||
|
dat = line.strip().split('\t')
|
||||||
|
if dat[0] == "start":
|
||||||
|
n_all += 1
|
||||||
|
if (len(dat) == 2):
|
||||||
|
n_direct += 1
|
||||||
|
n_direct_start_len += int(dat[0])
|
||||||
|
n_seed_len += int(dat[1])
|
||||||
|
|
||||||
|
print(n_all, n_direct, n_direct / n_all, n_direct_start_len / n_direct, n_seed_len / n_direct)
|
||||||
|
|
@ -0,0 +1,23 @@
|
||||||
|
#!/bin/env python3
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
fn = sys.argv[1]
|
||||||
|
n_bsw = 0
|
||||||
|
n_prun = 0
|
||||||
|
|
||||||
|
with open(fn, 'r') as f:
|
||||||
|
offset = 0
|
||||||
|
for line in f:
|
||||||
|
dat = line.strip().split('\t')
|
||||||
|
if dat[0] == "start":
|
||||||
|
offset = 1
|
||||||
|
elif offset == 1:
|
||||||
|
n_bsw += int(dat[0])
|
||||||
|
offset = 2
|
||||||
|
elif offset == 2:
|
||||||
|
n_prun += int(dat[0])
|
||||||
|
offset = 3
|
||||||
|
|
||||||
|
print(n_prun / n_bsw * 100, 100)
|
||||||
1
utils.h
1
utils.h
|
|
@ -50,6 +50,7 @@ time_work1,
|
||||||
time_work2,
|
time_work2,
|
||||||
time_flt_chain;
|
time_flt_chain;
|
||||||
|
|
||||||
|
extern int64_t gn[100];
|
||||||
extern int64_t dn, n16, n17, n18, n19, nall, num_sa;
|
extern int64_t dn, n16, n17, n18, n19, nall, num_sa;
|
||||||
extern int64_t s1n, s2n, s3n, s1l, s2l, s3l;
|
extern int64_t s1n, s2n, s3n, s1l, s2l, s3l;
|
||||||
extern int64_t g_num_smem2;
|
extern int64_t g_num_smem2;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue