解决了bsw上边界同样的问题,解决了seed3的一个bug

This commit is contained in:
zzh 2024-03-23 03:23:05 +08:00
parent 2eaeb26858
commit d728ddbe2c
13 changed files with 192 additions and 39 deletions

View File

@ -3,9 +3,10 @@ CC= gcc
# CFLAGS= -g -Wall -Wno-unused-function -O2
CFLAGS= -g -Wall -Wno-unused-function -mavx2 -O2
WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS
SHOW_PERF= -DSHOW_PERF
SHOW_PERF= #-DSHOW_PERF
FILTER_FULL_MATCH=#-DFILTER_FULL_MATCH
AR= ar
DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC) $(SHOW_PERF) -DUSE_AVX2 -DKSW_EQUAL #-DUSE_MT_READ #-DFILTER_FULL_MATCH
DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC) $(SHOW_PERF) $(FILTER_FULL_MATCH) -DUSE_AVX2 -DKSW_EQUAL -DUSE_MT_READ
LOBJS= utils.o kthread.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o bwamem_extra.o malloc_wrap.o \
QSufSort.o bwt_gen.o rope.o rle.o is.o bwtindex.o
AOBJS= bwashm.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \

View File

@ -133,7 +133,7 @@ static void mem_collect_intv(const mem_opt_t *opt, const FMTIndex *fmt, int len,
while (x < len) {
if (seq[x] < 4) {
start_flag = 0;
x = fmt_smem(fmt, len, seq, x, start_width, opt->min_seed_len, &a->mem1, a->tmpv[0]);
x = fmt_smem(fmt, len, seq, x, start_width, opt->min_seed_len, 0, &a->mem1, a->tmpv[0]);
for (i = 0; i < a->mem1.n; ++i) {
bwtintv_t *p = &a->mem1.a[i];
int slen = (uint32_t)p->info - (p->info >> 32); // seed length
@ -163,7 +163,7 @@ static void mem_collect_intv(const mem_opt_t *opt, const FMTIndex *fmt, int len,
bwtintv_t *p = &a->mem.a[k];
int start = p->info>>32, end = (int32_t)p->info;
if (end - start < split_len || p->x[2] > opt->split_width) continue;
fmt_smem(fmt, len, seq, (start + end) >> 1, p->x[2] + 1, opt->min_seed_len, &a->mem1, a->tmpv[0]);
fmt_smem(fmt, len, seq, (start + end) >> 1, p->x[2] + 1, opt->min_seed_len, p, &a->mem1, a->tmpv[0]);
for (i = 0; i < a->mem1.n; ++i) {
bwtintv_t *p = &a->mem1.a[i];
int slen = (uint32_t)p->info - (p->info >> 32);
@ -1384,10 +1384,13 @@ static void mem_collect_intv_batch(const mem_opt_t *opt, const FMTIndex *fmt, in
int64_t tmp_time = realtime_msec();
#endif
// 1. first pass: find all SMEMs
//fprintf(gfp1, "read\n");
while (x < len) {
if (seq[x] < 4) {
start_flag = 0;
x = fmt_smem(fmt, len, seq, x, start_width, opt->min_seed_len, &a->mem1, a->tmpv[0]);
//int last_x = x;
x = fmt_smem(fmt, len, seq, x, start_width, opt->min_seed_len, 0, &a->mem1, a->tmpv[0]);
//fprintf(gfp1, "%d\n", x - last_x);
for (i = 0; i < a->mem1.n; ++i) {
bwtintv_t *p = &a->mem1.a[i];
int slen = (uint32_t)p->info - (p->info >> 32); // seed length
@ -1405,12 +1408,11 @@ static void mem_collect_intv_batch(const mem_opt_t *opt, const FMTIndex *fmt, in
#ifdef SHOW_PERF
a->time_seed_1 += realtime_msec() - tmp_time;
tmp_time = realtime_msec();
#endif
s2n += 1;
if (max_seed_len == len - start_N_num)
s1n += 1;
#endif
#ifdef FILTER_FULL_MATCH
if (max_seed_len == len - start_N_num) goto collect_intv_end;
#endif
@ -1421,14 +1423,17 @@ static void mem_collect_intv_batch(const mem_opt_t *opt, const FMTIndex *fmt, in
bwtintv_t *p = &smem->mem.a[k];
int start = p->info>>32, end = (int32_t)p->info;
if (end - start < split_len || p->x[2] > opt->split_width) continue;
fmt_smem(fmt, len, seq, (start + end) >> 1, p->x[2] + 1, opt->min_seed_len, &a->mem1, a->tmpv[0]);
// fprintf(gfp1, "start\n");
fmt_smem(fmt, len, seq, (start + end) >> 1, p->x[2] + 1, opt->min_seed_len, p, &a->mem1, a->tmpv[0]);
for (i = 0; i < a->mem1.n; ++i) {
bwtintv_t *p = &a->mem1.a[i];
int slen = (uint32_t)p->info - (p->info >> 32);
// fprintf(gfp1, "%d\n", slen);
if (slen >= opt->min_seed_len) {
kv_push(bwtintv_t, smem->mem, a->mem1.a[i]);
}
}
// fprintf(gfp1, "end\n");
}
#ifdef SHOW_PERF
a->time_seed_2 += realtime_msec() - tmp_time;
@ -1442,6 +1447,7 @@ static void mem_collect_intv_batch(const mem_opt_t *opt, const FMTIndex *fmt, in
bwtintv_t m;
x = fmt_seed_strategy1(fmt, len, seq, x, opt->min_seed_len, opt->max_mem_intv, &m);
if (m.x[2] > 0) {
// fprintf(stderr, "3: %ld %ld %ld\n", m.info >> 32, m.info & ((1L << 32) - 1), m.x[2]);
kv_push(bwtintv_t, smem->mem, m);
}
} else ++x;
@ -1456,6 +1462,10 @@ collect_intv_end:
#endif
// sort
ks_introsort(mem_intv, smem->mem.n, smem->mem.a);
//for (i = 0; i < smem->mem.n; ++i) {
// fprintf(stderr, "seed %d: %ld %ld %ld\n", i, smem->mem.a[i].x[0], smem->mem.a[i].x[1], smem->mem.a[i].x[2]);
//}
//fprintf(stderr, "split\n");
}
void find_smem(const mem_opt_t *opt, const FMTIndex *fmt, int len, const uint8_t *seq, smem_aux_t *aux, smem_v *smemv)
@ -1642,10 +1652,10 @@ void mem_core_process(const mem_opt_t *opt, const FMTIndex *fmt, const bntseq_t
mem_chain2aln(opt, bns, pac, l_seq, (uint8_t*)seq, p, regp, aux);
free(chnp->a[j].seeds);
}
#ifdef SHOW_PERF
tmp_time = realtime_msec() - tmp_time;
__sync_fetch_and_add(&time_ksw_extend2, tmp_time);
#endif
//#ifdef SHOW_PERF
// tmp_time = realtime_msec() - tmp_time;
// __sync_fetch_and_add(&time_ksw_extend2, tmp_time);
//#endif
free(chnp->a); chnp->m = 0; chnp->a = 0;
regp->n = mem_sort_dedup_patch(opt, bns, pac, (uint8_t *)seq, regp->n, regp->a);
if (bwa_verbose >= 4) {
@ -1660,6 +1670,11 @@ void mem_core_process(const mem_opt_t *opt, const FMTIndex *fmt, const bntseq_t
if (p->rid >= 0 && bns->anns[p->rid].is_alt)
p->is_alt = 1;
}
#ifdef SHOW_PERF
tmp_time = realtime_msec() - tmp_time;
__sync_fetch_and_add(&time_ksw_extend2, tmp_time);
#endif
}
#if 1
@ -1747,11 +1762,15 @@ void mem_process_seqs(const mem_opt_t *opt, mem_worker_t *w, int64_t n_processed
w->isize_arr[i][j].n = 0;
}
#endif
#ifdef SHOW_PERF
int64_t tmp_time = realtime_msec();
#endif
//kt_for(opt->n_threads, worker1, w, (opt->flag&MEM_F_PE)? n>>1 : n); // find mapping positions
kt_for(opt->n_threads, worker_smem_align, w, n_batch); // find mapping positions
#ifdef SHOW_PERF
time_work1 += realtime_msec() - tmp_time;
tmp_time = realtime_msec();
#endif
if (opt->flag&MEM_F_PE) { // infer insert sizes if not provided
if (pes0) memcpy(pes, pes0, 4 * sizeof(mem_pestat_t)); // if pes0 != NULL, set the insert-size distribution as pes0
else mem_pestat(opt, w->bns->l_pac, n, w->isize_arr, pes); // otherwise, infer the insert size distribution from data
@ -1759,7 +1778,9 @@ void mem_process_seqs(const mem_opt_t *opt, mem_worker_t *w, int64_t n_processed
}
kt_for(opt->n_threads, worker2, w, (opt->flag&MEM_F_PE)? n>>1 : n); // generate alignment
//kt_for(opt->n_threads, worker_sam, w, n_batch);
#ifdef SHOW_PERF
time_work2 += realtime_msec() - tmp_time;
#endif
//free(w.regs);
if (bwa_verbose >= 3)
fprintf(stderr, "[M::%s] Processed %d reads in %.3f CPU sec, %.3f real sec\n", __func__, n, cputime() - ctime, realtime() - rtime);

View File

@ -1,4 +1,6 @@
thread=1
n_r1=~/data/fastq/dataset/na12878_wes_144/diff_r1.fq
n_r2=~/data/fastq/dataset/na12878_wes_144/diff_r2.fq
#n_r1=~/data/fastq/ZY2105177532213000/n_r1.fq
#n_r2=~/data/fastq/ZY2105177532213000/n_r2.fq
#n_r1=~/data/fastq/ZY2105177532213000/sn_r1.fq
@ -10,8 +12,8 @@ thread=1
#n_r2=~/fastq/ssn_r2.fq
#n_r1=~/fastq/tiny_n_r1.fq
#n_r2=~/fastq/tiny_n_r2.fq
n_r1=~/fastq/diff_r1.fq
n_r2=~/fastq/diff_r2.fq
#n_r1=~/fastq/diff_r1.fq
#n_r2=~/fastq/diff_r2.fq
#n_r1=~/fastq/diff_all_r1.fq
#n_r2=~/fastq/diff_all_r2.fq
#n_r1=~/fastq/d_r1.fq

View File

@ -63,6 +63,7 @@ int64_t time_ksw_extend2 = 0,
int64_t dn = 0, n16 = 0, n17 = 0, n18 = 0, n19 = 0, nall = 0, num_sa = 0;
int64_t s1n = 0, s2n = 0, s3n = 0, s1l = 0, s2l = 0, s3l = 0;
int64_t gn[100] = {0};
int64_t g_num_smem2 = 0;
FILE *gfp1, *gfp2, *gfp3;
@ -292,7 +293,7 @@ int main_mem(int argc, char *argv[])
}
if (opt->n_threads < 1) opt->n_threads = 1;
if (opt->batch_size < 1) opt->batch_size = 512;
if (opt->batch_size < 1) opt->batch_size = 256;
fprintf(stderr, "batch_size: %d\n", opt->batch_size);
if (optind + 1 >= argc || optind + 3 < argc) {
@ -430,6 +431,7 @@ int main_mem(int argc, char *argv[])
bwa_print_sam_hdr(aux.idx->bns, hdr_line);
aux.actual_chunk_size = fixed_chunk_size > 0? fixed_chunk_size : opt->chunk_size * opt->n_threads;
kt_pipeline(no_mt_io? 1 : 2, process, &aux, 3);
//kt_pipeline(3, process, &aux, 3);
free(hdr_line);
free(opt);
bwa_idx_destroy(aux.idx);

View File

@ -708,7 +708,10 @@ int fmt_smem_forward(const FMTIndex *fmt, int len, const uint8_t *q, int x, int
#if 1
if (min_intv == 1 && ok2.x[2] == min_intv)
{
// fprintf(gfp1, "%d\t", i + 2 - x);
direct_extend(fmt, len, q, x, i + 2, ok2.x[0], &mt);
//mt.x[0] = ok2.x[0];
//fprintf(gfp1, "mt %ld %ld\n", ok2.x[0], ok2.x[1]);
kv_push(bwtintv_t, *mem, mt);
ret = (uint32_t)mt.info;
goto fmt_smem_forward_end;
@ -767,8 +770,8 @@ fmt_smem_forward_end:
return ret;
}
// 找smemseed
int fmt_smem(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv, int min_seed_len, bwtintv_v *mem, bwtintv_v *tmpvec)
// 找smemseed(lm: long_smem)
int fmt_smem(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv, int min_seed_len, bwtintv_t *lm, bwtintv_v *mem, bwtintv_v *tmpvec)
{
if (x == 0 || q[x - 1] > 3)
return fmt_smem_forward(fmt, len, q, x, min_intv, min_seed_len, mem);
@ -788,7 +791,7 @@ int fmt_smem(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv
qbit = build_forward_kmer(&q[x], len - x, HASH_KMER_LEN, &kmer_len);
bwt_kmer_get(&fmt->kmer_hash, &ik, qbit, 0); // 初始碱基位置
ik.info = x + 1;
//int print_flag = 0;
// check change of the interval size and whether the interval size is too small to be extended further
#define CHECK_INTV_CHANGE(iv, ov, end_pos) \
if (ov.x[2] != iv.x[2]) \
@ -826,18 +829,37 @@ int fmt_smem(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv
__builtin_prefetch(fmt_occ_intv(fmt, ok2.x[1] - 1), 0, 2);
__builtin_prefetch(fmt_occ_intv(fmt, ok2.x[1] - 1 + ok2.x[2]), 0, 2);
CHECK_INTV_CHANGE(ik, ok1, i + 1);
#if 0
if (min_intv == 2 && ok1.x[2] == 2 && ok2.x[2] == 1 && lm && lm->num_match == 1)
{
bwtint_t bwt_mtx_row = ok1.x[0];
if (bwt_mtx_row == ok2.x[0])
bwt_mtx_row = ok1.x[0] + 1;
direct_extend(fmt, len, q, x, i + 1, bwt_mtx_row, &mt);
kv_push(bwtintv_t, *mem, mt);
//fprintf(gfp1, "cond-appear: %d %d %ld %ld %ld %ld\n", x, i + 2, ok1.x[0], ok1.x[1], ok2.x[0], ok2.x[1]);
//print_flag = 1;
}
else
{
CHECK_INTV_CHANGE(ik, ok2, i + 2);
}
#else
CHECK_INTV_CHANGE(ik, ok2, i + 2);
// 在这里进行判断是否只有一个候选了
#endif
#if 1
// 在这里进行判断是否只有一个候选了
if (min_intv == 1 && ok2.x[2] == min_intv)
{
direct_extend(fmt, len, q, x, i + 2, ok2.x[0], &mt);
kv_push(bwtintv_t, *mem, mt);
//mt.x[0] = ok2.x[0];
//fprintf(gfp1, "mt %ld %ld\n", ok2.x[0], ok2.x[1]);
kv_push(bwtintv_t, *mem, mt); // 这里可以判断一下是否大于min_seed_len
ret = (uint32_t)mt.info;
if (mt.rm[0].qs == 0 || q[mt.rm[0].qs - 1] > 3)
goto fmt_smem_end;
goto backward_search;
}
} // if (min_intv == 2 && ok2.x[2] == min_intv && !print_flag)
#endif
}
else if (q[i] < 4) // q[i+1] >= 4
@ -868,7 +890,6 @@ backward_search:
fmt_reverse_intvs(curr); // s.t. smaller intervals (i.e. longer matches) visited first
if (mt.num_match == 0)
ret = curr->a[0].info; // this will be the returned value扩展到的最远的位置
// 按照种子进行遍历,反向扩展
#define CHECK_ADD_MEM(pos, intv, mem) \
if (((int)((intv).info) - (pos) >= min_seed_len) && (mem->n == 0 || (pos) < mem->a[mem->n - 1].info >> 32)) \
@ -966,6 +987,7 @@ backward_search:
}
fmt_smem_end:
//if (mem->n == 0 && min_intv > 1 && print_flag) fprintf(gfp1, "\n");
fmt_reverse_intvs(mem); // s.t. sorted by the start coordinate
return ret;
}
@ -1029,10 +1051,12 @@ int fmt_seed_strategy1(const FMTIndex *fmt, int len, const uint8_t *q, int x, in
COND_SET_RETURN(ok1, *mem, x, i, max_intv, min_len);
COND_SET_RETURN(ok2, *mem, x, i + 1, max_intv, min_len);
ik = ok2;
//fprintf(stderr, "d: %d %ld\n", i, ok2.x[2]);
}
else if (q[i] < 4) // q[i+1] >= 4
{
fmt_extend1(fmt, &ik, &ok1, 0, 3 - q[i]);
//fprintf(stderr, "d: %d %ld\n", i, ok1.x[2]);
COND_SET_RETURN(ok1, *mem, x, i, max_intv, min_len);
return i + 2;
}
@ -1041,9 +1065,10 @@ int fmt_seed_strategy1(const FMTIndex *fmt, int len, const uint8_t *q, int x, in
return i + 1;
}
}
if (i == len - 1)
if (i == len - 1 && q[i] < 4)
{
fmt_extend1(fmt, &ik, &ok1, 0, 3 - q[i]);
//fprintf(stderr, "d: %d %ld\n", i, ok1.x[2]);
COND_SET_RETURN(ok1, *mem, x, i, max_intv, min_len);
}
return len;

View File

@ -107,7 +107,7 @@ void kmer_setval_at(uint8_t *mem_addr, bwtintv_t ik, int pos);
void kmer_getval_at(uint8_t *mem_addr, bwtintv_t *ok, int pos);
void fmt_kmer_get(const FMTIndex *fmt, bwtintv_t *ok, uint32_t qbit, int pos);
// 找smemseed
int fmt_smem(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv, int min_seed_len, bwtintv_v *mem, bwtintv_v *tmpvec);
int fmt_smem(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv, int min_seed_len, bwtintv_t *lm, bwtintv_v *mem, bwtintv_v *tmpvec);
int fmt_seed_strategy1(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_len, int max_intv, bwtintv_t *mem);

View File

@ -6,7 +6,10 @@
#include <immintrin.h>
#include <emmintrin.h>
#ifdef SHOW_PERF
extern FILE *gfp1, *gfp2, *gfp3;
#endif
//#define DEBUG_SW_EXTEND 1
#define NO_VAL -1
@ -148,8 +151,8 @@ static const uint16_t h_vec_int_mask[SIMD_WIDTH][SIMD_WIDTH] = {
max_vec = _mm256_max_epu16(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 8)); \
max_vec = _mm256_max_epu16(max_vec, _mm256_permute2x128_si256(max_vec, max_vec, 0x01)); \
int16_t *maxVal = (int16_t*)&max_vec; \
m = maxVal[0]; \
if (m > 0) { \
m = MAX(m,maxVal[0]); \
if (maxVal[0] > 0) { \
for(j=beg, i=iend; j<=end; j+=SIMD_WIDTH, i-=SIMD_WIDTH) { \
__m256i h2_vec = _mm256_loadu_si256((__m256i*) (&hA2[j])); \
__m256i vcmp = _mm256_cmpeq_epi16(h2_vec, max_vec); \
@ -197,7 +200,9 @@ int ksw_extend2_avx2(int qlen, // query length 待匹配段碱基的query长度
buf_t *buf) // 之前已经开辟过的缓存
{
//return ksw_extend2_origin(qlen, query, tlen, target, is_left, m, mat, o_del, e_del, o_ins, e_ins, w, end_bonus, zdrop, h0, _qle, _tle, _gtle, _gscore, _max_off);
//fprintf(gfp1, "%d\n", qlen);
if (qlen * a + h0 < 255) return ksw_extend2_avx2_u8(qlen, query, tlen, target, is_left, m, mat, o_del, e_del, o_ins, e_ins, a, b, w, end_bonus, zdrop, h0, _qle, _tle, _gtle, _gscore, _max_off, buf);
int16_t *mA,*hA, *eA, *fA, *mA1, *mA2, *hA0, *hA1, *eA1, *fA1, *hA2, *eA2, *fA2; // hA0保存上上个col的H其他的保存上个H E F M
int16_t *seq, *ref;
@ -335,7 +340,7 @@ int ksw_extend2_avx2(int qlen, // query length 待匹配段碱基的query长度
//}
// 要处理边界
// 左边界 处理f (insert)
if (ibeg == 0) { hA1[end] = MAX(0, h0 - (o_ins + e_ins * end)); }
if (ibeg == 0) { hA1[end] = MAX(0, h0 - (o_ins + e_ins * end)); m = hA1[end];}
// 上边界
if (beg == 1) { hA1[0] = MAX(0, h0 - (o_del + e_del * iend)); }
else if (D & 1) {
@ -616,14 +621,26 @@ int ksw_extend2_origin(int qlen, // query length 待匹配段碱基的query长
}
ins[0][0] = del[0][0] = score[0][0] = h0;
#endif
#ifdef SHOW_PERF
// fprintf(gfp1, "start\n");
int bsw_cal_num = 0;
int real_cal_num = 0;
for (i = 0; i < tlen; ++i)
{
int beg = MAX(0, i - w);
int end = MIN(qlen, i + w + 1);
if (beg >= end) break;
bsw_cal_num += end - beg;
}
// fprintf(gfp1, "%d\n", bsw_cal_num);
#endif
for (i = 0; LIKELY(i < tlen); ++i) {
int t, f = 0, h1, m = 0, mj = -1;
int8_t *q = &qp[ref[i] * qlen];
// apply the band and the constraint (if provided)
if (beg < i - w) beg = i - w;
if (end > i + w + 1) end = i + w + 1;
//if (end > qlen) end = qlen; 没用
if (end > qlen) end = qlen; // 没用
// compute the first column
if (beg == 0) {
h1 = h0 - (o_del + e_del * (i + 1));
@ -631,6 +648,11 @@ int ksw_extend2_origin(int qlen, // query length 待匹配段碱基的query长
} else h1 = 0;
//m = h1;
for (j = beg; LIKELY(j < end); ++j) {
#ifdef SHOW_PERF
real_cal_num++;
#endif
#ifdef DEBUG_SW_EXTEND
ins[i+1][j+1] = f;
#endif
@ -737,6 +759,9 @@ int ksw_extend2_origin(int qlen, // query length 待匹配段碱基的query长
fprintf(gfp3, "\n");
}
#endif
#ifdef SHOW_PERF
//fprintf(gfp1, "%d\nend\n", real_cal_num);
#endif
free(eh); free(qp); free(qmem);
if (_qle) *_qle = max_j + 1;

View File

@ -162,8 +162,8 @@ static const uint8_t reverse_mask[SIMD_WIDTH] = {7,6,5,4,3,2,1,0,15,14,13,12,11,
max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 7)); \
max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 8)); \
max_vec = _mm256_max_epu8(max_vec, _mm256_permute2x128_si256(max_vec, max_vec, 0x01)); \
m = maxVal[0]; \
if (m > 0) { \
m = MAX(m, maxVal[0]); \
if (maxVal[0] > 0) { \
for(j=beg, i=iend; j<=end; j+=SIMD_WIDTH, i-=SIMD_WIDTH) { \
__m256i h2_vec = _mm256_loadu_si256((__m256i*) (&hA2[j])); \
__m256i vcmp = _mm256_cmpeq_epi8(h2_vec, max_vec); \
@ -313,7 +313,7 @@ int ksw_extend2_avx2_u8(int qlen, // query length 待匹配段碱基的query长
// 要处理边界
// 左边界 处理f (insert)
if (ibeg == 0) { hA1[end] = MAX(0, h0 - (o_ins + e_ins * end)); }
if (ibeg == 0) { hA1[end] = MAX(0, h0 - (o_ins + e_ins * end)); m = hA1[end]; }
// 上边界
if (beg == 1) { hA1[0] = MAX(0, h0 - (o_del + e_del * iend)); }
else if (D & 1) {

20
run.sh
View File

@ -1,4 +1,12 @@
thread=1
thread=64
#n_r1=~/data/fastq/dataset/na12878_wgs_150/bn1.fq
#n_r2=~/data/fastq/dataset/na12878_wgs_150/bn2.fq
n_r1=~/data/fastq/dataset/na12878_wes_144/SRR25735653_1.fastq
n_r2=~/data/fastq/dataset/na12878_wes_144/SRR25735653_2.fastq
#n_r1=~/data/fastq/dataset/na12878_wgs_150/n1.fq
#n_r2=~/data/fastq/dataset/na12878_wgs_150/n2.fq
#n_r1=~/data/fastq/dataset/na12878_wgs_101/na_1.fq
#n_r2=~/data/fastq/dataset/na12878_wgs_101/na_2.fq
#n_r1=~/data/fastq/zy/wgs/n1.fq
#n_r2=~/data/fastq/zy/wgs/n2.fq
#n_r1=~/data/fastq/ds/n1.fq
@ -7,10 +15,12 @@ thread=1
#n_r2=~/data/fastq/platinum/n2.fq
#n_r1=~/data/fastq/zy/t1.fq
#n_r2=~/data/fastq/zy/t2.fq
n_r1=~/data/fastq/zy/n1.fq
n_r2=~/data/fastq/zy/n2.fq
#n_r1=~/data/fastq/zy/n1.fq
#n_r2=~/data/fastq/zy/n2.fq
#n_r1=~/data/fastq/ds/d1_1.fq
#n_r2=~/data/fastq/ds/d1_2.fq
#n_r1=~/data/fastq/ds/d2_1.fq
#n_r2=~/data/fastq/ds/d2_2.fq
#n_r1=~/data/fastq/ds/wes/n1.fq
#n_r2=~/data/fastq/ds/wes/n2.fq
#n_r1=~/fastq/ZY2105177532213010_L4_1.fq
@ -44,8 +54,8 @@ reference=~/data/reference/human_g1k_v37_decoy.fasta
#out=./all.sam
#out=./sn.sam
#out=./ssn-x1.sam
#out=./out.sam
out=/dev/null
out=./out.sam
#out=/dev/null
#out=./na12878.sam
#time ./bwa mem -t 12 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \
# /home/zzh/data/reference/human_g1k_v37_decoy.fasta \

21
seed1.py 100755
View File

@ -0,0 +1,21 @@
#!/bin/env python3
import sys
import os
fn = sys.argv[1]
n_all = 0
n_2 = 0
n_intv1_len = 0
n_seed_len = 0
with open(fn, 'r') as f:
for line in f:
dat = line.strip().split('\t')
n_all += 1
if (len(dat) == 2):
n_2 += 1
n_intv1_len += int(dat[0])
n_seed_len += int(dat[1])
print(n_all, n_2, n_2 / n_all, n_intv1_len / n_2, n_seed_len / n_2)

22
seed2.py 100755
View File

@ -0,0 +1,22 @@
#!/bin/env python3
import sys
import os
fn = sys.argv[1]
n_all = 0
n_direct = 0
n_direct_start_len = 0
n_seed_len = 0
with open(fn, 'r') as f:
for line in f:
dat = line.strip().split('\t')
if dat[0] == "start":
n_all += 1
if (len(dat) == 2):
n_direct += 1
n_direct_start_len += int(dat[0])
n_seed_len += int(dat[1])
print(n_all, n_direct, n_direct / n_all, n_direct_start_len / n_direct, n_seed_len / n_direct)

23
sw1.py 100755
View File

@ -0,0 +1,23 @@
#!/bin/env python3
import sys
import os
fn = sys.argv[1]
n_bsw = 0
n_prun = 0
with open(fn, 'r') as f:
offset = 0
for line in f:
dat = line.strip().split('\t')
if dat[0] == "start":
offset = 1
elif offset == 1:
n_bsw += int(dat[0])
offset = 2
elif offset == 2:
n_prun += int(dat[0])
offset = 3
print(n_prun / n_bsw * 100, 100)

View File

@ -50,6 +50,7 @@ time_work1,
time_work2,
time_flt_chain;
extern int64_t gn[100];
extern int64_t dn, n16, n17, n18, n19, nall, num_sa;
extern int64_t s1n, s2n, s3n, s1l, s2l, s3l;
extern int64_t g_num_smem2;