diff --git a/.gitignore b/.gitignore index e5ce07d..8104594 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ *.log *.sam output/ +scripts/ # Prerequisites *.d diff --git a/.vscode/launch.json b/.vscode/launch.json index a3676a7..827537b 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -18,10 +18,10 @@ "-R", "'@RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa'", "~/data/reference/fmt/human_g1k_v37_decoy.fasta", - "~/data/dataset/D2/n1.fq.gz", - "~/data/dataset/D2/n2.fq.gz", + //"~/data/dataset/D2/n1.fq.gz", "~/data/dataset/D2/n2.fq.gz", + "~/data/dataset/D2/d1.fq", "~/data/dataset/D2/d2.fq", "-o", - "D1-out.sam", + "/dev/null", // "D1-out.sam", ], "cwd": "${workspaceFolder}", // 当前工作路径:当前文件所在的工作空间 }, diff --git a/Makefile b/Makefile index 33673f8..5351642 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ CC= gcc -CFLAGS= -g -Wall -Wno-unused-function -mavx2 -mavx512bw -O3 +CFLAGS= -g -Wall -Wno-unused-function -mavx2 -mavx512bw #-O3 WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS SHOW_PERF= -DSHOW_PERF diff --git a/paired_sam.c b/paired_sam.c index 43698ec..b37893d 100644 --- a/paired_sam.c +++ b/paired_sam.c @@ -579,7 +579,13 @@ static void workder_gen_sam(void* data, int idx, int tid) { mem_alnreg_v* a = &w->regs[id]; seq_sam_t* ss = &w->sams[id]; - // int cmp = strcmp("ERR194147.1548", s[0].name); + int cmp = strcmp("ERR194147.42331121", s[0].name) == 0; + // int cmp = (id + w->n_processed) == 202924; + if (cmp == 1) { + fprintf(stderr, "%s\n", s[0].name); + fflush(stderr); + //exit(0); + } int n[2] = {0}, nn[2] = {0}; for (i = 0; i < 2; ++i) { @@ -600,37 +606,49 @@ static void workder_gen_sam(void* data, int idx, int tid) { skip |= 1 << r; } } - if ((t->skip | skip) != 15) { // 检查新添加的aln是不是把当前的任务skip掉了 - kswr_avx_t aln = t->aln; fprintf(gf[0], "id-%ld score-%d te-%d qe-%d score2-%d te2-%d tb-%d qb-%d\n", s[si].id + - w->n_processed, aln.score, aln.te, aln.qe, aln.score2, aln.te2, aln.tb, aln.qb); + //if ((t->skip | skip) != 15) { // 检查新添加的aln是不是把当前的任务skip掉了 + kswr_avx_t aln = t->aln; // fprintf(gf[0], "id-%ld score-%d te-%d qe-%d score2-%d te2-%d tb-%d qb-%d\n", s[si].id + w->n_processed, aln.score, aln.te, aln.qe, aln.score2, aln.te2, aln.tb, aln.qb); + if (cmp) { + fprintf(stderr, "id-%ld score-%d te-%d qe-%d score2-%d te2-%d tb-%d qb-%d\n", s[si].id + w->n_processed, aln.score, aln.te, aln.qe, aln.score2, aln.te2, aln.tb, aln.qb); + fflush(stderr); + } nn[si] += 1; n[si] += check_add_align(t->aln, opt->min_seed_len, t->is_rev, bns->l_pac, b, s[si].l_seq, (uint8_t*)s[si].seq, ma, t->rb); - } + //} } } // 处理完2个pair read之后再排序,因为上面操作有插入,排序后之前记录的索引就无效了,所以上面不能排序,要两个pair read处理完之后再排序 for (i = 0; i < 2; ++i) { int tmp; if (nn[i] > 0) { - for (j = 0; j < n[i]; ++j) { // 在这里排序 + int origin_n = a[i].n; + int start_n = a[i].n - n[i]; + for (j = 0; j < n[i]; ++j) { // 在这里排序 // move b s.t. ma is sorted - int nidx = a[i].n - n[i] + j; + int nidx = origin_n - n[i] + j; mem_alnreg_t b = a[i].a[nidx]; // 用指针一定要慎重,注意指向的地址内容会变 for (k = 0; k < nidx; ++k) // find the insertion point if (a[i].a[k].score < b.score) break; tmp = k; - for (k = nidx; k > tmp; --k) a[i].a[k] = a[i].a[k - 1]; + for (k = start_n; k > tmp; --k) a[i].a[k] = a[i].a[k - 1]; a[i].a[k] = b; + // 每添加一个aln都要sort-dedup一次 +#if 1 + start_n = mem_sort_dedup_patch(opt, 0, 0, 0, start_n + 1, a[i].a); + a[i].n = start_n; +#else + start_n += 1; +#endif } - a[i].n = mem_sort_dedup_patch(opt, 0, 0, 0, a[i].n, a[i].a); + // a[i].n = mem_sort_dedup_patch(opt, 0, 0, 0, a[i].n, a[i].a); } } //////////////////////////////////////////////////////////// - - generate_sam(opt, bns, pac, pes, id >> 1, s, a, ss, w->n_processed, tid); + // 这里需要传入全局id + generate_sam(opt, bns, pac, pes, (w->n_processed + id) >> 1, s, a, ss, w->n_processed, tid); } } diff --git a/run.sh b/run.sh index 762d4c4..77b8263 100755 --- a/run.sh +++ b/run.sh @@ -2,16 +2,20 @@ thread=1 make clean; make -j 32 -n1=~/data/dataset/D2/n1.fq.gz -n2=~/data/dataset/D2/n2.fq.gz +#n1=~/data/dataset/D2/n1.fq.gz +#n2=~/data/dataset/D2/n2.fq.gz +n1=~/data/dataset/D2/d1.fq +n2=~/data/dataset/D2/d2.fq #n1=/file-data/zzh/dataset/D1/45mr1.fq.gz #n2=/file-data/zzh/dataset/D1/45mr2.fq.gz reference=~/data/reference/fmt/human_g1k_v37_decoy.fasta -out=/dev/null +#out=/dev/null #out=./oldsam-D2-out.sam +#out=./D2-1.sam +out=./d.sam prog=./fastalign #prog=/home/zzh/fastbwa