改了几个bug,比如gen_sam需要全局id,dedup-sort需要一个一个做,现在还有一个问题,就是dedup-sort之后有些skip结果会变
This commit is contained in:
parent
fb0cd0f663
commit
67cbb5b8e9
|
|
@ -1,6 +1,7 @@
|
|||
*.log
|
||||
*.sam
|
||||
output/
|
||||
scripts/
|
||||
|
||||
# Prerequisites
|
||||
*.d
|
||||
|
|
|
|||
|
|
@ -18,10 +18,10 @@
|
|||
"-R",
|
||||
"'@RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa'",
|
||||
"~/data/reference/fmt/human_g1k_v37_decoy.fasta",
|
||||
"~/data/dataset/D2/n1.fq.gz",
|
||||
"~/data/dataset/D2/n2.fq.gz",
|
||||
//"~/data/dataset/D2/n1.fq.gz", "~/data/dataset/D2/n2.fq.gz",
|
||||
"~/data/dataset/D2/d1.fq", "~/data/dataset/D2/d2.fq",
|
||||
"-o",
|
||||
"D1-out.sam",
|
||||
"/dev/null", // "D1-out.sam",
|
||||
],
|
||||
"cwd": "${workspaceFolder}", // 当前工作路径:当前文件所在的工作空间
|
||||
},
|
||||
|
|
|
|||
2
Makefile
2
Makefile
|
|
@ -1,5 +1,5 @@
|
|||
CC= gcc
|
||||
CFLAGS= -g -Wall -Wno-unused-function -mavx2 -mavx512bw -O3
|
||||
CFLAGS= -g -Wall -Wno-unused-function -mavx2 -mavx512bw #-O3
|
||||
WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS
|
||||
|
||||
SHOW_PERF= -DSHOW_PERF
|
||||
|
|
|
|||
40
paired_sam.c
40
paired_sam.c
|
|
@ -579,7 +579,13 @@ static void workder_gen_sam(void* data, int idx, int tid) {
|
|||
mem_alnreg_v* a = &w->regs[id];
|
||||
seq_sam_t* ss = &w->sams[id];
|
||||
|
||||
// int cmp = strcmp("ERR194147.1548", s[0].name);
|
||||
int cmp = strcmp("ERR194147.42331121", s[0].name) == 0;
|
||||
// int cmp = (id + w->n_processed) == 202924;
|
||||
if (cmp == 1) {
|
||||
fprintf(stderr, "%s\n", s[0].name);
|
||||
fflush(stderr);
|
||||
//exit(0);
|
||||
}
|
||||
|
||||
int n[2] = {0}, nn[2] = {0};
|
||||
for (i = 0; i < 2; ++i) {
|
||||
|
|
@ -600,37 +606,49 @@ static void workder_gen_sam(void* data, int idx, int tid) {
|
|||
skip |= 1 << r;
|
||||
}
|
||||
}
|
||||
if ((t->skip | skip) != 15) { // 检查新添加的aln是不是把当前的任务skip掉了
|
||||
kswr_avx_t aln = t->aln; fprintf(gf[0], "id-%ld score-%d te-%d qe-%d score2-%d te2-%d tb-%d qb-%d\n", s[si].id +
|
||||
w->n_processed, aln.score, aln.te, aln.qe, aln.score2, aln.te2, aln.tb, aln.qb);
|
||||
//if ((t->skip | skip) != 15) { // 检查新添加的aln是不是把当前的任务skip掉了
|
||||
kswr_avx_t aln = t->aln; // fprintf(gf[0], "id-%ld score-%d te-%d qe-%d score2-%d te2-%d tb-%d qb-%d\n", s[si].id + w->n_processed, aln.score, aln.te, aln.qe, aln.score2, aln.te2, aln.tb, aln.qb);
|
||||
if (cmp) {
|
||||
fprintf(stderr, "id-%ld score-%d te-%d qe-%d score2-%d te2-%d tb-%d qb-%d\n", s[si].id + w->n_processed, aln.score, aln.te, aln.qe, aln.score2, aln.te2, aln.tb, aln.qb);
|
||||
fflush(stderr);
|
||||
}
|
||||
nn[si] += 1;
|
||||
n[si] += check_add_align(t->aln, opt->min_seed_len, t->is_rev, bns->l_pac, b, s[si].l_seq, (uint8_t*)s[si].seq, ma, t->rb);
|
||||
}
|
||||
//}
|
||||
}
|
||||
}
|
||||
// 处理完2个pair read之后再排序,因为上面操作有插入,排序后之前记录的索引就无效了,所以上面不能排序,要两个pair read处理完之后再排序
|
||||
for (i = 0; i < 2; ++i) {
|
||||
int tmp;
|
||||
if (nn[i] > 0) {
|
||||
for (j = 0; j < n[i]; ++j) { // 在这里排序
|
||||
int origin_n = a[i].n;
|
||||
int start_n = a[i].n - n[i];
|
||||
for (j = 0; j < n[i]; ++j) { // 在这里排序
|
||||
// move b s.t. ma is sorted
|
||||
int nidx = a[i].n - n[i] + j;
|
||||
int nidx = origin_n - n[i] + j;
|
||||
mem_alnreg_t b = a[i].a[nidx]; // 用指针一定要慎重,注意指向的地址内容会变
|
||||
for (k = 0; k < nidx; ++k) // find the insertion point
|
||||
if (a[i].a[k].score < b.score)
|
||||
break;
|
||||
tmp = k;
|
||||
for (k = nidx; k > tmp; --k) a[i].a[k] = a[i].a[k - 1];
|
||||
for (k = start_n; k > tmp; --k) a[i].a[k] = a[i].a[k - 1];
|
||||
a[i].a[k] = b;
|
||||
// 每添加一个aln都要sort-dedup一次
|
||||
#if 1
|
||||
start_n = mem_sort_dedup_patch(opt, 0, 0, 0, start_n + 1, a[i].a);
|
||||
a[i].n = start_n;
|
||||
#else
|
||||
start_n += 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
a[i].n = mem_sort_dedup_patch(opt, 0, 0, 0, a[i].n, a[i].a);
|
||||
// a[i].n = mem_sort_dedup_patch(opt, 0, 0, 0, a[i].n, a[i].a);
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
|
||||
generate_sam(opt, bns, pac, pes, id >> 1, s, a, ss, w->n_processed, tid);
|
||||
// 这里需要传入全局id
|
||||
generate_sam(opt, bns, pac, pes, (w->n_processed + id) >> 1, s, a, ss, w->n_processed, tid);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
10
run.sh
10
run.sh
|
|
@ -2,16 +2,20 @@ thread=1
|
|||
|
||||
make clean; make -j 32
|
||||
|
||||
n1=~/data/dataset/D2/n1.fq.gz
|
||||
n2=~/data/dataset/D2/n2.fq.gz
|
||||
#n1=~/data/dataset/D2/n1.fq.gz
|
||||
#n2=~/data/dataset/D2/n2.fq.gz
|
||||
n1=~/data/dataset/D2/d1.fq
|
||||
n2=~/data/dataset/D2/d2.fq
|
||||
|
||||
#n1=/file-data/zzh/dataset/D1/45mr1.fq.gz
|
||||
#n2=/file-data/zzh/dataset/D1/45mr2.fq.gz
|
||||
|
||||
reference=~/data/reference/fmt/human_g1k_v37_decoy.fasta
|
||||
|
||||
out=/dev/null
|
||||
#out=/dev/null
|
||||
#out=./oldsam-D2-out.sam
|
||||
#out=./D2-1.sam
|
||||
out=./d.sam
|
||||
prog=./fastalign
|
||||
#prog=/home/zzh/fastbwa
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue