修正了几个bug,但是目前结果还是不一致,还得继续调试
This commit is contained in:
parent
e997699a47
commit
fb0cd0f663
2
Makefile
2
Makefile
|
|
@ -1,5 +1,5 @@
|
|||
CC= gcc
|
||||
CFLAGS= -g -Wall -Wno-unused-function -mavx2 -mavx512bw #-O3
|
||||
CFLAGS= -g -Wall -Wno-unused-function -mavx2 -mavx512bw -O3
|
||||
WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS
|
||||
|
||||
SHOW_PERF= -DSHOW_PERF
|
||||
|
|
|
|||
|
|
@ -16,7 +16,8 @@
|
|||
|
||||
// 需要做mate sw的read
|
||||
typedef struct {
|
||||
int is_rev; // seq是否在反向互补链上
|
||||
uint8_t is_rev; // seq是否在反向互补链上
|
||||
uint8_t skip; // 记录任务创建时候的skip状态
|
||||
int xtra;
|
||||
int64_t rb, re; // ref的起始截止位置,左闭右开
|
||||
int64_t seq_id; // 对应的当前数据块的seq id
|
||||
|
|
|
|||
344
paired_sam.c
344
paired_sam.c
|
|
@ -93,7 +93,7 @@ static void get_matesw_tasks(const mem_opt_t* opt, const bntseq_t* bns, const ui
|
|||
return; // consistent pair exist; no need to perform SW
|
||||
int task_order = 0;
|
||||
for (r = 0; r < 4; ++r) {
|
||||
int is_rev, is_larger;
|
||||
uint8_t is_rev, is_larger;
|
||||
int64_t rb, re; // 左闭右开
|
||||
if (skip[r])
|
||||
continue;
|
||||
|
|
@ -124,6 +124,7 @@ static void get_matesw_tasks(const mem_opt_t* opt, const bntseq_t* bns, const ui
|
|||
else
|
||||
p = kv_pushp(msw_task_t, *msw16);
|
||||
p->is_rev = is_rev;
|
||||
p->skip = skip[0] | skip[1] << 1 | skip[2] << 2 | skip[3] << 3;
|
||||
p->xtra = xtra;
|
||||
p->rb = rb;
|
||||
p->re = re;
|
||||
|
|
@ -396,6 +397,167 @@ static int check_add_align(kswr_avx_t aln, int min_seed_len, int is_rev, int64_t
|
|||
|
||||
#define raw_mapq(diff, a) ((int)(6.02 * (diff) / (a) + .499))
|
||||
|
||||
// 根据比对结果生成sam
|
||||
void generate_sam(const mem_opt_t* opt, const bntseq_t* bns, const uint8_t* pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2],
|
||||
seq_sam_t ss[2], int64_t n_processed, int tid) {
|
||||
|
||||
int i, j, z[2], o, subo, n_sub, extra_flag = 1, n_pri[2], n_aa[2];
|
||||
kstring_t str;
|
||||
mem_aln_t h[2], g[2], aa[2][2];
|
||||
|
||||
// int cmp = strcmp("ERR194147.17699", s[0].name);
|
||||
|
||||
str.l = str.m = 0;
|
||||
str.s = 0;
|
||||
memset(h, 0, sizeof(mem_aln_t) * 2);
|
||||
memset(g, 0, sizeof(mem_aln_t) * 2);
|
||||
n_aa[0] = n_aa[1] = 0;
|
||||
n_pri[0] = mem_mark_primary_se(opt, a[0].n, a[0].a, id << 1 | 0);
|
||||
n_pri[1] = mem_mark_primary_se(opt, a[1].n, a[1].a, id << 1 | 1);
|
||||
if (opt->flag & MEM_F_PRIMARY5) {
|
||||
mem_reorder_primary5(opt->T, &a[0]);
|
||||
mem_reorder_primary5(opt->T, &a[1]);
|
||||
}
|
||||
if (opt->flag & MEM_F_NOPAIRING)
|
||||
goto no_pairing;
|
||||
// pairing single-end hits
|
||||
if (n_pri[0] && n_pri[1] && (o = mem_pair(opt, bns, pac, pes, s, a, id, &subo, &n_sub, z, n_pri)) > 0) {
|
||||
int is_multi[2], q_pe, score_un, q_se[2];
|
||||
char** XA[2];
|
||||
// check if an end has multiple hits even after mate-SW
|
||||
for (i = 0; i < 2; ++i) {
|
||||
for (j = 1; j < n_pri[i]; ++j)
|
||||
if (a[i].a[j].secondary < 0 && a[i].a[j].score >= opt->T)
|
||||
break;
|
||||
is_multi[i] = j < n_pri[i] ? 1 : 0;
|
||||
}
|
||||
if (is_multi[0] || is_multi[1])
|
||||
goto no_pairing; // TODO: in rare cases, the true hit may be long but with low score
|
||||
// compute mapQ for the best SE hit
|
||||
score_un = a[0].a[0].score + a[1].a[0].score - opt->pen_unpaired;
|
||||
// q_pe = o && subo < o? (int)(MEM_MAPQ_COEF * (1. - (double)subo / o) * log(a[0].a[z[0]].seedcov + a[1].a[z[1]].seedcov) + .499) : 0;
|
||||
subo = subo > score_un ? subo : score_un;
|
||||
q_pe = raw_mapq(o - subo, opt->a);
|
||||
if (n_sub > 0)
|
||||
q_pe -= (int)(4.343 * log(n_sub + 1) + .499);
|
||||
if (q_pe < 0)
|
||||
q_pe = 0;
|
||||
if (q_pe > 60)
|
||||
q_pe = 60;
|
||||
q_pe = (int)(q_pe * (1. - .5 * (a[0].a[0].frac_rep + a[1].a[0].frac_rep)) + .499);
|
||||
// the following assumes no split hits
|
||||
if (o > score_un) { // paired alignment is preferred
|
||||
mem_alnreg_t* c[2];
|
||||
c[0] = &a[0].a[z[0]];
|
||||
c[1] = &a[1].a[z[1]];
|
||||
for (i = 0; i < 2; ++i) {
|
||||
if (c[i]->secondary >= 0)
|
||||
c[i]->sub = a[i].a[c[i]->secondary].score, c[i]->secondary = -2;
|
||||
q_se[i] = mem_approx_mapq_se(opt, c[i]);
|
||||
}
|
||||
q_se[0] = q_se[0] > q_pe ? q_se[0] : q_pe < q_se[0] + 40 ? q_pe : q_se[0] + 40;
|
||||
q_se[1] = q_se[1] > q_pe ? q_se[1] : q_pe < q_se[1] + 40 ? q_pe : q_se[1] + 40;
|
||||
extra_flag |= 2;
|
||||
// cap at the tandem repeat score
|
||||
q_se[0] = q_se[0] < raw_mapq(c[0]->score - c[0]->csub, opt->a) ? q_se[0] : raw_mapq(c[0]->score - c[0]->csub, opt->a);
|
||||
q_se[1] = q_se[1] < raw_mapq(c[1]->score - c[1]->csub, opt->a) ? q_se[1] : raw_mapq(c[1]->score - c[1]->csub, opt->a);
|
||||
} else { // the unpaired alignment is preferred
|
||||
z[0] = z[1] = 0;
|
||||
q_se[0] = mem_approx_mapq_se(opt, &a[0].a[0]);
|
||||
q_se[1] = mem_approx_mapq_se(opt, &a[1].a[0]);
|
||||
}
|
||||
for (i = 0; i < 2; ++i) {
|
||||
int k = a[i].a[z[i]].secondary_all;
|
||||
if (k >= 0 && k < n_pri[i]) { // switch secondary and primary if both of them are non-ALT
|
||||
assert(a[i].a[k].secondary_all < 0);
|
||||
for (j = 0; j < a[i].n; ++j)
|
||||
if (a[i].a[j].secondary_all == k || j == k)
|
||||
a[i].a[j].secondary_all = z[i];
|
||||
a[i].a[z[i]].secondary_all = -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!(opt->flag & MEM_F_ALL)) {
|
||||
for (i = 0; i < 2; ++i) XA[i] = mem_gen_alt(opt, bns, pac, &a[i], s[i].l_seq, s[i].seq);
|
||||
} else
|
||||
XA[0] = XA[1] = 0;
|
||||
|
||||
// write SAM
|
||||
|
||||
for (i = 0; i < 2; ++i) {
|
||||
h[i] = mem_reg2aln(opt, bns, pac, s[i].l_seq, s[i].seq, &a[i].a[z[i]]);
|
||||
h[i].mapq = q_se[i];
|
||||
h[i].flag |= 0x40 << i | extra_flag;
|
||||
h[i].XA = XA[i] ? XA[i][z[i]] : 0;
|
||||
aa[i][n_aa[i]++] = h[i];
|
||||
if (n_pri[i] < a[i].n) { // the read has ALT hits
|
||||
mem_alnreg_t* p = &a[i].a[n_pri[i]];
|
||||
if (p->score < opt->T || p->secondary >= 0 || !p->is_alt)
|
||||
continue;
|
||||
g[i] = mem_reg2aln(opt, bns, pac, s[i].l_seq, s[i].seq, p);
|
||||
g[i].flag |= 0x800 | 0x40 << i | extra_flag;
|
||||
g[i].XA = XA[i] ? XA[i][n_pri[i]] : 0;
|
||||
aa[i][n_aa[i]++] = g[i];
|
||||
}
|
||||
}
|
||||
ss[0].sam.l = 0;
|
||||
// PROF_START(aln2sam);
|
||||
for (i = 0; i < n_aa[0]; ++i) mem_aln2sam(opt, bns, &ss[0].sam, &s[0], n_aa[0], aa[0], i, &h[1]); // write read1 hits
|
||||
ss[1].sam.l = 0;
|
||||
for (i = 0; i < n_aa[1]; ++i) mem_aln2sam(opt, bns, &ss[1].sam, &s[1], n_aa[1], aa[1], i, &h[0]); // write read2 hits
|
||||
if (strcmp(s[0].name, s[1].name) != 0)
|
||||
err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name);
|
||||
// free
|
||||
for (i = 0; i < 2; ++i) {
|
||||
free(h[i].cigar);
|
||||
free(g[i].cigar);
|
||||
if (XA[i] == 0)
|
||||
continue;
|
||||
for (j = 0; j < a[i].n; ++j) free(XA[i][j]);
|
||||
free(XA[i]);
|
||||
}
|
||||
} else
|
||||
goto no_pairing;
|
||||
|
||||
goto end_clear;
|
||||
|
||||
no_pairing:
|
||||
for (i = 0; i < 2; ++i) {
|
||||
int which = -1;
|
||||
if (a[i].n) {
|
||||
if (a[i].a[0].score >= opt->T)
|
||||
which = 0;
|
||||
else if (n_pri[i] < a[i].n && a[i].a[n_pri[i]].score >= opt->T)
|
||||
which = n_pri[i];
|
||||
}
|
||||
if (which >= 0)
|
||||
h[i] = mem_reg2aln(opt, bns, pac, s[i].l_seq, s[i].seq, &a[i].a[which]);
|
||||
else
|
||||
h[i] = mem_reg2aln(opt, bns, pac, s[i].l_seq, s[i].seq, 0);
|
||||
}
|
||||
if (!(opt->flag & MEM_F_NOPAIRING) && h[0].rid == h[1].rid &&
|
||||
h[0].rid >= 0) { // if the top hits from the two ends constitute a proper pair, flag it.
|
||||
int64_t dist;
|
||||
int d;
|
||||
d = mem_infer_dir(bns->l_pac, a[0].a[0].rb, a[1].a[0].rb, &dist);
|
||||
if (!pes[d].failed && dist >= pes[d].low && dist <= pes[d].high)
|
||||
extra_flag |= 2;
|
||||
}
|
||||
mem_reg2sam(opt, bns, pac, &s[0], &a[0], 0x41 | extra_flag, &h[1], &ss[0]);
|
||||
mem_reg2sam(opt, bns, pac, &s[1], &a[1], 0x81 | extra_flag, &h[0], &ss[1]);
|
||||
if (strcmp(s[0].name, s[1].name) != 0)
|
||||
err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name);
|
||||
free(h[0].cigar);
|
||||
free(h[1].cigar);
|
||||
|
||||
end_clear:
|
||||
_destory_clear_vec(s[0].msw);
|
||||
_destory_clear_vec(s[1].msw);
|
||||
|
||||
free(a[0].a);
|
||||
free(a[1].a);
|
||||
}
|
||||
|
||||
// 最后再计算并生成sam数据
|
||||
static void workder_gen_sam(void* data, int idx, int tid) {
|
||||
mem_worker_t* w = (mem_worker_t*)data;
|
||||
|
|
@ -416,37 +578,50 @@ static void workder_gen_sam(void* data, int idx, int tid) {
|
|||
bseq1_t* s = &w->seqs[id];
|
||||
mem_alnreg_v* a = &w->regs[id];
|
||||
seq_sam_t* ss = &w->sams[id];
|
||||
int z[2], o, subo, n_sub, extra_flag = 1, n_pri[2], n_aa[2];
|
||||
mem_aln_t h[2], g[2], aa[2][2];
|
||||
memset(h, 0, sizeof(mem_aln_t) * 2);
|
||||
memset(g, 0, sizeof(mem_aln_t) * 2);
|
||||
n_aa[0] = n_aa[1] = 0;
|
||||
|
||||
int n[2] = {0};
|
||||
// int cmp = strcmp("ERR194147.1548", s[0].name);
|
||||
|
||||
int n[2] = {0}, nn[2] = {0};
|
||||
for (i = 0; i < 2; ++i) {
|
||||
int si = !i;
|
||||
int origin_n = a[si].n;
|
||||
// 这里应该先给task排序,因为u8和i16是分开排序的,需要合在一起
|
||||
for (k = 0; k < s[si].msw.n; ++k) {
|
||||
msw_task_t* t = s[si].msw.a[k];
|
||||
// kswr_avx_t aln = t->aln; fprintf(gf[0], "id-%ld score-%d te-%d qe-%d score2-%d te2-%d tb-%d qb-%d\n", s[si].id + w->n_processed, aln.score, aln.te, aln.qe, aln.score2, aln.te2, aln.tb, aln.qb);
|
||||
|
||||
n[i] += check_add_align(t->aln, opt->min_seed_len, t->is_rev, bns->l_pac, &a[!si].a[t->aj], s[si].l_seq, (uint8_t*)s[si].seq, &a[si], t->rb);
|
||||
mem_alnreg_t* b = &a[i].a[t->aj];
|
||||
mem_alnreg_v* ma = &a[si];
|
||||
uint8_t skip = 0;
|
||||
if (n[si]) { // 添加了新的aln,需要检测skip
|
||||
for (j = origin_n; j < ma->n; ++j) { // check which orinentation has been found
|
||||
int64_t dist;
|
||||
int r;
|
||||
r = mem_infer_dir(bns->l_pac, b->rb, ma->a[j].rb, &dist);
|
||||
if (dist >= pes[r].low && dist <= pes[r].high)
|
||||
skip |= 1 << r;
|
||||
}
|
||||
}
|
||||
if ((t->skip | skip) != 15) { // 检查新添加的aln是不是把当前的任务skip掉了
|
||||
kswr_avx_t aln = t->aln; fprintf(gf[0], "id-%ld score-%d te-%d qe-%d score2-%d te2-%d tb-%d qb-%d\n", s[si].id +
|
||||
w->n_processed, aln.score, aln.te, aln.qe, aln.score2, aln.te2, aln.tb, aln.qb);
|
||||
nn[si] += 1;
|
||||
n[si] += check_add_align(t->aln, opt->min_seed_len, t->is_rev, bns->l_pac, b, s[si].l_seq, (uint8_t*)s[si].seq, ma, t->rb);
|
||||
}
|
||||
}
|
||||
}
|
||||
// 处理完2个pair read之后再排序,因为上面操作有插入,排序后之前记录的索引就无效了,所以上面不能排序,要两个pair read处理完之后再排序
|
||||
for (i = 0; i < 2; ++i) {
|
||||
int tmp;
|
||||
if (n[i] > 0) {
|
||||
if (nn[i] > 0) {
|
||||
for (j = 0; j < n[i]; ++j) { // 在这里排序
|
||||
// move b s.t. ma is sorted
|
||||
int nidx = a[i].n - n[i] + j;
|
||||
mem_alnreg_t* b = &a[i].a[nidx];
|
||||
mem_alnreg_t b = a[i].a[nidx]; // 用指针一定要慎重,注意指向的地址内容会变
|
||||
for (k = 0; k < nidx; ++k) // find the insertion point
|
||||
if (a[i].a[k].score < b->score)
|
||||
if (a[i].a[k].score < b.score)
|
||||
break;
|
||||
tmp = k;
|
||||
for (k = nidx; k > tmp; --k) a[i].a[k] = a[i].a[k - 1];
|
||||
a[i].a[k] = *b;
|
||||
a[i].a[k] = b;
|
||||
}
|
||||
|
||||
a[i].n = mem_sort_dedup_patch(opt, 0, 0, 0, a[i].n, a[i].a);
|
||||
|
|
@ -454,147 +629,8 @@ static void workder_gen_sam(void* data, int idx, int tid) {
|
|||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
n_pri[0] = mem_mark_primary_se(opt, a[0].n, a[0].a, id << 1 | 0);
|
||||
n_pri[1] = mem_mark_primary_se(opt, a[1].n, a[1].a, id << 1 | 1);
|
||||
if (opt->flag & MEM_F_PRIMARY5) {
|
||||
mem_reorder_primary5(opt->T, &a[0]);
|
||||
mem_reorder_primary5(opt->T, &a[1]);
|
||||
}
|
||||
if (opt->flag & MEM_F_NOPAIRING)
|
||||
goto no_pairing;
|
||||
// pairing single-end hits
|
||||
if (n_pri[0] && n_pri[1] && (o = mem_pair(opt, bns, pac, pes, s, a, id, &subo, &n_sub, z, n_pri)) > 0) {
|
||||
int is_multi[2], q_pe, score_un, q_se[2];
|
||||
char** XA[2];
|
||||
// check if an end has multiple hits even after mate-SW
|
||||
for (i = 0; i < 2; ++i) {
|
||||
for (j = 1; j < n_pri[i]; ++j)
|
||||
if (a[i].a[j].secondary < 0 && a[i].a[j].score >= opt->T)
|
||||
break;
|
||||
is_multi[i] = j < n_pri[i] ? 1 : 0;
|
||||
}
|
||||
if (is_multi[0] || is_multi[1])
|
||||
goto no_pairing; // TODO: in rare cases, the true hit may be long but with low score
|
||||
// compute mapQ for the best SE hit
|
||||
score_un = a[0].a[0].score + a[1].a[0].score - opt->pen_unpaired;
|
||||
// q_pe = o && subo < o? (int)(MEM_MAPQ_COEF * (1. - (double)subo / o) * log(a[0].a[z[0]].seedcov + a[1].a[z[1]].seedcov) + .499) : 0;
|
||||
subo = subo > score_un ? subo : score_un;
|
||||
q_pe = raw_mapq(o - subo, opt->a);
|
||||
if (n_sub > 0)
|
||||
q_pe -= (int)(4.343 * log(n_sub + 1) + .499);
|
||||
if (q_pe < 0)
|
||||
q_pe = 0;
|
||||
if (q_pe > 60)
|
||||
q_pe = 60;
|
||||
q_pe = (int)(q_pe * (1. - .5 * (a[0].a[0].frac_rep + a[1].a[0].frac_rep)) + .499);
|
||||
// the following assumes no split hits
|
||||
if (o > score_un) { // paired alignment is preferred
|
||||
mem_alnreg_t* c[2];
|
||||
c[0] = &a[0].a[z[0]];
|
||||
c[1] = &a[1].a[z[1]];
|
||||
for (i = 0; i < 2; ++i) {
|
||||
if (c[i]->secondary >= 0)
|
||||
c[i]->sub = a[i].a[c[i]->secondary].score, c[i]->secondary = -2;
|
||||
q_se[i] = mem_approx_mapq_se(opt, c[i]);
|
||||
}
|
||||
q_se[0] = q_se[0] > q_pe ? q_se[0] : q_pe < q_se[0] + 40 ? q_pe : q_se[0] + 40;
|
||||
q_se[1] = q_se[1] > q_pe ? q_se[1] : q_pe < q_se[1] + 40 ? q_pe : q_se[1] + 40;
|
||||
extra_flag |= 2;
|
||||
// cap at the tandem repeat score
|
||||
q_se[0] = q_se[0] < raw_mapq(c[0]->score - c[0]->csub, opt->a) ? q_se[0] : raw_mapq(c[0]->score - c[0]->csub, opt->a);
|
||||
q_se[1] = q_se[1] < raw_mapq(c[1]->score - c[1]->csub, opt->a) ? q_se[1] : raw_mapq(c[1]->score - c[1]->csub, opt->a);
|
||||
} else { // the unpaired alignment is preferred
|
||||
z[0] = z[1] = 0;
|
||||
q_se[0] = mem_approx_mapq_se(opt, &a[0].a[0]);
|
||||
q_se[1] = mem_approx_mapq_se(opt, &a[1].a[0]);
|
||||
}
|
||||
for (i = 0; i < 2; ++i) {
|
||||
int k = a[i].a[z[i]].secondary_all;
|
||||
if (k >= 0 && k < n_pri[i]) { // switch secondary and primary if both of them are non-ALT
|
||||
assert(a[i].a[k].secondary_all < 0);
|
||||
for (j = 0; j < a[i].n; ++j)
|
||||
if (a[i].a[j].secondary_all == k || j == k)
|
||||
a[i].a[j].secondary_all = z[i];
|
||||
a[i].a[z[i]].secondary_all = -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!(opt->flag & MEM_F_ALL)) {
|
||||
for (i = 0; i < 2; ++i) XA[i] = mem_gen_alt(opt, bns, pac, &a[i], s[i].l_seq, s[i].seq);
|
||||
} else
|
||||
XA[0] = XA[1] = 0;
|
||||
|
||||
// write SAM
|
||||
|
||||
for (i = 0; i < 2; ++i) {
|
||||
h[i] = mem_reg2aln(opt, bns, pac, s[i].l_seq, s[i].seq, &a[i].a[z[i]]);
|
||||
h[i].mapq = q_se[i];
|
||||
h[i].flag |= 0x40 << i | extra_flag;
|
||||
h[i].XA = XA[i] ? XA[i][z[i]] : 0;
|
||||
aa[i][n_aa[i]++] = h[i];
|
||||
if (n_pri[i] < a[i].n) { // the read has ALT hits
|
||||
mem_alnreg_t* p = &a[i].a[n_pri[i]];
|
||||
if (p->score < opt->T || p->secondary >= 0 || !p->is_alt)
|
||||
continue;
|
||||
g[i] = mem_reg2aln(opt, bns, pac, s[i].l_seq, s[i].seq, p);
|
||||
g[i].flag |= 0x800 | 0x40 << i | extra_flag;
|
||||
g[i].XA = XA[i] ? XA[i][n_pri[i]] : 0;
|
||||
aa[i][n_aa[i]++] = g[i];
|
||||
}
|
||||
}
|
||||
ss[0].sam.l = 0;
|
||||
//PROF_START(aln2sam);
|
||||
for (i = 0; i < n_aa[0]; ++i) mem_aln2sam(opt, bns, &ss[0].sam, &s[0], n_aa[0], aa[0], i, &h[1]); // write read1 hits
|
||||
ss[1].sam.l = 0;
|
||||
for (i = 0; i < n_aa[1]; ++i) mem_aln2sam(opt, bns, &ss[1].sam, &s[1], n_aa[1], aa[1], i, &h[0]); // write read2 hits
|
||||
if (strcmp(s[0].name, s[1].name) != 0)
|
||||
err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name);
|
||||
// free
|
||||
for (i = 0; i < 2; ++i) {
|
||||
free(h[i].cigar);
|
||||
free(g[i].cigar);
|
||||
if (XA[i] == 0)
|
||||
continue;
|
||||
for (j = 0; j < a[i].n; ++j) free(XA[i][j]);
|
||||
free(XA[i]);
|
||||
}
|
||||
} else
|
||||
goto no_pairing;
|
||||
|
||||
no_pairing:
|
||||
for (i = 0; i < 2; ++i) {
|
||||
int which = -1;
|
||||
if (a[i].n) {
|
||||
if (a[i].a[0].score >= opt->T)
|
||||
which = 0;
|
||||
else if (n_pri[i] < a[i].n && a[i].a[n_pri[i]].score >= opt->T)
|
||||
which = n_pri[i];
|
||||
}
|
||||
if (which >= 0)
|
||||
h[i] = mem_reg2aln(opt, bns, pac, s[i].l_seq, s[i].seq, &a[i].a[which]);
|
||||
else
|
||||
h[i] = mem_reg2aln(opt, bns, pac, s[i].l_seq, s[i].seq, 0);
|
||||
}
|
||||
if (!(opt->flag & MEM_F_NOPAIRING) && h[0].rid == h[1].rid &&
|
||||
h[0].rid >= 0) { // if the top hits from the two ends constitute a proper pair, flag it.
|
||||
int64_t dist;
|
||||
int d;
|
||||
d = mem_infer_dir(bns->l_pac, a[0].a[0].rb, a[1].a[0].rb, &dist);
|
||||
if (!pes[d].failed && dist >= pes[d].low && dist <= pes[d].high)
|
||||
extra_flag |= 2;
|
||||
}
|
||||
mem_reg2sam(opt, bns, pac, &s[0], &a[0], 0x41 | extra_flag, &h[1], &ss[0]);
|
||||
mem_reg2sam(opt, bns, pac, &s[1], &a[1], 0x81 | extra_flag, &h[0], &ss[1]);
|
||||
if (strcmp(s[0].name, s[1].name) != 0)
|
||||
err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name);
|
||||
free(h[0].cigar);
|
||||
free(h[1].cigar);
|
||||
|
||||
_destory_clear_vec(s[0].msw);
|
||||
_destory_clear_vec(s[1].msw);
|
||||
|
||||
free(a[0].a);
|
||||
free(a[1].a);
|
||||
generate_sam(opt, bns, pac, pes, id >> 1, s, a, ss, w->n_processed, tid);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
6
run.sh
6
run.sh
|
|
@ -1,6 +1,6 @@
|
|||
thread=64
|
||||
thread=1
|
||||
|
||||
make -j 16
|
||||
make clean; make -j 32
|
||||
|
||||
n1=~/data/dataset/D2/n1.fq.gz
|
||||
n2=~/data/dataset/D2/n2.fq.gz
|
||||
|
|
@ -11,7 +11,7 @@ n2=~/data/dataset/D2/n2.fq.gz
|
|||
reference=~/data/reference/fmt/human_g1k_v37_decoy.fasta
|
||||
|
||||
out=/dev/null
|
||||
#out=~/data/D2-out.sam
|
||||
#out=./oldsam-D2-out.sam
|
||||
prog=./fastalign
|
||||
#prog=/home/zzh/fastbwa
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue