diff --git a/bwamem.c b/bwamem.c index 66da875..784ec56 100644 --- a/bwamem.c +++ b/bwamem.c @@ -377,9 +377,12 @@ KSORT_INIT(mem_ars2, mem_alnreg_t, alnreg_slt2) #define alnreg_slt(a, b) ((a).score > (b).score || ((a).score == (b).score && ((a).rb < (b).rb || ((a).rb == (b).rb && (a).qb < (b).qb)))) KSORT_INIT(mem_ars, mem_alnreg_t, alnreg_slt) -#define alnreg_hlt(a, b) ((a).is_alt < (b).is_alt || ((a).is_alt == (b).is_alt && ((a).score > (b).score || ((a).score == (b).score && (a).hash < (b).hash)))) +#define alnreg_hlt(a, b) ((a).score > (b).score || ((a).score == (b).score && (a).hash < (b).hash)) KSORT_INIT(mem_ars_hash, mem_alnreg_t, alnreg_hlt) +#define alnreg_hlt2(a, b) ((a).is_alt < (b).is_alt || ((a).is_alt == (b).is_alt && ((a).score > (b).score || ((a).score == (b).score && (a).hash < (b).hash)))) +KSORT_INIT(mem_ars_hash2, mem_alnreg_t, alnreg_hlt2) + #define PATCH_MAX_R_BW 0.05f #define PATCH_MIN_SC_RATIO 0.90f @@ -475,24 +478,19 @@ int mem_test_and_remove_exact(const mem_opt_t *opt, int n, mem_alnreg_t *a, int return n - 1; } -void mem_mark_primary_se(const mem_opt_t *opt, int _n, mem_alnreg_t *a, int64_t id) +typedef kvec_t(int) int_v; + +static void mem_mark_primary_se_core(const mem_opt_t *opt, int n, mem_alnreg_t *a, int_v *z) { // similar to the loop in mem_chain_flt() - int i, k, tmp, n; - kvec_t(int) z; - if (n_ == 0) return; - kv_init(z); - for (i = 0; i < n_; ++i) a[i].sub = 0, a[i].secondary = -1, a[i].hash = hash_64(id+i); - ks_introsort(mem_ars_hash, n_, a); - for (i = 0; i < n_; ++i) - if (a[i].is_alt) break; - if ((n = i) == 0) return; + int i, k, tmp; tmp = opt->a + opt->b; tmp = opt->o_del + opt->e_del > tmp? opt->o_del + opt->e_del : tmp; tmp = opt->o_ins + opt->e_ins > tmp? opt->o_ins + opt->e_ins : tmp; - kv_push(int, z, 0); + z->n = 0; + kv_push(int, *z, 0); for (i = 1; i < n; ++i) { - for (k = 0; k < z.n; ++k) { - int j = z.a[k]; + for (k = 0; k < z->n; ++k) { + int j = z->a[k]; int b_max = a[j].qb > a[i].qb? a[j].qb : a[i].qb; int e_min = a[j].qe < a[i].qe? a[j].qe : a[i].qe; if (e_min > b_max) { // have overlap @@ -504,10 +502,32 @@ void mem_mark_primary_se(const mem_opt_t *opt, int _n, mem_alnreg_t *a, int64_t } } } - if (k == z.n) kv_push(int, z, i); - else a[i].secondary = z.a[k]; + if (k == z->n) kv_push(int, *z, i); + else a[i].secondary = z->a[k]; + } +} + +int mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id) +{ + int i, n_pri; + int_v z = {0,0,0}; + if (n == 0) return 0; + for (i = n_pri = 0; i < n; ++i) { + a[i].sub = 0, a[i].secondary = -1, a[i].hash = hash_64(id+i); + if (!a[i].is_alt) ++n_pri; + } + ks_introsort(mem_ars_hash, n, a); + mem_mark_primary_se_core(opt, n, a, &z); + for (i = 0; i < n; ++i) // this block is used to trigger potential bugs; if no bugs, it has no effects + if (a[i].is_alt && a[i].secondary >= 0) + a[i].secondary = INT_MAX; + if (n_pri > 0 || n_pri != n) { + ks_introsort(mem_ars_hash2, n, a); + for (i = 0; i < n_pri; ++i) a[i].sub = 0, a[i].secondary = -1; + mem_mark_primary_se_core(opt, n_pri, a, &z); } free(z.a); + return n_pri; } /********************************* @@ -920,7 +940,7 @@ int mem_approx_mapq_se(const mem_opt_t *opt, const mem_alnreg_t *a) } // TODO (future plan): group hits into a uint64_t[] array. This will be cleaner and more flexible -void mem_reg2sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m) +void mem_reg2sam(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m) { extern char **mem_gen_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, mem_alnreg_v *a, int l_query, const char *query); kstring_t str; @@ -936,7 +956,7 @@ void mem_reg2sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pa mem_alnreg_t *p = &a->a[k]; mem_aln_t *q; if (p->score < opt->T) continue; - if (p->secondary >= 0 && !(opt->flag&MEM_F_ALL)) continue; + if (p->secondary >= 0 && (p->is_alt || !(opt->flag&MEM_F_ALL))) continue; if (p->secondary >= 0 && p->score < a->a[p->secondary].score * opt->drop_ratio) continue; q = kv_pushp(mem_aln_t, aa); *q = mem_reg2aln(opt, bns, pac, s->l_seq, s->seq, p); @@ -1114,7 +1134,7 @@ static void worker2(void *data, int i, int tid) mem_reg2ovlp(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i]); } else { mem_mark_primary_se(w->opt, w->regs[i].n, w->regs[i].a, w->n_processed + i); - mem_reg2sam_se(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i], 0, 0); + mem_reg2sam(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i], 0, 0); } free(w->regs[i].a); } else { diff --git a/bwamem_pair.c b/bwamem_pair.c index 8fe2dd0..c79cc69 100644 --- a/bwamem_pair.c +++ b/bwamem_pair.c @@ -177,14 +177,14 @@ int mem_matesw(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co return n; } -int mem_pair(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], bseq1_t s[2], mem_alnreg_v a[2], int id, int *sub, int *n_sub, int z[2]) +int mem_pair(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], bseq1_t s[2], mem_alnreg_v a[2], int id, int *sub, int *n_sub, int z[2], int n_pri[2]) { pair64_v v, u; int r, i, k, y[4], ret; // y[] keeps the last hit int64_t l_pac = bns->l_pac; kv_init(v); kv_init(u); for (r = 0; r < 2; ++r) { // loop through read number - for (i = 0; i < a[r].n; ++i) { + for (i = 0; i < n_pri[r]; ++i) { pair64_t key; mem_alnreg_t *e = &a[r].a[i]; key.x = e->rb < l_pac? e->rb : (l_pac<<1) - 1 - e->rb; // forward position @@ -244,13 +244,13 @@ int mem_pair(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, cons int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2]) { - extern void mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id); + extern int mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id); extern int mem_approx_mapq_se(const mem_opt_t *opt, const mem_alnreg_t *a); - extern void mem_reg2sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m); + extern void mem_reg2sam(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m); extern void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m); extern char **mem_gen_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_alnreg_v *a, int l_query, const char *query); - int n = 0, i, j, z[2], o, subo, n_sub, extra_flag = 1; + int n = 0, i, j, z[2], o, subo, n_sub, extra_flag = 1, n_pri[2]; kstring_t str; mem_aln_t h[2]; @@ -267,11 +267,11 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co n += mem_matesw(opt, bns, pac, pes, &b[i].a[j], s[!i].l_seq, (uint8_t*)s[!i].seq, &a[!i]); free(b[0].a); free(b[1].a); } - mem_mark_primary_se(opt, a[0].n, a[0].a, id<<1|0); - mem_mark_primary_se(opt, a[1].n, a[1].a, id<<1|1); + n_pri[0] = mem_mark_primary_se(opt, a[0].n, a[0].a, id<<1|0); + n_pri[1] = mem_mark_primary_se(opt, a[1].n, a[1].a, id<<1|1); if (opt->flag&MEM_F_NOPAIRING) goto no_pairing; // pairing single-end hits - if (a[0].n && a[1].n && (o = mem_pair(opt, bns, pac, pes, s, a, id, &subo, &n_sub, z)) > 0) { + if (n_pri[0] && n_pri[1] && (o = mem_pair(opt, bns, pac, pes, s, a, id, &subo, &n_sub, z, n_pri)) > 0) { int is_multi[2], q_pe, score_un, q_se[2]; char **XA[2]; // check if an end has multiple hits even after mate-SW @@ -354,8 +354,8 @@ no_pairing: d = mem_infer_dir(bns->l_pac, a[0].a[0].rb, a[1].a[0].rb, &dist); if (!pes[d].failed && dist >= pes[d].low && dist <= pes[d].high) extra_flag |= 2; } - mem_reg2sam_se(opt, bns, pac, &s[0], &a[0], 0x41|extra_flag, &h[1]); - mem_reg2sam_se(opt, bns, pac, &s[1], &a[1], 0x81|extra_flag, &h[0]); + mem_reg2sam(opt, bns, pac, &s[0], &a[0], 0x41|extra_flag, &h[1]); + mem_reg2sam(opt, bns, pac, &s[1], &a[1], 0x81|extra_flag, &h[0]); if (strcmp(s[0].name, s[1].name) != 0) err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name); free(h[0].cigar); free(h[1].cigar); return n; diff --git a/main.c b/main.c index 2915e78..4c7a85e 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.10-r821-dirty" +#define PACKAGE_VERSION "0.7.10-r823-dirty" #endif int bwa_fa2pac(int argc, char *argv[]);