r445: pair coordinate adjustment working
Next: mapq adjustment, which will be tricky...
This commit is contained in:
parent
a349d85280
commit
7e0d70bfd3
2
main.c
2
main.c
|
|
@ -6,7 +6,7 @@
|
|||
#include "mmpriv.h"
|
||||
#include "getopt.h"
|
||||
|
||||
#define MM_VERSION "2.2-r444-dirty"
|
||||
#define MM_VERSION "2.2-r446-dirty"
|
||||
|
||||
#ifdef __linux__
|
||||
#include <sys/resource.h>
|
||||
|
|
|
|||
8
map.c
8
map.c
|
|
@ -13,7 +13,6 @@ void mm_mapopt_init(mm_mapopt_t *opt)
|
|||
memset(opt, 0, sizeof(mm_mapopt_t));
|
||||
opt->mid_occ_frac = 2e-4f;
|
||||
opt->sdust_thres = 0; // no SDUST masking
|
||||
opt->pe_ori = 0; // FF
|
||||
|
||||
opt->min_cnt = 3;
|
||||
opt->min_chain_score = 40;
|
||||
|
|
@ -35,6 +34,9 @@ void mm_mapopt_init(mm_mapopt_t *opt)
|
|||
opt->min_dp_max = opt->min_chain_score * opt->a;
|
||||
opt->min_ksw_len = 200;
|
||||
opt->mini_batch_size = 200000000;
|
||||
|
||||
opt->pe_ori = 0; // FF
|
||||
opt->pe_bonus = 33;
|
||||
}
|
||||
|
||||
void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi)
|
||||
|
|
@ -236,7 +238,7 @@ static void chain_post(const mm_mapopt_t *opt, const mm_idx_t *mi, void *km, int
|
|||
if (!(opt->flag & MM_F_AVA)) { // don't choose primary mapping(s) for read overlap
|
||||
mm_set_parent(km, opt->mask_level, *n_regs, regs, opt->a * 2 + opt->b);
|
||||
if (n_segs <= 1) mm_select_sub(km, opt->pri_ratio, mi->k*2, opt->best_n, n_regs, regs);
|
||||
else mm_select_sub_multi(km, opt->pri_ratio, 0.2f, 0.7f, opt->max_gap_ref, mi->k*2, opt->best_n * n_segs, n_segs, qlens, n_regs, regs);
|
||||
else mm_select_sub_multi(km, opt->pri_ratio, 0.2f, 0.7f, opt->max_gap_ref, mi->k*2, opt->best_n, n_segs, qlens, n_regs, regs);
|
||||
if (!(opt->flag & MM_F_SPLICE) && !(opt->flag & MM_F_SR))
|
||||
mm_join_long(km, opt, qlen, n_regs, regs, a); // TODO: this can be applied to all-vs-all in principle
|
||||
}
|
||||
|
|
@ -304,6 +306,8 @@ void mm_map_multi(const mm_idx_t *mi, int n_segs, const int *qlens, const char *
|
|||
mm_set_mapq(n_regs[i], regs[i], opt->min_chain_score, opt->a, rep_len);
|
||||
}
|
||||
mm_seg_free(b->km, n_segs, seg);
|
||||
if (n_segs == 2 && opt->pe_ori >= 0)
|
||||
mm_pair(b->km, max_gap_ref, opt->pe_bonus, qlens, n_regs, regs);
|
||||
}
|
||||
|
||||
kfree(b->km, a);
|
||||
|
|
|
|||
|
|
@ -68,6 +68,7 @@ typedef struct {
|
|||
int32_t as; // offset in the a[] array (for internal uses only)
|
||||
int32_t fuzzy_mlen, fuzzy_blen; // seeded exact match length; seeded alignment block length (approximate)
|
||||
uint32_t mapq:8, split:2, sam_pri:1, n_sub:21; // mapQ; split pattern; if SAM primary; number of suboptimal mappings
|
||||
uint64_t hash;
|
||||
mm_extra_t *p;
|
||||
} mm_reg1_t;
|
||||
|
||||
|
|
@ -81,7 +82,6 @@ typedef struct {
|
|||
typedef struct {
|
||||
int sdust_thres; // score threshold for SDUST; 0 to disable
|
||||
int flag; // see MM_F_* macros
|
||||
int pe_ori;
|
||||
|
||||
int bw; // bandwidth
|
||||
int max_gap, max_gap_ref; // break a chain if there are no minimizers in a max_gap window
|
||||
|
|
@ -102,6 +102,8 @@ typedef struct {
|
|||
int min_dp_max; // drop an alignment if the score of the max scoring segment is below this threshold
|
||||
int min_ksw_len;
|
||||
|
||||
int pe_ori, pe_bonus;
|
||||
|
||||
float mid_occ_frac; // only used by mm_mapopt_update(); see below
|
||||
int32_t mid_occ; // ignore seeds with occurrences above this threshold
|
||||
int mini_batch_size; // size of a batch of query bases to process in parallel
|
||||
|
|
|
|||
1
mmpriv.h
1
mmpriv.h
|
|
@ -80,6 +80,7 @@ void mm_set_mapq(int n_regs, mm_reg1_t *regs, int min_chain_sc, int match_sc, in
|
|||
|
||||
mm_seg_t *mm_seg_gen(void *km, int n_segs, const int *qlens, int n_regs0, const mm_reg1_t *regs0, int *n_regs, mm_reg1_t **regs, const mm128_t *a);
|
||||
void mm_seg_free(void *km, int n_segs, mm_seg_t *segs);
|
||||
void mm_pair(void *km, int max_gap_ref, int dp_bonus, const int *qlens, int *n_regs, mm_reg1_t **regs);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
75
pe.c
75
pe.c
|
|
@ -1,5 +1,6 @@
|
|||
#include <stdlib.h>
|
||||
#include "mmpriv.h"
|
||||
#include "kalloc.h"
|
||||
|
||||
void mm_select_sub_multi(void *km, float pri_ratio, float pri1, float pri2, int max_gap_ref, int min_diff, int best_n, int n_segs, const int *qlens, int *n_, mm_reg1_t *r)
|
||||
{
|
||||
|
|
@ -43,29 +44,83 @@ void mm_select_sub_multi(void *km, float pri_ratio, float pri1, float pri2, int
|
|||
#include "ksort.h"
|
||||
|
||||
typedef struct {
|
||||
int s;
|
||||
int s, rev;
|
||||
uint64_t key;
|
||||
mm_reg1_t *r;
|
||||
} pair_arr_t;
|
||||
|
||||
#define sort_key_pair(a) ((a).key)
|
||||
KRADIX_SORT_INIT(pair, pair_arr_t, sort_key_pair, 8)
|
||||
/*
|
||||
void mm_pair(void *km, int max_gap_ref, int *n_regs, mm_reg1_t **regs)
|
||||
|
||||
void mm_pair(void *km, int max_gap_ref, int pe_bonus, const int *qlens, int *n_regs, mm_reg1_t **regs)
|
||||
{
|
||||
int i, j, s, n, last = -1;
|
||||
int i, j, s, n, last[2], dp_thres, segs = 0, n_pairs = 0, max_idx[2];
|
||||
int64_t max, max2;
|
||||
pair_arr_t *a;
|
||||
|
||||
a = (pair_arr_t*)kmalloc(km, (n_regs[0] + n_regs[1]) * sizeof(pair_arr_t));
|
||||
for (s = n = 0; s < 2; ++s)
|
||||
for (s = n = 0, dp_thres = 0; s < 2; ++s) {
|
||||
int max = 0;
|
||||
for (i = 0; i < n_regs[s]; ++i) {
|
||||
a[n].s = s;
|
||||
a[n].r = ®s[s][i];
|
||||
a[n++].key = (uint64_t)a[n].r->rid << 32 | a[n].r->rs;
|
||||
a[n].rev = a[n].r->rev;
|
||||
a[n].key = (uint64_t)a[n].r->rid << 32 | a[n].r->rs<<1 | (s^a[n].rev);
|
||||
max = max > a[n].r->p->dp_max? max : a[n].r->p->dp_max;
|
||||
++n;
|
||||
segs |= 1<<s;
|
||||
}
|
||||
radix_sort_pair(n, a);
|
||||
for (i = 0; i < n; ++i) {
|
||||
int pre_dir = seg_rev? !a[i].r->rev : a[i].r->rev;
|
||||
dp_thres += max;
|
||||
}
|
||||
if (segs != 3) {
|
||||
kfree(km, a); // only one end is mapped
|
||||
return;
|
||||
}
|
||||
dp_thres -= pe_bonus;
|
||||
if (dp_thres < 0) dp_thres = 0;
|
||||
radix_sort_pair(a, a + n);
|
||||
|
||||
max = max2 = -1;
|
||||
max_idx[0] = max_idx[1] = -1;
|
||||
last[0] = last[1] = -1;
|
||||
for (i = 0; i < n; ++i) {
|
||||
if (a[i].key & 1) { // reverse first read or forward second read
|
||||
mm_reg1_t *q, *r;
|
||||
if (last[a[i].rev] < 0) continue;
|
||||
r = a[i].r;
|
||||
q = a[last[a[i].rev]].r;
|
||||
if (r->rid != q->rid || r->rs - q->re > max_gap_ref) continue;
|
||||
for (j = last[a[i].rev]; j >= 0; --j) {
|
||||
int sum;
|
||||
if (a[j].rev != a[i].rev || a[j].s == a[i].s) continue;
|
||||
q = a[j].r;
|
||||
if (r->rid != q->rid || r->rs - q->re > max_gap_ref) break;
|
||||
if (r->p->dp_max + q->p->dp_max < dp_thres) continue;
|
||||
++n_pairs;
|
||||
sum = r->p->dp_max + q->p->dp_max;
|
||||
if (sum > max) {
|
||||
max_idx[a[j].s] = j, max_idx[a[i].s] = i;
|
||||
max2 = max;
|
||||
max = sum;
|
||||
} else if (sum > max2)
|
||||
max2 = sum;
|
||||
}
|
||||
} else { // forward first read or reverse second read
|
||||
last[a[i].rev] = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (max > 0) {
|
||||
for (s = 0; s < 2; ++s) {
|
||||
mm_reg1_t *r = a[max_idx[s]].r;
|
||||
if (r->id != r->parent) {
|
||||
mm_reg1_t *p = ®s[s][r->parent];
|
||||
for (i = 0; i < n_regs[s]; ++i)
|
||||
if (regs[s][i].parent == p->id)
|
||||
regs[s][i].parent = r->id;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
kfree(km, a);
|
||||
}
|
||||
*/
|
||||
|
|
|
|||
Loading…
Reference in New Issue