diff --git a/align.c b/align.c index 2e07ce7..b0db4b2 100644 --- a/align.c +++ b/align.c @@ -720,7 +720,7 @@ mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *m *n_regs_ = n_regs; kfree(km, qseq0[0]); kfree(km, ez.cigar); - mm_filter_regs(km, opt, n_regs_, regs); + mm_filter_regs(km, opt, qlen, n_regs_, regs); mm_hit_sort_by_dp(km, n_regs_, regs); return regs; } diff --git a/hit.c b/hit.c index c3256b4..f427654 100644 --- a/hit.c +++ b/hit.c @@ -245,16 +245,17 @@ void mm_select_sub(void *km, float pri_ratio, int min_diff, int best_n, int *n_, } } -void mm_filter_regs(void *km, const mm_mapopt_t *opt, int *n_regs, mm_reg1_t *regs) +void mm_filter_regs(void *km, const mm_mapopt_t *opt, int qlen, int *n_regs, mm_reg1_t *regs) { // NB: after this call, mm_reg1_t::parent can be -1 if its parent filtered out int i, k; for (i = k = 0; i < *n_regs; ++i) { mm_reg1_t *r = ®s[i]; int flt = 0; if (!r->inv && !r->seg_split && r->cnt < opt->min_cnt) flt = 1; - if (r->p) { + if (r->p) { // these filters are only applied when base-alignment is available if (r->mlen < opt->min_chain_score) flt = 1; else if (r->p->dp_max < opt->min_dp_max) flt = 1; + else if (r->qs > qlen * opt->max_clip_ratio && qlen - r->qe > qlen * opt->max_clip_ratio) flt = 1; if (flt) free(r->p); } if (!flt) { @@ -337,7 +338,7 @@ void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int *n_regs_, mm_r r->parent = regs[r->parent].parent; } } - mm_filter_regs(km, opt, n_regs_, regs); + mm_filter_regs(km, opt, qlen, n_regs_, regs); mm_sync_regs(km, *n_regs_, regs); } } diff --git a/main.c b/main.c index 964a0eb..9fb8115 100644 --- a/main.c +++ b/main.c @@ -6,7 +6,7 @@ #include "mmpriv.h" #include "getopt.h" -#define MM_VERSION "2.8-r686-dirty" +#define MM_VERSION "2.8-r703-dirty" #ifdef __linux__ #include @@ -50,6 +50,7 @@ static struct option long_options[] = { { "heap-sort", required_argument, 0, 0 }, // 24 { "all-chain", no_argument, 0, 'P' }, { "dual", required_argument, 0, 0 }, // 26 + { "max-clip-ratio", required_argument, 0, 0 }, // 27 { "help", no_argument, 0, 'h' }, { "max-intron-len", required_argument, 0, 'G' }, { "version", no_argument, 0, 'V' }, @@ -163,6 +164,7 @@ int main(int argc, char *argv[]) else if (c == 0 && long_idx ==21) opt.anchor_ext_shift = atoi(optarg); // --end-seed-pen else if (c == 0 && long_idx ==22) opt.flag |= MM_F_FOR_ONLY; // --for-only else if (c == 0 && long_idx ==23) opt.flag |= MM_F_REV_ONLY; // --rev-only + else if (c == 0 && long_idx ==27) opt.max_clip_ratio = atof(optarg); // --max-clip-ratio else if (c == 0 && long_idx == 14) { // --frag yes_or_no(&opt, MM_F_FRAG_MODE, long_idx, optarg, 1); } else if (c == 0 && long_idx == 15) { // --secondary diff --git a/minimap.h b/minimap.h index 6907387..b6b9a00 100644 --- a/minimap.h +++ b/minimap.h @@ -121,6 +121,7 @@ typedef struct { int min_dp_max; // drop an alignment if the score of the max scoring segment is below this threshold int min_ksw_len; int anchor_ext_len, anchor_ext_shift; + float max_clip_ratio; // drop an alignment if BOTH ends are clipped above this ratio int pe_ori, pe_bonus; diff --git a/mmpriv.h b/mmpriv.h index f7e344f..36c2033 100644 --- a/mmpriv.h +++ b/mmpriv.h @@ -75,7 +75,7 @@ int mm_set_sam_pri(int n, mm_reg1_t *r); void mm_set_parent(void *km, float mask_level, int n, mm_reg1_t *r, int sub_diff); void mm_select_sub(void *km, float pri_ratio, int min_diff, int best_n, int *n_, mm_reg1_t *r); void mm_select_sub_multi(void *km, float pri_ratio, float pri1, float pri2, int max_gap_ref, int min_diff, int best_n, int n_segs, const int *qlens, int *n_, mm_reg1_t *r); -void mm_filter_regs(void *km, const mm_mapopt_t *opt, int *n_regs, mm_reg1_t *regs); +void mm_filter_regs(void *km, const mm_mapopt_t *opt, int qlen, int *n_regs, mm_reg1_t *regs); void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int *n_regs, mm_reg1_t *regs, mm128_t *a); void mm_hit_sort_by_dp(void *km, int *n_regs, mm_reg1_t *r); void mm_set_mapq(int n_regs, mm_reg1_t *regs, int min_chain_sc, int match_sc, int rep_len, int is_sr); diff --git a/options.c b/options.c index 4b67ab2..9e4276c 100644 --- a/options.c +++ b/options.c @@ -29,6 +29,7 @@ void mm_mapopt_init(mm_mapopt_t *opt) opt->min_dp_max = opt->min_chain_score * opt->a; opt->min_ksw_len = 200; opt->anchor_ext_len = 20, opt->anchor_ext_shift = 6; + opt->max_clip_ratio = 1.0f; opt->mini_batch_size = 500000000; opt->pe_ori = 0; // FF diff --git a/python/cmappy.pxd b/python/cmappy.pxd index 6d1b931..5295313 100644 --- a/python/cmappy.pxd +++ b/python/cmappy.pxd @@ -31,6 +31,7 @@ cdef extern from "minimap.h": int min_dp_max int min_ksw_len int anchor_ext_len, anchor_ext_shift + float max_clip_ratio int pe_ori, pe_bonus float mid_occ_frac int32_t mid_occ