r440: better chain filtering for PE reads
This commit is contained in:
parent
64c0ad6b35
commit
55d1e4f638
5
Makefile
5
Makefile
|
|
@ -1,7 +1,7 @@
|
|||
CFLAGS= -g -Wall -O2 -Wc++-compat
|
||||
CPPFLAGS= -DHAVE_KALLOC
|
||||
INCLUDES=
|
||||
OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o index.o chain.o align.o hit.o map.o format.o ksw2_ll_sse.o
|
||||
OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o index.o chain.o align.o hit.o map.o format.o pe.o ksw2_ll_sse.o
|
||||
PROG= minimap2
|
||||
PROG_EXTRA= sdust minimap2-lite
|
||||
LIBS= -lm -lz -lpthread
|
||||
|
|
@ -63,7 +63,7 @@ depend:
|
|||
# DO NOT DELETE
|
||||
|
||||
align.o: minimap.h mmpriv.h bseq.h ksw2.h kalloc.h
|
||||
bseq.o: bseq.h kseq.h
|
||||
bseq.o: bseq.h kvec.h kalloc.h kseq.h
|
||||
chain.o: minimap.h mmpriv.h bseq.h kalloc.h
|
||||
example.o: minimap.h kseq.h
|
||||
format.o: kalloc.h mmpriv.h minimap.h bseq.h
|
||||
|
|
@ -78,5 +78,6 @@ ksw2_ll_sse.o: ksw2.h kalloc.h
|
|||
main.o: bseq.h minimap.h mmpriv.h getopt.h
|
||||
map.o: kthread.h kvec.h kalloc.h sdust.h mmpriv.h minimap.h bseq.h
|
||||
misc.o: minimap.h ksort.h
|
||||
pe.o: mmpriv.h minimap.h bseq.h
|
||||
sdust.o: kalloc.h kdq.h kvec.h sdust.h
|
||||
sketch.o: kvec.h kalloc.h minimap.h
|
||||
|
|
|
|||
2
hit.c
2
hit.c
|
|
@ -183,7 +183,7 @@ void mm_sync_regs(void *km, int n_regs, mm_reg1_t *regs) // keep mm_reg1_t::{id,
|
|||
mm_set_sam_pri(n_regs, regs);
|
||||
}
|
||||
|
||||
void mm_select_sub(void *km, float mask_level, float pri_ratio, int min_diff, int best_n, int *n_, mm_reg1_t *r)
|
||||
void mm_select_sub(void *km, float pri_ratio, int min_diff, int best_n, int *n_, mm_reg1_t *r)
|
||||
{
|
||||
if (pri_ratio > 0.0f && *n_ > 0) {
|
||||
int i, k, n = *n_, n_2nd = 0;
|
||||
|
|
|
|||
2
main.c
2
main.c
|
|
@ -6,7 +6,7 @@
|
|||
#include "mmpriv.h"
|
||||
#include "getopt.h"
|
||||
|
||||
#define MM_VERSION "2.2-r439-dirty"
|
||||
#define MM_VERSION "2.2-r440-dirty"
|
||||
|
||||
#ifdef __linux__
|
||||
#include <sys/resource.h>
|
||||
|
|
|
|||
9
map.c
9
map.c
|
|
@ -229,11 +229,12 @@ static mm128_t *collect_seed_hits(const mm_mapopt_t *opt, const mm_idx_t *mi, co
|
|||
return a;
|
||||
}
|
||||
|
||||
static void chain_post(const mm_mapopt_t *opt, const mm_idx_t *mi, void *km, int qlen, int *n_regs, mm_reg1_t *regs, mm128_t *a)
|
||||
static void chain_post(const mm_mapopt_t *opt, const mm_idx_t *mi, void *km, int qlen, int n_segs, const int *qlens, int *n_regs, mm_reg1_t *regs, mm128_t *a)
|
||||
{
|
||||
if (!(opt->flag & MM_F_AVA)) { // don't choose primary mapping(s) for read overlap
|
||||
mm_set_parent(km, opt->mask_level, *n_regs, regs, opt->a * 2 + opt->b);
|
||||
mm_select_sub(km, opt->mask_level, opt->pri_ratio, mi->k*2, opt->best_n, n_regs, regs);
|
||||
if (n_segs <= 1) mm_select_sub(km, opt->pri_ratio, mi->k*2, opt->best_n, n_regs, regs);
|
||||
else mm_select_sub_multi(km, opt->pri_ratio, 0.2f, 0.7f, opt->max_gap_ref, mi->k*2, opt->best_n, n_segs, qlens, n_regs, regs);
|
||||
if (!(opt->flag & MM_F_SPLICE) && !(opt->flag & MM_F_SR))
|
||||
mm_join_long(km, opt, qlen, n_regs, regs, a); // TODO: this can be applied to all-vs-all in principle
|
||||
}
|
||||
|
|
@ -245,7 +246,7 @@ static mm_reg1_t *align_regs(const mm_mapopt_t *opt, const mm_idx_t *mi, void *k
|
|||
regs = mm_align_skeleton(km, opt, mi, qlen, seq, n_regs, regs, a); // this calls mm_filter_regs()
|
||||
if (!(opt->flag & MM_F_AVA)) {
|
||||
mm_set_parent(km, opt->mask_level, *n_regs, regs, opt->a * 2 + opt->b);
|
||||
mm_select_sub(km, opt->mask_level, opt->pri_ratio, mi->k*2, opt->best_n, n_regs, regs);
|
||||
mm_select_sub(km, opt->pri_ratio, mi->k*2, opt->best_n, n_regs, regs);
|
||||
mm_set_sam_pri(*n_regs, regs);
|
||||
}
|
||||
return regs;
|
||||
|
|
@ -285,7 +286,7 @@ void mm_map_multi(const mm_idx_t *mi, int n_segs, const int *qlens, const char *
|
|||
fprintf(stderr, "CN\t%d\t%s\t%d\t%c\t%d\t%d\t%d\n", j, mi->seq[a[i].x<<1>>33].name, (int32_t)a[i].x, "+-"[a[i].x>>63], (int32_t)a[i].y, (int32_t)(a[i].y>>32&0xff),
|
||||
i == regs0[j].as? 0 : ((int32_t)a[i].y - (int32_t)a[i-1].y) - ((int32_t)a[i].x - (int32_t)a[i-1].x));
|
||||
|
||||
chain_post(opt, mi, b->km, qlen_sum, &n_regs0, regs0, a);
|
||||
chain_post(opt, mi, b->km, qlen_sum, n_segs, qlens, &n_regs0, regs0, a);
|
||||
|
||||
if (n_segs == 1) {
|
||||
regs0 = align_regs(opt, mi, b->km, qlens[0], seqs[0], &n_regs0, regs0, a);
|
||||
|
|
|
|||
3
mmpriv.h
3
mmpriv.h
|
|
@ -71,7 +71,8 @@ void mm_split_reg(mm_reg1_t *r, mm_reg1_t *r2, int n, int qlen, mm128_t *a);
|
|||
void mm_sync_regs(void *km, int n_regs, mm_reg1_t *regs);
|
||||
int mm_set_sam_pri(int n, mm_reg1_t *r);
|
||||
void mm_set_parent(void *km, float mask_level, int n, mm_reg1_t *r, int sub_diff);
|
||||
void mm_select_sub(void *km, float mask_level, float pri_ratio, int min_diff, int best_n, int *n_, mm_reg1_t *r);
|
||||
void mm_select_sub(void *km, float pri_ratio, int min_diff, int best_n, int *n_, mm_reg1_t *r);
|
||||
void mm_select_sub_multi(void *km, float pri_ratio, float pri1, float pri2, int max_gap_ref, int min_diff, int best_n, int n_segs, const int *qlens, int *n_, mm_reg1_t *r);
|
||||
void mm_filter_regs(void *km, const mm_mapopt_t *opt, int *n_regs, mm_reg1_t *regs);
|
||||
void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int *n_regs, mm_reg1_t *regs, mm128_t *a);
|
||||
void mm_hit_sort_by_dp(void *km, int *n_regs, mm_reg1_t *r);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,42 @@
|
|||
#include <stdlib.h>
|
||||
#include "mmpriv.h"
|
||||
|
||||
void mm_select_sub_multi(void *km, float pri_ratio, float pri1, float pri2, int max_gap_ref, int min_diff, int best_n, int n_segs, const int *qlens, int *n_, mm_reg1_t *r)
|
||||
{
|
||||
if (pri_ratio > 0.0f && *n_ > 0) {
|
||||
int i, k, n = *n_, n_2nd = 0;
|
||||
int max_dist = n_segs == 2? qlens[0] + qlens[1] + max_gap_ref : 0;
|
||||
for (i = k = 0; i < n; ++i) {
|
||||
int to_keep = 0;
|
||||
if (r[i].parent == i) { // primary
|
||||
to_keep = 1;
|
||||
} else if (r[i].score + min_diff >= r[r[i].parent].score) {
|
||||
to_keep = 1;
|
||||
} else {
|
||||
mm_reg1_t *p = &r[r[i].parent], *q = &r[i];
|
||||
if (p->rev == q->rev && p->rid == q->rid && q->re - p->rs < max_dist && p->re - q->rs < max_dist) { // child and parent are close on the ref
|
||||
if (q->score >= p->score * pri1)
|
||||
to_keep = 1;
|
||||
} else {
|
||||
int is_par_both = (n_segs == 2 && p->qs < qlens[0] && p->qe > qlens[0]);
|
||||
int is_chi_both = (n_segs == 2 && q->qs < qlens[0] && q->qe > qlens[0]);
|
||||
if (is_chi_both || is_chi_both == is_par_both) {
|
||||
if (q->score >= p->score * pri_ratio)
|
||||
to_keep = 1;
|
||||
} else { // the remaining case: is_chi_both == 0 && is_par_both == 1
|
||||
if (q->score >= p->score * pri2)
|
||||
to_keep = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (to_keep && r[i].parent != i) {
|
||||
if (n_2nd++ >= best_n) to_keep = 0; // don't keep if there are too many secondary hits
|
||||
}
|
||||
if (to_keep) r[k++] = r[i];
|
||||
else if (r[i].p) free(r[i].p);
|
||||
}
|
||||
if (k != n) mm_sync_regs(km, k, r); // removing hits requires sync()
|
||||
*n_ = k;
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue