r128: more conservative chaining filter
This commit is contained in:
parent
0ae318be0d
commit
f44edd4fc9
2
bwtsw2.h
2
bwtsw2.h
|
|
@ -13,7 +13,7 @@
|
|||
|
||||
typedef struct {
|
||||
int skip_sw:16, hard_clip:16;
|
||||
int a, b, q, r, t, qr, bw, max_ins;
|
||||
int a, b, q, r, t, qr, bw, max_ins, max_chain_gap;
|
||||
int z, is, t_seeds, multi_2nd;
|
||||
float mask_level, coef;
|
||||
int n_threads, chunk_size;
|
||||
|
|
|
|||
|
|
@ -54,6 +54,7 @@ bsw2opt_t *bsw2_init_opt()
|
|||
o->z = 1; o->is = 3; o->t_seeds = 5; o->hard_clip = 0; o->skip_sw = 0;
|
||||
o->mask_level = 0.50f; o->coef = 5.5f;
|
||||
o->qr = o->q + o->r; o->n_threads = 1; o->chunk_size = 10000000;
|
||||
o->max_chain_gap = 10000;
|
||||
return o;
|
||||
}
|
||||
|
||||
|
|
@ -286,7 +287,7 @@ static bwtsw2_t *bsw2_aln1_core(const bsw2opt_t *opt, const bntseq_t *bns, uint8
|
|||
}
|
||||
}
|
||||
b[0] = bb[0][1]; b[1] = bb[1][1]; // bb[*][1] are "narrow SA hits"
|
||||
bsw2_chain_filter(opt, l, b);
|
||||
bsw2_chain_filter(opt, l, b); // NB: only unique seeds are chained
|
||||
for (k = 0; k < 2; ++k) {
|
||||
bsw2_extend_left(opt, bb[k][1], seq[k], l, pac, bns->l_pac, pool->aln_mem);
|
||||
merge_hits(bb[k], l, 0); // bb[k][1] is merged to bb[k][0] here
|
||||
|
|
|
|||
|
|
@ -23,15 +23,15 @@ static int chaining(const bsw2opt_t *opt, int shift, int n, hsaip_t *z, hsaip_t
|
|||
hsaip_t *q = chain + k;
|
||||
int x = p->qbeg - q->qbeg; // always positive
|
||||
int y = p->tbeg - q->tbeg;
|
||||
if (y > 0 && x - y <= opt->bw && y - x <= opt->bw) {
|
||||
if (y > 0 && x < opt->max_chain_gap && y < opt->max_chain_gap && x - y <= opt->bw && y - x <= opt->bw) { // chained
|
||||
if (p->qend > q->qend) q->qend = p->qend;
|
||||
if (p->tend > q->tend) q->tend = p->tend;
|
||||
++q->chain;
|
||||
p->chain = shift + k;
|
||||
break;
|
||||
}
|
||||
} else if (q->chain > opt->t_seeds * 2) k = 0; // if the chain is strong enough, do not check the previous chains
|
||||
}
|
||||
if (k < 0) {
|
||||
if (k < 0) { // not added to any previous chains
|
||||
chain[m] = *p;
|
||||
chain[m].chain = 1;
|
||||
chain[m].idx = p->chain = shift + m;
|
||||
|
|
@ -44,7 +44,7 @@ static int chaining(const bsw2opt_t *opt, int shift, int n, hsaip_t *z, hsaip_t
|
|||
void bsw2_chain_filter(const bsw2opt_t *opt, int len, bwtsw2_t *b[2])
|
||||
{
|
||||
hsaip_t *z[2], *chain[2];
|
||||
int i, j, k, n[2], m[2];
|
||||
int i, j, k, n[2], m[2], thres = opt->t_seeds * 2;
|
||||
char *flag;
|
||||
// initialization
|
||||
n[0] = b[0]->n; n[1] = b[1]->n;
|
||||
|
|
@ -71,6 +71,7 @@ void bsw2_chain_filter(const bsw2opt_t *opt, int len, bwtsw2_t *b[2])
|
|||
int tmp = p->qbeg;
|
||||
p->qbeg = len - p->qend; p->qend = len - tmp;
|
||||
}
|
||||
//for (k = 0; k < m[0]; ++k) printf("%d, [%d,%d), [%d,%d)\n", chain[0][k].chain, chain[0][k].tbeg, chain[0][k].tend, chain[0][k].qbeg, chain[0][k].qend);
|
||||
// filtering
|
||||
flag = calloc(m[0] + m[1], 1);
|
||||
ks_introsort(hsaip, m[0] + m[1], chain[0]);
|
||||
|
|
@ -79,7 +80,7 @@ void bsw2_chain_filter(const bsw2opt_t *opt, int len, bwtsw2_t *b[2])
|
|||
for (j = 0; j < k; ++j) {
|
||||
hsaip_t *q = chain[0] + j;
|
||||
if (flag[q->idx]) continue;
|
||||
if (q->qend >= p->qend && q->chain > p->chain * opt->t_seeds * 2) {
|
||||
if (q->qend >= p->qend && q->chain > p->chain * thres && p->chain < thres) {
|
||||
flag[p->idx] = 1;
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ int bwa_bwtsw2(int argc, char *argv[])
|
|||
|
||||
opt = bsw2_init_opt();
|
||||
srand48(11);
|
||||
while ((c = getopt(argc, argv, "q:r:a:b:t:T:w:d:z:m:s:c:N:Hf:MI:S")) >= 0) {
|
||||
while ((c = getopt(argc, argv, "q:r:a:b:t:T:w:d:z:m:s:c:N:Hf:MI:SG:")) >= 0) {
|
||||
switch (c) {
|
||||
case 'q': opt->q = atoi(optarg); break;
|
||||
case 'r': opt->r = atoi(optarg); break;
|
||||
|
|
@ -37,6 +37,7 @@ int bwa_bwtsw2(int argc, char *argv[])
|
|||
case 'f': xreopen(optarg, "w", stdout); break;
|
||||
case 'I': opt->max_ins = atoi(optarg); break;
|
||||
case 'S': opt->skip_sw = 1; break;
|
||||
case 'G': opt->max_chain_gap = atoi(optarg); break;
|
||||
}
|
||||
}
|
||||
opt->qr = opt->q + opt->r;
|
||||
|
|
@ -62,7 +63,8 @@ int bwa_bwtsw2(int argc, char *argv[])
|
|||
fprintf(stderr, " -c FLOAT coefficient of length-threshold adjustment [%.1f]\n", opt->coef);
|
||||
fprintf(stderr, " -z INT Z-best [%d]\n", opt->z);
|
||||
fprintf(stderr, " -s INT maximum seeding interval size [%d]\n", opt->is);
|
||||
fprintf(stderr, " -N INT # seeds to trigger reverse alignment [%d]\n", opt->t_seeds);
|
||||
fprintf(stderr, " -N INT # seeds to trigger rev aln; 2*INT is also the chaining threshold [%d]\n", opt->t_seeds);
|
||||
fprintf(stderr, " -G INT maximum gap size during chaining [%d]\n", opt->max_chain_gap);
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "Note: For long Illumina, 454 and Sanger reads, assembly contigs, fosmids and\n");
|
||||
fprintf(stderr, " BACs, the default setting usually works well. For the current PacBio\n");
|
||||
|
|
|
|||
Loading…
Reference in New Issue