r736: improved in low-complexity regions
Example: GGAGGGGAAGGGTGGGCTGGAGGGGACGGGTGGGCTGGAGGGGAAGGGTGTGCTGGAGGGAAAAGGTGGACTGGAGGGGAAGGGTGGGCTGGAGGGGAAGG This read has 5 chains, two of which are: weight=80 26;26;0,4591439948(10:-3095894) 23;23;27,4591439957(10:-3095888) 31;31;70,4591439964(10:-3095873) weight=50 45;45;51,4591440017(10:-3095806) 50;50;51,4591440017(10:-3095801) 31;31;70,4591440090(10:-3095747) Extension from the 26bp seed in the 1st chain gives an alignment [0,101) <=> [4591439948,4591440067), which contains the 50bp seed in the second chain. However, if we extend the 50bp seed, it yields a better alignment [0,101) <=> [4591439966,4591440067) with a different starting position. The 26bp seed is wrong. This commit adds a heuristic to fix this issue.
This commit is contained in:
parent
11698fc4e5
commit
88f89be60e
7
bwamem.c
7
bwamem.c
|
|
@ -417,6 +417,8 @@ int mem_sort_dedup_patch(const mem_opt_t *opt, const bntseq_t *bns, const uint8_
|
|||
} else if (q->rb < p->rb && (score = mem_patch_reg(opt, bns, pac, query, q, p, &w)) > 0) { // then merge q into p
|
||||
p->n_comp += q->n_comp + 1;
|
||||
p->seedcov = p->seedcov > q->seedcov? p->seedcov : q->seedcov;
|
||||
p->sub = p->sub > q->sub? p->sub : q->sub;
|
||||
p->csub = p->csub > q->csub? p->csub : q->csub;
|
||||
p->qb = q->qb, p->rb = q->rb;
|
||||
p->truesc = p->score = score;
|
||||
p->w = w;
|
||||
|
|
@ -650,6 +652,7 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac
|
|||
int64_t rd;
|
||||
int qd, w, max_gap;
|
||||
if (s->rbeg < p->rb || s->rbeg + s->len > p->re || s->qbeg < p->qb || s->qbeg + s->len > p->qe) continue; // not fully contained
|
||||
if (s->len - p->seedlen0 > .1 * l_query) continue; // this seed may give a better alignment
|
||||
// qd: distance ahead of the seed on query; rd: on reference
|
||||
qd = s->qbeg - p->qb; rd = s->rbeg - p->rb;
|
||||
max_gap = cal_max_gap(opt, qd < rd? qd : rd); // the maximal gap allowed in regions ahead of the seed
|
||||
|
|
@ -663,7 +666,8 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac
|
|||
}
|
||||
if (i < av->n) { // the seed is (almost) contained in an existing alignment; further testing is needed to confirm it is not leading to a different aln
|
||||
if (bwa_verbose >= 4)
|
||||
printf("** Seed(%d) [%ld;%ld,%ld] is almost contained in an existing alignment. Confirming whether extension is needed...\n", k, (long)s->len, (long)s->qbeg, (long)s->rbeg);
|
||||
printf("** Seed(%d) [%ld;%ld,%ld] is almost contained in an existing alignment [%d,%d) <=> [%ld,%ld)\n",
|
||||
k, (long)s->len, (long)s->qbeg, (long)s->rbeg, av->a[i].qb, av->a[i].qe, (long)av->a[i].rb, (long)av->a[i].re);
|
||||
for (i = k + 1; i < c->n; ++i) { // check overlapping seeds in the same chain
|
||||
const mem_seed_t *t;
|
||||
if (srt[i] == 0) continue;
|
||||
|
|
@ -753,6 +757,7 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac
|
|||
a->seedcov += t->len; // this is not very accurate, but for approx. mapQ, this is good enough
|
||||
}
|
||||
a->w = aw[0] > aw[1]? aw[0] : aw[1];
|
||||
a->seedlen0 = s->len;
|
||||
}
|
||||
free(srt); free(rseq);
|
||||
}
|
||||
|
|
|
|||
1
bwamem.h
1
bwamem.h
|
|
@ -62,6 +62,7 @@ typedef struct {
|
|||
int w; // actual band width used in extension
|
||||
int seedcov; // length of regions coverged by seeds
|
||||
int secondary; // index of the parent hit shadowing the current hit; <0 if primary
|
||||
int seedlen0; // length of the starting seed
|
||||
int n_comp; // number of sub-alignments chained together
|
||||
uint64_t hash;
|
||||
} mem_alnreg_t;
|
||||
|
|
|
|||
Loading…
Reference in New Issue