this is better; but still buggy

This commit is contained in:
Heng Li 2011-10-24 11:50:11 -04:00
parent 29c3acfb31
commit 4c43c5914d
3 changed files with 17 additions and 7 deletions

View File

@ -223,7 +223,7 @@ void bsw2_debug_hits(const bwtsw2_t *b)
for (i = 0; i < b->n; ++i) { for (i = 0; i < b->n; ++i) {
bsw2hit_t *p = b->hits + i; bsw2hit_t *p = b->hits + i;
if (p->l == 0) if (p->l == 0)
printf("%d, %d, %d, %lu, %lu\n", p->G, p->beg, p->end, (long)p->k, (long)p->l); printf("G=%d, [%d,%d), k=%lu, l=%lu\n", p->G, p->beg, p->end, (long)p->k, (long)p->l);
} }
} }
@ -264,21 +264,31 @@ static bwtsw2_t *bsw2_aln1_core(const bsw2opt_t *opt, const bntseq_t *bns, uint8
bb[k][0] = calloc(1, sizeof(bwtsw2_t)); bb[k][0] = calloc(1, sizeof(bwtsw2_t));
bb[k][1] = calloc(1, sizeof(bwtsw2_t)); bb[k][1] = calloc(1, sizeof(bwtsw2_t));
} }
fprintf(stderr, "here!\n");
for (k = 0; k < 2; ++k) { // separate _b into bb[2] based on the strand for (k = 0; k < 2; ++k) { // separate _b into bb[2] based on the strand
for (j = 0; j < _b[k]->n; ++j) { for (j = 0; j < _b[k]->n; ++j) {
p = bb[k][_b[k]->hits[j].is_rev]; bsw2hit_t *q;
p = bb[_b[k]->hits[j].is_rev][k];
if (p->n == p->max) { if (p->n == p->max) {
p->max = p->max? p->max<<1 : 8; p->max = p->max? p->max<<1 : 8;
p->hits = realloc(p->hits, p->max * sizeof(bsw2hit_t)); p->hits = realloc(p->hits, p->max * sizeof(bsw2hit_t));
} }
p->hits[p->n++] = _b[k]->hits[j]; q = &p->hits[p->n++];
*q = _b[k]->hits[j];
if (_b[k]->hits[j].is_rev) {
int x = q->beg;
q->beg = l - q->end;
q->end = l - x;
q->k -= q->len;
}
} }
} }
//bsw2_debug_hits(bb[0][1]);
bsw2_debug_hits(bb[1][1]);
b[0] = bb[0][1]; b[1] = bb[1][1]; // bb[*][1] are "narrow SA hits" b[0] = bb[0][1]; b[1] = bb[1][1]; // bb[*][1] are "narrow SA hits"
bsw2_chain_filter(opt, l, b); bsw2_chain_filter(opt, l, b);
for (k = 0; k < 2; ++k) { for (k = 0; k < 2; ++k) {
bsw2_extend_left(opt, bb[k][1], seq[k], l, pac, bns->l_pac, is_rev, pool->aln_mem); bsw2_extend_left(opt, bb[k][1], seq[k], l, pac, bns->l_pac, is_rev, pool->aln_mem);
if (k == 1) bsw2_debug_hits(bb[k][1]);
merge_hits(bb[k], l, 0); // bb[k][1] is merged to bb[k][0] here merge_hits(bb[k], l, 0); // bb[k][1] is merged to bb[k][0] here
bsw2_resolve_duphits(0, 0, bb[k][0], 0); bsw2_resolve_duphits(0, 0, bb[k][0], 0);
bsw2_extend_rght(opt, bb[k][0], seq[k], l, pac, bns->l_pac, is_rev, pool->aln_mem); bsw2_extend_rght(opt, bb[k][0], seq[k], l, pac, bns->l_pac, is_rev, pool->aln_mem);
@ -510,8 +520,8 @@ static void bsw2_aln_core(int tid, bsw2seq_t *_seq, const bsw2opt_t *_opt, const
if (c >= 4) { c = (int)(drand48() * 4); ++k; } // FIXME: ambiguous bases are not properly handled if (c >= 4) { c = (int)(drand48() * 4); ++k; } // FIXME: ambiguous bases are not properly handled
seq[0][i] = c; seq[0][i] = c;
seq[1][l-1-i] = 3 - c; seq[1][l-1-i] = 3 - c;
rseq[0][l-1-i] = c; rseq[0][l-1-i] = 3 - c;
rseq[1][i] = 3 - c; rseq[1][i] = c;
} }
if (l - k < opt.t) { // too few unambiguous bases if (l - k < opt.t) { // too few unambiguous bases
print_hits(bns, &opt, p, 0); print_hits(bns, &opt, p, 0);

View File

@ -284,6 +284,7 @@ int bsw2_resolve_duphits(const bntseq_t *bns, const bwt_t *bwt, bwtsw2_t *b, int
bsw2hit_t *p = old_hits + i; bsw2hit_t *p = old_hits + i;
if (p->l - p->k + 1 <= IS) { // the hit is no so repetitive if (p->l - p->k + 1 <= IS) { // the hit is no so repetitive
bwtint_t k; bwtint_t k;
if (p->G == 0 && p->k == 0 && p->l == 0 && p->len == 0) continue;
for (k = p->k; k <= p->l; ++k) { for (k = p->k; k <= p->l; ++k) {
b->hits[j] = *p; b->hits[j] = *p;
b->hits[j].k = bns_pos2refId(bns, bwt_sa(bwt, k), 1, &ref_id, &is_rev); b->hits[j].k = bns_pos2refId(bns, bwt_sa(bwt, k), 1, &ref_id, &is_rev);

View File

@ -48,7 +48,6 @@ int bwa_bwtsw2(int argc, char *argv[])
// fprintf(stderr, " -y FLOAT error recurrence coef. (4..16) [%.1f]\n", opt->yita); // fprintf(stderr, " -y FLOAT error recurrence coef. (4..16) [%.1f]\n", opt->yita);
fprintf(stderr, "\n"); fprintf(stderr, "\n");
fprintf(stderr, " -t INT number of threads [%d]\n", opt->n_threads); fprintf(stderr, " -t INT number of threads [%d]\n", opt->n_threads);
fprintf(stderr, " -s INT size of a chunk of reads [%d]\n", opt->chunk_size);
fprintf(stderr, "\n"); fprintf(stderr, "\n");
fprintf(stderr, " -w INT band width [%d]\n", opt->bw); fprintf(stderr, " -w INT band width [%d]\n", opt->bw);
fprintf(stderr, " -m FLOAT mask level [%.2f]\n", opt->mask_level); fprintf(stderr, " -m FLOAT mask level [%.2f]\n", opt->mask_level);