not segfault immediately; but buggy

This commit is contained in:
Heng Li 2011-10-24 09:36:52 -04:00
parent b3397a1f14
commit 29c3acfb31
4 changed files with 51 additions and 35 deletions

View File

@ -15,7 +15,7 @@ typedef struct {
typedef struct {
bwtint_t k, l;
uint32_t flag:18, n_seeds:14;
uint32_t flag:18, n_seeds:13, is_rev:1;
int len, G, G2;
int beg, end;
} bsw2hit_t;
@ -38,8 +38,8 @@ extern "C" {
#endif
bsw2opt_t *bsw2_init_opt();
bwtsw2_t **bsw2_core(const bsw2opt_t *opt, const bwtl_t *target, const bwt_t *query, bsw2global_t *pool);
void bsw2_aln(const bsw2opt_t *opt, const bntseq_t *bns, bwt_t * const target[2], const char *fn);
bwtsw2_t **bsw2_core(const bntseq_t *bns, const bsw2opt_t *opt, const bwtl_t *target, const bwt_t *query, bsw2global_t *pool);
void bsw2_aln(const bsw2opt_t *opt, const bntseq_t *bns, bwt_t * const target, const char *fn);
void bsw2_destroy(bwtsw2_t *b);
bsw2global_t *bsw2_global_init();

View File

@ -42,7 +42,7 @@ unsigned char nt_comp_table[256] = {
'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N'
};
extern int bsw2_resolve_duphits(const bwt_t *bwt, bwtsw2_t *b, int IS);
extern int bsw2_resolve_duphits(const bntseq_t *bns, const bwt_t *bwt, bwtsw2_t *b, int IS);
extern int bsw2_resolve_query_overlaps(bwtsw2_t *b, float mask_level);
bsw2opt_t *bsw2_init_opt()
@ -253,25 +253,41 @@ static bwtsw2_t *bsw2_aln1_core(const bsw2opt_t *opt, const bntseq_t *bns, uint8
int l, uint8_t *seq[2], int is_rev, bsw2global_t *pool)
{
extern void bsw2_chain_filter(const bsw2opt_t *opt, int len, bwtsw2_t *b[2]);
bwtsw2_t *b[2], **bb[2];
int k;
bwtsw2_t *b[2], **bb[2], **_b, *p;
int k, j;
bwtl_t *query;
query = bwtl_seq2bwtl(l, seq[0]);
_b = bsw2_core(bns, opt, query, target, pool);
bwtl_destroy(query);
for (k = 0; k < 2; ++k) {
bwtl_t *query = bwtl_seq2bwtl(l, seq[k]);
bb[k] = bsw2_core(opt, query, target, pool);
bwtl_destroy(query);
bb[k] = calloc(2, sizeof(void*));
bb[k][0] = calloc(1, sizeof(bwtsw2_t));
bb[k][1] = calloc(1, sizeof(bwtsw2_t));
}
fprintf(stderr, "here!\n");
for (k = 0; k < 2; ++k) { // separate _b into bb[2] based on the strand
for (j = 0; j < _b[k]->n; ++j) {
p = bb[k][_b[k]->hits[j].is_rev];
if (p->n == p->max) {
p->max = p->max? p->max<<1 : 8;
p->hits = realloc(p->hits, p->max * sizeof(bsw2hit_t));
}
p->hits[p->n++] = _b[k]->hits[j];
}
}
b[0] = bb[0][1]; b[1] = bb[1][1]; // bb[*][1] are "narrow SA hits"
bsw2_chain_filter(opt, l, b);
for (k = 0; k < 2; ++k) {
bsw2_extend_left(opt, bb[k][1], seq[k], l, pac, bns->l_pac, is_rev, pool->aln_mem);
merge_hits(bb[k], l, 0); // bb[k][1] is merged to bb[k][0] here
bsw2_resolve_duphits(0, bb[k][0], 0);
bsw2_resolve_duphits(0, 0, bb[k][0], 0);
bsw2_extend_rght(opt, bb[k][0], seq[k], l, pac, bns->l_pac, is_rev, pool->aln_mem);
b[k] = bb[k][0];
free(bb[k]);
}
merge_hits(b, l, 1); // again, b[1] is merged to b[0]
bsw2_resolve_query_overlaps(b[0], opt->mask_level);
bsw2_destroy(_b[0]); bsw2_destroy(_b[1]); free(_b);
return b[0];
}
@ -453,7 +469,7 @@ static void print_hits(const bntseq_t *bns, const bsw2opt_t *opt, bsw2seq1_t *ks
/* Core routine to align reads in _seq. It is separated from
* process_seqs() to realize multi-threading */
static void bsw2_aln_core(int tid, bsw2seq_t *_seq, const bsw2opt_t *_opt, const bntseq_t *bns, uint8_t *pac, bwt_t * const target[2])
static void bsw2_aln_core(int tid, bsw2seq_t *_seq, const bsw2opt_t *_opt, const bntseq_t *bns, uint8_t *pac, bwt_t * const target)
{
int x;
bsw2opt_t opt = *_opt;
@ -502,11 +518,11 @@ static void bsw2_aln_core(int tid, bsw2seq_t *_seq, const bsw2opt_t *_opt, const
free(seq[0]); continue;
}
// alignment
b[0] = bsw2_aln1_core(&opt, bns, pac, target[0], l, seq, 0, pool);
b[0] = bsw2_aln1_core(&opt, bns, pac, target, l, seq, 0, pool);
for (k = 0; k < b[0]->n; ++k)
if (b[0]->hits[k].n_seeds < opt.t_seeds) break;
if (k < b[0]->n) {
b[1] = bsw2_aln1_core(&opt, bns, pac, target[1], l, rseq, 1, pool);
if (0 && k < b[0]->n) {
b[1] = bsw2_aln1_core(&opt, bns, pac, target, l, rseq, 1, pool);
for (i = 0; i < b[1]->n; ++i) {
bsw2hit_t *p = b[1]->hits + i;
int x = p->beg;
@ -516,7 +532,7 @@ static void bsw2_aln_core(int tid, bsw2seq_t *_seq, const bsw2opt_t *_opt, const
}
flag_fr(b);
merge_hits(b, l, 0);
bsw2_resolve_duphits(0, b[0], 0);
bsw2_resolve_duphits(0, 0, b[0], 0);
bsw2_resolve_query_overlaps(b[0], opt.mask_level);
} else b[1] = 0;
// generate CIGAR and print SAM
@ -536,7 +552,7 @@ typedef struct {
const bsw2opt_t *_opt;
const bntseq_t *bns;
uint8_t *pac;
bwt_t *target[2];
bwt_t *target;
} thread_aux_t;
/* another interface to bsw2_aln_core() to facilitate pthread_create() */
@ -550,7 +566,7 @@ static void *worker(void *data)
/* process sequences stored in _seq, generate SAM lines for these
* sequences and reset _seq afterwards. */
static void process_seqs(bsw2seq_t *_seq, const bsw2opt_t *opt, const bntseq_t *bns, uint8_t *pac, bwt_t * const target[2])
static void process_seqs(bsw2seq_t *_seq, const bsw2opt_t *opt, const bntseq_t *bns, uint8_t *pac, bwt_t * const target)
{
int i;
@ -569,7 +585,7 @@ static void process_seqs(bsw2seq_t *_seq, const bsw2opt_t *opt, const bntseq_t *
for (j = 0; j < opt->n_threads; ++j) {
thread_aux_t *p = data + j;
p->tid = j; p->_seq = _seq; p->_opt = opt; p->bns = bns;
p->pac = pac; p->target[0] = target[0]; p->target[1] = target[1];
p->pac = pac; p->target = target;
pthread_create(&tid[j], &attr, worker, p);
}
for (j = 0; j < opt->n_threads; ++j) pthread_join(tid[j], 0);
@ -591,7 +607,7 @@ static void process_seqs(bsw2seq_t *_seq, const bsw2opt_t *opt, const bntseq_t *
_seq->n = 0;
}
void bsw2_aln(const bsw2opt_t *opt, const bntseq_t *bns, bwt_t * const target[2], const char *fn)
void bsw2_aln(const bsw2opt_t *opt, const bntseq_t *bns, bwt_t * const target, const char *fn)
{
gzFile fp;
kseq_t *ks;

View File

@ -266,14 +266,14 @@ static void save_narrow_hits(const bwtl_t *bwtl, bsw2entry_t *u, bwtsw2_t *b1, i
}
/* after this, "narrow SA hits" will be expanded and the coordinates
* will be obtained and stored in b->hits[*].k. */
int bsw2_resolve_duphits(const bwt_t *bwt, bwtsw2_t *b, int IS)
int bsw2_resolve_duphits(const bntseq_t *bns, const bwt_t *bwt, bwtsw2_t *b, int IS)
{
int i, j, n;
int i, j, n, ref_id, is_rev;
if (b->n == 0) return 0;
if (bwt) { // convert to chromosomal coordinates if suitable
if (bwt && bns) { // convert to chromosomal coordinates if suitable
int old_n = b->n;
bsw2hit_t *old_hits = b->hits;
for (i = n = 0; i < b->n; ++i) {
for (i = n = 0; i < b->n; ++i) { // compute memory needed to be allocated
bsw2hit_t *p = old_hits + i;
if (p->l - p->k + 1 <= IS) n += p->l - p->k + 1;
else if (p->G > 0) ++n;
@ -282,19 +282,21 @@ int bsw2_resolve_duphits(const bwt_t *bwt, bwtsw2_t *b, int IS)
b->hits = calloc(b->max, sizeof(bsw2hit_t));
for (i = j = 0; i < old_n; ++i) {
bsw2hit_t *p = old_hits + i;
if (p->l - p->k + 1 <= IS) {
if (p->l - p->k + 1 <= IS) { // the hit is no so repetitive
bwtint_t k;
for (k = p->k; k <= p->l; ++k) {
b->hits[j] = *p;
b->hits[j].k = bwt_sa(bwt, k);
b->hits[j].k = bns_pos2refId(bns, bwt_sa(bwt, k), 1, &ref_id, &is_rev);
b->hits[j].l = 0;
b->hits[j].is_rev = is_rev;
++j;
}
} else if (p->G > 0) {
b->hits[j] = *p;
b->hits[j].k = bwt_sa(bwt, p->k);
b->hits[j].k = bns_pos2refId(bns, bwt_sa(bwt, p->k), 1, &ref_id, &is_rev);
b->hits[j].l = 0;
b->hits[j].flag |= 1;
b->hits[j].is_rev = is_rev;
++j;
}
}
@ -434,7 +436,7 @@ static void init_bwtsw2(const bwtl_t *target, const bwt_t *query, bsw2stack_t *s
stack_push0(s, u);
}
/* On return, ret[1] keeps not-so-repetitive hits (narrow SA hits); ret[0] keeps all hits (right?) */
bwtsw2_t **bsw2_core(const bsw2opt_t *opt, const bwtl_t *target, const bwt_t *query, bsw2global_t *pool)
bwtsw2_t **bsw2_core(const bntseq_t *bns, const bsw2opt_t *opt, const bwtl_t *target, const bwt_t *query, bsw2global_t *pool)
{
bsw2stack_t *stack = (bsw2stack_t*)pool->stack;
bwtsw2_t *b, *b1, **b_ret;
@ -591,8 +593,8 @@ bwtsw2_t **bsw2_core(const bsw2opt_t *opt, const bwtl_t *target, const bwt_t *qu
mp_free(stack->pool, v);
} // while(top)
getrusage(0, &curr);
bsw2_resolve_duphits(query, b, opt->is);
bsw2_resolve_duphits(query, b1, opt->is);
bsw2_resolve_duphits(bns, query, b, opt->is);
bsw2_resolve_duphits(bns, query, b1, opt->is);
//fprintf(stderr, "stats: %.3lf sec; %d elems\n", time_elapse(&curr, &last), n_tot);
// free
free(heap);

View File

@ -10,7 +10,7 @@
int bwa_bwtsw2(int argc, char *argv[])
{
bsw2opt_t *opt;
bwt_t *target[2];
bwt_t *target;
char buf[1024];
bntseq_t *bns;
int c;
@ -82,16 +82,14 @@ int bwa_bwtsw2(int argc, char *argv[])
opt->t *= opt->a;
opt->coef *= opt->a;
strcpy(buf, argv[optind]); target[0] = bwt_restore_bwt(strcat(buf, ".bwt"));
strcpy(buf, argv[optind]); bwt_restore_sa(strcat(buf, ".sa"), target[0]);
strcpy(buf, argv[optind]); target[1] = bwt_restore_bwt(strcat(buf, ".rbwt"));
strcpy(buf, argv[optind]); bwt_restore_sa(strcat(buf, ".rsa"), target[1]);
strcpy(buf, argv[optind]); target = bwt_restore_bwt(strcat(buf, ".bwt"));
strcpy(buf, argv[optind]); bwt_restore_sa(strcat(buf, ".sa"), target);
bns = bns_restore(argv[optind]);
bsw2_aln(opt, bns, target, argv[optind+1]);
bns_destroy(bns);
bwt_destroy(target[0]); bwt_destroy(target[1]);
bwt_destroy(target);
free(opt);
return 0;