not segfault immediately; but buggy
This commit is contained in:
parent
b3397a1f14
commit
29c3acfb31
6
bwtsw2.h
6
bwtsw2.h
|
|
@ -15,7 +15,7 @@ typedef struct {
|
|||
|
||||
typedef struct {
|
||||
bwtint_t k, l;
|
||||
uint32_t flag:18, n_seeds:14;
|
||||
uint32_t flag:18, n_seeds:13, is_rev:1;
|
||||
int len, G, G2;
|
||||
int beg, end;
|
||||
} bsw2hit_t;
|
||||
|
|
@ -38,8 +38,8 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
bsw2opt_t *bsw2_init_opt();
|
||||
bwtsw2_t **bsw2_core(const bsw2opt_t *opt, const bwtl_t *target, const bwt_t *query, bsw2global_t *pool);
|
||||
void bsw2_aln(const bsw2opt_t *opt, const bntseq_t *bns, bwt_t * const target[2], const char *fn);
|
||||
bwtsw2_t **bsw2_core(const bntseq_t *bns, const bsw2opt_t *opt, const bwtl_t *target, const bwt_t *query, bsw2global_t *pool);
|
||||
void bsw2_aln(const bsw2opt_t *opt, const bntseq_t *bns, bwt_t * const target, const char *fn);
|
||||
void bsw2_destroy(bwtsw2_t *b);
|
||||
|
||||
bsw2global_t *bsw2_global_init();
|
||||
|
|
|
|||
48
bwtsw2_aux.c
48
bwtsw2_aux.c
|
|
@ -42,7 +42,7 @@ unsigned char nt_comp_table[256] = {
|
|||
'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N'
|
||||
};
|
||||
|
||||
extern int bsw2_resolve_duphits(const bwt_t *bwt, bwtsw2_t *b, int IS);
|
||||
extern int bsw2_resolve_duphits(const bntseq_t *bns, const bwt_t *bwt, bwtsw2_t *b, int IS);
|
||||
extern int bsw2_resolve_query_overlaps(bwtsw2_t *b, float mask_level);
|
||||
|
||||
bsw2opt_t *bsw2_init_opt()
|
||||
|
|
@ -253,25 +253,41 @@ static bwtsw2_t *bsw2_aln1_core(const bsw2opt_t *opt, const bntseq_t *bns, uint8
|
|||
int l, uint8_t *seq[2], int is_rev, bsw2global_t *pool)
|
||||
{
|
||||
extern void bsw2_chain_filter(const bsw2opt_t *opt, int len, bwtsw2_t *b[2]);
|
||||
bwtsw2_t *b[2], **bb[2];
|
||||
int k;
|
||||
bwtsw2_t *b[2], **bb[2], **_b, *p;
|
||||
int k, j;
|
||||
bwtl_t *query;
|
||||
query = bwtl_seq2bwtl(l, seq[0]);
|
||||
_b = bsw2_core(bns, opt, query, target, pool);
|
||||
bwtl_destroy(query);
|
||||
for (k = 0; k < 2; ++k) {
|
||||
bwtl_t *query = bwtl_seq2bwtl(l, seq[k]);
|
||||
bb[k] = bsw2_core(opt, query, target, pool);
|
||||
bwtl_destroy(query);
|
||||
bb[k] = calloc(2, sizeof(void*));
|
||||
bb[k][0] = calloc(1, sizeof(bwtsw2_t));
|
||||
bb[k][1] = calloc(1, sizeof(bwtsw2_t));
|
||||
}
|
||||
fprintf(stderr, "here!\n");
|
||||
for (k = 0; k < 2; ++k) { // separate _b into bb[2] based on the strand
|
||||
for (j = 0; j < _b[k]->n; ++j) {
|
||||
p = bb[k][_b[k]->hits[j].is_rev];
|
||||
if (p->n == p->max) {
|
||||
p->max = p->max? p->max<<1 : 8;
|
||||
p->hits = realloc(p->hits, p->max * sizeof(bsw2hit_t));
|
||||
}
|
||||
p->hits[p->n++] = _b[k]->hits[j];
|
||||
}
|
||||
}
|
||||
b[0] = bb[0][1]; b[1] = bb[1][1]; // bb[*][1] are "narrow SA hits"
|
||||
bsw2_chain_filter(opt, l, b);
|
||||
for (k = 0; k < 2; ++k) {
|
||||
bsw2_extend_left(opt, bb[k][1], seq[k], l, pac, bns->l_pac, is_rev, pool->aln_mem);
|
||||
merge_hits(bb[k], l, 0); // bb[k][1] is merged to bb[k][0] here
|
||||
bsw2_resolve_duphits(0, bb[k][0], 0);
|
||||
bsw2_resolve_duphits(0, 0, bb[k][0], 0);
|
||||
bsw2_extend_rght(opt, bb[k][0], seq[k], l, pac, bns->l_pac, is_rev, pool->aln_mem);
|
||||
b[k] = bb[k][0];
|
||||
free(bb[k]);
|
||||
}
|
||||
merge_hits(b, l, 1); // again, b[1] is merged to b[0]
|
||||
bsw2_resolve_query_overlaps(b[0], opt->mask_level);
|
||||
bsw2_destroy(_b[0]); bsw2_destroy(_b[1]); free(_b);
|
||||
return b[0];
|
||||
}
|
||||
|
||||
|
|
@ -453,7 +469,7 @@ static void print_hits(const bntseq_t *bns, const bsw2opt_t *opt, bsw2seq1_t *ks
|
|||
|
||||
/* Core routine to align reads in _seq. It is separated from
|
||||
* process_seqs() to realize multi-threading */
|
||||
static void bsw2_aln_core(int tid, bsw2seq_t *_seq, const bsw2opt_t *_opt, const bntseq_t *bns, uint8_t *pac, bwt_t * const target[2])
|
||||
static void bsw2_aln_core(int tid, bsw2seq_t *_seq, const bsw2opt_t *_opt, const bntseq_t *bns, uint8_t *pac, bwt_t * const target)
|
||||
{
|
||||
int x;
|
||||
bsw2opt_t opt = *_opt;
|
||||
|
|
@ -502,11 +518,11 @@ static void bsw2_aln_core(int tid, bsw2seq_t *_seq, const bsw2opt_t *_opt, const
|
|||
free(seq[0]); continue;
|
||||
}
|
||||
// alignment
|
||||
b[0] = bsw2_aln1_core(&opt, bns, pac, target[0], l, seq, 0, pool);
|
||||
b[0] = bsw2_aln1_core(&opt, bns, pac, target, l, seq, 0, pool);
|
||||
for (k = 0; k < b[0]->n; ++k)
|
||||
if (b[0]->hits[k].n_seeds < opt.t_seeds) break;
|
||||
if (k < b[0]->n) {
|
||||
b[1] = bsw2_aln1_core(&opt, bns, pac, target[1], l, rseq, 1, pool);
|
||||
if (0 && k < b[0]->n) {
|
||||
b[1] = bsw2_aln1_core(&opt, bns, pac, target, l, rseq, 1, pool);
|
||||
for (i = 0; i < b[1]->n; ++i) {
|
||||
bsw2hit_t *p = b[1]->hits + i;
|
||||
int x = p->beg;
|
||||
|
|
@ -516,7 +532,7 @@ static void bsw2_aln_core(int tid, bsw2seq_t *_seq, const bsw2opt_t *_opt, const
|
|||
}
|
||||
flag_fr(b);
|
||||
merge_hits(b, l, 0);
|
||||
bsw2_resolve_duphits(0, b[0], 0);
|
||||
bsw2_resolve_duphits(0, 0, b[0], 0);
|
||||
bsw2_resolve_query_overlaps(b[0], opt.mask_level);
|
||||
} else b[1] = 0;
|
||||
// generate CIGAR and print SAM
|
||||
|
|
@ -536,7 +552,7 @@ typedef struct {
|
|||
const bsw2opt_t *_opt;
|
||||
const bntseq_t *bns;
|
||||
uint8_t *pac;
|
||||
bwt_t *target[2];
|
||||
bwt_t *target;
|
||||
} thread_aux_t;
|
||||
|
||||
/* another interface to bsw2_aln_core() to facilitate pthread_create() */
|
||||
|
|
@ -550,7 +566,7 @@ static void *worker(void *data)
|
|||
|
||||
/* process sequences stored in _seq, generate SAM lines for these
|
||||
* sequences and reset _seq afterwards. */
|
||||
static void process_seqs(bsw2seq_t *_seq, const bsw2opt_t *opt, const bntseq_t *bns, uint8_t *pac, bwt_t * const target[2])
|
||||
static void process_seqs(bsw2seq_t *_seq, const bsw2opt_t *opt, const bntseq_t *bns, uint8_t *pac, bwt_t * const target)
|
||||
{
|
||||
int i;
|
||||
|
||||
|
|
@ -569,7 +585,7 @@ static void process_seqs(bsw2seq_t *_seq, const bsw2opt_t *opt, const bntseq_t *
|
|||
for (j = 0; j < opt->n_threads; ++j) {
|
||||
thread_aux_t *p = data + j;
|
||||
p->tid = j; p->_seq = _seq; p->_opt = opt; p->bns = bns;
|
||||
p->pac = pac; p->target[0] = target[0]; p->target[1] = target[1];
|
||||
p->pac = pac; p->target = target;
|
||||
pthread_create(&tid[j], &attr, worker, p);
|
||||
}
|
||||
for (j = 0; j < opt->n_threads; ++j) pthread_join(tid[j], 0);
|
||||
|
|
@ -591,7 +607,7 @@ static void process_seqs(bsw2seq_t *_seq, const bsw2opt_t *opt, const bntseq_t *
|
|||
_seq->n = 0;
|
||||
}
|
||||
|
||||
void bsw2_aln(const bsw2opt_t *opt, const bntseq_t *bns, bwt_t * const target[2], const char *fn)
|
||||
void bsw2_aln(const bsw2opt_t *opt, const bntseq_t *bns, bwt_t * const target, const char *fn)
|
||||
{
|
||||
gzFile fp;
|
||||
kseq_t *ks;
|
||||
|
|
|
|||
|
|
@ -266,14 +266,14 @@ static void save_narrow_hits(const bwtl_t *bwtl, bsw2entry_t *u, bwtsw2_t *b1, i
|
|||
}
|
||||
/* after this, "narrow SA hits" will be expanded and the coordinates
|
||||
* will be obtained and stored in b->hits[*].k. */
|
||||
int bsw2_resolve_duphits(const bwt_t *bwt, bwtsw2_t *b, int IS)
|
||||
int bsw2_resolve_duphits(const bntseq_t *bns, const bwt_t *bwt, bwtsw2_t *b, int IS)
|
||||
{
|
||||
int i, j, n;
|
||||
int i, j, n, ref_id, is_rev;
|
||||
if (b->n == 0) return 0;
|
||||
if (bwt) { // convert to chromosomal coordinates if suitable
|
||||
if (bwt && bns) { // convert to chromosomal coordinates if suitable
|
||||
int old_n = b->n;
|
||||
bsw2hit_t *old_hits = b->hits;
|
||||
for (i = n = 0; i < b->n; ++i) {
|
||||
for (i = n = 0; i < b->n; ++i) { // compute memory needed to be allocated
|
||||
bsw2hit_t *p = old_hits + i;
|
||||
if (p->l - p->k + 1 <= IS) n += p->l - p->k + 1;
|
||||
else if (p->G > 0) ++n;
|
||||
|
|
@ -282,19 +282,21 @@ int bsw2_resolve_duphits(const bwt_t *bwt, bwtsw2_t *b, int IS)
|
|||
b->hits = calloc(b->max, sizeof(bsw2hit_t));
|
||||
for (i = j = 0; i < old_n; ++i) {
|
||||
bsw2hit_t *p = old_hits + i;
|
||||
if (p->l - p->k + 1 <= IS) {
|
||||
if (p->l - p->k + 1 <= IS) { // the hit is no so repetitive
|
||||
bwtint_t k;
|
||||
for (k = p->k; k <= p->l; ++k) {
|
||||
b->hits[j] = *p;
|
||||
b->hits[j].k = bwt_sa(bwt, k);
|
||||
b->hits[j].k = bns_pos2refId(bns, bwt_sa(bwt, k), 1, &ref_id, &is_rev);
|
||||
b->hits[j].l = 0;
|
||||
b->hits[j].is_rev = is_rev;
|
||||
++j;
|
||||
}
|
||||
} else if (p->G > 0) {
|
||||
b->hits[j] = *p;
|
||||
b->hits[j].k = bwt_sa(bwt, p->k);
|
||||
b->hits[j].k = bns_pos2refId(bns, bwt_sa(bwt, p->k), 1, &ref_id, &is_rev);
|
||||
b->hits[j].l = 0;
|
||||
b->hits[j].flag |= 1;
|
||||
b->hits[j].is_rev = is_rev;
|
||||
++j;
|
||||
}
|
||||
}
|
||||
|
|
@ -434,7 +436,7 @@ static void init_bwtsw2(const bwtl_t *target, const bwt_t *query, bsw2stack_t *s
|
|||
stack_push0(s, u);
|
||||
}
|
||||
/* On return, ret[1] keeps not-so-repetitive hits (narrow SA hits); ret[0] keeps all hits (right?) */
|
||||
bwtsw2_t **bsw2_core(const bsw2opt_t *opt, const bwtl_t *target, const bwt_t *query, bsw2global_t *pool)
|
||||
bwtsw2_t **bsw2_core(const bntseq_t *bns, const bsw2opt_t *opt, const bwtl_t *target, const bwt_t *query, bsw2global_t *pool)
|
||||
{
|
||||
bsw2stack_t *stack = (bsw2stack_t*)pool->stack;
|
||||
bwtsw2_t *b, *b1, **b_ret;
|
||||
|
|
@ -591,8 +593,8 @@ bwtsw2_t **bsw2_core(const bsw2opt_t *opt, const bwtl_t *target, const bwt_t *qu
|
|||
mp_free(stack->pool, v);
|
||||
} // while(top)
|
||||
getrusage(0, &curr);
|
||||
bsw2_resolve_duphits(query, b, opt->is);
|
||||
bsw2_resolve_duphits(query, b1, opt->is);
|
||||
bsw2_resolve_duphits(bns, query, b, opt->is);
|
||||
bsw2_resolve_duphits(bns, query, b1, opt->is);
|
||||
//fprintf(stderr, "stats: %.3lf sec; %d elems\n", time_elapse(&curr, &last), n_tot);
|
||||
// free
|
||||
free(heap);
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@
|
|||
int bwa_bwtsw2(int argc, char *argv[])
|
||||
{
|
||||
bsw2opt_t *opt;
|
||||
bwt_t *target[2];
|
||||
bwt_t *target;
|
||||
char buf[1024];
|
||||
bntseq_t *bns;
|
||||
int c;
|
||||
|
|
@ -82,16 +82,14 @@ int bwa_bwtsw2(int argc, char *argv[])
|
|||
opt->t *= opt->a;
|
||||
opt->coef *= opt->a;
|
||||
|
||||
strcpy(buf, argv[optind]); target[0] = bwt_restore_bwt(strcat(buf, ".bwt"));
|
||||
strcpy(buf, argv[optind]); bwt_restore_sa(strcat(buf, ".sa"), target[0]);
|
||||
strcpy(buf, argv[optind]); target[1] = bwt_restore_bwt(strcat(buf, ".rbwt"));
|
||||
strcpy(buf, argv[optind]); bwt_restore_sa(strcat(buf, ".rsa"), target[1]);
|
||||
strcpy(buf, argv[optind]); target = bwt_restore_bwt(strcat(buf, ".bwt"));
|
||||
strcpy(buf, argv[optind]); bwt_restore_sa(strcat(buf, ".sa"), target);
|
||||
bns = bns_restore(argv[optind]);
|
||||
|
||||
bsw2_aln(opt, bns, target, argv[optind+1]);
|
||||
|
||||
bns_destroy(bns);
|
||||
bwt_destroy(target[0]); bwt_destroy(target[1]);
|
||||
bwt_destroy(target);
|
||||
free(opt);
|
||||
|
||||
return 0;
|
||||
|
|
|
|||
Loading…
Reference in New Issue