r397: multi changes/bugfixes to bwa-backtrack
1. Check .sai versioning 2. Keep track of #ins and #del during backtrack 3. Use info above to get accurate aligned regions; don't call SW extension any more 4. Identify alignment crossing the for-rev boundary 5. Fixed a bug in printing the XA tag: ungapped alignments missing
This commit is contained in:
parent
bde5005f39
commit
599e840779
8
bwape.c
8
bwape.c
|
|
@ -633,7 +633,7 @@ void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const f
|
|||
gap_opt_t opt, opt0;
|
||||
khint_t iter;
|
||||
isize_info_t last_ii; // this is for the last batch of reads
|
||||
char str[1024];
|
||||
char str[1024], magic[2][4];
|
||||
bwt_t *bwt;
|
||||
uint8_t *pac;
|
||||
|
||||
|
|
@ -648,6 +648,12 @@ void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const f
|
|||
g_hash = kh_init(b128);
|
||||
last_ii.avg = -1.0;
|
||||
|
||||
err_fread_noeof(magic[0], 1, 4, fp_sa[0]);
|
||||
err_fread_noeof(magic[1], 1, 4, fp_sa[1]);
|
||||
if (strncmp(magic[0], SAI_MAGIC, 4) != 0 || strncmp(magic[1], SAI_MAGIC, 4) != 0) {
|
||||
fprintf(stderr, "[E::%s] Unmatched SAI magic. Please re-run `aln' with the same version of bwa.\n", __func__);
|
||||
exit(1);
|
||||
}
|
||||
err_fread_noeof(&opt, sizeof(gap_opt_t), 1, fp_sa[0]);
|
||||
ks[0] = bwa_open_reads(opt.mode, fn_fa[0]);
|
||||
opt0 = opt;
|
||||
|
|
|
|||
101
bwase.c
101
bwase.c
|
|
@ -4,6 +4,7 @@
|
|||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <time.h>
|
||||
#include <assert.h>
|
||||
#include "bwase.h"
|
||||
#include "bwtaln.h"
|
||||
#include "bntseq.h"
|
||||
|
|
@ -36,6 +37,7 @@ void bwa_aln2seq_core(int n_aln, const bwt_aln1_t *aln, bwa_seq_t *s, int set_ma
|
|||
if (p->score > best) break;
|
||||
if (drand48() * (p->l - p->k + 1 + cnt) > (double)cnt) {
|
||||
s->n_mm = p->n_mm; s->n_gapo = p->n_gapo; s->n_gape = p->n_gape;
|
||||
s->ref_shift = (int)p->n_del - (int)p->n_ins;
|
||||
s->score = p->score;
|
||||
s->sa = p->k + (bwtint_t)((p->l - p->k + 1) * drand48());
|
||||
}
|
||||
|
|
@ -71,6 +73,7 @@ void bwa_aln2seq_core(int n_aln, const bwt_aln1_t *aln, bwa_seq_t *s, int set_ma
|
|||
for (l = q->k; l <= q->l; ++l) {
|
||||
s->multi[z].pos = l;
|
||||
s->multi[z].gap = q->n_gapo + q->n_gape;
|
||||
s->multi[z].ref_shift = (int)q->n_del - (int)q->n_ins;
|
||||
s->multi[z++].mm = q->n_mm;
|
||||
}
|
||||
rest -= q->l - q->k + 1;
|
||||
|
|
@ -81,6 +84,7 @@ void bwa_aln2seq_core(int n_aln, const bwt_aln1_t *aln, bwa_seq_t *s, int set_ma
|
|||
while (x < p) p -= p * j / (i--);
|
||||
s->multi[z].pos = q->l - i;
|
||||
s->multi[z].gap = q->n_gapo + q->n_gape;
|
||||
s->multi[z].ref_shift = (int)q->n_del - (int)q->n_ins;
|
||||
s->multi[z++].mm = q->n_mm;
|
||||
}
|
||||
rest = 0;
|
||||
|
|
@ -107,16 +111,15 @@ int bwa_approx_mapQ(const bwa_seq_t *p, int mm)
|
|||
return (23 < g_log_n[n])? 0 : 23 - g_log_n[n];
|
||||
}
|
||||
|
||||
bwtint_t bwa_sa2pos(const bntseq_t *bns, const bwt_t *bwt, bwtint_t sapos, int len, int *strand)
|
||||
bwtint_t bwa_sa2pos(const bntseq_t *bns, const bwt_t *bwt, bwtint_t sapos, int ref_len, int *strand)
|
||||
{
|
||||
bwtint_t pos_f;
|
||||
int is_rev;
|
||||
pos_f = bns_depos(bns, bwt_sa(bwt, sapos), &is_rev); // pos_f
|
||||
pos_f = bwt_sa(bwt, sapos); // position on the forward-reverse coordinate
|
||||
if (pos_f < bns->l_pac && bns->l_pac < pos_f + ref_len) return (bwtint_t)-1;
|
||||
pos_f = bns_depos(bns, pos_f, &is_rev); // position on the forward strand; this may be the first base or the last base
|
||||
*strand = !is_rev;
|
||||
/* NB: For gapped alignment, pacpos may not be correct, which will be fixed
|
||||
* in bwa_refine_gapped_core(). This line also determines the way "x" is
|
||||
* calculated in bwa_refine_gapped_core() when (ext < 0 && is_end == 0). */
|
||||
if (is_rev) pos_f = pos_f + 1 < len? 0 : pos_f - len + 1; // mapped to the forward strand
|
||||
if (is_rev) pos_f = pos_f + 1 < ref_len? 0 : pos_f - ref_len + 1; // position of the first base
|
||||
return pos_f; // FIXME: it is possible that pos_f < bns->anns[ref_id].offset
|
||||
}
|
||||
|
||||
|
|
@ -132,9 +135,11 @@ void bwa_cal_pac_pos_core(const bntseq_t *bns, const bwt_t *bwt, bwa_seq_t *seq,
|
|||
if (seq->type != BWA_TYPE_UNIQUE && seq->type != BWA_TYPE_REPEAT) return;
|
||||
max_diff = fnr > 0.0? bwa_cal_maxdiff(seq->len, BWA_AVG_ERR, fnr) : max_mm;
|
||||
seq->seQ = seq->mapQ = bwa_approx_mapQ(seq, max_diff);
|
||||
seq->pos = bwa_sa2pos(bns, bwt, seq->sa, seq->len, &strand);
|
||||
//fprintf(stderr, "%d\n", seq->ref_shift);
|
||||
seq->pos = bwa_sa2pos(bns, bwt, seq->sa, seq->len + seq->ref_shift, &strand);
|
||||
seq->strand = strand;
|
||||
seq->seQ = seq->mapQ = bwa_approx_mapQ(seq, max_diff);
|
||||
if (seq->pos == (bwtint_t)-1) seq->type = BWA_TYPE_NO_MATCH;
|
||||
}
|
||||
|
||||
void bwa_cal_pac_pos(const bntseq_t *bns, const char *prefix, int n_seqs, bwa_seq_t *seqs, int max_mm, float fnr)
|
||||
|
|
@ -150,9 +155,9 @@ void bwa_cal_pac_pos(const bntseq_t *bns, const char *prefix, int n_seqs, bwa_se
|
|||
bwa_cal_pac_pos_core(bns, bwt, p, max_mm, fnr);
|
||||
for (j = n_multi = 0; j < p->n_multi; ++j) {
|
||||
bwt_multi1_t *q = p->multi + j;
|
||||
q->pos = bwa_sa2pos(bns, bwt, q->pos, p->len, &strand);
|
||||
q->pos = bwa_sa2pos(bns, bwt, q->pos, p->len + p->ref_shift, &strand);
|
||||
q->strand = strand;
|
||||
if (q->pos != p->pos)
|
||||
if (q->pos != p->pos && q->pos != (bwtint_t)-1)
|
||||
p->multi[n_multi++] = *q;
|
||||
}
|
||||
p->n_multi = n_multi;
|
||||
|
|
@ -162,64 +167,25 @@ void bwa_cal_pac_pos(const bntseq_t *bns, const char *prefix, int n_seqs, bwa_se
|
|||
|
||||
#define SW_BW 50
|
||||
|
||||
bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, ubyte_t *seq, bwtint_t *_pos, int *n_cigar, int is_rev)
|
||||
bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, ubyte_t *seq, int ref_shift, bwtint_t rb, int *n_cigar)
|
||||
{
|
||||
bwa_cigar_t *cigar = 0;
|
||||
uint32_t *cigar32 = 0;
|
||||
ubyte_t *rseq;
|
||||
int tle, qle, gtle, gscore;
|
||||
int64_t k, rb, re, rlen;
|
||||
int64_t k, re, rlen;
|
||||
int8_t mat[25];
|
||||
|
||||
bwa_fill_scmat(1, 3, mat);
|
||||
if (!is_rev) { // forward strand, the end position is correct
|
||||
re = *_pos + len;
|
||||
if (re > l_pac) re = l_pac;
|
||||
rb = re - (len + SW_BW);
|
||||
if (rb < 0) rb = 0;
|
||||
rseq = bns_get_seq(l_pac, pacseq, rb, re, &rlen);
|
||||
seq_reverse(len, seq, 0); // as we need to do left extension, we have to reverse both query and reference sequences
|
||||
seq_reverse(rlen, rseq, 0);
|
||||
ksw_extend(len, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, 0, -1, len<<1, &qle, &tle, >le, &gscore, 0);
|
||||
if (gscore > 0) tle = gtle, qle = len;
|
||||
rb = re - tle; rlen = tle;
|
||||
seq_reverse(len, seq, 0);
|
||||
seq_reverse(rlen, rseq, 0);
|
||||
if (rlen == 0) goto refine_gapped_err;
|
||||
ksw_global(qle, &seq[len-qle], rlen, rseq, 5, mat, 5, 1, SW_BW, n_cigar, &cigar32);
|
||||
if (qle < len) { // write soft clip
|
||||
cigar = realloc(cigar, (*n_cigar + 1) * 4);
|
||||
memmove(cigar + 1, cigar, *n_cigar * 4);
|
||||
cigar[0] = (len - qle)<<4 | FROM_S;
|
||||
++(*n_cigar);
|
||||
}
|
||||
} else { // reverse strand, the start position is correct
|
||||
rb = *_pos; re = rb + len + SW_BW;
|
||||
if (re > l_pac) re = l_pac;
|
||||
rseq = bns_get_seq(l_pac, pacseq, rb, re, &rlen);
|
||||
ksw_extend(len, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, 0, -1, len<<1, &qle, &tle, >le, &gscore, 0);
|
||||
if (gscore > 0) tle = gtle, qle = len;
|
||||
re = rb + tle; rlen = tle;
|
||||
if (rlen == 0) goto refine_gapped_err;
|
||||
ksw_global(qle, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, n_cigar, &cigar32); // right extension
|
||||
if (qle < len) {
|
||||
cigar = realloc(cigar, (*n_cigar + 1) * 4);
|
||||
cigar[*n_cigar - 1] = (len - qle)<<4 | FROM_S;
|
||||
++(*n_cigar);
|
||||
}
|
||||
}
|
||||
*_pos = rb;
|
||||
|
||||
re = rb + len + ref_shift;
|
||||
assert(re <= l_pac);
|
||||
rseq = bns_get_seq(l_pac, pacseq, rb, re, &rlen);
|
||||
assert(re - rb == rlen);
|
||||
ksw_global(len, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, n_cigar, &cigar32); // right extension
|
||||
cigar = (bwa_cigar_t*)cigar32;
|
||||
for (k = 0; k < *n_cigar; ++k)
|
||||
cigar[k] = __cigar_create((cigar32[k]&0xf), (cigar32[k]>>4));
|
||||
free(rseq);
|
||||
return cigar;
|
||||
|
||||
refine_gapped_err:
|
||||
free(rseq);
|
||||
*n_cigar = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
char *bwa_cal_md1(int n_cigar, bwa_cigar_t *cigar, int len, bwtint_t pos, ubyte_t *seq,
|
||||
|
|
@ -311,7 +277,7 @@ void bwa_correct_trimmed(bwa_seq_t *s)
|
|||
void bwa_refine_gapped(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, ubyte_t *_pacseq)
|
||||
{
|
||||
ubyte_t *pacseq;
|
||||
int i, j;
|
||||
int i, j, k;
|
||||
kstring_t *str;
|
||||
|
||||
if (!_pacseq) {
|
||||
|
|
@ -322,15 +288,18 @@ void bwa_refine_gapped(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, ubyte_t
|
|||
for (i = 0; i != n_seqs; ++i) {
|
||||
bwa_seq_t *s = seqs + i;
|
||||
seq_reverse(s->len, s->seq, 0); // IMPORTANT: s->seq is reversed here!!!
|
||||
for (j = 0; j < s->n_multi; ++j) {
|
||||
for (j = k = 0; j < s->n_multi; ++j) {
|
||||
bwt_multi1_t *q = s->multi + j;
|
||||
int n_cigar;
|
||||
if (q->gap == 0) continue;
|
||||
q->cigar = bwa_refine_gapped_core(bns->l_pac, pacseq, s->len, q->strand? s->rseq : s->seq, &q->pos, &n_cigar, q->strand);
|
||||
q->n_cigar = n_cigar;
|
||||
if (q->gap) { // gapped alignment
|
||||
q->cigar = bwa_refine_gapped_core(bns->l_pac, pacseq, s->len, q->strand? s->rseq : s->seq, q->ref_shift, q->pos, &n_cigar);
|
||||
q->n_cigar = n_cigar;
|
||||
if (q->cigar) s->multi[k++] = *q;
|
||||
} else s->multi[k++] = *q;
|
||||
}
|
||||
s->n_multi = k; // this squeezes out gapped alignments which failed the CIGAR generation
|
||||
if (s->type == BWA_TYPE_NO_MATCH || s->type == BWA_TYPE_MATESW || s->n_gapo == 0) continue;
|
||||
s->cigar = bwa_refine_gapped_core(bns->l_pac, pacseq, s->len, s->strand? s->rseq : s->seq, &s->pos, &s->n_cigar, s->strand);
|
||||
s->cigar = bwa_refine_gapped_core(bns->l_pac, pacseq, s->len, s->strand? s->rseq : s->seq, s->ref_shift, s->pos, &s->n_cigar);
|
||||
if (s->cigar == 0) s->type = BWA_TYPE_NO_MATCH;
|
||||
}
|
||||
// generate MD tag
|
||||
|
|
@ -339,8 +308,7 @@ void bwa_refine_gapped(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, ubyte_t
|
|||
bwa_seq_t *s = seqs + i;
|
||||
if (s->type != BWA_TYPE_NO_MATCH) {
|
||||
int nm;
|
||||
s->md = bwa_cal_md1(s->n_cigar, s->cigar, s->len, s->pos, s->strand? s->rseq : s->seq,
|
||||
bns->l_pac, pacseq, str, &nm);
|
||||
s->md = bwa_cal_md1(s->n_cigar, s->cigar, s->len, s->pos, s->strand? s->rseq : s->seq, bns->l_pac, pacseq, str, &nm);
|
||||
s->nm = nm;
|
||||
}
|
||||
}
|
||||
|
|
@ -487,7 +455,6 @@ void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, in
|
|||
for (i = 0; i < p->n_multi; ++i) {
|
||||
bwt_multi1_t *q = p->multi + i;
|
||||
int k;
|
||||
if (q->cigar == 0) continue;
|
||||
j = pos_end_multi(q, p->len) - q->pos;
|
||||
nn = bns_cnt_ambi(bns, q->pos, j, &seqid);
|
||||
err_printf("%s,%c%d,", bns->anns[seqid].name, q->strand? '-' : '+',
|
||||
|
|
@ -538,6 +505,7 @@ void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_f
|
|||
bntseq_t *bns;
|
||||
FILE *fp_sa;
|
||||
gap_opt_t opt;
|
||||
char magic[4];
|
||||
|
||||
// initialization
|
||||
bwase_initialize();
|
||||
|
|
@ -546,6 +514,11 @@ void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_f
|
|||
fp_sa = xopen(fn_sa, "r");
|
||||
|
||||
m_aln = 0;
|
||||
err_fread_noeof(magic, 1, 4, fp_sa);
|
||||
if (strncmp(magic, SAI_MAGIC, 4) != 0) {
|
||||
fprintf(stderr, "[E::%s] Unmatched SAI magic. Please re-run `aln' with the same version of bwa.\n", __func__);
|
||||
exit(1);
|
||||
}
|
||||
err_fread_noeof(&opt, sizeof(gap_opt_t), 1, fp_sa);
|
||||
bwa_print_sam_hdr(bns, rg_line);
|
||||
//bwa_print_sam_PG();
|
||||
|
|
|
|||
2
bwtaln.c
2
bwtaln.c
|
|
@ -116,6 +116,7 @@ void bwa_cal_sa_reg_gap(int tid, bwt_t *const bwt, int n_seqs, bwa_seq_t *seqs,
|
|||
for (j = 0; j < p->len; ++j) // we need to complement
|
||||
p->seq[j] = p->seq[j] > 3? 4 : 3 - p->seq[j];
|
||||
p->aln = bwt_match_gap(bwt, p->len, p->seq, w, p->len <= opt->seed_len? 0 : seed_w, &local_opt, &p->n_aln, stack);
|
||||
//fprintf(stderr, "mm=%lld,ins=%lld,del=%lld,gapo=%lld\n", p->aln->n_mm, p->aln->n_ins, p->aln->n_del, p->aln->n_gapo);
|
||||
// clean up the unused data in the record
|
||||
free(p->name); free(p->seq); free(p->rseq); free(p->qual);
|
||||
p->name = 0; p->seq = p->rseq = p->qual = 0;
|
||||
|
|
@ -173,6 +174,7 @@ void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt)
|
|||
}
|
||||
|
||||
// core loop
|
||||
err_fwrite(SAI_MAGIC, 1, 4, stdout);
|
||||
err_fwrite(opt, sizeof(gap_opt_t), 1, stdout);
|
||||
while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, opt->mode, opt->trim_qual)) != 0) {
|
||||
tot_seqs += n_seqs;
|
||||
|
|
|
|||
7
bwtaln.h
7
bwtaln.h
|
|
@ -33,14 +33,15 @@
|
|||
#define FROM_D 2
|
||||
#define FROM_S 3
|
||||
|
||||
#define SAI_MAGIC "SAI\1"
|
||||
|
||||
typedef struct {
|
||||
bwtint_t w;
|
||||
int bid;
|
||||
} bwt_width_t;
|
||||
|
||||
typedef struct {
|
||||
uint32_t n_mm:16, n_gapo:8, n_gape:8;
|
||||
int score;
|
||||
uint64_t n_mm:8, n_gapo:8, n_gape:8, score:20, n_ins:10, n_del:10;
|
||||
bwtint_t k, l;
|
||||
} bwt_aln1_t;
|
||||
|
||||
|
|
@ -57,6 +58,7 @@ typedef uint16_t bwa_cigar_t;
|
|||
|
||||
typedef struct {
|
||||
uint32_t n_cigar:15, gap:8, mm:8, strand:1;
|
||||
int ref_shift;
|
||||
bwtint_t pos;
|
||||
bwa_cigar_t *cigar;
|
||||
} bwt_multi1_t;
|
||||
|
|
@ -77,6 +79,7 @@ typedef struct {
|
|||
// alignment information
|
||||
bwtint_t sa, pos;
|
||||
uint64_t c1:28, c2:28, seQ:8; // number of top1 and top2 hits; single-end mapQ
|
||||
int ref_shift;
|
||||
int n_cigar;
|
||||
bwa_cigar_t *cigar;
|
||||
// for multi-threading only
|
||||
|
|
|
|||
24
bwtgap.c
24
bwtgap.c
|
|
@ -45,7 +45,7 @@ static void gap_reset_stack(gap_stack_t *stack)
|
|||
stack->n_entries = 0;
|
||||
}
|
||||
|
||||
static inline void gap_push(gap_stack_t *stack, int i, bwtint_t k, bwtint_t l, int n_mm, int n_gapo, int n_gape,
|
||||
static inline void gap_push(gap_stack_t *stack, int i, bwtint_t k, bwtint_t l, int n_mm, int n_gapo, int n_gape, int n_ins, int n_del,
|
||||
int state, int is_diff, const gap_opt_t *opt)
|
||||
{
|
||||
int score;
|
||||
|
|
@ -59,7 +59,9 @@ static inline void gap_push(gap_stack_t *stack, int i, bwtint_t k, bwtint_t l, i
|
|||
}
|
||||
p = q->stack + q->n_entries;
|
||||
p->info = (u_int32_t)score<<21 | i; p->k = k; p->l = l;
|
||||
p->n_mm = n_mm; p->n_gapo = n_gapo; p->n_gape = n_gape; p->state = state;
|
||||
p->n_mm = n_mm; p->n_gapo = n_gapo; p->n_gape = n_gape;
|
||||
p->n_ins = n_ins; p->n_del = n_del;
|
||||
p->state = state;
|
||||
p->last_diff_pos = is_diff? i : 0;
|
||||
++(q->n_entries);
|
||||
++(stack->n_entries);
|
||||
|
|
@ -106,7 +108,7 @@ static inline int int_log2(uint32_t v)
|
|||
|
||||
bwt_aln1_t *bwt_match_gap(bwt_t *const bwt, int len, const ubyte_t *seq, bwt_width_t *width,
|
||||
bwt_width_t *seed_width, const gap_opt_t *opt, int *_n_aln, gap_stack_t *stack)
|
||||
{
|
||||
{ // $seq is the reverse complement of the input read
|
||||
int best_score = aln_score(opt->max_diff+1, opt->max_gapo+1, opt->max_gape+1, opt);
|
||||
int best_diff = opt->max_diff + 1, max_diff = opt->max_diff;
|
||||
int best_cnt = 0;
|
||||
|
|
@ -126,7 +128,7 @@ bwt_aln1_t *bwt_match_gap(bwt_t *const bwt, int len, const ubyte_t *seq, bwt_wid
|
|||
|
||||
//for (j = 0; j != len; ++j) printf("#0 %d: [%d,%u]\t[%d,%u]\n", j, w[0][j].bid, w[0][j].w, w[1][j].bid, w[1][j].w);
|
||||
gap_reset_stack(stack); // reset stack
|
||||
gap_push(stack, len, 0, bwt->seq_len, 0, 0, 0, 0, 0, opt);
|
||||
gap_push(stack, len, 0, bwt->seq_len, 0, 0, 0, 0, 0, 0, 0, opt);
|
||||
|
||||
while (stack->n_entries) {
|
||||
gap_entry_t e;
|
||||
|
|
@ -186,8 +188,10 @@ bwt_aln1_t *bwt_match_gap(bwt_t *const bwt, int len, const ubyte_t *seq, bwt_wid
|
|||
}
|
||||
p = aln + n_aln;
|
||||
p->n_mm = e.n_mm; p->n_gapo = e.n_gapo; p->n_gape = e.n_gape;
|
||||
p->n_ins = e.n_ins; p->n_del = e.n_del;
|
||||
p->k = k; p->l = l;
|
||||
p->score = score;
|
||||
//fprintf(stderr, "*** n_mm=%d,n_gapo=%d,n_gape=%d,n_ins=%d,n_del=%d\n", e.n_mm, e.n_gapo, e.n_gape, e.n_ins, e.n_del);
|
||||
++n_aln;
|
||||
}
|
||||
continue;
|
||||
|
|
@ -214,24 +218,24 @@ bwt_aln1_t *bwt_match_gap(bwt_t *const bwt, int len, const ubyte_t *seq, bwt_wid
|
|||
if (e.state == STATE_M) { // gap open
|
||||
if (e.n_gapo < opt->max_gapo) { // gap open is allowed
|
||||
// insertion
|
||||
gap_push(stack, i, k, l, e.n_mm, e.n_gapo + 1, e.n_gape, STATE_I, 1, opt);
|
||||
gap_push(stack, i, k, l, e.n_mm, e.n_gapo + 1, e.n_gape, e.n_ins + 1, e.n_del, STATE_I, 1, opt);
|
||||
// deletion
|
||||
for (j = 0; j != 4; ++j) {
|
||||
k = bwt->L2[j] + cnt_k[j] + 1;
|
||||
l = bwt->L2[j] + cnt_l[j];
|
||||
if (k <= l) gap_push(stack, i + 1, k, l, e.n_mm, e.n_gapo + 1, e.n_gape, STATE_D, 1, opt);
|
||||
if (k <= l) gap_push(stack, i + 1, k, l, e.n_mm, e.n_gapo + 1, e.n_gape, e.n_ins, e.n_del + 1, STATE_D, 1, opt);
|
||||
}
|
||||
}
|
||||
} else if (e.state == STATE_I) { // extention of an insertion
|
||||
if (e.n_gape < opt->max_gape) // gap extention is allowed
|
||||
gap_push(stack, i, k, l, e.n_mm, e.n_gapo, e.n_gape + 1, STATE_I, 1, opt);
|
||||
gap_push(stack, i, k, l, e.n_mm, e.n_gapo, e.n_gape + 1, e.n_ins + 1, e.n_del, STATE_I, 1, opt);
|
||||
} else if (e.state == STATE_D) { // extention of a deletion
|
||||
if (e.n_gape < opt->max_gape) { // gap extention is allowed
|
||||
if (e.n_gape + e.n_gapo < max_diff || occ < opt->max_del_occ) {
|
||||
for (j = 0; j != 4; ++j) {
|
||||
k = bwt->L2[j] + cnt_k[j] + 1;
|
||||
l = bwt->L2[j] + cnt_l[j];
|
||||
if (k <= l) gap_push(stack, i + 1, k, l, e.n_mm, e.n_gapo, e.n_gape + 1, STATE_D, 1, opt);
|
||||
if (k <= l) gap_push(stack, i + 1, k, l, e.n_mm, e.n_gapo, e.n_gape + 1, e.n_ins, e.n_del + 1, STATE_D, 1, opt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -244,13 +248,13 @@ bwt_aln1_t *bwt_match_gap(bwt_t *const bwt, int len, const ubyte_t *seq, bwt_wid
|
|||
int is_mm = (j != 4 || seq[i] > 3);
|
||||
k = bwt->L2[c] + cnt_k[c] + 1;
|
||||
l = bwt->L2[c] + cnt_l[c];
|
||||
if (k <= l) gap_push(stack, i, k, l, e.n_mm + is_mm, e.n_gapo, e.n_gape, STATE_M, is_mm, opt);
|
||||
if (k <= l) gap_push(stack, i, k, l, e.n_mm + is_mm, e.n_gapo, e.n_gape, e.n_ins, e.n_del, STATE_M, is_mm, opt);
|
||||
}
|
||||
} else if (seq[i] < 4) { // try exact match only
|
||||
int c = seq[i] & 3;
|
||||
k = bwt->L2[c] + cnt_k[c] + 1;
|
||||
l = bwt->L2[c] + cnt_l[c];
|
||||
if (k <= l) gap_push(stack, i, k, l, e.n_mm, e.n_gapo, e.n_gape, STATE_M, 0, opt);
|
||||
if (k <= l) gap_push(stack, i, k, l, e.n_mm, e.n_gapo, e.n_gape, e.n_ins, e.n_del, STATE_M, 0, opt);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
3
bwtgap.h
3
bwtgap.h
|
|
@ -7,8 +7,9 @@
|
|||
typedef struct { // recursion stack
|
||||
u_int32_t info; // score<<21 | i
|
||||
u_int32_t n_mm:8, n_gapo:8, n_gape:8, state:2, n_seed_mm:6;
|
||||
bwtint_t k, l; // (k,l) is the SA region of [i,n-1]
|
||||
u_int32_t n_ins:16, n_del:16;
|
||||
int last_diff_pos;
|
||||
bwtint_t k, l; // (k,l) is the SA region of [i,n-1]
|
||||
} gap_entry_t;
|
||||
|
||||
typedef struct {
|
||||
|
|
|
|||
Loading…
Reference in New Issue