r651: incorrectly treat introns as deletions

This happened when the last operation during backtracking is an intron.
This commit is contained in:
Heng Li 2018-01-07 19:42:50 -05:00
parent 248b43cc47
commit f5cfd439ee
4 changed files with 15 additions and 8 deletions

View File

@ -210,6 +210,13 @@ static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint
ksw_extz2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, w, zdrop, end_bonus, flag, ez);
else
ksw_extd2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, opt->e2, w, zdrop, end_bonus, flag, ez);
if (mm_dbg_flag & MM_DBG_PRINT_ALN_SEQ) {
int i;
fprintf(stderr, "score=%d, cigar=", ez->score);
for (i = 0; i < ez->n_cigar; ++i)
fprintf(stderr, "%d%c", ez->cigar[i]>>4, "MIDN"[ez->cigar[i]&0xf]);
fprintf(stderr, "\n");
}
}
static inline int mm_get_hplen_back(const mm_idx_t *mi, uint32_t rid, uint32_t x)

8
ksw2.h
View File

@ -116,7 +116,7 @@ static inline uint32_t *ksw_push_cigar(void *km, int *n_cigar, int *m_cigar, uin
// bit 0-2: which type gets the max - 0 for H, 1 for E, 2 for F, 3 for \tilde{E} and 4 for \tilde{F}
// bit 3/0x08: 1 if a continuation on the E state (bit 5/0x20 for a continuation on \tilde{E})
// bit 4/0x10: 1 if a continuation on the F state (bit 6/0x40 for a continuation on \tilde{F})
static inline void ksw_backtrack(void *km, int is_rot, int is_rev, int with_N, const uint8_t *p, const int *off, const int *off_end, int n_col, int i0, int j0,
static inline void ksw_backtrack(void *km, int is_rot, int is_rev, int min_intron_len, const uint8_t *p, const int *off, const int *off_end, int n_col, int i0, int j0,
int *m_cigar_, int *n_cigar_, uint32_t **cigar_)
{ // p[] - lower 3 bits: which type gets the max; bit
int n_cigar = 0, m_cigar = *m_cigar_, i = i0, j = j0, r, state = 0;
@ -138,11 +138,11 @@ static inline void ksw_backtrack(void *km, int is_rot, int is_rev, int with_N, c
if (state == 0) state = tmp & 7; // TODO: probably this line can be merged into the "else if" line right above; not 100% sure
if (force_state >= 0) state = force_state;
if (state == 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 0, 1), --i, --j; // match
else if (state == 1 || (state == 3 && !with_N)) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 2, 1), --i; // deletion
else if (state == 3 && with_N) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 3, 1), --i; // intron
else if (state == 1 || (state == 3 && min_intron_len <= 0)) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 2, 1), --i; // deletion
else if (state == 3 && min_intron_len > 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 3, 1), --i; // intron
else cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 1, 1), --j; // insertion
}
if (i >= 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 2, i + 1); // first deletion
if (i >= 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, i >= min_intron_len? 3 : 2, i + 1); // first deletion
if (j >= 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 1, j + 1); // first insertion
if (!is_rev)
for (i = 0; i < n_cigar>>1; ++i) // reverse CIGAR

View File

@ -367,9 +367,9 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
if (with_cigar) { // backtrack
int rev_cigar = !!(flag & KSW_EZ_REV_CIGAR);
if (!ez->zdropped && !(flag&KSW_EZ_EXTZ_ONLY))
ksw_backtrack(km, 1, rev_cigar, 1, (uint8_t*)p, off, off_end, n_col_*16, tlen-1, qlen-1, &ez->m_cigar, &ez->n_cigar, &ez->cigar);
ksw_backtrack(km, 1, rev_cigar, long_thres, (uint8_t*)p, off, off_end, n_col_*16, tlen-1, qlen-1, &ez->m_cigar, &ez->n_cigar, &ez->cigar);
else if (ez->max_t >= 0 && ez->max_q >= 0)
ksw_backtrack(km, 1, rev_cigar, 1, (uint8_t*)p, off, off_end, n_col_*16, ez->max_t, ez->max_q, &ez->m_cigar, &ez->n_cigar, &ez->cigar);
ksw_backtrack(km, 1, rev_cigar, long_thres, (uint8_t*)p, off, off_end, n_col_*16, ez->max_t, ez->max_q, &ez->m_cigar, &ez->n_cigar, &ez->cigar);
kfree(km, mem2); kfree(km, off);
}
}

4
main.c
View File

@ -6,7 +6,7 @@
#include "mmpriv.h"
#include "getopt.h"
#define MM_VERSION "2.6-r639-dirty"
#define MM_VERSION "2.6-r651-dirty"
#ifdef __linux__
#include <sys/resource.h>
@ -134,7 +134,7 @@ int main(int argc, char *argv[])
else if (c == 0 && long_idx == 6) mm_dbg_flag |= MM_DBG_PRINT_QNAME | MM_DBG_PRINT_SEED, n_threads = 1; // --print-seed
else if (c == 0 && long_idx == 7) opt.max_chain_skip = atoi(optarg); // --max-chain-skip
else if (c == 0 && long_idx == 8) opt.min_ksw_len = atoi(optarg); // --min-dp-len
else if (c == 0 && long_idx == 9) mm_dbg_flag |= MM_DBG_PRINT_QNAME | MM_DBG_PRINT_ALN_SEQ; // --print-aln-seq
else if (c == 0 && long_idx == 9) mm_dbg_flag |= MM_DBG_PRINT_QNAME | MM_DBG_PRINT_ALN_SEQ, n_threads = 1; // --print-aln-seq
else if (c == 0 && long_idx ==10) opt.flag |= MM_F_SPLICE; // --splice
else if (c == 0 && long_idx ==12) opt.flag |= MM_F_NO_LJOIN; // --no-long-join
else if (c == 0 && long_idx ==13) opt.flag |= MM_F_SR; // --sr