r513: merged pre- and post-cigar blen and mlen

This saves a bit memory and is cleaner.
This commit is contained in:
Heng Li 2017-10-16 10:55:18 -04:00
parent e6f525edaf
commit adf6cd7f52
5 changed files with 18 additions and 21 deletions

14
align.c
View File

@ -125,6 +125,7 @@ static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *qu
if (p == 0) return;
mm_fix_cigar(r, qseq, tseq, &qshift, &tshift);
qseq += qshift, tseq += tshift; // qseq and tseq may be shifted due to the removal of leading I/D
r->blen = r->mlen = 0;
for (k = 0; k < p->n_cigar; ++k) {
uint32_t op = p->cigar[k]&0xf, len = p->cigar[k]>>4;
if (op == 0) { // match/mismatch
@ -141,28 +142,27 @@ static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *qu
if (s < 0) s = 0;
else max = max > s? max : s;
}
p->n_ambi += n_ambi;
p->n_diff += n_diff;
toff += len, qoff += len, p->blen += len;
r->blen += len - n_ambi, r->mlen += len - (n_ambi + n_diff), p->n_ambi += n_ambi;
p->n_diff2 += n_diff2, p->blen2 += len - n_ambi;
toff += len, qoff += len;
} else if (op == 1) { // insertion
int n_ambi = 0;
for (l = 0; l < len; ++l)
if (qseq[qoff + l] > 3) ++n_ambi;
qoff += len, p->blen += len;
p->n_ambi += n_ambi, p->n_diff += len - n_ambi;
r->blen += len - n_ambi, p->n_ambi += n_ambi;
p->n_diff2 += 1.0f, ++p->blen2;
s -= q + e * len;
if (s < 0) s = 0;
qoff += len;
} else if (op == 2) { // deletion
int n_ambi = 0;
for (l = 0; l < len; ++l)
if (tseq[toff + l] > 3) ++n_ambi;
toff += len, p->blen += len;
p->n_ambi += n_ambi, p->n_diff += len - n_ambi;
r->blen += len - n_ambi, p->n_ambi += n_ambi;
p->n_diff2 += 1.0f, ++p->blen2;
s -= q + e * len;
if (s < 0) s = 0;
toff += len;
} else if (op == 3) { // intron
toff += len;
}

View File

@ -198,7 +198,7 @@ static inline void write_tags(kstring_t *s, const mm_reg1_t *r)
int type = r->inv? 'I' : r->id == r->parent? 'P' : 'S';
if (r->iden_flt) mm_sprintf_lite(s, "\tom:i:%d", r->mapq);
if (r->p) {
mm_sprintf_lite(s, "\tNM:i:%d\tms:i:%d\tAS:i:%d\tnn:i:%d", r->p->n_diff, r->p->dp_max, r->p->dp_score, r->p->n_ambi);
mm_sprintf_lite(s, "\tNM:i:%d\tms:i:%d\tAS:i:%d\tnn:i:%d", r->blen - r->mlen + r->p->n_ambi, r->p->dp_max, r->p->dp_score, r->p->n_ambi);
if (r->p->trans_strand == 1 || r->p->trans_strand == 2)
mm_sprintf_lite(s, "\tts:A:%c", "?+-?"[r->p->trans_strand]);
}
@ -214,8 +214,7 @@ void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const m
if (mi->seq[r->rid].name) mm_sprintf_lite(s, "%s", mi->seq[r->rid].name);
else mm_sprintf_lite(s, "%d", r->rid);
mm_sprintf_lite(s, "\t%d\t%d\t%d", mi->seq[r->rid].len, r->rs, r->re);
if (r->p) mm_sprintf_lite(s, "\t%d\t%d", r->p->blen - r->p->n_ambi - r->p->n_diff, r->p->blen);
else mm_sprintf_lite(s, "\t%d\t%d", r->fuzzy_mlen, r->fuzzy_blen);
mm_sprintf_lite(s, "\t%d\t%d", r->mlen, r->blen);
mm_sprintf_lite(s, "\t%d", r->mapq);
write_tags(s, r);
if (r->p && (opt_flag & MM_F_OUT_CG)) {
@ -389,7 +388,7 @@ void mm_write_sam2(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int se
if (l_I) mm_sprintf_lite(s, "%dI", l_I);
if (l_D) mm_sprintf_lite(s, "%dD", l_D);
if (clip3) mm_sprintf_lite(s, "%dS", clip3);
mm_sprintf_lite(s, ",%d,%d;", q->mapq, q->p->n_diff);
mm_sprintf_lite(s, ",%d,%d;", q->mapq, q->blen - q->mlen + q->p->n_ambi);
}
}
}

12
hit.c
View File

@ -8,15 +8,15 @@
static inline void mm_cal_fuzzy_len(mm_reg1_t *r, const mm128_t *a)
{
int i;
r->fuzzy_mlen = r->fuzzy_blen = 0;
r->mlen = r->blen = 0;
if (r->cnt <= 0) return;
r->fuzzy_mlen = r->fuzzy_blen = a[r->as].y>>32&0xff;
r->mlen = r->blen = a[r->as].y>>32&0xff;
for (i = r->as + 1; i < r->as + r->cnt; ++i) {
int span = a[i].y>>32&0xff;
int tl = (int32_t)a[i].x - (int32_t)a[i-1].x;
int ql = (int32_t)a[i].y - (int32_t)a[i-1].y;
r->fuzzy_blen += tl > ql? tl : ql;
r->fuzzy_mlen += tl > span && ql > span? span : tl < ql? tl : ql;
r->blen += tl > ql? tl : ql;
r->mlen += tl > span && ql > span? span : tl < ql? tl : ql;
}
}
@ -227,7 +227,7 @@ void mm_filter_regs(void *km, const mm_mapopt_t *opt, int *n_regs, mm_reg1_t *re
int flt = 0;
if (!r->inv && !r->seg_split && r->cnt < opt->min_cnt) flt = 1;
if (r->p) {
if (r->p->blen - r->p->n_ambi - r->p->n_diff < opt->min_chain_score) flt = 1;
if (r->mlen < opt->min_chain_score) flt = 1;
else if (r->p->dp_max < opt->min_dp_max) flt = 1;
if (flt) free(r->p);
}
@ -432,7 +432,7 @@ void mm_set_mapq(int n_regs, mm_reg1_t *regs, int min_chain_sc, int match_sc, in
pen_cm = pen_s1 < pen_cm? pen_s1 : pen_cm;
subsc = r->subsc > min_chain_sc? r->subsc : min_chain_sc;
if (r->p && r->p->dp_max2 > 0 && r->p->dp_max > 0) {
float identity = (float)(r->p->blen - r->p->n_diff - r->p->n_ambi) / (r->p->blen - r->p->n_ambi);
float identity = (float)r->mlen / r->blen;
int mapq_alt = (int)(6.02f * identity * identity * (r->p->dp_max - r->p->dp_max2) / match_sc + .499f); // BWA-MEM like mapQ, mostly for short reads
mapq = (int)(identity * pen_cm * q_coef * (1. - (float)r->p->dp_max2 * subsc / r->p->dp_max / r->score) * logf(r->score)); // more for long reads
mapq = mapq < mapq_alt? mapq : mapq_alt; // in case the long-read heuristic fails

2
main.c
View File

@ -6,7 +6,7 @@
#include "mmpriv.h"
#include "getopt.h"
#define MM_VERSION "2.2-r512-dirty"
#define MM_VERSION "2.2-r513-dirty"
#ifdef __linux__
#include <sys/resource.h>

View File

@ -54,8 +54,6 @@ typedef struct {
typedef struct {
uint32_t capacity; // the capacity of cigar[]
int32_t dp_score, dp_max, dp_max2; // DP score; score of the max-scoring segment; score of the best alternate mappings
uint32_t blen; // block length
uint32_t n_diff; // number of differences, including ambiguous bases
uint32_t n_ambi:30, trans_strand:2; // number of ambiguous bases; transcript strand: 0 for unknown, 1 for +, 2 for -
uint32_t n_cigar; // number of cigar operations in cigar[]
float n_diff2;
@ -71,7 +69,7 @@ typedef struct {
int32_t qs, qe, rs, re; // query start and end; reference start and end
int32_t parent, subsc; // parent==id if primary; best alternate mapping score
int32_t as; // offset in the a[] array (for internal uses only)
int32_t fuzzy_mlen, fuzzy_blen; // seeded exact match length; seeded alignment block length (approximate)
int32_t mlen, blen; // seeded exact match length; seeded alignment block length
uint32_t mapq:8, split:2, n_sub:22; // mapQ; split pattern; number of suboptimal mappings
uint32_t sam_pri:1, proper_frag:1, iden_flt:1, pe_thru:1, dummy:29;
uint32_t hash;