r513: merged pre- and post-cigar blen and mlen
This saves a bit memory and is cleaner.
This commit is contained in:
parent
e6f525edaf
commit
adf6cd7f52
14
align.c
14
align.c
|
|
@ -125,6 +125,7 @@ static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *qu
|
|||
if (p == 0) return;
|
||||
mm_fix_cigar(r, qseq, tseq, &qshift, &tshift);
|
||||
qseq += qshift, tseq += tshift; // qseq and tseq may be shifted due to the removal of leading I/D
|
||||
r->blen = r->mlen = 0;
|
||||
for (k = 0; k < p->n_cigar; ++k) {
|
||||
uint32_t op = p->cigar[k]&0xf, len = p->cigar[k]>>4;
|
||||
if (op == 0) { // match/mismatch
|
||||
|
|
@ -141,28 +142,27 @@ static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *qu
|
|||
if (s < 0) s = 0;
|
||||
else max = max > s? max : s;
|
||||
}
|
||||
p->n_ambi += n_ambi;
|
||||
p->n_diff += n_diff;
|
||||
toff += len, qoff += len, p->blen += len;
|
||||
r->blen += len - n_ambi, r->mlen += len - (n_ambi + n_diff), p->n_ambi += n_ambi;
|
||||
p->n_diff2 += n_diff2, p->blen2 += len - n_ambi;
|
||||
toff += len, qoff += len;
|
||||
} else if (op == 1) { // insertion
|
||||
int n_ambi = 0;
|
||||
for (l = 0; l < len; ++l)
|
||||
if (qseq[qoff + l] > 3) ++n_ambi;
|
||||
qoff += len, p->blen += len;
|
||||
p->n_ambi += n_ambi, p->n_diff += len - n_ambi;
|
||||
r->blen += len - n_ambi, p->n_ambi += n_ambi;
|
||||
p->n_diff2 += 1.0f, ++p->blen2;
|
||||
s -= q + e * len;
|
||||
if (s < 0) s = 0;
|
||||
qoff += len;
|
||||
} else if (op == 2) { // deletion
|
||||
int n_ambi = 0;
|
||||
for (l = 0; l < len; ++l)
|
||||
if (tseq[toff + l] > 3) ++n_ambi;
|
||||
toff += len, p->blen += len;
|
||||
p->n_ambi += n_ambi, p->n_diff += len - n_ambi;
|
||||
r->blen += len - n_ambi, p->n_ambi += n_ambi;
|
||||
p->n_diff2 += 1.0f, ++p->blen2;
|
||||
s -= q + e * len;
|
||||
if (s < 0) s = 0;
|
||||
toff += len;
|
||||
} else if (op == 3) { // intron
|
||||
toff += len;
|
||||
}
|
||||
|
|
|
|||
7
format.c
7
format.c
|
|
@ -198,7 +198,7 @@ static inline void write_tags(kstring_t *s, const mm_reg1_t *r)
|
|||
int type = r->inv? 'I' : r->id == r->parent? 'P' : 'S';
|
||||
if (r->iden_flt) mm_sprintf_lite(s, "\tom:i:%d", r->mapq);
|
||||
if (r->p) {
|
||||
mm_sprintf_lite(s, "\tNM:i:%d\tms:i:%d\tAS:i:%d\tnn:i:%d", r->p->n_diff, r->p->dp_max, r->p->dp_score, r->p->n_ambi);
|
||||
mm_sprintf_lite(s, "\tNM:i:%d\tms:i:%d\tAS:i:%d\tnn:i:%d", r->blen - r->mlen + r->p->n_ambi, r->p->dp_max, r->p->dp_score, r->p->n_ambi);
|
||||
if (r->p->trans_strand == 1 || r->p->trans_strand == 2)
|
||||
mm_sprintf_lite(s, "\tts:A:%c", "?+-?"[r->p->trans_strand]);
|
||||
}
|
||||
|
|
@ -214,8 +214,7 @@ void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const m
|
|||
if (mi->seq[r->rid].name) mm_sprintf_lite(s, "%s", mi->seq[r->rid].name);
|
||||
else mm_sprintf_lite(s, "%d", r->rid);
|
||||
mm_sprintf_lite(s, "\t%d\t%d\t%d", mi->seq[r->rid].len, r->rs, r->re);
|
||||
if (r->p) mm_sprintf_lite(s, "\t%d\t%d", r->p->blen - r->p->n_ambi - r->p->n_diff, r->p->blen);
|
||||
else mm_sprintf_lite(s, "\t%d\t%d", r->fuzzy_mlen, r->fuzzy_blen);
|
||||
mm_sprintf_lite(s, "\t%d\t%d", r->mlen, r->blen);
|
||||
mm_sprintf_lite(s, "\t%d", r->mapq);
|
||||
write_tags(s, r);
|
||||
if (r->p && (opt_flag & MM_F_OUT_CG)) {
|
||||
|
|
@ -389,7 +388,7 @@ void mm_write_sam2(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int se
|
|||
if (l_I) mm_sprintf_lite(s, "%dI", l_I);
|
||||
if (l_D) mm_sprintf_lite(s, "%dD", l_D);
|
||||
if (clip3) mm_sprintf_lite(s, "%dS", clip3);
|
||||
mm_sprintf_lite(s, ",%d,%d;", q->mapq, q->p->n_diff);
|
||||
mm_sprintf_lite(s, ",%d,%d;", q->mapq, q->blen - q->mlen + q->p->n_ambi);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
12
hit.c
12
hit.c
|
|
@ -8,15 +8,15 @@
|
|||
static inline void mm_cal_fuzzy_len(mm_reg1_t *r, const mm128_t *a)
|
||||
{
|
||||
int i;
|
||||
r->fuzzy_mlen = r->fuzzy_blen = 0;
|
||||
r->mlen = r->blen = 0;
|
||||
if (r->cnt <= 0) return;
|
||||
r->fuzzy_mlen = r->fuzzy_blen = a[r->as].y>>32&0xff;
|
||||
r->mlen = r->blen = a[r->as].y>>32&0xff;
|
||||
for (i = r->as + 1; i < r->as + r->cnt; ++i) {
|
||||
int span = a[i].y>>32&0xff;
|
||||
int tl = (int32_t)a[i].x - (int32_t)a[i-1].x;
|
||||
int ql = (int32_t)a[i].y - (int32_t)a[i-1].y;
|
||||
r->fuzzy_blen += tl > ql? tl : ql;
|
||||
r->fuzzy_mlen += tl > span && ql > span? span : tl < ql? tl : ql;
|
||||
r->blen += tl > ql? tl : ql;
|
||||
r->mlen += tl > span && ql > span? span : tl < ql? tl : ql;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -227,7 +227,7 @@ void mm_filter_regs(void *km, const mm_mapopt_t *opt, int *n_regs, mm_reg1_t *re
|
|||
int flt = 0;
|
||||
if (!r->inv && !r->seg_split && r->cnt < opt->min_cnt) flt = 1;
|
||||
if (r->p) {
|
||||
if (r->p->blen - r->p->n_ambi - r->p->n_diff < opt->min_chain_score) flt = 1;
|
||||
if (r->mlen < opt->min_chain_score) flt = 1;
|
||||
else if (r->p->dp_max < opt->min_dp_max) flt = 1;
|
||||
if (flt) free(r->p);
|
||||
}
|
||||
|
|
@ -432,7 +432,7 @@ void mm_set_mapq(int n_regs, mm_reg1_t *regs, int min_chain_sc, int match_sc, in
|
|||
pen_cm = pen_s1 < pen_cm? pen_s1 : pen_cm;
|
||||
subsc = r->subsc > min_chain_sc? r->subsc : min_chain_sc;
|
||||
if (r->p && r->p->dp_max2 > 0 && r->p->dp_max > 0) {
|
||||
float identity = (float)(r->p->blen - r->p->n_diff - r->p->n_ambi) / (r->p->blen - r->p->n_ambi);
|
||||
float identity = (float)r->mlen / r->blen;
|
||||
int mapq_alt = (int)(6.02f * identity * identity * (r->p->dp_max - r->p->dp_max2) / match_sc + .499f); // BWA-MEM like mapQ, mostly for short reads
|
||||
mapq = (int)(identity * pen_cm * q_coef * (1. - (float)r->p->dp_max2 * subsc / r->p->dp_max / r->score) * logf(r->score)); // more for long reads
|
||||
mapq = mapq < mapq_alt? mapq : mapq_alt; // in case the long-read heuristic fails
|
||||
|
|
|
|||
2
main.c
2
main.c
|
|
@ -6,7 +6,7 @@
|
|||
#include "mmpriv.h"
|
||||
#include "getopt.h"
|
||||
|
||||
#define MM_VERSION "2.2-r512-dirty"
|
||||
#define MM_VERSION "2.2-r513-dirty"
|
||||
|
||||
#ifdef __linux__
|
||||
#include <sys/resource.h>
|
||||
|
|
|
|||
|
|
@ -54,8 +54,6 @@ typedef struct {
|
|||
typedef struct {
|
||||
uint32_t capacity; // the capacity of cigar[]
|
||||
int32_t dp_score, dp_max, dp_max2; // DP score; score of the max-scoring segment; score of the best alternate mappings
|
||||
uint32_t blen; // block length
|
||||
uint32_t n_diff; // number of differences, including ambiguous bases
|
||||
uint32_t n_ambi:30, trans_strand:2; // number of ambiguous bases; transcript strand: 0 for unknown, 1 for +, 2 for -
|
||||
uint32_t n_cigar; // number of cigar operations in cigar[]
|
||||
float n_diff2;
|
||||
|
|
@ -71,7 +69,7 @@ typedef struct {
|
|||
int32_t qs, qe, rs, re; // query start and end; reference start and end
|
||||
int32_t parent, subsc; // parent==id if primary; best alternate mapping score
|
||||
int32_t as; // offset in the a[] array (for internal uses only)
|
||||
int32_t fuzzy_mlen, fuzzy_blen; // seeded exact match length; seeded alignment block length (approximate)
|
||||
int32_t mlen, blen; // seeded exact match length; seeded alignment block length
|
||||
uint32_t mapq:8, split:2, n_sub:22; // mapQ; split pattern; number of suboptimal mappings
|
||||
uint32_t sam_pri:1, proper_frag:1, iden_flt:1, pe_thru:1, dummy:29;
|
||||
uint32_t hash;
|
||||
|
|
|
|||
Loading…
Reference in New Issue