diff --git a/align.c b/align.c index 73890da..68cd724 100644 --- a/align.c +++ b/align.c @@ -130,27 +130,21 @@ static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *qu uint32_t op = p->cigar[k]&0xf, len = p->cigar[k]>>4; if (op == 0) { // match/mismatch int n_ambi = 0, n_diff = 0; - float n_diff2 = 0.0f; for (l = 0; l < len; ++l) { int cq = qseq[qoff + l], ct = tseq[toff + l]; if (ct > 3 || cq > 3) ++n_ambi; - else if (ct != cq) { - ++n_diff; - n_diff2 += qual == 0 || qual[qoff + l] >= 20? 1.0f : .05f * qual[qoff + l]; - } + else if (ct != cq) ++n_diff; s += mat[ct * 5 + cq]; if (s < 0) s = 0; else max = max > s? max : s; } r->blen += len - n_ambi, r->mlen += len - (n_ambi + n_diff), p->n_ambi += n_ambi; - p->n_diff2 += n_diff2, p->blen2 += len - n_ambi; toff += len, qoff += len; } else if (op == 1) { // insertion int n_ambi = 0; for (l = 0; l < len; ++l) if (qseq[qoff + l] > 3) ++n_ambi; r->blen += len - n_ambi, p->n_ambi += n_ambi; - p->n_diff2 += 1.0f, ++p->blen2; s -= q + e * len; if (s < 0) s = 0; qoff += len; @@ -159,7 +153,6 @@ static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *qu for (l = 0; l < len; ++l) if (tseq[toff + l] > 3) ++n_ambi; r->blen += len - n_ambi, p->n_ambi += n_ambi; - p->n_diff2 += 1.0f, ++p->blen2; s -= q + e * len; if (s < 0) s = 0; toff += len; diff --git a/hit.c b/hit.c index c5f84cd..4f62962 100644 --- a/hit.c +++ b/hit.c @@ -264,45 +264,6 @@ void mm_filter_regs(void *km, const mm_mapopt_t *opt, int *n_regs, mm_reg1_t *re *n_regs = k; } -void mm_filter_by_identity(void *km, int n_regs, mm_reg1_t *regs, float min_iden, int qlen, const char *qual) // TODO: make sure it is not beyond the ends of contigs -{ - int i, j, n_aux = 0, en, blen = 0; - uint64_t *aux; - float n_diff = 0.0f; - if (n_regs <= 0) return; - for (i = 0; i < n_regs; ++i) - if (regs[i].id == regs[i].parent && regs[i].pe_thru) // sequenced through the fragment; don't filter - return; - for (i = 0; i < n_regs; ++i) - if (regs[i].id == regs[i].parent) - ++n_aux; - assert(n_aux >= 1); - aux = (uint64_t*)kmalloc(km, n_aux * 8); - for (i = 0, n_aux = 0; i < n_regs; ++i) - if (regs[i].id == regs[i].parent) - aux[n_aux++] = (uint64_t)regs[i].qs<<32 | i; - radix_sort_64(aux, aux + n_aux); - for (i = 0, en = 0; i < n_aux; ++i) { - mm_reg1_t *r = ®s[(int32_t)aux[i]]; - if (r->qs > en) { - for (j = en; j < r->qs; ++j) - n_diff += qual == 0 || qual[j] >= 53? .25f : .05f * .25f * (qual[j] - 33); - blen += r->qs - en; - } - assert(r->p); - blen += r->p->blen2; - n_diff += r->p->n_diff2; - en = en > r->qe? en : r->qe; - } - for (j = en; j < qlen; ++j) - n_diff += qual == 0 || qual[j] >= 53? .25f : .05f * .25f * (qual[j] - 33); - blen += qlen - en; - kfree(km, aux); - if (1.0f - n_diff / blen < min_iden) - for (i = 0; i < n_regs; ++i) - regs[i].iden_flt = 1; -} - int mm_squeeze_a(void *km, int n_regs, mm_reg1_t *regs, mm128_t *a) { // squeeze out regions in a[] that are not referenced by regs[] int i, as = 0; diff --git a/main.c b/main.c index 9feef04..f04310b 100644 --- a/main.c +++ b/main.c @@ -6,7 +6,7 @@ #include "mmpriv.h" #include "getopt.h" -#define MM_VERSION "2.3-r544-dirty" +#define MM_VERSION "2.3-r545-dirty" #ifdef __linux__ #include @@ -67,7 +67,7 @@ static inline int64_t mm_parse_num(const char *str) int main(int argc, char *argv[]) { - const char *opt_str = "2aSw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:i:LC:"; + const char *opt_str = "2aSw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:LC:"; mm_mapopt_t opt; mm_idxopt_t ipt; int i, c, n_threads = 3, long_idx; @@ -102,7 +102,6 @@ int main(int argc, char *argv[]) else if (c == 'g') opt.max_gap = (int)mm_parse_num(optarg); else if (c == 'G') mm_mapopt_max_intron_len(&opt, (int)mm_parse_num(optarg)); else if (c == 'F') opt.max_frag_len = (int)mm_parse_num(optarg); - else if (c == 'i') opt.min_iden = atof(optarg); else if (c == 'N') opt.best_n = atoi(optarg); else if (c == 'p') opt.pri_ratio = atof(optarg); else if (c == 'M') opt.mask_level = atof(optarg); @@ -226,7 +225,6 @@ int main(int argc, char *argv[]) fprintf(fp_help, " -z INT Z-drop score [%d]\n", opt.zdrop); fprintf(fp_help, " -s INT minimal peak DP alignment score [%d]\n", opt.min_dp_max); fprintf(fp_help, " -u CHAR how to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG [n]\n"); - fprintf(fp_help, " -i FLOAT min identity (mapQ reduced to 0 if below) [0]\n"); fprintf(fp_help, " Input/Output:\n"); fprintf(fp_help, " -a output in the SAM format (PAF by default)\n"); fprintf(fp_help, " -Q don't output base quality in SAM\n"); diff --git a/map.c b/map.c index eea9690..086f357 100644 --- a/map.c +++ b/map.c @@ -360,9 +360,6 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char ** if (n_segs == 2 && opt->pe_ori >= 0 && (opt->flag&MM_F_CIGAR)) mm_pair(b->km, max_chain_gap_ref, opt->pe_bonus, opt->a * 2 + opt->b, opt->a, qlens, n_regs, regs); // pairing } - if (opt->min_iden > 0.0f) - for (i = 0; i < n_segs; ++i) - mm_filter_by_identity(b->km, n_regs[i], regs[i], opt->min_iden, qlens[i], quals[i]); kfree(b->km, a); kfree(b->km, u); diff --git a/minimap.h b/minimap.h index 17e411c..d5c22fd 100644 --- a/minimap.h +++ b/minimap.h @@ -59,8 +59,6 @@ typedef struct { int32_t dp_score, dp_max, dp_max2; // DP score; score of the max-scoring segment; score of the best alternate mappings uint32_t n_ambi:30, trans_strand:2; // number of ambiguous bases; transcript strand: 0 for unknown, 1 for +, 2 for - uint32_t n_cigar; // number of cigar operations in cigar[] - float n_diff2; - uint32_t blen2; uint32_t cigar[]; } mm_extra_t; @@ -101,7 +99,6 @@ typedef struct { float mask_level; float pri_ratio; int best_n; // top best_n chains are subjected to DP alignment - float min_iden; int max_join_long, max_join_short; int min_join_flank_sc; diff --git a/mmpriv.h b/mmpriv.h index bbf54e5..e0b06c9 100644 --- a/mmpriv.h +++ b/mmpriv.h @@ -75,7 +75,6 @@ void mm_set_parent(void *km, float mask_level, int n, mm_reg1_t *r, int sub_diff void mm_select_sub(void *km, float pri_ratio, int min_diff, int best_n, int *n_, mm_reg1_t *r); void mm_select_sub_multi(void *km, float pri_ratio, float pri1, float pri2, int max_gap_ref, int min_diff, int best_n, int n_segs, const int *qlens, int *n_, mm_reg1_t *r); void mm_filter_regs(void *km, const mm_mapopt_t *opt, int *n_regs, mm_reg1_t *regs); -void mm_filter_by_identity(void *km, int n_regs, mm_reg1_t *regs, float min_iden, int qlen, const char *qual); void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int *n_regs, mm_reg1_t *regs, mm128_t *a); void mm_hit_sort_by_dp(void *km, int *n_regs, mm_reg1_t *r); void mm_set_mapq(int n_regs, mm_reg1_t *regs, int min_chain_sc, int match_sc, int rep_len, int is_sr);