From 79c9478f46eef8fd9968104e74ac9e5ba5ce64de Mon Sep 17 00:00:00 2001 From: Heng Li Date: Sun, 9 Apr 2017 14:59:39 -0400 Subject: [PATCH] backup --- main.c | 4 ++-- map.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 2 deletions(-) diff --git a/main.c b/main.c index 29762e8..3dfad8e 100644 --- a/main.c +++ b/main.c @@ -83,11 +83,11 @@ int main(int argc, char *argv[]) fprintf(stderr, "Usage: minimap2 [options] [query.fa] [...]\n"); fprintf(stderr, "Options:\n"); fprintf(stderr, " Indexing:\n"); + fprintf(stderr, " -H use homopolymer-compressed k-mer\n"); fprintf(stderr, " -k INT k-mer size [%d]\n", k); fprintf(stderr, " -w INT minizer window size [{-k}*2/3]\n"); fprintf(stderr, " -I NUM split index for every ~NUM input bases [4G]\n"); fprintf(stderr, " -d FILE dump index to FILE []\n"); - fprintf(stderr, " -H use homopolymer-compressed k-mer\n"); fprintf(stderr, " -l the 1st argument is a index file (overriding -k, -w and -I)\n"); // fprintf(stderr, " -b INT bucket bits [%d]\n", b); // most users wouldn't care about this fprintf(stderr, " Mapping:\n"); @@ -107,7 +107,7 @@ int main(int argc, char *argv[]) fprintf(stderr, " ava10k: -Sw5 -L100 -m0 (PacBio/ONT all-vs-all read mapping)\n"); fprintf(stderr, " Input/Output:\n"); fprintf(stderr, " -t INT number of threads [%d]\n", n_threads); -// fprintf(stderr, " -B NUM process ~NUM bp in each batch [100M]\n"); +// fprintf(stderr, " -B NUM process ~NUM bp in each mini-batch [100M]\n"); // fprintf(stderr, " -v INT verbose level [%d]\n", mm_verbose); // fprintf(stderr, " -N use integer as target names\n"); fprintf(stderr, " -V show version number\n"); diff --git a/map.c b/map.c index b0b86eb..e98d758 100644 --- a/map.c +++ b/map.c @@ -1,3 +1,7 @@ +#include +#include "kvec.h" +#include "kalloc.h" +#include "sdust.h" #include "minimap.h" void mm_mapopt_init(mm_mapopt_t *opt) @@ -10,3 +14,56 @@ void mm_mapopt_init(mm_mapopt_t *opt) opt->flag = MM_F_WITH_REP; opt->merge_frac = .5; } + +struct mm_tbuf_s { + sdust_buf_t *sdb; + mm128_v mini; + kvec_t(mm_reg1_t) reg; + void *km; +}; + +mm_tbuf_t *mm_tbuf_init(void) +{ + mm_tbuf_t *b; + b = (mm_tbuf_t*)calloc(1, sizeof(mm_tbuf_t)); + b->km = km_init(); + b->sdb = sdust_buf_init(b->km); + return b; +} + +void mm_tbuf_destroy(mm_tbuf_t *b) +{ + if (b == 0) return; + km_destroy(b->km); + free(b); +} + +const mm_reg1_t *mm_map(const mm_idx_t *mi, int l_seq, const char *seq, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *name) +{ + int j; + + b->mini.n = 0; + mm_sketch(b->km, seq, l_seq, mi->w, mi->k, 0, mi->is_hpc, &b->mini); + if (opt->sdust_thres > 0) { // squeeze out minimizers that significantly overlap with LCRs + int n_dreg, k, u = 0; + const uint64_t *dreg; + dreg = sdust_core((const uint8_t*)seq, l_seq, opt->sdust_thres, 64, &n_dreg, b->sdb); + for (j = k = 0; j < b->mini.n; ++j) { + int32_t qpos = (uint32_t)b->mini.a[j].y>>1; + int32_t s = qpos - (mi->k - 1), e = s + mi->k; + while (u < n_dreg && (uint32_t)dreg[u] <= s) ++u; + if (u < n_dreg && dreg[u]>>32 < e) { + int v, l = 0; + for (v = u; v < n_dreg && dreg[v]>>32 < e; ++v) { // iterate over LCRs overlapping this minimizer + int ss = s > dreg[v]>>32? s : dreg[v]>>32; + int ee = e < (uint32_t)dreg[v]? e : (uint32_t)dreg[v]; + l += ee - ss; + } + if (l <= mi->k>>1) b->mini.a[k++] = b->mini.a[j]; + } + } + b->mini.n = k; + } + *n_regs = b->reg.n; + return b->reg.a; +}