This commit is contained in:
Heng Li 2017-04-09 14:59:39 -04:00
parent 49b8d23695
commit 79c9478f46
2 changed files with 59 additions and 2 deletions

4
main.c
View File

@ -83,11 +83,11 @@ int main(int argc, char *argv[])
fprintf(stderr, "Usage: minimap2 [options] <target.fa> [query.fa] [...]\n");
fprintf(stderr, "Options:\n");
fprintf(stderr, " Indexing:\n");
fprintf(stderr, " -H use homopolymer-compressed k-mer\n");
fprintf(stderr, " -k INT k-mer size [%d]\n", k);
fprintf(stderr, " -w INT minizer window size [{-k}*2/3]\n");
fprintf(stderr, " -I NUM split index for every ~NUM input bases [4G]\n");
fprintf(stderr, " -d FILE dump index to FILE []\n");
fprintf(stderr, " -H use homopolymer-compressed k-mer\n");
fprintf(stderr, " -l the 1st argument is a index file (overriding -k, -w and -I)\n");
// fprintf(stderr, " -b INT bucket bits [%d]\n", b); // most users wouldn't care about this
fprintf(stderr, " Mapping:\n");
@ -107,7 +107,7 @@ int main(int argc, char *argv[])
fprintf(stderr, " ava10k: -Sw5 -L100 -m0 (PacBio/ONT all-vs-all read mapping)\n");
fprintf(stderr, " Input/Output:\n");
fprintf(stderr, " -t INT number of threads [%d]\n", n_threads);
// fprintf(stderr, " -B NUM process ~NUM bp in each batch [100M]\n");
// fprintf(stderr, " -B NUM process ~NUM bp in each mini-batch [100M]\n");
// fprintf(stderr, " -v INT verbose level [%d]\n", mm_verbose);
// fprintf(stderr, " -N use integer as target names\n");
fprintf(stderr, " -V show version number\n");

57
map.c
View File

@ -1,3 +1,7 @@
#include <stdlib.h>
#include "kvec.h"
#include "kalloc.h"
#include "sdust.h"
#include "minimap.h"
void mm_mapopt_init(mm_mapopt_t *opt)
@ -10,3 +14,56 @@ void mm_mapopt_init(mm_mapopt_t *opt)
opt->flag = MM_F_WITH_REP;
opt->merge_frac = .5;
}
struct mm_tbuf_s {
sdust_buf_t *sdb;
mm128_v mini;
kvec_t(mm_reg1_t) reg;
void *km;
};
mm_tbuf_t *mm_tbuf_init(void)
{
mm_tbuf_t *b;
b = (mm_tbuf_t*)calloc(1, sizeof(mm_tbuf_t));
b->km = km_init();
b->sdb = sdust_buf_init(b->km);
return b;
}
void mm_tbuf_destroy(mm_tbuf_t *b)
{
if (b == 0) return;
km_destroy(b->km);
free(b);
}
const mm_reg1_t *mm_map(const mm_idx_t *mi, int l_seq, const char *seq, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *name)
{
int j;
b->mini.n = 0;
mm_sketch(b->km, seq, l_seq, mi->w, mi->k, 0, mi->is_hpc, &b->mini);
if (opt->sdust_thres > 0) { // squeeze out minimizers that significantly overlap with LCRs
int n_dreg, k, u = 0;
const uint64_t *dreg;
dreg = sdust_core((const uint8_t*)seq, l_seq, opt->sdust_thres, 64, &n_dreg, b->sdb);
for (j = k = 0; j < b->mini.n; ++j) {
int32_t qpos = (uint32_t)b->mini.a[j].y>>1;
int32_t s = qpos - (mi->k - 1), e = s + mi->k;
while (u < n_dreg && (uint32_t)dreg[u] <= s) ++u;
if (u < n_dreg && dreg[u]>>32 < e) {
int v, l = 0;
for (v = u; v < n_dreg && dreg[v]>>32 < e; ++v) { // iterate over LCRs overlapping this minimizer
int ss = s > dreg[v]>>32? s : dreg[v]>>32;
int ee = e < (uint32_t)dreg[v]? e : (uint32_t)dreg[v];
l += ee - ss;
}
if (l <= mi->k>>1) b->mini.a[k++] = b->mini.a[j];
}
}
b->mini.n = k;
}
*n_regs = b->reg.n;
return b->reg.a;
}