r629: API to construct index from strings

This commit is contained in:
Heng Li 2017-12-18 22:29:46 -05:00
parent 23a846c594
commit e420b17496
3 changed files with 60 additions and 2 deletions

45
index.c
View File

@ -329,7 +329,7 @@ mm_idx_t *mm_idx_gen(mm_bseq_file_t *fp, int w, int k, int b, int flag, int mini
return pl.mi;
}
mm_idx_t *mm_idx_build(const char *fn, int w, int k, int flag, int n_threads) // a simpler interface
mm_idx_t *mm_idx_build(const char *fn, int w, int k, int flag, int n_threads) // a simpler interface; deprecated
{
mm_bseq_file_t *fp;
mm_idx_t *mi;
@ -340,6 +340,49 @@ mm_idx_t *mm_idx_build(const char *fn, int w, int k, int flag, int n_threads) //
return mi;
}
mm_idx_t *mm_idx_str(int w, int k, int is_hpc, int bucket_bits, int n, const char **seq, const char **name)
{
uint64_t sum_len = 0;
mm128_v a = {0,0,0};
mm_idx_t *mi;
int i, flag = 0;
if (n <= 0) return 0;
for (i = 0; i < n; ++i) // get the total length
sum_len += strlen(seq[i]);
if (is_hpc) flag |= MM_I_HPC;
if (name == 0) flag |= MM_I_NO_NAME;
if (bucket_bits < 0) bucket_bits = 14;
mi = mm_idx_init(w, k, bucket_bits, flag);
mi->n_seq = n;
mi->seq = (mm_idx_seq_t*)kcalloc(mi->km, n, sizeof(mm_idx_seq_t)); // ->seq is allocated from km
mi->S = (uint32_t*)calloc((sum_len + 7) / 8, 4);
for (i = 0, sum_len = 0; i < n; ++i) {
const char *s = seq[i];
mm_idx_seq_t *p = &mi->seq[i];
uint32_t j;
if (name && name[i]) {
p->name = (char*)kmalloc(mi->km, strlen(name[i]) + 1);
strcpy(p->name, name[i]);
}
p->offset = sum_len;
p->len = strlen(s);
for (j = 0; j < p->len; ++j) {
int c = seq_nt4_table[(uint8_t)s[j]];
uint64_t o = sum_len + j;
mm_seq4_set(mi->S, o, c);
}
sum_len += p->len;
if (p->len > 0) {
a.n = 0;
mm_sketch(0, s, p->len, w, k, i, is_hpc, &a);
mm_idx_add(mi, a.n, a.a);
}
}
free(a.a);
mm_idx_post(mi, 1);
return mi;
}
/*************
* index I/O *
*************/

2
main.c
View File

@ -6,7 +6,7 @@
#include "mmpriv.h"
#include "getopt.h"
#define MM_VERSION "2.6-r626-dirty"
#define MM_VERSION "2.6-r629-dirty"
#ifdef __linux__
#include <sys/resource.h>

View File

@ -208,6 +208,21 @@ void mm_idx_reader_close(mm_idx_reader_t *r);
int mm_idx_reader_eof(const mm_idx_reader_t *r);
/**
* Create an index from strings in memory
*
* @param w minimizer window size
* @param k minimizer k-mer size
* @param is_hpc use HPC k-mer if true
* @param bucket_bits number of bits for the first level of the hash table
* @param n number of sequences
* @param seq sequences in A/C/G/T
* @param name sequence names; could be NULL
*
* @return minimap2 index
*/
mm_idx_t *mm_idx_str(int w, int k, int is_hpc, int bucket_bits, int n, const char **seq, const char **name);
/**
* Print index statistics to stderr
*