working with toy examples
This commit is contained in:
parent
951c0d1d35
commit
4b707aac92
4
align.c
4
align.c
|
|
@ -199,7 +199,7 @@ static void mm_append_cigar(mm_reg1_t *r, uint32_t n_cigar, uint32_t *cigar) //
|
|||
mm_extra_t *p;
|
||||
if (n_cigar == 0) return;
|
||||
if (r->p == 0) {
|
||||
uint32_t capacity = n_cigar + sizeof(mm_extra_t)/4; // TODO: should this be "n_cigar + sizeof(mm_extra_t)/4" instead?
|
||||
uint32_t capacity = n_cigar + sizeof(mm_extra_t)/4;
|
||||
kroundup32(capacity);
|
||||
r->p = (mm_extra_t*)calloc(capacity, 4);
|
||||
r->p->capacity = capacity;
|
||||
|
|
@ -878,6 +878,6 @@ mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *m
|
|||
kfree(km, qseq0[0]);
|
||||
kfree(km, ez.cigar);
|
||||
mm_filter_regs(opt, qlen, n_regs_, regs);
|
||||
mm_hit_sort_by_dp(km, n_regs_, regs);
|
||||
mm_hit_sort(km, n_regs_, regs);
|
||||
return regs;
|
||||
}
|
||||
|
|
|
|||
14
hit.c
14
hit.c
|
|
@ -164,9 +164,9 @@ set_parent_test:
|
|||
kfree(km, w);
|
||||
}
|
||||
|
||||
void mm_hit_sort_by_dp(void *km, int *n_regs, mm_reg1_t *r)
|
||||
void mm_hit_sort(void *km, int *n_regs, mm_reg1_t *r)
|
||||
{
|
||||
int32_t i, n_aux, n = *n_regs;
|
||||
int32_t i, n_aux, n = *n_regs, has_cigar = 0, no_cigar = 0;
|
||||
mm128_t *aux;
|
||||
mm_reg1_t *t;
|
||||
|
||||
|
|
@ -175,14 +175,20 @@ void mm_hit_sort_by_dp(void *km, int *n_regs, mm_reg1_t *r)
|
|||
t = (mm_reg1_t*)kmalloc(km, n * sizeof(mm_reg1_t));
|
||||
for (i = n_aux = 0; i < n; ++i) {
|
||||
if (r[i].inv || r[i].cnt > 0) { // squeeze out elements with cnt==0 (soft deleted)
|
||||
assert(r[i].p);
|
||||
aux[n_aux].x = (uint64_t)r[i].p->dp_max << 32 | r[i].hash;
|
||||
if (r[i].p) {
|
||||
aux[n_aux].x = (uint64_t)r[i].p->dp_max << 32 | r[i].hash;
|
||||
has_cigar = 1;
|
||||
} else {
|
||||
aux[n_aux].x = (uint64_t)r[i].score << 32 | r[i].hash;
|
||||
no_cigar = 1;
|
||||
}
|
||||
aux[n_aux++].y = i;
|
||||
} else if (r[i].p) {
|
||||
free(r[i].p);
|
||||
r[i].p = 0;
|
||||
}
|
||||
}
|
||||
assert(has_cigar + no_cigar == 1);
|
||||
radix_sort_128x(aux, aux + n_aux);
|
||||
for (i = n_aux - 1; i >= 0; --i)
|
||||
t[n_aux - 1 - i] = r[aux[i].y];
|
||||
|
|
|
|||
24
map.c
24
map.c
|
|
@ -399,6 +399,7 @@ typedef struct {
|
|||
kstring_t str;
|
||||
|
||||
int n_parts;
|
||||
uint32_t *rid_shift;
|
||||
FILE *fp_split, **fp_parts;
|
||||
} pipeline_t;
|
||||
|
||||
|
|
@ -486,6 +487,7 @@ static void merge_hits(step_t *s)
|
|||
mm_reg1_t *r = &s->reg[k][l];
|
||||
uint32_t capacity;
|
||||
mm_err_fread(r, sizeof(mm_reg1_t), 1, fp[j]);
|
||||
r->rid += s->p->rid_shift[j];
|
||||
if (opt->flag & MM_F_CIGAR) {
|
||||
mm_err_fread(&capacity, 4, 1, fp[j]);
|
||||
r->p = (mm_extra_t*)calloc(capacity, 4);
|
||||
|
|
@ -494,6 +496,7 @@ static void merge_hits(step_t *s)
|
|||
}
|
||||
}
|
||||
}
|
||||
mm_hit_sort(km, &s->n_reg[k], s->reg[k]);
|
||||
mm_set_parent(km, opt->mask_level, s->n_reg[k], s->reg[k], opt->a * 2 + opt->b);
|
||||
if (!(opt->flag & MM_F_ALL_CHAINS)) {
|
||||
mm_select_sub(km, opt->pri_ratio, s->p->mi->k*2, opt->best_n, &s->n_reg[k], s->reg[k]);
|
||||
|
|
@ -663,13 +666,26 @@ int mm_split_merge(int n_segs, const char **fn, const mm_mapopt_t *opt, int n_sp
|
|||
pl.fp = open_bseqs(pl.n_fp, fn);
|
||||
if (pl.fp == 0) return -1;
|
||||
pl.opt = opt;
|
||||
pl.n_parts = n_split_idx;
|
||||
pl.fp_parts = mm_split_merge_prep(opt->split_prefix, n_split_idx, &mi);
|
||||
if (pl.fp_parts == 0) return -1;
|
||||
pl.mi = mi;
|
||||
pl.mini_batch_size = opt->mini_batch_size;
|
||||
|
||||
pl.n_parts = n_split_idx;
|
||||
pl.fp_parts = CALLOC(FILE*, pl.n_parts);
|
||||
pl.rid_shift = CALLOC(uint32_t, pl.n_parts);
|
||||
pl.mi = mm_split_merge_prep(opt->split_prefix, n_split_idx, pl.fp_parts, pl.rid_shift);
|
||||
if (pl.mi == 0) {
|
||||
free(pl.fp_parts);
|
||||
free(pl.rid_shift);
|
||||
return -1;
|
||||
}
|
||||
for (i = n_split_idx - 1; i > 0; --i)
|
||||
pl.rid_shift[i] = pl.rid_shift[i - 1];
|
||||
for (pl.rid_shift[0] = 0, i = 1; i < n_split_idx; ++i)
|
||||
pl.rid_shift[i] += pl.rid_shift[i - 1];
|
||||
|
||||
kt_pipeline(2, worker_pipeline, &pl, 3);
|
||||
|
||||
free(pl.str.s);
|
||||
free(pl.rid_shift);
|
||||
for (i = 0; i < n_split_idx; ++i)
|
||||
fclose(pl.fp_parts[i]);
|
||||
free(pl.fp_parts);
|
||||
|
|
|
|||
4
mmpriv.h
4
mmpriv.h
|
|
@ -80,7 +80,7 @@ void mm_select_sub(void *km, float pri_ratio, int min_diff, int best_n, int *n_,
|
|||
void mm_select_sub_multi(void *km, float pri_ratio, float pri1, float pri2, int max_gap_ref, int min_diff, int best_n, int n_segs, const int *qlens, int *n_, mm_reg1_t *r);
|
||||
void mm_filter_regs(const mm_mapopt_t *opt, int qlen, int *n_regs, mm_reg1_t *regs);
|
||||
void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int *n_regs, mm_reg1_t *regs, mm128_t *a);
|
||||
void mm_hit_sort_by_dp(void *km, int *n_regs, mm_reg1_t *r);
|
||||
void mm_hit_sort(void *km, int *n_regs, mm_reg1_t *r);
|
||||
void mm_set_mapq(void *km, int n_regs, mm_reg1_t *regs, int min_chain_sc, int match_sc, int rep_len, int is_sr);
|
||||
|
||||
void mm_est_err(const mm_idx_t *mi, int qlen, int n_regs, mm_reg1_t *regs, const mm128_t *a, int32_t n, const uint64_t *mini_pos);
|
||||
|
|
@ -90,7 +90,7 @@ void mm_seg_free(void *km, int n_segs, mm_seg_t *segs);
|
|||
void mm_pair(void *km, int max_gap_ref, int dp_bonus, int sub_diff, int match_sc, const int *qlens, int *n_regs, mm_reg1_t **regs);
|
||||
|
||||
FILE *mm_split_init(const char *prefix, const mm_idx_t *mi);
|
||||
FILE **mm_split_merge_prep(const char *prefix, int n_splits, mm_idx_t **mi_);
|
||||
mm_idx_t *mm_split_merge_prep(const char *prefix, int n_splits, FILE **fp, uint32_t *n_seq_part);
|
||||
int mm_split_merge(int n_segs, const char **fn, const mm_mapopt_t *opt, int n_split_idx);
|
||||
|
||||
void mm_err_puts(const char *str);
|
||||
|
|
|
|||
37
splitidx.c
37
splitidx.c
|
|
@ -26,17 +26,13 @@ FILE *mm_split_init(const char *prefix, const mm_idx_t *mi)
|
|||
return fp;
|
||||
}
|
||||
|
||||
FILE **mm_split_merge_prep(const char *prefix, int n_splits, mm_idx_t **mi_)
|
||||
mm_idx_t *mm_split_merge_prep(const char *prefix, int n_splits, FILE **fp, uint32_t *n_seq_part)
|
||||
{
|
||||
mm_idx_t *mi = 0;
|
||||
FILE **fp;
|
||||
char *fn;
|
||||
int i, j;
|
||||
uint32_t m_seq = 0;
|
||||
|
||||
if (n_splits < 1) return 0;
|
||||
*mi_ = 0;
|
||||
fp = (FILE**)calloc(n_splits, sizeof(FILE*));
|
||||
fn = (char*)calloc(strlen(prefix) + 10, 1);
|
||||
for (i = 0; i < n_splits; ++i) {
|
||||
sprintf(fn, "%s.%.4d.tmp", prefix, i);
|
||||
|
|
@ -45,31 +41,28 @@ FILE **mm_split_merge_prep(const char *prefix, int n_splits, mm_idx_t **mi_)
|
|||
fprintf(stderr, "ERROR: failed to open temporary file '%s'\n", fn);
|
||||
for (j = 0; j < i; ++j)
|
||||
fclose(fp[j]);
|
||||
free(fp);
|
||||
free(fn);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
free(fn);
|
||||
|
||||
mi = (mm_idx_t*)calloc(1, sizeof(mm_idx_t));
|
||||
for (i = 0; i < n_splits; ++i) {
|
||||
uint32_t k, n;
|
||||
mm_err_fread(&k, 4, 1, fp[i]);
|
||||
mm_err_fread(&n, 4, 1, fp[i]);
|
||||
mi->k = k;
|
||||
if (mi->n_seq + n > m_seq) {
|
||||
m_seq = mi->n_seq + n;
|
||||
kroundup32(m_seq);
|
||||
mi->seq = (mm_idx_seq_t*)realloc(mi->seq, sizeof(mm_idx_seq_t) * m_seq);
|
||||
}
|
||||
for (k = 0; k < n; ++i) {
|
||||
mm_err_fread(&mi->k, 4, 1, fp[i]); // TODO: check if k is all the same
|
||||
mm_err_fread(&n_seq_part[i], 4, 1, fp[i]);
|
||||
mi->n_seq += n_seq_part[i];
|
||||
}
|
||||
mi->seq = CALLOC(mm_idx_seq_t, mi->n_seq);
|
||||
for (i = j = 0; i < n_splits; ++i) {
|
||||
uint32_t k;
|
||||
for (k = 0; k < n_seq_part[i]; ++k, ++j) {
|
||||
uint8_t l;
|
||||
mm_err_fread(&l, 1, 1, fp[i]);
|
||||
mi->seq[mi->n_seq].name = (char*)calloc(l + 1, 1);
|
||||
mm_err_fread(mi->seq[mi->n_seq].name, 1, l, fp[i]);
|
||||
mm_err_fread(&mi->seq[mi->n_seq].len, 4, 1, fp[i]);
|
||||
++mi->n_seq;
|
||||
mi->seq[j].name = (char*)calloc(l + 1, 1);
|
||||
mm_err_fread(mi->seq[j].name, 1, l, fp[i]);
|
||||
mm_err_fread(&mi->seq[j].len, 4, 1, fp[i]);
|
||||
}
|
||||
}
|
||||
*mi_ = mi;
|
||||
return fp;
|
||||
return mi;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue