code backup
This commit is contained in:
parent
8220008564
commit
bcf6d1b355
90
layout.c
90
layout.c
|
|
@ -4,6 +4,8 @@
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
#include "kvec.h"
|
#include "kvec.h"
|
||||||
#include "khash.h"
|
#include "khash.h"
|
||||||
#include "kseq.h"
|
#include "kseq.h"
|
||||||
|
|
@ -24,20 +26,20 @@ typedef struct {
|
||||||
float min_aln_ratio;
|
float min_aln_ratio;
|
||||||
} lo_opt_t;
|
} lo_opt_t;
|
||||||
|
|
||||||
KHASH_SET_INIT_INT(int)
|
KHASH_MAP_INIT_INT(int, int)
|
||||||
typedef khash_t(int) inthash_t;
|
typedef khash_t(int) inthash_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int id;
|
int id;
|
||||||
int contained;
|
int contained;
|
||||||
char *name;
|
char *name;
|
||||||
inthash_t *nei;
|
inthash_t *nei[2];
|
||||||
} vertex_t;
|
} vertex_t;
|
||||||
|
|
||||||
typedef kvec_t(vertex_t) vertex_v;
|
typedef kvec_t(vertex_t) vertex_v;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int type, score, l[2], s[2], e[2]; // length, start and end
|
int type, score, l[2], s[2], e[2], d[2]; // length, start and end
|
||||||
} edgeinfo_t;
|
} edgeinfo_t;
|
||||||
|
|
||||||
KHASH_MAP_INIT_INT64(edge, edgeinfo_t)
|
KHASH_MAP_INIT_INT64(edge, edgeinfo_t)
|
||||||
|
|
@ -71,7 +73,7 @@ ograph_t *lo_graph_init()
|
||||||
return g;
|
return g;
|
||||||
}
|
}
|
||||||
|
|
||||||
int lo_infer_edge_type(const lo_opt_t *opt, int l[2], int s[2], int e[2])
|
int lo_infer_edge_type(const lo_opt_t *opt, int l[2], int s[2], int e[2], int d[2])
|
||||||
{
|
{
|
||||||
int el, x[2], a[2], r[2]; // x: eXtended length, a: Aligned length; r: Remaining length
|
int el, x[2], a[2], r[2]; // x: eXtended length, a: Aligned length; r: Remaining length
|
||||||
int t[2][2], type;
|
int t[2][2], type;
|
||||||
|
|
@ -93,6 +95,7 @@ int lo_infer_edge_type(const lo_opt_t *opt, int l[2], int s[2], int e[2])
|
||||||
if (r[0] < 0) type ^= 3; // reverse the direction
|
if (r[0] < 0) type ^= 3; // reverse the direction
|
||||||
} else type = x[0] / l[0] > x[1] / l[1]? LO_T_C1 : LO_T_C2;
|
} else type = x[0] / l[0] > x[1] / l[1]? LO_T_C1 : LO_T_C2;
|
||||||
} else type = LO_T_I; // internal local match; not a suffix-prefix match
|
} else type = LO_T_I; // internal local match; not a suffix-prefix match
|
||||||
|
d[0] = abs(r[0]); d[1] = abs(r[1]);
|
||||||
return type;
|
return type;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -119,8 +122,9 @@ ograph_t *lo_graph_parse(const lo_opt_t *opt, kstream_t *ks)
|
||||||
z->id = kh_size(g->n);
|
z->id = kh_size(g->n);
|
||||||
z->name = strdup(p);
|
z->name = strdup(p);
|
||||||
z->contained = 0;
|
z->contained = 0;
|
||||||
z->nei = 0; // don't initialize the hash table right now
|
z->nei[0] = z->nei[1] = 0; // don't initialize the hash table right now
|
||||||
k = kh_put(name, g->n, z->name, &absent);
|
k = kh_put(name, g->n, z->name, &absent);
|
||||||
|
assert(absent);
|
||||||
kh_val(g->n, k) = z->id;
|
kh_val(g->n, k) = z->id;
|
||||||
}
|
}
|
||||||
id[(i==4)] = kh_val(g->n, k);
|
id[(i==4)] = kh_val(g->n, k);
|
||||||
|
|
@ -138,7 +142,7 @@ ograph_t *lo_graph_parse(const lo_opt_t *opt, kstream_t *ks)
|
||||||
if (*q == 0) break;
|
if (*q == 0) break;
|
||||||
}
|
}
|
||||||
if (i < 9) continue; // not enough fields
|
if (i < 9) continue; // not enough fields
|
||||||
e.type = lo_infer_edge_type(opt, e.l, e.s, e.e);
|
e.type = lo_infer_edge_type(opt, e.l, e.s, e.e, e.d);
|
||||||
if (e.type == LO_T_C1) {
|
if (e.type == LO_T_C1) {
|
||||||
g->v.a[id[0]].contained = 1;
|
g->v.a[id[0]].contained = 1;
|
||||||
} else if (e.type == LO_T_C2) {
|
} else if (e.type == LO_T_C2) {
|
||||||
|
|
@ -152,7 +156,8 @@ ograph_t *lo_graph_parse(const lo_opt_t *opt, kstream_t *ks)
|
||||||
// printf("%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\n", id[0], e.l[0], e.s[0], e.e[0], id[1], e.l[1], e.s[1], e.e[1]);
|
// printf("%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\n", id[0], e.l[0], e.s[0], e.e[0], id[1], e.l[1], e.s[1], e.e[1]);
|
||||||
}
|
}
|
||||||
free(str.s);
|
free(str.s);
|
||||||
if (lo_verbose >= 3) fprintf(stderr, "[M::%s] read %d edges\n", __func__, kh_size(g->e));
|
if (lo_verbose >= 3)
|
||||||
|
fprintf(stderr, "[M::%s] read %d edges\n", __func__, kh_size(g->e));
|
||||||
return g;
|
return g;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -160,7 +165,8 @@ void lo_graph_destroy(ograph_t *g)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < g->v.n; ++i) {
|
for (i = 0; i < g->v.n; ++i) {
|
||||||
if (g->v.a[i].nei) kh_destroy(int, g->v.a[i].nei);
|
if (g->v.a[i].nei[0]) kh_destroy(int, g->v.a[i].nei[0]);
|
||||||
|
if (g->v.a[i].nei[1]) kh_destroy(int, g->v.a[i].nei[1]);
|
||||||
free(g->v.a[i].name);
|
free(g->v.a[i].name);
|
||||||
}
|
}
|
||||||
free(g->v.a);
|
free(g->v.a);
|
||||||
|
|
@ -173,18 +179,77 @@ void lo_graph_destroy(ograph_t *g)
|
||||||
* Graph routines *
|
* Graph routines *
|
||||||
******************/
|
******************/
|
||||||
|
|
||||||
|
#define lo_swap(tmp, a, b) ((tmp) = (a), (a) = (b), (b) = (tmp))
|
||||||
|
|
||||||
|
static inline void lo_flip_edge(edgeinfo_t *e)
|
||||||
|
{
|
||||||
|
int tmp;
|
||||||
|
lo_swap(tmp, e->l[0], e->l[1]);
|
||||||
|
lo_swap(tmp, e->s[0], e->s[1]);
|
||||||
|
lo_swap(tmp, e->e[0], e->e[1]);
|
||||||
|
e->type = (e->type|1)<<1 | (e->type|2)>>1;
|
||||||
|
}
|
||||||
|
|
||||||
void lo_rm_contained(ograph_t *g)
|
void lo_rm_contained(ograph_t *g)
|
||||||
{
|
{
|
||||||
khint_t k;
|
khint_t k, l;
|
||||||
|
int n_del = 0, n_add = 0, absent;
|
||||||
|
ehash_t *tmp;
|
||||||
|
tmp = kh_init(edge);
|
||||||
for (k = 0; k != kh_end(g->e); ++k) {
|
for (k = 0; k != kh_end(g->e); ++k) {
|
||||||
int id[2];
|
int id[2];
|
||||||
if (!kh_exist(g->e, k)) continue;
|
if (!kh_exist(g->e, k)) continue;
|
||||||
id[0] = kh_key(g->e, k)>>32;
|
id[0] = kh_key(g->e, k)>>32;
|
||||||
id[1] = (uint32_t)kh_key(g->e, k);
|
id[1] = (uint32_t)kh_key(g->e, k);
|
||||||
if (g->v.a[id[0]].contained || g->v.a[id[1]].contained)
|
if (g->v.a[id[0]].contained || g->v.a[id[1]].contained) {
|
||||||
kh_del(edge, g->e, k);
|
++n_del;
|
||||||
|
kh_del(edge, g->e, k); // kh_del() will not trigger rehash
|
||||||
|
} else {
|
||||||
|
uint64_t key2 = (uint64_t)id[1]<<32|id[0];
|
||||||
|
l = kh_get(edge, g->e, key2);
|
||||||
|
if (l == kh_end(g->e)) {
|
||||||
|
l = kh_put(edge, tmp, key2, &absent);
|
||||||
|
kh_val(tmp, l) = kh_val(g->e, k);
|
||||||
|
lo_flip_edge(&kh_val(tmp, l));
|
||||||
|
++n_add;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (k = 0; k != kh_end(tmp); ++k) {
|
||||||
|
if (!kh_exist(tmp, k)) continue;
|
||||||
|
l = kh_put(edge, g->e, kh_key(tmp, k), &absent);
|
||||||
|
assert(absent);
|
||||||
|
kh_val(g->e, l) = kh_val(tmp, k);
|
||||||
|
}
|
||||||
|
if (lo_verbose >= 3)
|
||||||
|
fprintf(stderr, "[M::%s] removed %d and added %d; %d edges remain\n", __func__, n_del, kh_size(tmp), kh_size(g->e));
|
||||||
|
kh_destroy(edge, tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
void lo_populate_nei(ograph_t *g)
|
||||||
|
{
|
||||||
|
int i, absent;
|
||||||
|
khint_t k, l;
|
||||||
|
for (i = 0; i < g->v.n; ++i) {
|
||||||
|
if (g->v.a[i].contained) continue;
|
||||||
|
g->v.a[i].nei[0] = kh_init(int);
|
||||||
|
g->v.a[i].nei[1] = kh_init(int);
|
||||||
|
}
|
||||||
|
for (k = 0; k != kh_end(g->e); ++k) {
|
||||||
|
int id[2];
|
||||||
|
edgeinfo_t *e;
|
||||||
|
vertex_t *v;
|
||||||
|
if (!kh_exist(g->e, k)) continue;
|
||||||
|
id[0] = kh_key(g->e, k)>>32;
|
||||||
|
id[1] = (uint32_t)kh_key(g->e, k);
|
||||||
|
e = &kh_val(g->e, k);
|
||||||
|
v = &g->v.a[id[0]];
|
||||||
|
l = kh_put(int, v->nei[!(e->type>>1)], id[1]<<1|(e->type&1), &absent);
|
||||||
|
kh_val(v->nei[!(e->type>>1)], l) = e->d[0];
|
||||||
|
v = &g->v.a[id[1]];
|
||||||
|
l = kh_put(int, v->nei[e->type&1], id[0]<<1|(e->type>>1^1), &absent);
|
||||||
|
kh_val(v->nei[e->type&1], l) = e->d[1];
|
||||||
}
|
}
|
||||||
if (lo_verbose >= 3) fprintf(stderr, "[M::%s] %d edges remain after removing contained vertices\n", __func__, kh_size(g->e));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*****************
|
/*****************
|
||||||
|
|
@ -210,6 +275,7 @@ int main_layout(int argc, char *argv[])
|
||||||
ks = ks_init(fp);
|
ks = ks_init(fp);
|
||||||
g = lo_graph_parse(&opt, ks);
|
g = lo_graph_parse(&opt, ks);
|
||||||
lo_rm_contained(g);
|
lo_rm_contained(g);
|
||||||
|
lo_populate_nei(g);
|
||||||
lo_graph_destroy(g);
|
lo_graph_destroy(g);
|
||||||
ks_destroy(ks);
|
ks_destroy(ks);
|
||||||
gzclose(fp);
|
gzclose(fp);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue