From 11081c6c272032187b9c5dcf97d6e580e3d1f90d Mon Sep 17 00:00:00 2001 From: Heng Li Date: Mon, 18 Sep 2017 19:49:15 -0400 Subject: [PATCH] r411: refactored kalloc for clarity The new version is closer to K&R's original implementation. --- align.c | 1 + hit.c | 1 + kalloc.c | 246 +++++++++++++++++++++++++------------------------------ kalloc.h | 11 +-- main.c | 2 +- 5 files changed, 122 insertions(+), 139 deletions(-) diff --git a/align.c b/align.c index 03f97d6..1e97616 100644 --- a/align.c +++ b/align.c @@ -1,5 +1,6 @@ #include #include +#include #include "minimap.h" #include "mmpriv.h" #include "ksw2.h" diff --git a/hit.c b/hit.c index fa3e068..d6e22a3 100644 --- a/hit.c +++ b/hit.c @@ -1,4 +1,5 @@ #include +#include #include #include "mmpriv.h" #include "kalloc.h" diff --git a/kalloc.c b/kalloc.c index fa9bc88..fe55681 100644 --- a/kalloc.c +++ b/kalloc.c @@ -1,175 +1,144 @@ #include #include #include -#include #include "kalloc.h" -/* The whole thing is: ("@" for the kheader_t of the block, "-" for free - * memory, and "+" for allocated memory. One char for one unit.) - * - * This region is core 1. This region is core 2. +/* In kalloc, a *core* is a large chunk of contiguous memory. Each core is + * associated with a master header, which keeps the size of the current core + * and the pointer to next core. Kalloc allocates small *blocks* of memory from + * the cores and organizes free memory blocks in a circular single-linked list. * - * @-------@++++++@++++++++++++@------------ @----------@++++++++++++@+++++++@------------ - * | | | | - * p=p->ptr->ptr->ptr->ptr p->ptr p->ptr->ptr p->ptr->ptr->ptr + * In the following diagram, "@" stands for the header of a free block (of type + * header_t), "#" for the header of an allocated block (of type size_t), "-" + * for free memory, and "+" for allocated memory. + * + * master This region is core 1. master This region is core 2. + * | | + * *@-------#++++++#++++++++++++@-------- *@----------#++++++++++++#+++++++@------------ + * | | | | + * p=p->ptr->ptr->ptr->ptr p->ptr p->ptr->ptr p->ptr->ptr->ptr */ -#define PTR(p) ((size_t*)((size_t*)p)[1]) +#define MIN_CORE_SIZE 0x80000 -typedef struct _allocated_t { - struct _allocated_t *next; - size_t *ptr; -} allocated_t; +typedef struct header_t { + size_t size; + struct header_t *ptr; +} header_t; typedef struct { - size_t base[2], *loop_head; - allocated_t list_head, *list_tail; - size_t total_allocated; + header_t base, *loop_head, *core_head; /* base is a zero-sized block always kept in the loop */ } kmem_t; -void *km_init() -{ - return calloc(1, sizeof(kmem_t)); -} - -static void kerror(const char *s) +static void panic(const char *s) { fprintf(stderr, "%s\n", s); - exit(1); + abort(); } -static size_t *morecore(kmem_t *km, size_t nu) +void *km_init(void) { - size_t rnu, *up; - - rnu = (nu + 0xfffff) & (~(size_t)0xfffff); - up = (size_t*)malloc(rnu * sizeof(size_t)); - if (!up) { /* fail to allocate memory */ - km_stat(km); - fprintf(stderr, "[morecore] %lu bytes requested but not available.\n", (unsigned long)rnu * sizeof(size_t)); - exit(1); - } - /* put the pointer in km->list_head */ - if (km->list_tail == 0) km->list_tail = &km->list_head; - km->list_tail->ptr = up; - km->list_tail->next = (allocated_t*)calloc(1, sizeof(allocated_t)); - km->list_tail = km->list_tail->next; - - km->total_allocated += rnu * sizeof(size_t); - *up = rnu; /* the size of the current block, and in this case the block is the same as the new core */ - kfree(km, up + 1); /* initialize the new "core" */ - return km->loop_head; + return calloc(1, sizeof(kmem_t)); } void km_destroy(void *_km) { kmem_t *km = (kmem_t*)_km; - allocated_t *p, *q; - if (km == 0) return; - p = &km->list_head; - do { - q = p->next; - free(p->ptr); - if (p != &km->list_head) free(p); + header_t *p, *q; + if (km == NULL) return; + for (p = km->core_head; p != NULL;) { + q = p->ptr; + free(p); p = q; - } while (p && p->next); - if (p != &km->list_head) free(p); + } free(km); } -void kfree(void *_km, void *ap) +static header_t *morecore(kmem_t *km, size_t nu) { - size_t *p, *q; + header_t *q; + size_t bytes, *p; + nu = (nu + 1 + (MIN_CORE_SIZE - 1)) / MIN_CORE_SIZE * MIN_CORE_SIZE; /* the first +1 for core header */ + bytes = nu * sizeof(header_t); + q = (header_t*)malloc(bytes); + if (!q) panic("[morecore] insufficient memory"); + q->ptr = km->core_head, q->size = nu, km->core_head = q; + p = (size_t*)(q + 1); + *p = nu - 1; /* the size of the free block; -1 because the first unit is used for the core header */ + kfree(km, p + 1); /* initialize the new "core"; NB: the core header is not looped. */ + return km->loop_head; +} + +void kfree(void *_km, void *ap) /* kfree() also adds a new core to the circular list */ +{ + header_t *p, *q; kmem_t *km = (kmem_t*)_km; if (!ap) return; - if (km == 0) { + if (km == NULL) { free(ap); return; } - p = (size_t*)ap - 1; /* *p is the size of the current block */ + p = (header_t*)((size_t*)ap - 1); + p->size = *((size_t*)ap - 1); /* Find the pointer that points to the block to be freed. The following loop can stop on two conditions: * - * a) "p>q && pptr": @------@++++++++@+++++++@------- @---------------@+++++++@------- + * a) "p>q && pptr": @------#++++++++#+++++++@------- @---------------#+++++++@------- * (can also be in | | | -> | | * two cores) q p q->ptr q q->ptr * - * @-------- @+++++++++@-------- @-------- @------------------ + * @-------- #+++++++++@-------- @-------- @------------------ * | | | -> | | * q p q->ptr q q->ptr * - * b) "q>=q->ptr && (p>q || pptr)": @-------@+++++ @--------@+++++++ @-------@+++++ @---------------- + * b) "q>=q->ptr && (p>q || pptr)": @-------#+++++ @--------#+++++++ @-------#+++++ @---------------- * | | | -> | | * q->ptr q p q->ptr q * - * @+++++++@----- @++++++++@------- @------------- @++++++++@------- + * #+++++++@----- #++++++++@------- @------------- #++++++++@------- * | | | -> | | * p q->ptr q q->ptr q */ - for (q = km->loop_head; !(p > q && p < PTR(q)); q = PTR(q)) - if (q >= PTR(q) && (p > q || p < PTR(q))) break; - if (p + (*p) == PTR(q)) { /* two adjacent blocks, merge p and q->ptr (the 2nd and 4th cases) */ - *p += *PTR(q); /* this is the new q->ptr size */ - p[1] = (size_t)PTR(PTR(q)); /* this is the new q->ptr->ptr */ - /* p is actually the new q->ptr. The actual change happens a few lines below. */ - } else if (p + (*p) > PTR(q) && PTR(q) >= p) { /* the end of the allocated block is in the next free block */ - kerror("[kfree] The end of the allocated block enters a free block."); - } else p[1] = (size_t)PTR(q); /* backup q->ptr */ + for (q = km->loop_head; !(p > q && p < q->ptr); q = q->ptr) + if (q >= q->ptr && (p > q || p < q->ptr)) break; + if (p + p->size == q->ptr) { /* two adjacent blocks, merge p and q->ptr (the 2nd and 4th cases) */ + p->size += q->ptr->size; + p->ptr = q->ptr->ptr; + } else if (p + p->size > q->ptr && q->ptr >= p) { + panic("[kfree] The end of the allocated block enters a free block."); + } else p->ptr = q->ptr; /* backup q->ptr */ - if (q + (*q) == p) { /* two adjacent blocks, merge q and p (the other two cases) */ - *q += *p; - q[1] = (size_t)PTR(p); + if (q + q->size == p) { /* two adjacent blocks, merge q and p (the other two cases) */ + q->size += p->size; + q->ptr = p->ptr; km->loop_head = q; - } else if (q + (*q) > p && p >= q) { /* the end of a free block in the allocated block */ - kerror("[kfree] The end of a free block enters the allocated block."); - } else km->loop_head = p, q[1] = (size_t)p; /* in two cores, cannot be merged */ -} - -void *krealloc(void *_km, void *ap, size_t n_bytes) -{ - kmem_t *km = (kmem_t*)_km; - size_t n_units, *p, *q; - - if (n_bytes == 0) { - kfree(km, ap); return 0; - } - if (km == 0) return realloc(ap, n_bytes); - if (!ap) return kmalloc(km, n_bytes); - n_units = 1 + (n_bytes + sizeof(size_t) - 1) / sizeof(size_t); - p = (size_t*)ap - 1; - if (*p >= n_units) return ap; /* TODO: this prevents shrinking */ - q = (size_t*)kmalloc(km, n_bytes); - memcpy(q, ap, (*p - 1) * sizeof(size_t)); - kfree(km, ap); - return q; + } else if (q + q->size > p && p >= q) { + panic("[kfree] The end of a free block enters the allocated block."); + } else km->loop_head = p, q->ptr = p; /* in two cores, cannot be merged; create a new block in the list */ } void *kmalloc(void *_km, size_t n_bytes) { kmem_t *km = (kmem_t*)_km; - size_t n_units, *p, *q; + size_t n_units; + header_t *p, *q; if (n_bytes == 0) return 0; - if (km == 0) return malloc(n_bytes); - /* "n_units" means the number of units. The size of one unit equals to sizeof(kheader_t). - * "1" is the kheader_t of a block, which is always required. */ - n_units = 1 + (n_bytes + sizeof(size_t) - 1) / sizeof(size_t); - if (n_units&1) ++n_units; /* make n_units an even number, or it will segfault if only one unit remains */ + if (km == NULL) return malloc(n_bytes); + n_units = (n_bytes + sizeof(size_t) + sizeof(header_t) - 1) / sizeof(header_t) + 1; - if (!(q = km->loop_head)) { /* the first time when kmalloc() is called, intialization */ - km->base[1] = (size_t)(km->loop_head = q = km->base); *q = 0; - } - for (p = PTR(q);; q = p, p = PTR(p)) { /* search for a suitable block */ - if (*p >= n_units) { /* p->size if the size of current block. This line means the current block is large enough. */ - if (*p == n_units) q[1] = (size_t)PTR(p); /* no need to split the block */ - else { /* split the block */ - /* memory is allocated at the end of the block */ - *p -= n_units; /* reduce the size of the free block */ - p += *p; /* skip to the kheader_t of the allocated block */ - *p = n_units; /* set the size */ + if (!(q = km->loop_head)) /* the first time when kmalloc() is called, intialize it */ + q = km->loop_head = km->base.ptr = &km->base; + for (p = q->ptr;; q = p, p = p->ptr) { /* search for a suitable block */ + if (p->size >= n_units) { /* p->size if the size of current block. This line means the current block is large enough. */ + if (p->size == n_units) q->ptr = p->ptr; /* no need to split the block */ + else { /* split the block. NB: memory is allocated at the end of the block! */ + p->size -= n_units; /* reduce the size of the free block */ + p += p->size; /* p points to the allocated block */ + *(size_t*)p = n_units; /* set the size */ } km->loop_head = q; /* set the end of chain */ - return p + 1; /* skip the kheader_t */ + return (size_t*)p + 1; } if (p == km->loop_head) { /* then ask for more "cores" */ if ((p = morecore(km, n_units)) == 0) return 0; @@ -182,33 +151,44 @@ void *kcalloc(void *_km, size_t count, size_t size) kmem_t *km = (kmem_t*)_km; void *p; if (size == 0 || count == 0) return 0; - if (km == 0) return calloc(count, size); + if (km == NULL) return calloc(count, size); p = kmalloc(km, count * size); memset(p, 0, count * size); return p; } -void km_stat(const void *_km) +void *krealloc(void *_km, void *ap, size_t n_bytes) // TODO: this can be made more efficient in principle { kmem_t *km = (kmem_t*)_km; - unsigned n_blocks, n_units; - size_t max_block = 0, *p, *q; - float frag; + size_t n_units, *p, *q; - if (km == 0 || !(p = km->loop_head)) return; - n_blocks = n_units = 0; - do { - q = PTR(p); - if (*p > max_block) max_block = *p; - n_units += *p; - if (p + (*p) > q && q > p) - kerror("[kr_stat] The end of a free block enters another free block."); - p = q; - ++n_blocks; - } while (p != km->loop_head); - - --n_blocks; - frag = 1.0/1024.0 * n_units * sizeof(size_t) / n_blocks; - fprintf(stderr, "[kr_stat] tot=%lu, free=%lu, n_block=%u, max_block=%lu, frag_len=%.3fK\n", - (unsigned long)km->total_allocated, (unsigned long)n_units * sizeof(size_t), n_blocks, (unsigned long)max_block * sizeof(size_t), frag); + if (n_bytes == 0) { + kfree(km, ap); return 0; + } + if (km == NULL) return realloc(ap, n_bytes); + if (ap == NULL) return kmalloc(km, n_bytes); + n_units = (n_bytes + sizeof(size_t) + sizeof(header_t) - 1) / sizeof(header_t); + p = (size_t*)ap - 1; + if (*p >= n_units) return ap; /* TODO: this prevents shrinking */ + q = (size_t*)kmalloc(km, n_bytes); + memcpy(q, ap, (*p - 1) * sizeof(header_t)); + kfree(km, ap); + return q; +} + +void km_stat(const void *_km, km_stat_t *s) +{ + kmem_t *km = (kmem_t*)_km; + header_t *p; + memset(s, 0, sizeof(km_stat_t)); + if (km == NULL || km->loop_head == NULL) return; + for (p = km->loop_head;; p = p->ptr) { + s->available += p->size * sizeof(header_t); + if (p->size != 0) ++s->n_blocks; /* &kmem_t::base is always one of the cores. It is zero-sized. */ + if (p->ptr > p && p + p->size > p->ptr) + panic("[km_stat] The end of a free block enters another free block."); + if (p->ptr == km->loop_head) break; + } + for (p = km->core_head; p != NULL; p = p->ptr) + ++s->n_cores, s->capacity += p->size * sizeof(header_t); } diff --git a/kalloc.h b/kalloc.h index ec683d7..85b54a7 100644 --- a/kalloc.h +++ b/kalloc.h @@ -1,14 +1,16 @@ #ifndef _KALLOC_H_ #define _KALLOC_H_ -#include - -#define km_size(x) (*(((size_t*)(x))-1) * sizeof(size_t)) +#include /* for size_t */ #ifdef __cplusplus extern "C" { #endif +typedef struct { + size_t capacity, available, n_blocks, n_cores; +} km_stat_t; + void *kmalloc(void *km, size_t size); void *krealloc(void *km, void *ptr, size_t size); void *kcalloc(void *km, size_t count, size_t size); @@ -16,8 +18,7 @@ void kfree(void *km, void *ptr); void *km_init(void); void km_destroy(void *km); - -void km_stat(const void *km); // TODO: return numbers instead of print to stderr +void km_stat(const void *_km, km_stat_t *s); #ifdef __cplusplus } diff --git a/main.c b/main.c index 49f2a22..2192ed0 100644 --- a/main.c +++ b/main.c @@ -6,7 +6,7 @@ #include "mmpriv.h" #include "getopt.h" -#define MM_VERSION "2.2-r409" +#define MM_VERSION "2.2-r411-dirty" #ifdef __linux__ #include