0
0
mirror of https://gitlab.nic.cz/labs/bird.git synced 2025-01-24 18:01:54 +00:00

Page allocator moved from pools to IO loops.

The resource pool system is highly hierarchical and keeping spare pages
in pools leads to unnecessarily complex memory management.

Loops have a flat hiearchy, at least for now, and it is therefore much
easier to keep care of pages, especially in cases of excessive virtual memory
fragmentation.
This commit is contained in:
Maria Matejka 2021-11-30 23:57:14 +01:00
parent 385b3ea395
commit bb63e99d78
12 changed files with 206 additions and 212 deletions

View File

@ -130,7 +130,7 @@ lp_alloc(linpool *m, uint size)
{ {
/* Need to allocate a new chunk */ /* Need to allocate a new chunk */
if (m->use_pages) if (m->use_pages)
c = alloc_page(m->p); c = alloc_page();
else else
c = xmalloc(sizeof(struct lp_chunk) + m->chunk_size); c = xmalloc(sizeof(struct lp_chunk) + m->chunk_size);
@ -271,7 +271,7 @@ lp_free(resource *r)
{ {
c = d->next; c = d->next;
if (m->use_pages) if (m->use_pages)
free_page(m->p, d); free_page(d);
else else
xfree(d); xfree(d);
} }

View File

@ -30,14 +30,6 @@
* is freed upon shutdown of the module. * is freed upon shutdown of the module.
*/ */
struct pool_pages {
uint free;
uint used;
void *ptr[0];
};
#define POOL_PAGES_MAX ((page_size - sizeof(struct pool_pages)) / sizeof (void *))
static void pool_dump(resource *); static void pool_dump(resource *);
static void pool_free(resource *); static void pool_free(resource *);
static resource *pool_lookup(resource *, unsigned long); static resource *pool_lookup(resource *, unsigned long);
@ -54,9 +46,6 @@ static struct resclass pool_class = {
pool root_pool; pool root_pool;
void *alloc_sys_page(void);
int free_sys_page(void *);
static int indent; static int indent;
/** /**
@ -103,16 +92,6 @@ pool_free(resource *P)
r = rr; r = rr;
} }
if (p->pages)
{
ASSERT_DIE(!p->pages->used);
for (uint i = 0; i < p->pages->free; i++)
free_sys_page(p->pages->ptr[i]);
free_sys_page(p->pages);
}
pool_parent = parent; pool_parent = parent;
} }
@ -185,9 +164,6 @@ pool_memsize_locked(pool *p)
WALK_LIST(r, p->inside) WALK_LIST(r, p->inside)
sum += rmemsize(r); sum += rmemsize(r);
if (p->pages)
sum += page_size * (p->pages->used + p->pages->free + 1);
return sum; return sum;
} }
@ -551,49 +527,6 @@ mb_free(void *m)
rfree(b); rfree(b);
} }
void *
alloc_page(pool *p)
{
if (!p->pages)
{
p->pages = alloc_sys_page();
p->pages->free = 0;
p->pages->used = 1;
}
else
p->pages->used++;
if (p->pages->free)
{
void *ptr = p->pages->ptr[--p->pages->free];
bzero(ptr, page_size);
return ptr;
}
else
return alloc_sys_page();
}
void
free_page(pool *p, void *ptr)
{
ASSERT_DIE(p->pages);
p->pages->used--;
ASSERT_DIE(p->pages->free <= POOL_PAGES_MAX);
if (p->pages->free == POOL_PAGES_MAX)
{
const unsigned long keep = POOL_PAGES_MAX / 4;
for (uint i = keep; i < p->pages->free; i++)
free_sys_page(p->pages->ptr[i]);
p->pages->free = keep;
}
p->pages->ptr[p->pages->free++] = ptr;
}
#define STEP_UP(x) ((x) + (x)/2 + 4) #define STEP_UP(x) ((x) + (x)/2 + 4)

View File

@ -108,8 +108,8 @@ void buffer_realloc(void **buf, unsigned *size, unsigned need, unsigned item_siz
extern long page_size; extern long page_size;
/* Allocator of whole pages; for use in slabs and other high-level allocators. */ /* Allocator of whole pages; for use in slabs and other high-level allocators. */
void *alloc_page(pool *); void *alloc_page(void);
void free_page(pool *, void *); void free_page(void *);
#define PAGE_HEAD(x) ((void *) (((intptr_t) (x)) & ~(page_size-1))) #define PAGE_HEAD(x) ((void *) (((intptr_t) (x)) & ~(page_size-1)))
#ifdef HAVE_LIBDMALLOC #ifdef HAVE_LIBDMALLOC

View File

@ -269,7 +269,7 @@ no_partial:
s->num_empty_heads--; s->num_empty_heads--;
goto okay; goto okay;
} }
h = alloc_page(s->p); h = alloc_page();
#ifdef POISON #ifdef POISON
memset(h, 0xba, page_size); memset(h, 0xba, page_size);
#endif #endif
@ -332,7 +332,7 @@ sl_free(slab *s, void *oo)
#ifdef POISON #ifdef POISON
memset(h, 0xde, page_size); memset(h, 0xde, page_size);
#endif #endif
free_page(s->p, h); free_page(h);
} }
else else
{ {
@ -349,11 +349,11 @@ slab_free(resource *r)
struct sl_head *h, *g; struct sl_head *h, *g;
WALK_LIST_DELSAFE(h, g, s->empty_heads) WALK_LIST_DELSAFE(h, g, s->empty_heads)
free_page(s->p, h); free_page(h);
WALK_LIST_DELSAFE(h, g, s->partial_heads) WALK_LIST_DELSAFE(h, g, s->partial_heads)
free_page(s->p, h); free_page(h);
WALK_LIST_DELSAFE(h, g, s->full_heads) WALK_LIST_DELSAFE(h, g, s->full_heads)
free_page(s->p, h); free_page(h);
} }
static void static void
@ -386,8 +386,7 @@ slab_memsize(resource *r)
WALK_LIST(h, s->full_heads) WALK_LIST(h, s->full_heads)
heads++; heads++;
// return ALLOC_OVERHEAD + sizeof(struct slab) + heads * (ALLOC_OVERHEAD + page_size); return ALLOC_OVERHEAD + sizeof(struct slab) + heads * page_size;
return ALLOC_OVERHEAD + sizeof(struct slab); /* The page sizes are accounted for in the pool */
} }
static resource * static resource *

View File

@ -198,6 +198,7 @@ t_as_path_converting(void)
#endif #endif
void resource_sys_init(void); void resource_sys_init(void);
void io_init(void);
int int
main(int argc, char *argv[]) main(int argc, char *argv[])
@ -207,6 +208,7 @@ main(int argc, char *argv[])
resource_init(); resource_init();
the_bird_lock(); the_bird_lock();
birdloop_init(); birdloop_init();
io_init();
bt_test_suite(t_as_path_match, "Testing AS path matching and some a-path utilities."); bt_test_suite(t_as_path_match, "Testing AS path matching and some a-path utilities.");
bt_test_suite(t_path_format, "Testing formating as path into byte buffer"); bt_test_suite(t_path_format, "Testing formating as path into byte buffer");

View File

@ -1057,7 +1057,7 @@ rte_announce(rtable_private *tab, net *net, struct rte_storage *new, struct rte_
if (!rpeb) if (!rpeb)
{ {
rpeb = alloc_page(tab->rp); rpeb = alloc_page();
*rpeb = (struct rt_export_block) {}; *rpeb = (struct rt_export_block) {};
add_tail(&tab->pending_exports, &rpeb->n); add_tail(&tab->pending_exports, &rpeb->n);
} }
@ -2157,7 +2157,7 @@ rt_free(resource *_r)
static void static void
rt_res_dump(resource *_r) rt_res_dump(resource *_r)
{ {
RT_LOCKED((rtable *) _r, r) rtable_private *r = RT_PRIV((rtable *) _r);
debug("name \"%s\", addr_type=%s, rt_count=%u, use_count=%d\n", debug("name \"%s\", addr_type=%s, rt_count=%u, use_count=%d\n",
r->name, net_label[r->addr_type], r->rt_count, r->use_count); r->name, net_label[r->addr_type], r->rt_count, r->use_count);
} }
@ -2484,7 +2484,7 @@ rt_export_cleanup(void *data)
memset(reb, 0xbe, page_size); memset(reb, 0xbe, page_size);
#endif #endif
free_page(tab->rp, reb); free_page(reb);
if (EMPTY_LIST(tab->pending_exports)) if (EMPTY_LIST(tab->pending_exports))
{ {

View File

@ -9,6 +9,8 @@
#include "nest/bird.h" #include "nest/bird.h"
#include "lib/resource.h" #include "lib/resource.h"
#include "sysdep/unix/io-loop.h"
#include <stdlib.h> #include <stdlib.h>
#include <unistd.h> #include <unistd.h>
#include <stdatomic.h> #include <stdatomic.h>
@ -19,86 +21,47 @@
#endif #endif
long page_size = 0; long page_size = 0;
_Bool alloc_multipage = 0;
static _Atomic int global_page_list_not_empty;
static list global_page_list;
static _Atomic int global_page_spinlock;
#define GLOBAL_PAGE_SPIN_LOCK for (int v = 0; !atomic_compare_exchange_weak_explicit(&global_page_spinlock, &v, 1, memory_order_acq_rel, memory_order_acquire); v = 0)
#define GLOBAL_PAGE_SPIN_UNLOCK do { int v = 1; ASSERT_DIE(atomic_compare_exchange_strong_explicit(&global_page_spinlock, &v, 0, memory_order_acq_rel, memory_order_acquire)); } while (0)
#ifdef HAVE_MMAP #ifdef HAVE_MMAP
#if DEBUGGING
#define FP_NODE_OFFSET 42
#else
#define FP_NODE_OFFSET 1
#endif
static _Bool use_fake = 0; static _Bool use_fake = 0;
#else #else
static _Bool use_fake = 1; static _Bool use_fake = 1;
#endif #endif
void resource_sys_init(void) static void *
alloc_sys_page(void)
{ {
#ifdef HAVE_MMAP void *ptr = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
init_list(&global_page_list);
if (!(page_size = sysconf(_SC_PAGESIZE))) if (ptr == MAP_FAILED)
die("System page size must be non-zero"); bug("mmap(%lu) failed: %m", page_size);
if ((u64_popcount(page_size) > 1) || (page_size > 16384)) return ptr;
#endif
{
/* Too big or strange page, use the aligned allocator instead */
page_size = 4096;
use_fake = 1;
}
} }
void * void *
alloc_sys_page(void) alloc_page(void)
{ {
#ifdef HAVE_MMAP #ifdef HAVE_MMAP
if (!use_fake) if (!use_fake)
{ {
if (atomic_load_explicit(&global_page_list_not_empty, memory_order_relaxed)) struct free_pages *fp = &birdloop_current->pages;
{ if (!fp->cnt)
GLOBAL_PAGE_SPIN_LOCK; return alloc_sys_page();
if (!EMPTY_LIST(global_page_list))
{
node *ret = HEAD(global_page_list);
rem_node(ret);
if (EMPTY_LIST(global_page_list))
atomic_store_explicit(&global_page_list_not_empty, 0, memory_order_relaxed);
GLOBAL_PAGE_SPIN_UNLOCK;
memset(ret, 0, sizeof(node));
return (void *) ret;
}
GLOBAL_PAGE_SPIN_UNLOCK;
}
if (alloc_multipage) node *n = HEAD(fp->list);
{ rem_node(n);
void *big = mmap(NULL, page_size * 2, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (--fp->cnt < fp->min)
if (big == MAP_FAILED) ev_send(&global_work_list, fp->cleanup);
bug("mmap(%lu) failed: %m", page_size);
uintptr_t offset = ((uintptr_t) big) % page_size; void *ptr = n - FP_NODE_OFFSET;
if (offset) memset(ptr, 0, page_size);
{ return ptr;
void *ret = big + page_size - offset;
munmap(big, page_size - offset);
munmap(ret + page_size, offset);
return ret;
}
else
{
munmap(big + page_size, page_size);
return big;
}
}
void *ret = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (ret == MAP_FAILED)
bug("mmap(%lu) failed: %m", page_size);
return ret;
} }
else else
#endif #endif
@ -111,56 +74,156 @@ alloc_sys_page(void)
} }
void void
free_sys_page(void *ptr) free_page(void *ptr)
{ {
#ifdef HAVE_MMAP #ifdef HAVE_MMAP
if (!use_fake) if (!use_fake)
{ {
if (munmap(ptr, page_size) < 0) struct free_pages *fp = &birdloop_current->pages;
#ifdef ENOMEM struct node *n = ptr;
if (errno == ENOMEM) n += FP_NODE_OFFSET;
{
memset(ptr, 0, page_size);
GLOBAL_PAGE_SPIN_LOCK; memset(n, 0, sizeof(node));
add_tail(&global_page_list, (node *) ptr); add_tail(&fp->list, n);
atomic_store_explicit(&global_page_list_not_empty, 1, memory_order_relaxed); if (++fp->cnt > fp->max)
GLOBAL_PAGE_SPIN_UNLOCK; ev_send(&global_work_list, fp->cleanup);
}
else
#endif
bug("munmap(%p) failed: %m", ptr);
} }
else else
#endif #endif
free(ptr); free(ptr);
} }
#ifdef HAVE_MMAP
#define GFP (&main_birdloop.pages)
void void
check_stored_pages(void) flush_pages(struct birdloop *loop)
{ {
#ifdef ENOMEM ASSERT_DIE(birdloop_inside(&main_birdloop));
if (atomic_load_explicit(&global_page_list_not_empty, memory_order_relaxed) == 0)
return;
for (uint limit = 0; limit < 256; limit++) add_tail_list(&GFP->list, &loop->pages.list);
{ GFP->cnt += loop->pages.cnt;
GLOBAL_PAGE_SPIN_LOCK;
void *ptr = HEAD(global_page_list); loop->pages.cnt = 0;
if (!NODE_VALID(ptr)) loop->pages.list = (list) {};
{ loop->pages.min = 0;
atomic_store_explicit(&global_page_list_not_empty, 0, memory_order_relaxed); loop->pages.max = 0;
GLOBAL_PAGE_SPIN_UNLOCK;
return; rfree(loop->pages.cleanup);
loop->pages.cleanup = NULL;
} }
rem_node(ptr); static void
if (munmap(ptr, page_size) < 0) cleanup_pages(void *data)
if (errno == ENOMEM) {
add_tail(&global_page_list, ptr); struct birdloop *loop = data;
birdloop_enter(loop);
struct free_pages *fp = &birdloop_current->pages;
while ((fp->cnt < fp->min) && (GFP->cnt > GFP->min))
{
node *n = HEAD(GFP->list);
rem_node(n);
add_tail(&fp->list, n);
fp->cnt++;
GFP->cnt--;
}
while (fp->cnt < fp->min)
{
node *n = alloc_sys_page();
add_tail(&fp->list, n + FP_NODE_OFFSET);
fp->cnt++;
}
while (fp->cnt > fp->max)
{
node *n = HEAD(fp->list);
rem_node(n);
add_tail(&GFP->list, n);
fp->cnt--;
GFP->cnt++;
}
birdloop_leave(loop);
if (GFP->cnt > GFP->max)
ev_send(&global_work_list, GFP->cleanup);
}
static void
cleanup_global_pages(void *data UNUSED)
{
while (GFP->cnt < GFP->max)
{
node *n = alloc_sys_page();
add_tail(&GFP->list, n + FP_NODE_OFFSET);
GFP->cnt++;
}
for (uint limit = GFP->cnt; (limit > 0) && (GFP->cnt > GFP->max); limit--)
{
node *n = TAIL(GFP->list);
rem_node(n);
if (munmap(n - FP_NODE_OFFSET, page_size) == 0)
GFP->cnt--;
else if (errno == ENOMEM)
add_head(&GFP->list, n);
else else
bug("munmap(%p) failed: %m", ptr); bug("munmap(%p) failed: %m", n - FP_NODE_OFFSET);
GLOBAL_PAGE_SPIN_UNLOCK;
} }
}
void
init_pages(struct birdloop *loop)
{
struct free_pages *fp = &loop->pages;
init_list(&fp->list);
fp->cleanup = ev_new_init(&root_pool, cleanup_pages, loop);
fp->min = 4;
fp->max = 16;
for (fp->cnt = 0; fp->cnt < fp->min; fp->cnt++)
{
node *n = alloc_sys_page();
add_tail(&fp->list, n + FP_NODE_OFFSET);
}
}
static event global_free_pages_cleanup_event = { .hook = cleanup_global_pages };
void resource_sys_init(void)
{
if (!(page_size = sysconf(_SC_PAGESIZE)))
die("System page size must be non-zero");
if (u64_popcount(page_size) == 1)
{
init_list(&GFP->list);
GFP->cleanup = &global_free_pages_cleanup_event;
GFP->min = 0;
GFP->max = 256;
return;
}
log(L_WARN "Got strange memory page size (%lu), using the aligned allocator instead", page_size);
/* Too big or strange page, use the aligned allocator instead */
page_size = 4096;
use_fake = 1;
}
#else
void
resource_sys_init(void)
{
page_size = 4096;
use_fake = 1;
}
#endif #endif
}

View File

@ -145,10 +145,13 @@ static void coro_free(resource *r)
coro_cleaned_up = 1; coro_cleaned_up = 1;
} }
static void coro_dump(resource *r UNUSED) { }
static struct resclass coro_class = { static struct resclass coro_class = {
.name = "Coroutine", .name = "Coroutine",
.size = sizeof(struct coroutine), .size = sizeof(struct coroutine),
.free = coro_free, .free = coro_free,
.dump = coro_dump,
}; };
_Thread_local struct coroutine *this_coro = NULL; _Thread_local struct coroutine *this_coro = NULL;

View File

@ -32,7 +32,7 @@
* Current thread context * Current thread context
*/ */
_Thread_local struct birdloop *birdloop_current; _Thread_local struct birdloop *birdloop_current = NULL;
static _Thread_local struct birdloop *birdloop_wakeup_masked; static _Thread_local struct birdloop *birdloop_wakeup_masked;
static _Thread_local uint birdloop_wakeup_masked_count; static _Thread_local uint birdloop_wakeup_masked_count;
@ -391,6 +391,8 @@ birdloop_new(pool *pp, uint order, const char *name)
timers_init(&loop->time, loop->pool); timers_init(&loop->time, loop->pool);
sockets_init(loop); sockets_init(loop);
init_pages(loop);
loop->time.coro = coro_run(loop->pool, birdloop_main, loop); loop->time.coro = coro_run(loop->pool, birdloop_main, loop);
birdloop_leave(loop); birdloop_leave(loop);
@ -571,6 +573,7 @@ birdloop_main(void *arg)
/* Free the pool and loop */ /* Free the pool and loop */
birdloop_enter(loop); birdloop_enter(loop);
rp_free(loop->pool, parent); rp_free(loop->pool, parent);
flush_pages(loop);
birdloop_leave(loop); birdloop_leave(loop);
rfree(&loop->r); rfree(&loop->r);

View File

@ -7,6 +7,20 @@
#ifndef _BIRD_SYSDEP_UNIX_IO_LOOP_H_ #ifndef _BIRD_SYSDEP_UNIX_IO_LOOP_H_
#define _BIRD_SYSDEP_UNIX_IO_LOOP_H_ #define _BIRD_SYSDEP_UNIX_IO_LOOP_H_
#include "nest/bird.h"
#include "lib/lists.h"
#include "lib/event.h"
#include "lib/timer.h"
struct free_pages
{
list list; /* List of empty pages */
event *cleanup; /* Event to call when number of pages is outside bounds */
u16 min, max; /* Minimal and maximal number of free pages kept */
uint cnt; /* Number of empty pages */
};
struct birdloop struct birdloop
{ {
resource r; resource r;
@ -29,10 +43,17 @@ struct birdloop
uint links; uint links;
struct free_pages pages;
void (*stopped)(void *data); void (*stopped)(void *data);
void *stop_data; void *stop_data;
struct birdloop *prev_loop; struct birdloop *prev_loop;
}; };
extern _Thread_local struct birdloop *birdloop_current;
void init_pages(struct birdloop *loop);
void flush_pages(struct birdloop *loop);
#endif #endif

View File

@ -2216,9 +2216,6 @@ io_loop(void)
timers_fire(&main_birdloop.time, 1); timers_fire(&main_birdloop.time, 1);
io_close_event(); io_close_event();
/* Try to release some memory if possible */
check_stored_pages();
// FIXME // FIXME
poll_tout = (events ? 0 : 3000); /* Time in milliseconds */ poll_tout = (events ? 0 : 3000); /* Time in milliseconds */
if (t = timers_first(&main_birdloop.time)) if (t = timers_first(&main_birdloop.time))

View File

@ -683,7 +683,7 @@ signal_init(void)
* Parsing of command-line arguments * Parsing of command-line arguments
*/ */
static char *opt_list = "B:c:dD:ps:P:u:g:flRh"; static char *opt_list = "c:dD:ps:P:u:g:flRh";
int parse_and_exit; int parse_and_exit;
char *bird_name; char *bird_name;
static char *use_user; static char *use_user;
@ -704,7 +704,6 @@ display_help(void)
fprintf(stderr, fprintf(stderr,
"\n" "\n"
"Options: \n" "Options: \n"
" -B <block-size> Use 2^this number as memory allocation block size (default: 12)\n"
" -c <config-file> Use given configuration file instead of\n" " -c <config-file> Use given configuration file instead of\n"
" " PATH_CONFIG_FILE "\n" " " PATH_CONFIG_FILE "\n"
" -d Enable debug messages and run bird in foreground\n" " -d Enable debug messages and run bird in foreground\n"
@ -791,15 +790,12 @@ get_gid(const char *s)
return gr->gr_gid; return gr->gr_gid;
} }
extern _Bool alloc_multipage;
static void static void
parse_args(int argc, char **argv) parse_args(int argc, char **argv)
{ {
int config_changed = 0; int config_changed = 0;
int socket_changed = 0; int socket_changed = 0;
int c; int c;
int bp;
bird_name = get_bird_name(argv[0], "bird"); bird_name = get_bird_name(argv[0], "bird");
if (argc == 2) if (argc == 2)
@ -812,29 +808,6 @@ parse_args(int argc, char **argv)
while ((c = getopt(argc, argv, opt_list)) >= 0) while ((c = getopt(argc, argv, opt_list)) >= 0)
switch (c) switch (c)
{ {
case 'B':
bp = atoi(optarg);
if (bp < 1)
{
fprintf(stderr, "Strange block size power %d\n\n", bp);
display_usage();
exit(1);
}
if ((1 << bp) < page_size)
{
fprintf(stderr, "Requested block size %ld is lesser than page size %ld\n\n", (1L<<bp), page_size);
display_usage();
exit(1);
}
if ((1L << bp) > page_size)
{
alloc_multipage = 1;
page_size = (1L << bp);
}
break;
case 'c': case 'c':
config_name = optarg; config_name = optarg;
config_changed = 1; config_changed = 1;