0
0
mirror of https://gitlab.nic.cz/labs/bird.git synced 2025-01-08 18:11:54 +00:00

Page allocator moved from pools to IO loops.

The resource pool system is highly hierarchical and keeping spare pages
in pools leads to unnecessarily complex memory management.

Loops have a flat hiearchy, at least for now, and it is therefore much
easier to keep care of pages, especially in cases of excessive virtual memory
fragmentation.
This commit is contained in:
Maria Matejka 2021-11-30 23:57:14 +01:00
parent 385b3ea395
commit bb63e99d78
12 changed files with 206 additions and 212 deletions

View File

@ -130,7 +130,7 @@ lp_alloc(linpool *m, uint size)
{
/* Need to allocate a new chunk */
if (m->use_pages)
c = alloc_page(m->p);
c = alloc_page();
else
c = xmalloc(sizeof(struct lp_chunk) + m->chunk_size);
@ -271,7 +271,7 @@ lp_free(resource *r)
{
c = d->next;
if (m->use_pages)
free_page(m->p, d);
free_page(d);
else
xfree(d);
}

View File

@ -30,14 +30,6 @@
* is freed upon shutdown of the module.
*/
struct pool_pages {
uint free;
uint used;
void *ptr[0];
};
#define POOL_PAGES_MAX ((page_size - sizeof(struct pool_pages)) / sizeof (void *))
static void pool_dump(resource *);
static void pool_free(resource *);
static resource *pool_lookup(resource *, unsigned long);
@ -54,9 +46,6 @@ static struct resclass pool_class = {
pool root_pool;
void *alloc_sys_page(void);
int free_sys_page(void *);
static int indent;
/**
@ -103,16 +92,6 @@ pool_free(resource *P)
r = rr;
}
if (p->pages)
{
ASSERT_DIE(!p->pages->used);
for (uint i = 0; i < p->pages->free; i++)
free_sys_page(p->pages->ptr[i]);
free_sys_page(p->pages);
}
pool_parent = parent;
}
@ -185,9 +164,6 @@ pool_memsize_locked(pool *p)
WALK_LIST(r, p->inside)
sum += rmemsize(r);
if (p->pages)
sum += page_size * (p->pages->used + p->pages->free + 1);
return sum;
}
@ -551,49 +527,6 @@ mb_free(void *m)
rfree(b);
}
void *
alloc_page(pool *p)
{
if (!p->pages)
{
p->pages = alloc_sys_page();
p->pages->free = 0;
p->pages->used = 1;
}
else
p->pages->used++;
if (p->pages->free)
{
void *ptr = p->pages->ptr[--p->pages->free];
bzero(ptr, page_size);
return ptr;
}
else
return alloc_sys_page();
}
void
free_page(pool *p, void *ptr)
{
ASSERT_DIE(p->pages);
p->pages->used--;
ASSERT_DIE(p->pages->free <= POOL_PAGES_MAX);
if (p->pages->free == POOL_PAGES_MAX)
{
const unsigned long keep = POOL_PAGES_MAX / 4;
for (uint i = keep; i < p->pages->free; i++)
free_sys_page(p->pages->ptr[i]);
p->pages->free = keep;
}
p->pages->ptr[p->pages->free++] = ptr;
}
#define STEP_UP(x) ((x) + (x)/2 + 4)

View File

@ -108,8 +108,8 @@ void buffer_realloc(void **buf, unsigned *size, unsigned need, unsigned item_siz
extern long page_size;
/* Allocator of whole pages; for use in slabs and other high-level allocators. */
void *alloc_page(pool *);
void free_page(pool *, void *);
void *alloc_page(void);
void free_page(void *);
#define PAGE_HEAD(x) ((void *) (((intptr_t) (x)) & ~(page_size-1)))
#ifdef HAVE_LIBDMALLOC

View File

@ -269,7 +269,7 @@ no_partial:
s->num_empty_heads--;
goto okay;
}
h = alloc_page(s->p);
h = alloc_page();
#ifdef POISON
memset(h, 0xba, page_size);
#endif
@ -332,7 +332,7 @@ sl_free(slab *s, void *oo)
#ifdef POISON
memset(h, 0xde, page_size);
#endif
free_page(s->p, h);
free_page(h);
}
else
{
@ -349,11 +349,11 @@ slab_free(resource *r)
struct sl_head *h, *g;
WALK_LIST_DELSAFE(h, g, s->empty_heads)
free_page(s->p, h);
free_page(h);
WALK_LIST_DELSAFE(h, g, s->partial_heads)
free_page(s->p, h);
free_page(h);
WALK_LIST_DELSAFE(h, g, s->full_heads)
free_page(s->p, h);
free_page(h);
}
static void
@ -386,8 +386,7 @@ slab_memsize(resource *r)
WALK_LIST(h, s->full_heads)
heads++;
// return ALLOC_OVERHEAD + sizeof(struct slab) + heads * (ALLOC_OVERHEAD + page_size);
return ALLOC_OVERHEAD + sizeof(struct slab); /* The page sizes are accounted for in the pool */
return ALLOC_OVERHEAD + sizeof(struct slab) + heads * page_size;
}
static resource *

View File

@ -198,6 +198,7 @@ t_as_path_converting(void)
#endif
void resource_sys_init(void);
void io_init(void);
int
main(int argc, char *argv[])
@ -207,6 +208,7 @@ main(int argc, char *argv[])
resource_init();
the_bird_lock();
birdloop_init();
io_init();
bt_test_suite(t_as_path_match, "Testing AS path matching and some a-path utilities.");
bt_test_suite(t_path_format, "Testing formating as path into byte buffer");

View File

@ -1057,7 +1057,7 @@ rte_announce(rtable_private *tab, net *net, struct rte_storage *new, struct rte_
if (!rpeb)
{
rpeb = alloc_page(tab->rp);
rpeb = alloc_page();
*rpeb = (struct rt_export_block) {};
add_tail(&tab->pending_exports, &rpeb->n);
}
@ -2157,7 +2157,7 @@ rt_free(resource *_r)
static void
rt_res_dump(resource *_r)
{
RT_LOCKED((rtable *) _r, r)
rtable_private *r = RT_PRIV((rtable *) _r);
debug("name \"%s\", addr_type=%s, rt_count=%u, use_count=%d\n",
r->name, net_label[r->addr_type], r->rt_count, r->use_count);
}
@ -2484,7 +2484,7 @@ rt_export_cleanup(void *data)
memset(reb, 0xbe, page_size);
#endif
free_page(tab->rp, reb);
free_page(reb);
if (EMPTY_LIST(tab->pending_exports))
{

View File

@ -9,6 +9,8 @@
#include "nest/bird.h"
#include "lib/resource.h"
#include "sysdep/unix/io-loop.h"
#include <stdlib.h>
#include <unistd.h>
#include <stdatomic.h>
@ -19,86 +21,47 @@
#endif
long page_size = 0;
_Bool alloc_multipage = 0;
static _Atomic int global_page_list_not_empty;
static list global_page_list;
static _Atomic int global_page_spinlock;
#define GLOBAL_PAGE_SPIN_LOCK for (int v = 0; !atomic_compare_exchange_weak_explicit(&global_page_spinlock, &v, 1, memory_order_acq_rel, memory_order_acquire); v = 0)
#define GLOBAL_PAGE_SPIN_UNLOCK do { int v = 1; ASSERT_DIE(atomic_compare_exchange_strong_explicit(&global_page_spinlock, &v, 0, memory_order_acq_rel, memory_order_acquire)); } while (0)
#ifdef HAVE_MMAP
#if DEBUGGING
#define FP_NODE_OFFSET 42
#else
#define FP_NODE_OFFSET 1
#endif
static _Bool use_fake = 0;
#else
static _Bool use_fake = 1;
#endif
void resource_sys_init(void)
static void *
alloc_sys_page(void)
{
#ifdef HAVE_MMAP
init_list(&global_page_list);
void *ptr = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (!(page_size = sysconf(_SC_PAGESIZE)))
die("System page size must be non-zero");
if (ptr == MAP_FAILED)
bug("mmap(%lu) failed: %m", page_size);
if ((u64_popcount(page_size) > 1) || (page_size > 16384))
#endif
{
/* Too big or strange page, use the aligned allocator instead */
page_size = 4096;
use_fake = 1;
}
return ptr;
}
void *
alloc_sys_page(void)
alloc_page(void)
{
#ifdef HAVE_MMAP
if (!use_fake)
{
if (atomic_load_explicit(&global_page_list_not_empty, memory_order_relaxed))
{
GLOBAL_PAGE_SPIN_LOCK;
if (!EMPTY_LIST(global_page_list))
{
node *ret = HEAD(global_page_list);
rem_node(ret);
if (EMPTY_LIST(global_page_list))
atomic_store_explicit(&global_page_list_not_empty, 0, memory_order_relaxed);
GLOBAL_PAGE_SPIN_UNLOCK;
memset(ret, 0, sizeof(node));
return (void *) ret;
}
GLOBAL_PAGE_SPIN_UNLOCK;
}
struct free_pages *fp = &birdloop_current->pages;
if (!fp->cnt)
return alloc_sys_page();
node *n = HEAD(fp->list);
rem_node(n);
if (--fp->cnt < fp->min)
ev_send(&global_work_list, fp->cleanup);
if (alloc_multipage)
{
void *big = mmap(NULL, page_size * 2, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (big == MAP_FAILED)
bug("mmap(%lu) failed: %m", page_size);
uintptr_t offset = ((uintptr_t) big) % page_size;
if (offset)
{
void *ret = big + page_size - offset;
munmap(big, page_size - offset);
munmap(ret + page_size, offset);
return ret;
}
else
{
munmap(big + page_size, page_size);
return big;
}
}
void *ret = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (ret == MAP_FAILED)
bug("mmap(%lu) failed: %m", page_size);
return ret;
void *ptr = n - FP_NODE_OFFSET;
memset(ptr, 0, page_size);
return ptr;
}
else
#endif
@ -111,56 +74,156 @@ alloc_sys_page(void)
}
void
free_sys_page(void *ptr)
free_page(void *ptr)
{
#ifdef HAVE_MMAP
if (!use_fake)
{
if (munmap(ptr, page_size) < 0)
#ifdef ENOMEM
if (errno == ENOMEM)
{
memset(ptr, 0, page_size);
struct free_pages *fp = &birdloop_current->pages;
struct node *n = ptr;
n += FP_NODE_OFFSET;
GLOBAL_PAGE_SPIN_LOCK;
add_tail(&global_page_list, (node *) ptr);
atomic_store_explicit(&global_page_list_not_empty, 1, memory_order_relaxed);
GLOBAL_PAGE_SPIN_UNLOCK;
}
else
#endif
bug("munmap(%p) failed: %m", ptr);
memset(n, 0, sizeof(node));
add_tail(&fp->list, n);
if (++fp->cnt > fp->max)
ev_send(&global_work_list, fp->cleanup);
}
else
#endif
free(ptr);
}
#ifdef HAVE_MMAP
#define GFP (&main_birdloop.pages)
void
check_stored_pages(void)
flush_pages(struct birdloop *loop)
{
#ifdef ENOMEM
if (atomic_load_explicit(&global_page_list_not_empty, memory_order_relaxed) == 0)
return;
ASSERT_DIE(birdloop_inside(&main_birdloop));
for (uint limit = 0; limit < 256; limit++)
{
GLOBAL_PAGE_SPIN_LOCK;
void *ptr = HEAD(global_page_list);
if (!NODE_VALID(ptr))
{
atomic_store_explicit(&global_page_list_not_empty, 0, memory_order_relaxed);
GLOBAL_PAGE_SPIN_UNLOCK;
return;
}
add_tail_list(&GFP->list, &loop->pages.list);
GFP->cnt += loop->pages.cnt;
loop->pages.cnt = 0;
loop->pages.list = (list) {};
loop->pages.min = 0;
loop->pages.max = 0;
rem_node(ptr);
if (munmap(ptr, page_size) < 0)
if (errno == ENOMEM)
add_tail(&global_page_list, ptr);
else
bug("munmap(%p) failed: %m", ptr);
GLOBAL_PAGE_SPIN_UNLOCK;
}
#endif
rfree(loop->pages.cleanup);
loop->pages.cleanup = NULL;
}
static void
cleanup_pages(void *data)
{
struct birdloop *loop = data;
birdloop_enter(loop);
struct free_pages *fp = &birdloop_current->pages;
while ((fp->cnt < fp->min) && (GFP->cnt > GFP->min))
{
node *n = HEAD(GFP->list);
rem_node(n);
add_tail(&fp->list, n);
fp->cnt++;
GFP->cnt--;
}
while (fp->cnt < fp->min)
{
node *n = alloc_sys_page();
add_tail(&fp->list, n + FP_NODE_OFFSET);
fp->cnt++;
}
while (fp->cnt > fp->max)
{
node *n = HEAD(fp->list);
rem_node(n);
add_tail(&GFP->list, n);
fp->cnt--;
GFP->cnt++;
}
birdloop_leave(loop);
if (GFP->cnt > GFP->max)
ev_send(&global_work_list, GFP->cleanup);
}
static void
cleanup_global_pages(void *data UNUSED)
{
while (GFP->cnt < GFP->max)
{
node *n = alloc_sys_page();
add_tail(&GFP->list, n + FP_NODE_OFFSET);
GFP->cnt++;
}
for (uint limit = GFP->cnt; (limit > 0) && (GFP->cnt > GFP->max); limit--)
{
node *n = TAIL(GFP->list);
rem_node(n);
if (munmap(n - FP_NODE_OFFSET, page_size) == 0)
GFP->cnt--;
else if (errno == ENOMEM)
add_head(&GFP->list, n);
else
bug("munmap(%p) failed: %m", n - FP_NODE_OFFSET);
}
}
void
init_pages(struct birdloop *loop)
{
struct free_pages *fp = &loop->pages;
init_list(&fp->list);
fp->cleanup = ev_new_init(&root_pool, cleanup_pages, loop);
fp->min = 4;
fp->max = 16;
for (fp->cnt = 0; fp->cnt < fp->min; fp->cnt++)
{
node *n = alloc_sys_page();
add_tail(&fp->list, n + FP_NODE_OFFSET);
}
}
static event global_free_pages_cleanup_event = { .hook = cleanup_global_pages };
void resource_sys_init(void)
{
if (!(page_size = sysconf(_SC_PAGESIZE)))
die("System page size must be non-zero");
if (u64_popcount(page_size) == 1)
{
init_list(&GFP->list);
GFP->cleanup = &global_free_pages_cleanup_event;
GFP->min = 0;
GFP->max = 256;
return;
}
log(L_WARN "Got strange memory page size (%lu), using the aligned allocator instead", page_size);
/* Too big or strange page, use the aligned allocator instead */
page_size = 4096;
use_fake = 1;
}
#else
void
resource_sys_init(void)
{
page_size = 4096;
use_fake = 1;
}
#endif

View File

@ -145,10 +145,13 @@ static void coro_free(resource *r)
coro_cleaned_up = 1;
}
static void coro_dump(resource *r UNUSED) { }
static struct resclass coro_class = {
.name = "Coroutine",
.size = sizeof(struct coroutine),
.free = coro_free,
.dump = coro_dump,
};
_Thread_local struct coroutine *this_coro = NULL;

View File

@ -32,7 +32,7 @@
* Current thread context
*/
_Thread_local struct birdloop *birdloop_current;
_Thread_local struct birdloop *birdloop_current = NULL;
static _Thread_local struct birdloop *birdloop_wakeup_masked;
static _Thread_local uint birdloop_wakeup_masked_count;
@ -391,6 +391,8 @@ birdloop_new(pool *pp, uint order, const char *name)
timers_init(&loop->time, loop->pool);
sockets_init(loop);
init_pages(loop);
loop->time.coro = coro_run(loop->pool, birdloop_main, loop);
birdloop_leave(loop);
@ -571,6 +573,7 @@ birdloop_main(void *arg)
/* Free the pool and loop */
birdloop_enter(loop);
rp_free(loop->pool, parent);
flush_pages(loop);
birdloop_leave(loop);
rfree(&loop->r);

View File

@ -7,6 +7,20 @@
#ifndef _BIRD_SYSDEP_UNIX_IO_LOOP_H_
#define _BIRD_SYSDEP_UNIX_IO_LOOP_H_
#include "nest/bird.h"
#include "lib/lists.h"
#include "lib/event.h"
#include "lib/timer.h"
struct free_pages
{
list list; /* List of empty pages */
event *cleanup; /* Event to call when number of pages is outside bounds */
u16 min, max; /* Minimal and maximal number of free pages kept */
uint cnt; /* Number of empty pages */
};
struct birdloop
{
resource r;
@ -29,10 +43,17 @@ struct birdloop
uint links;
struct free_pages pages;
void (*stopped)(void *data);
void *stop_data;
struct birdloop *prev_loop;
};
extern _Thread_local struct birdloop *birdloop_current;
void init_pages(struct birdloop *loop);
void flush_pages(struct birdloop *loop);
#endif

View File

@ -2216,9 +2216,6 @@ io_loop(void)
timers_fire(&main_birdloop.time, 1);
io_close_event();
/* Try to release some memory if possible */
check_stored_pages();
// FIXME
poll_tout = (events ? 0 : 3000); /* Time in milliseconds */
if (t = timers_first(&main_birdloop.time))

View File

@ -683,7 +683,7 @@ signal_init(void)
* Parsing of command-line arguments
*/
static char *opt_list = "B:c:dD:ps:P:u:g:flRh";
static char *opt_list = "c:dD:ps:P:u:g:flRh";
int parse_and_exit;
char *bird_name;
static char *use_user;
@ -704,7 +704,6 @@ display_help(void)
fprintf(stderr,
"\n"
"Options: \n"
" -B <block-size> Use 2^this number as memory allocation block size (default: 12)\n"
" -c <config-file> Use given configuration file instead of\n"
" " PATH_CONFIG_FILE "\n"
" -d Enable debug messages and run bird in foreground\n"
@ -791,15 +790,12 @@ get_gid(const char *s)
return gr->gr_gid;
}
extern _Bool alloc_multipage;
static void
parse_args(int argc, char **argv)
{
int config_changed = 0;
int socket_changed = 0;
int c;
int bp;
bird_name = get_bird_name(argv[0], "bird");
if (argc == 2)
@ -812,29 +808,6 @@ parse_args(int argc, char **argv)
while ((c = getopt(argc, argv, opt_list)) >= 0)
switch (c)
{
case 'B':
bp = atoi(optarg);
if (bp < 1)
{
fprintf(stderr, "Strange block size power %d\n\n", bp);
display_usage();
exit(1);
}
if ((1 << bp) < page_size)
{
fprintf(stderr, "Requested block size %ld is lesser than page size %ld\n\n", (1L<<bp), page_size);
display_usage();
exit(1);
}
if ((1L << bp) > page_size)
{
alloc_multipage = 1;
page_size = (1L << bp);
}
break;
case 'c':
config_name = optarg;
config_changed = 1;