mirror of
https://gitlab.nic.cz/labs/bird.git
synced 2025-01-07 17:41:58 +00:00
804916daa9
- Fix THP disable on old systems - Failed syscalls should use die() instead of bug() - Our printf uses %ld for s64 instead of long
265 lines
6.7 KiB
C
265 lines
6.7 KiB
C
/*
|
|
* BIRD Internet Routing Daemon -- Raw allocation
|
|
*
|
|
* (c) 2020 Maria Matejka <mq@ucw.cz>
|
|
*
|
|
* Can be freely distributed and used under the terms of the GNU GPL.
|
|
*/
|
|
|
|
#include "nest/bird.h"
|
|
#include "lib/resource.h"
|
|
#include "lib/lists.h"
|
|
#include "lib/event.h"
|
|
|
|
#include <errno.h>
|
|
#include <stdlib.h>
|
|
#include <unistd.h>
|
|
|
|
#ifdef HAVE_MMAP
|
|
#include <sys/mman.h>
|
|
#endif
|
|
|
|
#ifdef CONFIG_DISABLE_THP
|
|
#include <sys/prctl.h>
|
|
#ifndef PR_SET_THP_DISABLE
|
|
#define PR_SET_THP_DISABLE 41
|
|
#endif
|
|
#endif
|
|
|
|
long page_size = 0;
|
|
|
|
#ifdef HAVE_MMAP
|
|
#define KEEP_PAGES_MAIN_MAX 256
|
|
#define KEEP_PAGES_MAIN_MIN 8
|
|
#define CLEANUP_PAGES_BULK 256
|
|
|
|
STATIC_ASSERT(KEEP_PAGES_MAIN_MIN * 4 < KEEP_PAGES_MAIN_MAX);
|
|
|
|
static _Bool use_fake = 0;
|
|
|
|
#if DEBUGGING
|
|
struct free_page {
|
|
node unused[42];
|
|
node n;
|
|
};
|
|
#else
|
|
struct free_page {
|
|
node n;
|
|
};
|
|
#endif
|
|
|
|
#define EP_POS_MAX ((page_size - OFFSETOF(struct empty_pages, pages)) / sizeof (void *))
|
|
|
|
struct empty_pages {
|
|
node n;
|
|
uint pos;
|
|
void *pages[0];
|
|
};
|
|
|
|
struct free_pages {
|
|
list pages; /* List of (struct free_page) keeping free pages without releasing them (hot) */
|
|
list empty; /* List of (struct empty_pages) keeping invalidated pages mapped for us (cold) */
|
|
u16 min, max; /* Minimal and maximal number of free pages kept */
|
|
uint cnt; /* Number of free pages in list */
|
|
event cleanup;
|
|
};
|
|
|
|
static void global_free_pages_cleanup_event(void *);
|
|
static void *alloc_cold_page(void);
|
|
|
|
static struct free_pages global_free_pages = {
|
|
.min = KEEP_PAGES_MAIN_MIN,
|
|
.max = KEEP_PAGES_MAIN_MAX,
|
|
.cleanup = { .hook = global_free_pages_cleanup_event },
|
|
};
|
|
|
|
uint *pages_kept = &global_free_pages.cnt;
|
|
|
|
static void *
|
|
alloc_sys_page(void)
|
|
{
|
|
void *ptr = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
|
|
|
if (ptr == MAP_FAILED)
|
|
die("mmap(%ld) failed: %m", (s64) page_size);
|
|
|
|
return ptr;
|
|
}
|
|
|
|
extern int shutting_down; /* Shutdown requested. */
|
|
|
|
#else // ! HAVE_MMAP
|
|
#define use_fake 1
|
|
#endif
|
|
|
|
void *
|
|
alloc_page(void)
|
|
{
|
|
/* If the system page allocator is goofy, we use posix_memalign to get aligned blocks of memory. */
|
|
if (use_fake)
|
|
{
|
|
void *ptr = NULL;
|
|
int err = posix_memalign(&ptr, page_size, page_size);
|
|
|
|
if (err || !ptr)
|
|
die("posix_memalign(%ld) failed", (s64) page_size);
|
|
|
|
return ptr;
|
|
}
|
|
|
|
#ifdef HAVE_MMAP
|
|
struct free_pages *fps = &global_free_pages;
|
|
|
|
/* If there is any free page kept hot, we use it. */
|
|
if (fps->cnt)
|
|
{
|
|
struct free_page *fp = SKIP_BACK(struct free_page, n, HEAD(fps->pages));
|
|
rem_node(&fp->n);
|
|
|
|
/* If the hot-free-page cache is getting short, request the cleanup routine to replenish the cache */
|
|
if ((--fps->cnt < fps->min) && !shutting_down)
|
|
ev_schedule(&fps->cleanup);
|
|
|
|
return fp;
|
|
}
|
|
else
|
|
return alloc_cold_page();
|
|
}
|
|
|
|
static void *
|
|
alloc_cold_page(void)
|
|
{
|
|
struct free_pages *fps = &global_free_pages;
|
|
|
|
/* If there is any free page kept cold, we use that. */
|
|
if (!EMPTY_LIST(fps->empty))
|
|
{
|
|
struct empty_pages *ep = HEAD(fps->empty);
|
|
|
|
/* Either the keeper page contains at least one cold page pointer, return that */
|
|
if (ep->pos)
|
|
return ep->pages[--ep->pos];
|
|
|
|
/* Or the keeper page has no more cold page pointer, return the keeper page */
|
|
rem_node(&ep->n);
|
|
return ep;
|
|
}
|
|
|
|
/* And in the worst case, allocate a new page by mmap() */
|
|
return alloc_sys_page();
|
|
#endif
|
|
}
|
|
|
|
void
|
|
free_page(void *ptr)
|
|
{
|
|
/* If the system page allocator is goofy, we just free the block and care no more. */
|
|
if (use_fake)
|
|
{
|
|
free(ptr);
|
|
return;
|
|
}
|
|
|
|
#ifdef HAVE_MMAP
|
|
struct free_pages *fps = &global_free_pages;
|
|
struct free_page *fp = ptr;
|
|
|
|
/* Otherwise, we add the free page to the hot-free-page list */
|
|
fp->n = (node) {};
|
|
add_tail(&fps->pages, &fp->n);
|
|
|
|
/* And if there are too many hot free pages, we ask for page cleanup */
|
|
if ((++fps->cnt > fps->max) && !shutting_down)
|
|
ev_schedule(&fps->cleanup);
|
|
#endif
|
|
}
|
|
|
|
#ifdef HAVE_MMAP
|
|
static void
|
|
global_free_pages_cleanup_event(void *data UNUSED)
|
|
{
|
|
/* Cleanup on shutdown is ignored. All pages may be kept hot, OS will take care. */
|
|
if (shutting_down)
|
|
return;
|
|
|
|
struct free_pages *fps = &global_free_pages;
|
|
|
|
/* Cleanup may get called when hot free page cache is short of pages. Replenishing. */
|
|
while (fps->cnt / 2 < fps->min)
|
|
free_page(alloc_cold_page());
|
|
|
|
/* Or the hot free page cache is too big. Moving some pages to the cold free page cache. */
|
|
for (int limit = CLEANUP_PAGES_BULK; limit && (fps->cnt > fps->max / 2); fps->cnt--, limit--)
|
|
{
|
|
struct free_page *fp = SKIP_BACK(struct free_page, n, TAIL(fps->pages));
|
|
rem_node(&fp->n);
|
|
|
|
/* Empty pages are stored as pointers. To store them, we need a pointer block. */
|
|
struct empty_pages *ep;
|
|
if (EMPTY_LIST(fps->empty) || ((ep = HEAD(fps->empty))->pos == EP_POS_MAX))
|
|
{
|
|
/* There is either no pointer block or the last block is full. We use this block as a pointer block. */
|
|
ep = (struct empty_pages *) fp;
|
|
*ep = (struct empty_pages) {};
|
|
add_head(&fps->empty, &ep->n);
|
|
}
|
|
else
|
|
{
|
|
/* We store this block as a pointer into the first free place
|
|
* and tell the OS that the underlying memory is trash. */
|
|
ep->pages[ep->pos++] = fp;
|
|
if (madvise(fp, page_size,
|
|
#ifdef CONFIG_MADV_DONTNEED_TO_FREE
|
|
MADV_DONTNEED
|
|
#else
|
|
MADV_FREE
|
|
#endif
|
|
) < 0)
|
|
bug("madvise(%p) failed: %m", fp);
|
|
}
|
|
}
|
|
|
|
/* If the hot free page cleanup hit the limit, re-schedule this routine
|
|
* to allow for other routines to run. */
|
|
if (fps->cnt > fps->max)
|
|
ev_schedule(&fps->cleanup);
|
|
}
|
|
#endif
|
|
|
|
void
|
|
resource_sys_init(void)
|
|
{
|
|
#ifdef CONFIG_DISABLE_THP
|
|
/* Disable transparent huge pages, they do not work properly with madvice(MADV_DONTNEED) */
|
|
if (prctl(PR_SET_THP_DISABLE, (unsigned long) 1, (unsigned long) 0, (unsigned long) 0, (unsigned long) 0) < 0)
|
|
log(L_WARN "Cannot disable transparent huge pages: prctl(PR_SET_THP_DISABLE) failed: %m");
|
|
#endif
|
|
|
|
#ifdef HAVE_MMAP
|
|
ASSERT_DIE(global_free_pages.cnt == 0);
|
|
|
|
/* Check what page size the system supports */
|
|
if (!(page_size = sysconf(_SC_PAGESIZE)))
|
|
die("System page size must be non-zero");
|
|
|
|
if ((u64_popcount(page_size) == 1) && (page_size >= (1 << 10)) && (page_size <= (1 << 18)))
|
|
{
|
|
/* We assume that page size has only one bit and is between 1K and 256K (incl.).
|
|
* Otherwise, the assumptions in lib/slab.c (sl_head's num_full range) aren't met. */
|
|
|
|
struct free_pages *fps = &global_free_pages;
|
|
|
|
init_list(&fps->pages);
|
|
init_list(&fps->empty);
|
|
global_free_pages_cleanup_event(NULL);
|
|
return;
|
|
}
|
|
|
|
/* Too big or strange page, use the aligned allocator instead */
|
|
log(L_WARN "Got strange memory page size (%ld), using the aligned allocator instead", (s64) page_size);
|
|
use_fake = 1;
|
|
#endif
|
|
|
|
page_size = 4096;
|
|
}
|