From ef63946d2edd2088d4140b2a4e8678979a73af67 Mon Sep 17 00:00:00 2001 From: Katerina Kubecova Date: Fri, 13 Dec 2024 12:35:02 +0100 Subject: [PATCH] Allow allocating cold pages inside RCU critical section We have quite large critical sections and we need to allocate inside them. This is something to revise properly later on, yet for now, instead of slowly but surely growing the virtual memory address space, it's better to optimize the cold page cache pickup and count situations where this happened inside the critical section. --- lib/resource.h | 1 + nest/cmds.c | 1 + sysdep/unix/alloc.c | 104 +++++++++++++++++++++++++++---------------- sysdep/unix/domain.c | 2 +- 4 files changed, 69 insertions(+), 39 deletions(-) diff --git a/lib/resource.h b/lib/resource.h index 8baa693c..48bf1f9b 100644 --- a/lib/resource.h +++ b/lib/resource.h @@ -154,6 +154,7 @@ extern _Atomic int pages_kept_locally; extern _Atomic int pages_kept_cold; extern _Atomic int pages_kept_cold_index; extern _Atomic int pages_total; +extern _Atomic int alloc_locking_in_rcu; void *alloc_page(void); void free_page(void *); void flush_local_pages(void); diff --git a/nest/cmds.c b/nest/cmds.c index c46c94f5..a42112bc 100644 --- a/nest/cmds.c +++ b/nest/cmds.c @@ -147,6 +147,7 @@ cmd_show_memory(void) cli_msg(-1018, "%-17s " SIZE_FORMAT, "Cold free pages:", SIZE_ARGS(cold)); #endif + cli_msg(-1028, "Hot page cache depleted while in RCU: %d", atomic_load_explicit(&alloc_locking_in_rcu, memory_order_relaxed)); cli_msg(0, ""); } diff --git a/sysdep/unix/alloc.c b/sysdep/unix/alloc.c index f9c6bfca..1d33b462 100644 --- a/sysdep/unix/alloc.c +++ b/sysdep/unix/alloc.c @@ -127,6 +127,7 @@ long page_size = 0; _Atomic int pages_kept_cold = 0; _Atomic int pages_kept_cold_index = 0; _Atomic int pages_total = 0; + _Atomic int alloc_locking_in_rcu = 0; static struct free_page * _Atomic page_stack = NULL; static _Thread_local struct free_page * local_page_stack = NULL; @@ -169,6 +170,27 @@ long page_size = 0; #define ALLOC_TRACE(fmt...) do { \ if (atomic_load_explicit(&global_runtime, memory_order_relaxed)->latency_debug & DL_ALLOCATOR) log(L_TRACE "Allocator: " fmt, ##fmt); } while (0) + +static void * +alloc_hot_page(struct free_page *fp) { + if (fp = PAGE_STACK_GET) + { + /* Reinstate the stack with the next page in list */ + PAGE_STACK_PUT(atomic_load_explicit(&fp->next, memory_order_relaxed)); + + /* Update the counters */ + UNUSED uint pk = atomic_fetch_sub_explicit(&pages_kept, 1, memory_order_relaxed); + + /* Release the page */ + UNPROTECT_PAGE(fp); + ajlog(fp, atomic_load_explicit(&fp->next, memory_order_relaxed), pk, AJT_ALLOC_GLOBAL_HOT); + return fp; + } + /* Reinstate the stack with zero */ + PAGE_STACK_PUT(NULL); + return NULL; +} + void * alloc_page(void) { @@ -201,62 +223,68 @@ alloc_page(void) ASSERT_DIE(pages_kept_here == 0); /* If there is any free page kept hot in global storage, we use it. */ - if (fp = PAGE_STACK_GET) - { - /* Reinstate the stack with the next page in list */ - PAGE_STACK_PUT(atomic_load_explicit(&fp->next, memory_order_relaxed)); - - /* Update the counters */ - UNUSED uint pk = atomic_fetch_sub_explicit(&pages_kept, 1, memory_order_relaxed); - - /* Release the page */ - UNPROTECT_PAGE(fp); - ajlog(fp, atomic_load_explicit(&fp->next, memory_order_relaxed), pk, AJT_ALLOC_GLOBAL_HOT); + if (fp = alloc_hot_page(fp)) return fp; - } - - /* Reinstate the stack with zero */ - PAGE_STACK_PUT(NULL); if (rcu_read_active()) { - /* We can't lock and we actually shouldn't alloc either when rcu is active - * but that's a quest for another day. */ + /* We shouldn't alloc when rcu is active but that's a quest for another day. */ + atomic_fetch_add_explicit(&alloc_locking_in_rcu, 1, memory_order_relaxed); } - else - { - /* If there is any free page kept cold, we use that. */ + /* If there is any free page kept cold, we warm up some of these. */ LOCK_DOMAIN(resource, empty_pages_domain); + + /* Threads were serialized on lock and the first one might have prepared some + * blocks for the rest of threads */ + if (fp = alloc_hot_page(fp)) + { + UNLOCK_DOMAIN(resource, empty_pages_domain); + return fp; + } + if (empty_pages) { UNPROTECT_PAGE(empty_pages); + + /* We flush all the pages in this block to the hot page cache + * and return the keeper page as allocated. */ + ajlog(fp, empty_pages, empty_pages->pos, AJT_ALLOC_COLD_STD); if (empty_pages->pos) { - /* Either the keeper page contains at least one cold page pointer, return that */ - fp = empty_pages->pages[--empty_pages->pos]; - PROTECT_PAGE(empty_pages); - UNPROTECT_PAGE(fp); - ajlog(fp, empty_pages, empty_pages->pos, AJT_ALLOC_COLD_STD); - atomic_fetch_sub_explicit(&pages_kept_cold, 1, memory_order_relaxed); - } - else - { - /* Or the keeper page has no more cold page pointer, return the keeper page */ - fp = (struct free_page *) empty_pages; - empty_pages = empty_pages->next; - ajlog(fp, empty_pages, 0, AJT_ALLOC_COLD_KEEPER); - atomic_fetch_sub_explicit(&pages_kept_cold_index, 1, memory_order_relaxed); - if (!empty_pages) - ALLOC_TRACE("Taken last page from cold storage"); + /* Link one after another */ + for (uint i = 0; i < empty_pages->pos - 1; i++) + atomic_store_explicit( + &((struct free_page *) empty_pages->pages[i])->next, + empty_pages->pages[i+1], + memory_order_relaxed); + + /* And put into the hot page cache */ + atomic_store_explicit( + &((struct free_page *) empty_pages->pages[empty_pages->pos - 1])->next, + PAGE_STACK_GET, + memory_order_release); + PAGE_STACK_PUT(empty_pages->pages[0]); + + /* Update counters */ + atomic_fetch_sub_explicit(&pages_kept_cold, empty_pages->pos, memory_order_relaxed); + atomic_fetch_add_explicit(&pages_kept, empty_pages->pos, memory_order_relaxed); } + + /* We can then reuse the old keeper page. */ + /* Or the keeper page has no more cold page pointer, return the keeper page */ + fp = (struct free_page *) empty_pages; + empty_pages = empty_pages->next; + ajlog(fp, empty_pages, 0, AJT_ALLOC_COLD_KEEPER); + atomic_fetch_sub_explicit(&pages_kept_cold_index, 1, memory_order_relaxed); + + if (!empty_pages) + ALLOC_TRACE("Taken last page from cold storage"); } UNLOCK_DOMAIN(resource, empty_pages_domain); if (fp) return fp; - } - /* And in the worst case, allocate some new pages by mmap() */ void *ptr = alloc_sys_page(); ajlog(ptr, NULL, 0, AJT_ALLOC_MMAP); diff --git a/sysdep/unix/domain.c b/sysdep/unix/domain.c index e76ac2fb..efc6fc85 100644 --- a/sysdep/unix/domain.c +++ b/sysdep/unix/domain.c @@ -106,7 +106,7 @@ void do_lock(struct domain_generic *dg, struct domain_generic **lsp) memcpy(&stack_copy, &locking_stack, sizeof(stack_copy)); struct domain_generic **lll = last_locked; - if (rcu_read_active()) + if (rcu_read_active() && (dg->order < DOMAIN_ORDER(resource))) bug("Locking forbidden while RCU reader is active"); if ((char *) lsp - (char *) &locking_stack != dg->order)