From 1338ba86da300cb62a7e3ac69126bf245f21c94d Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Tue, 17 Jan 2023 17:13:50 +0100 Subject: [PATCH] Alloc: Disable transparent huge pages The usage pattern implemented in allocator seems to be incompatible with transparent huge pages, as memory released using madvise(MADV_DONTNEED) with regular page size and alignment does not seem to trigger demotion of huge pages back to regular pages, even when significant number of pages is released. Even if demotion is triggered when system memory is low, it still breaks memory accounting. --- sysdep/cf/README | 3 +++ sysdep/cf/linux.h | 1 + sysdep/unix/alloc.c | 10 ++++++++++ 3 files changed, 14 insertions(+) diff --git a/sysdep/cf/README b/sysdep/cf/README index 68078bbe..af65aaec 100644 --- a/sysdep/cf/README +++ b/sysdep/cf/README @@ -14,3 +14,6 @@ CONFIG_DONTROUTE_UNICAST Use MSG_DONTROUTE flag for unicast packets (def for Fre CONFIG_USE_HDRINCL Use IP_HDRINCL instead of control messages for source address on raw IP sockets. CONFIG_RESTRICTED_PRIVILEGES Implements restricted privileges using drop_uid() + +CONFIG_MADV_DONTNEED_TO_FREE To free pages, use MADV_DONTNEED instead of MADV_FREE (linux) +CONFIG_DISABLE_THP Disable transparent huge pages (linux) diff --git a/sysdep/cf/linux.h b/sysdep/cf/linux.h index 9c37dd8a..5edc4969 100644 --- a/sysdep/cf/linux.h +++ b/sysdep/cf/linux.h @@ -24,6 +24,7 @@ #define CONFIG_INCLUDE_SYSPRIV_H "sysdep/linux/syspriv.h" #define CONFIG_MADV_DONTNEED_TO_FREE +#define CONFIG_DISABLE_THP #ifndef AF_MPLS #define AF_MPLS 28 diff --git a/sysdep/unix/alloc.c b/sysdep/unix/alloc.c index 0ca12ec3..d625cd38 100644 --- a/sysdep/unix/alloc.c +++ b/sysdep/unix/alloc.c @@ -19,6 +19,10 @@ #include #endif +#ifdef CONFIG_DISABLE_THP +#include +#endif + long page_size = 0; #ifdef HAVE_MMAP @@ -222,6 +226,12 @@ global_free_pages_cleanup_event(void *data UNUSED) void resource_sys_init(void) { +#ifdef CONFIG_DISABLE_THP + /* Disable transparent huge pages, they do not work properly with madvice(MADV_DONTNEED) */ + if (prctl(PR_SET_THP_DISABLE, (unsigned long) 1, (unsigned long) 0, (unsigned long) 0, (unsigned long) 0) < 0) + die("prctl(PR_SET_THP_DISABLE) failed: %m"); +#endif + #ifdef HAVE_MMAP ASSERT_DIE(global_free_pages.cnt == 0);