From 1b39473993abcc6180657c8d3bd5f9e12e4bc816 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Fri, 12 Nov 2021 22:58:40 +0100 Subject: [PATCH 1/3] Introducing basic RCU primitives for lock-less shared data structures --- lib/Makefile | 2 +- lib/coro.h | 2 ++ lib/rcu.c | 79 +++++++++++++++++++++++++++++++++++++++++ lib/rcu.h | 55 ++++++++++++++++++++++++++++ lib/resource.c | 3 ++ sysdep/unix/coroutine.c | 10 ++++++ 6 files changed, 150 insertions(+), 1 deletion(-) create mode 100644 lib/rcu.c create mode 100644 lib/rcu.h diff --git a/lib/Makefile b/lib/Makefile index 4378a7bd..98c5db3c 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -1,4 +1,4 @@ -src := bitmap.c bitops.c blake2s.c blake2b.c checksum.c event.c flowspec.c idm.c ip.c lists.c mac.c md5.c mempool.c net.c patmatch.c printf.c resource.c sha1.c sha256.c sha512.c slab.c slists.c strtoul.c tbf.c timer.c xmalloc.c +src := bitmap.c bitops.c blake2s.c blake2b.c checksum.c event.c flowspec.c idm.c ip.c lists.c mac.c md5.c mempool.c net.c patmatch.c printf.c resource.c rcu.c sha1.c sha256.c sha512.c slab.c slists.c strtoul.c tbf.c timer.c xmalloc.c obj := $(src-o-files) $(all-daemon) diff --git a/lib/coro.h b/lib/coro.h index 17ccff89..b36f1d2c 100644 --- a/lib/coro.h +++ b/lib/coro.h @@ -25,5 +25,7 @@ struct coroutine *coro_run(pool *, void (*entry)(void *), void *data); /* Get self. */ extern _Thread_local struct coroutine *this_coro; +/* Just wait for a little while. Not intended for general use; use events if possible. */ +void coro_yield(void); #endif diff --git a/lib/rcu.c b/lib/rcu.c new file mode 100644 index 00000000..69f3442f --- /dev/null +++ b/lib/rcu.c @@ -0,0 +1,79 @@ +/* + * BIRD Library -- Read-Copy-Update Basic Operations + * + * (c) 2021 Maria Matejka + * (c) 2021 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + * Note: all the relevant patents shall be expired. + * + * Using the Supplementary Material for User-Level Implementations of Read-Copy-Update + * by Matthieu Desnoyers, Paul E. McKenney, Alan S. Stern, Michel R. Dagenais and Jonathan Walpole + * obtained from https://www.efficios.com/pub/rcu/urcu-supp-accepted.pdf + */ + +#include "lib/rcu.h" +#include "lib/coro.h" +#include "lib/locking.h" + +_Atomic uint rcu_gp_ctl = RCU_NEST_CNT; +_Thread_local struct rcu_coro *this_rcu_coro = NULL; + +static list rcu_coro_list; + +static struct rcu_coro main_rcu_coro; + +DEFINE_DOMAIN(resource); +static DOMAIN(resource) rcu_domain; + +static int +rcu_gp_ongoing(_Atomic uint *ctl) +{ + uint val = atomic_load(ctl); + return (val & RCU_NEST_CNT) && ((val ^ rcu_gp_ctl) & RCU_GP_PHASE); +} + +static void +update_counter_and_wait(void) +{ + atomic_fetch_xor(&rcu_gp_ctl, RCU_GP_PHASE); + struct rcu_coro *rc; + WALK_LIST(rc, rcu_coro_list) + while (rcu_gp_ongoing(&rc->ctl)) + coro_yield(); +} + +void +synchronize_rcu(void) +{ + LOCK_DOMAIN(resource, rcu_domain); + update_counter_and_wait(); + update_counter_and_wait(); + UNLOCK_DOMAIN(resource, rcu_domain); +} + +void +rcu_coro_start(struct rcu_coro *rc) +{ + LOCK_DOMAIN(resource, rcu_domain); + add_tail(&rcu_coro_list, &rc->n); + this_rcu_coro = rc; + UNLOCK_DOMAIN(resource, rcu_domain); +} + +void +rcu_coro_stop(struct rcu_coro *rc) +{ + LOCK_DOMAIN(resource, rcu_domain); + this_rcu_coro = NULL; + rem_node(&rc->n); + UNLOCK_DOMAIN(resource, rcu_domain); +} + +void +rcu_init(void) +{ + rcu_domain = DOMAIN_NEW(resource, "Read-Copy-Update"); + init_list(&rcu_coro_list); + rcu_coro_start(&main_rcu_coro); +} diff --git a/lib/rcu.h b/lib/rcu.h new file mode 100644 index 00000000..ac8fc9ce --- /dev/null +++ b/lib/rcu.h @@ -0,0 +1,55 @@ +/* + * BIRD Library -- Read-Copy-Update Basic Operations + * + * (c) 2021 Maria Matejka + * (c) 2021 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + * Note: all the relevant patents shall be expired. + */ + +#ifndef _BIRD_RCU_H_ +#define _BIRD_RCU_H_ + +#include "lib/birdlib.h" +#include "lib/lists.h" +#include + +#define RCU_GP_PHASE 0x100000 +#define RCU_NEST_MASK 0x0fffff +#define RCU_NEST_CNT 0x000001 + +extern _Atomic uint rcu_gp_ctl; + +struct rcu_coro { + node n; + _Atomic uint ctl; +}; + +extern _Thread_local struct rcu_coro *this_rcu_coro; + +static inline void rcu_read_lock(void) +{ + uint cmp = atomic_load_explicit(&this_rcu_coro->ctl, memory_order_acquire); + + if (cmp & RCU_NEST_MASK) + atomic_store_explicit(&this_rcu_coro->ctl, cmp + RCU_NEST_CNT, memory_order_relaxed); + else + atomic_store(&this_rcu_coro->ctl, atomic_load_explicit(&rcu_gp_ctl, memory_order_acquire)); +} + +static inline void rcu_read_unlock(void) +{ + atomic_fetch_sub(&this_rcu_coro->ctl, RCU_NEST_CNT); +} + +void synchronize_rcu(void); + +/* Registering and unregistering a coroutine. To be called from coroutine implementation */ +void rcu_coro_start(struct rcu_coro *); +void rcu_coro_stop(struct rcu_coro *); + +/* Run this from resource init */ +void rcu_init(void); + +#endif diff --git a/lib/resource.c b/lib/resource.c index 2d041ad5..0651406f 100644 --- a/lib/resource.c +++ b/lib/resource.c @@ -13,6 +13,7 @@ #include "nest/bird.h" #include "lib/resource.h" #include "lib/string.h" +#include "lib/rcu.h" /** * DOC: Resource pools @@ -284,6 +285,8 @@ rlookup(unsigned long a) void resource_init(void) { + rcu_init(); + root_pool.r.class = &pool_class; root_pool.name = "Root"; init_list(&root_pool.inside); diff --git a/sysdep/unix/coroutine.c b/sysdep/unix/coroutine.c index 4758c056..12ba55d8 100644 --- a/sysdep/unix/coroutine.c +++ b/sysdep/unix/coroutine.c @@ -18,6 +18,7 @@ #include "lib/birdlib.h" #include "lib/locking.h" #include "lib/coro.h" +#include "lib/rcu.h" #include "lib/resource.h" #include "lib/timer.h" @@ -128,6 +129,7 @@ struct coroutine { resource r; pthread_t id; pthread_attr_t attr; + struct rcu_coro rcu; void (*entry)(void *); void *data; }; @@ -137,6 +139,7 @@ static _Thread_local _Bool coro_cleaned_up = 0; static void coro_free(resource *r) { struct coroutine *c = (void *) r; + rcu_coro_stop(&c->rcu); ASSERT_DIE(pthread_equal(pthread_self(), c->id)); pthread_attr_destroy(&c->attr); coro_cleaned_up = 1; @@ -157,6 +160,7 @@ static void *coro_entry(void *p) ASSERT_DIE(c->entry); this_coro = c; + rcu_coro_start(&c->rcu); c->entry(c->data); ASSERT_DIE(coro_cleaned_up); @@ -190,3 +194,9 @@ struct coroutine *coro_run(pool *p, void (*entry)(void *), void *data) return c; } + +void +coro_yield(void) +{ + usleep(100); +} From 2a224a9e1e1fbe1abec96c8585dd754fb716d020 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Sat, 13 Nov 2021 17:52:34 +0100 Subject: [PATCH 2/3] Route sources have their separate global lock --- nest/rt-attr.c | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/nest/rt-attr.c b/nest/rt-attr.c index cd4c6892..cb66b65d 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -86,8 +86,13 @@ const char * rta_dest_names[RTD_MAX] = { }; DOMAIN(attrs) attrs_domain; +DOMAIN(attrs) src_domain; + +#define SRC_LOCK LOCK_DOMAIN(attrs, src_domain) +#define SRC_UNLOCK UNLOCK_DOMAIN(attrs, src_domain) pool *rta_pool; +pool *src_pool; static slab *rta_slab_[4]; static slab *nexthop_slab_[4]; @@ -110,9 +115,11 @@ static struct idm src_ids; static void rte_src_init(void) { - rte_src_slab = sl_new(rta_pool, sizeof(struct rte_src)); + src_domain = DOMAIN_NEW(attrs, "Route sources"); + src_pool = rp_new(&root_pool, "Route sources"); + rte_src_slab = sl_new(src_pool, sizeof(struct rte_src)); - idm_init(&src_ids, rta_pool, SRC_ID_INIT_SIZE); + idm_init(&src_ids, src_pool, SRC_ID_INIT_SIZE); } HASH_DEFINE_REHASH_FN(RSH, struct rte_src) @@ -137,7 +144,7 @@ rt_get_source_o(struct rte_owner *p, u32 id) return src; } - RTA_LOCK; + SRC_LOCK; src = sl_allocz(rte_src_slab); src->owner = p; src->private_id = id; @@ -146,12 +153,12 @@ rt_get_source_o(struct rte_owner *p, u32 id) atomic_store_explicit(&src->uc, 1, memory_order_release); p->uc++; - HASH_INSERT2(p->hash, RSH, rta_pool, src); + HASH_INSERT2(p->hash, RSH, src_pool, src); if (config->table_debug) log(L_TRACE "Allocated new rte_src for %s, ID %uL %uG, have %u sources now", p->name, src->private_id, src->global_id, p->uc); - RTA_UNLOCK; + SRC_UNLOCK; return src; } @@ -182,21 +189,21 @@ rt_prune_sources(void *data) HASH_DO_REMOVE(o->hash, RSH, sp); - RTA_LOCK; + SRC_LOCK; idm_free(&src_ids, src->global_id); sl_free(rte_src_slab, src); - RTA_UNLOCK; + SRC_UNLOCK; } } HASH_WALK_FILTER_END; - RTA_LOCK; - HASH_MAY_RESIZE_DOWN(o->hash, RSH, rta_pool); + SRC_LOCK; + HASH_MAY_RESIZE_DOWN(o->hash, RSH, src_pool); if (o->stop && !o->uc) { rfree(o->prune); - RTA_UNLOCK; + SRC_UNLOCK; if (config->table_debug) log(L_TRACE "All rte_src's for %s pruned, scheduling stop event", o->name); @@ -204,21 +211,21 @@ rt_prune_sources(void *data) rt_done_sources(o); } else - RTA_UNLOCK; + SRC_UNLOCK; } void rt_init_sources(struct rte_owner *o, const char *name, event_list *list) { - RTA_LOCK; - HASH_INIT(o->hash, rta_pool, RSH_INIT_ORDER); + SRC_LOCK; + HASH_INIT(o->hash, src_pool, RSH_INIT_ORDER); o->hash_key = random_u32(); o->uc = 0; o->name = name; - o->prune = ev_new_init(rta_pool, rt_prune_sources, o); + o->prune = ev_new_init(src_pool, rt_prune_sources, o); o->stop = NULL; o->list = list; - RTA_UNLOCK; + SRC_UNLOCK; } void @@ -231,9 +238,9 @@ rt_destroy_sources(struct rte_owner *o, event *done) if (config->table_debug) log(L_TRACE "Source owner %s destroy requested. All rte_src's already pruned, scheduling stop event", o->name); - RTA_LOCK; + SRC_LOCK; rfree(o->prune); - RTA_UNLOCK; + SRC_UNLOCK; rt_done_sources(o); } From 0fd1c1d091ee8e43eb0e15c67a92960ca581ed5f Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Sat, 13 Nov 2021 22:13:51 +0100 Subject: [PATCH 3/3] Route attribute cache is now lockless on read / clone. Lots of time was spent locking when accessing route attribute cache. This overhead should be now reduced to a minimum. --- nest/route.h | 25 +++--- nest/rt-attr.c | 214 ++++++++++++++++++++++++++++++++++++------------- 2 files changed, 171 insertions(+), 68 deletions(-) diff --git a/nest/route.h b/nest/route.h index 3f8bf433..531e004b 100644 --- a/nest/route.h +++ b/nest/route.h @@ -606,8 +606,8 @@ struct rte_src { typedef struct rta { - struct rta *next, **pprev; /* Hash chain */ - u32 uc; /* Use count */ + struct rta * _Atomic next, * _Atomic *pprev; /* Hash chain */ + _Atomic u32 uc; /* Use count */ u32 hash_key; /* Hash over important fields */ struct ea_list *eattrs; /* Extended Attribute chain */ struct hostentry *hostentry; /* Hostentry for recursive next-hops */ @@ -758,12 +758,6 @@ struct rte_owner { event *stop; }; -DEFINE_DOMAIN(attrs); -extern DOMAIN(attrs) attrs_domain; - -#define RTA_LOCK LOCK_DOMAIN(attrs, attrs_domain) -#define RTA_UNLOCK UNLOCK_DOMAIN(attrs, attrs_domain) - #define RTE_SRC_PU_SHIFT 44 #define RTE_SRC_IN_PROGRESS (1ULL << RTE_SRC_PU_SHIFT) @@ -879,20 +873,23 @@ static inline size_t rta_size(const rta *a) { return sizeof(rta) + sizeof(u32)*a #define RTA_MAX_SIZE (sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK) rta *rta_lookup(rta *); /* Get rta equivalent to this one, uc++ */ static inline int rta_is_cached(rta *r) { return r->cached; } + static inline rta *rta_clone(rta *r) { - RTA_LOCK; - r->uc++; - RTA_UNLOCK; + u32 uc = atomic_fetch_add_explicit(&r->uc, 1, memory_order_acq_rel); + ASSERT_DIE(uc > 0); return r; } void rta__free(rta *r); static inline void rta_free(rta *r) { - RTA_LOCK; - if (r && !--r->uc) + if (!r) + return; + + u32 uc = atomic_fetch_sub_explicit(&r->uc, 1, memory_order_acq_rel); + if (uc == 1) rta__free(r); - RTA_UNLOCK; } + rta *rta_do_cow(rta *o, linpool *lp); static inline rta * rta_cow(rta *r, linpool *lp) { return rta_is_cached(r) ? rta_do_cow(r, lp) : r; } static inline void rta_uncache(rta *r) { r->cached = 0; r->uc = 0; } diff --git a/nest/rt-attr.c b/nest/rt-attr.c index cb66b65d..9a5498ed 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -54,6 +54,7 @@ #include "lib/hash.h" #include "lib/idm.h" #include "lib/resource.h" +#include "lib/rcu.h" #include "lib/string.h" #include @@ -85,8 +86,8 @@ const char * rta_dest_names[RTD_MAX] = { [RTD_PROHIBIT] = "prohibited", }; -DOMAIN(attrs) attrs_domain; -DOMAIN(attrs) src_domain; +DEFINE_DOMAIN(attrs); +static DOMAIN(attrs) src_domain; #define SRC_LOCK LOCK_DOMAIN(attrs, src_domain) #define SRC_UNLOCK UNLOCK_DOMAIN(attrs, src_domain) @@ -1166,21 +1167,28 @@ ea_append(ea_list *to, ea_list *what) * rta's */ -static uint rta_cache_count; -static uint rta_cache_size = 32; -static uint rta_cache_limit; -static uint rta_cache_mask; -static rta **rta_hash_table; +static DOMAIN(attrs) attrs_domain; -static void -rta_alloc_hash(void) +#define RTA_LOCK LOCK_DOMAIN(attrs, attrs_domain) +#define RTA_UNLOCK UNLOCK_DOMAIN(attrs, attrs_domain) + +struct rta_cache { + u32 count; + u32 size; + u32 limit; + u32 mask; + rta * _Atomic table[0]; +} * _Atomic rta_cache; +// rta_aux, rta_cache = { .size = ATOMIC_VAR_INIT(32), }; + +static struct rta_cache * +rta_alloc_hash(u32 size) { - rta_hash_table = mb_allocz(rta_pool, sizeof(rta *) * rta_cache_size); - if (rta_cache_size < 32768) - rta_cache_limit = rta_cache_size * 2; - else - rta_cache_limit = ~0; - rta_cache_mask = rta_cache_size - 1; + struct rta_cache *c = mb_allocz(rta_pool, sizeof(struct rta_cache) + sizeof(rta * _Atomic) * size); + c->size = size; + c->limit = (size >> 20) ? (~0U) : (size * 2); + c->mask = size - 1; + return c; } static inline uint @@ -1234,34 +1242,88 @@ rta_copy(rta *o) } static inline void -rta_insert(rta *r) +rta_insert(rta *r, struct rta_cache *c) { - uint h = r->hash_key & rta_cache_mask; - r->next = rta_hash_table[h]; - if (r->next) - r->next->pprev = &r->next; - r->pprev = &rta_hash_table[h]; - rta_hash_table[h] = r; + uint h = r->hash_key & c->mask; + rta *next = atomic_load_explicit(&c->table[h], memory_order_relaxed); + + atomic_store_explicit(&r->next, next, memory_order_relaxed); + r->pprev = &c->table[h]; + + if (next) + next->pprev = &r->next; + + /* This store MUST be the last and MUST have release order for thread-safety */ + atomic_store_explicit(&c->table[h], r, memory_order_release); } static void -rta_rehash(void) +rta_rehash(struct rta_cache *c) { - uint ohs = rta_cache_size; - uint h; - rta *r, *n; - rta **oht = rta_hash_table; + u32 os = c->size; - rta_cache_size = 2*rta_cache_size; - DBG("Rehashing rta cache from %d to %d entries.\n", ohs, rta_cache_size); - rta_alloc_hash(); - for(h=0; hcount = c->count; + + /* First we simply copy every chain to both new locations */ + for (u32 h = 0; h < os; h++) + { + rta *r = atomic_load_explicit(&c->table[h], memory_order_relaxed); + atomic_store_explicit(&nc->table[h], r, memory_order_relaxed); + atomic_store_explicit(&nc->table[h + os], r, memory_order_relaxed); + } + + /* Then we exchange the hashes; release semantics forces the previous code to be already done */ + atomic_store_explicit(&rta_cache, nc, memory_order_release); + + /* And now we pass through both chains and filter them */ + for (u32 h = 0; h < c->size; h++) + { + rta * _Atomic * ap = &nc->table[h]; + rta * _Atomic * bp = &nc->table[h + os]; + + rta *r = atomic_load_explicit(ap, memory_order_relaxed); + ASSERT_DIE(r == atomic_load_explicit(bp, memory_order_relaxed)); + + while (r) + { + if (r->hash_key & os) { - n = r->next; - rta_insert(r); + r->pprev = bp; + atomic_store_explicit(bp, r, memory_order_release); + bp = &r->next; } - mb_free(oht); + else + { + r->pprev = ap; + atomic_store_explicit(ap, r, memory_order_release); + ap = &r->next; + } + + r = atomic_load_explicit(&r->next, memory_order_acquire); + } + + atomic_store_explicit(ap, NULL, memory_order_release); + atomic_store_explicit(bp, NULL, memory_order_release); + } + + synchronize_rcu(); + mb_free(c); +} + +static rta * +rta_find(rta *o, u32 h, struct rta_cache *c) +{ + rta *r = NULL; + + for (r = atomic_load_explicit(&c->table[h & c->mask], memory_order_acquire); r; r = atomic_load_explicit(&r->next, memory_order_acquire)) + if (r->hash_key == h && rta_same(r, o)) + { + atomic_fetch_add_explicit(&r->uc, 1, memory_order_acq_rel); + return r; + } + + return NULL; } /** @@ -1289,24 +1351,34 @@ rta_lookup(rta *o) h = rta_hash(o); + /* Lockless lookup */ + rcu_read_lock(); + r = rta_find(o, h, atomic_load_explicit(&rta_cache, memory_order_acquire)); + rcu_read_unlock(); + + if (r) + return r; + RTA_LOCK; - for(r=rta_hash_table[h & rta_cache_mask]; r; r=r->next) - if (r->hash_key == h && rta_same(r, o)) - { - r->uc++; - RTA_UNLOCK; - return r; - } + /* Locked lookup to avoid duplicates if possible */ + struct rta_cache *c = atomic_load_explicit(&rta_cache, memory_order_acquire); + r = rta_find(o, h, c); + if (r) + { + RTA_UNLOCK; + return r; + } + /* Store the rta */ r = rta_copy(o); r->hash_key = h; r->cached = 1; rt_lock_hostentry(r->hostentry); - rta_insert(r); + rta_insert(r, c); - if (++rta_cache_count > rta_cache_limit) - rta_rehash(); + if (++c->count > c->limit) + rta_rehash(c); RTA_UNLOCK; return r; @@ -1315,17 +1387,47 @@ rta_lookup(rta *o) void rta__free(rta *a) { - ASSERT(rta_cache_count && a->cached); - rta_cache_count--; - *a->pprev = a->next; - if (a->next) - a->next->pprev = a->pprev; + ASSERT(a->cached); + + RTA_LOCK; + struct rta_cache *c = atomic_load_explicit(&rta_cache, memory_order_acquire); + + if (atomic_load_explicit(&a->uc, memory_order_acquire)) + { + /* Acquired inbetween */ + RTA_UNLOCK; + return; + } + + /* Relink the forward pointer */ + rta *next = atomic_load_explicit(&a->next, memory_order_acquire); + atomic_store_explicit(a->pprev, next, memory_order_release); + + /* Relink the backwards pointer */ + if (next) + next->pprev = a->pprev; + + /* Wait until nobody knows about us */ + synchronize_rcu(); + + if (atomic_load_explicit(&a->uc, memory_order_acquire)) + { + /* Acquired inbetween, relink back */ + rta_insert(a, c); + RTA_UNLOCK; + return; + } + + /* Cleared to free the memory */ rt_unlock_hostentry(a->hostentry); if (a->nh.next) nexthop_free(a->nh.next); ea_free(a->eattrs); a->cached = 0; + c->count--; sl_free(rta_slab(a), a); + + RTA_UNLOCK; } rta * @@ -1394,9 +1496,13 @@ rta_dump_all(void) RTA_LOCK; - debug("Route attribute cache (%d entries, rehash at %d):\n", rta_cache_count, rta_cache_limit); - for(h=0; hnext) + struct rta_cache *c = atomic_load_explicit(&rta_cache, memory_order_acquire); + + debug("Route attribute cache (%d entries, rehash at %d):\n", c->count, c->limit); + for(h=0; hsize; h++) + for(a = atomic_load_explicit(&c->table[h], memory_order_acquire); + a; + a = atomic_load_explicit(&a->next, memory_order_acquire)) { debug("%p ", a); rta_dump(a); @@ -1440,7 +1546,7 @@ rta_init(void) nexthop_slab_[2] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*2); nexthop_slab_[3] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK); - rta_alloc_hash(); + atomic_store_explicit(&rta_cache, rta_alloc_hash(32), memory_order_relaxed); rte_src_init(); }