diff --git a/lib/rcu.c b/lib/rcu.c index cfec36e6..bd442c74 100644 --- a/lib/rcu.c +++ b/lib/rcu.c @@ -16,31 +16,21 @@ #include "lib/io-loop.h" #include "lib/locking.h" -_Atomic uint rcu_gp_ctl = RCU_NEST_CNT; -_Thread_local struct rcu_thread *this_rcu_thread = NULL; +_Atomic u64 rcu_global_phase = RCU_GP_PHASE; +_Thread_local struct rcu_thread this_rcu_thread; _Thread_local uint rcu_blocked; -static list rcu_thread_list; +static struct rcu_thread * _Atomic rcu_thread_list = NULL; -static struct rcu_thread main_rcu_thread; - -static DOMAIN(resource) rcu_domain; +static _Atomic uint rcu_thread_spinlock = 0; static int -rcu_gp_ongoing(_Atomic uint *ctl) +rcu_critical(struct rcu_thread *t, u64 phase) { - uint val = atomic_load(ctl); - return (val & RCU_NEST_CNT) && ((val ^ atomic_load_explicit(&rcu_gp_ctl, memory_order_acquire)) & RCU_GP_PHASE); -} - -static void -update_counter_and_wait(void) -{ - atomic_fetch_xor(&rcu_gp_ctl, RCU_GP_PHASE); - struct rcu_thread *rc; - WALK_LIST(rc, rcu_thread_list) - while (rcu_gp_ongoing(&rc->ctl)) - birdloop_yield(); + uint val = atomic_load_explicit(&t->ctl, memory_order_acquire); + return + (val & RCU_NEST_MASK) /* Active */ + && ((val & ~RCU_NEST_MASK) <= phase); /* In an older phase */ } void @@ -49,35 +39,72 @@ synchronize_rcu(void) if (!rcu_blocked && last_locked) bug("Forbidden to synchronize RCU unless an appropriate lock is taken"); - LOCK_DOMAIN(resource, rcu_domain); - update_counter_and_wait(); - update_counter_and_wait(); - UNLOCK_DOMAIN(resource, rcu_domain); + /* Increment phase */ + u64 phase = atomic_fetch_add_explicit(&rcu_global_phase, RCU_GP_PHASE, memory_order_acq_rel); + + while (1) { + /* Spinlock */ + while (atomic_exchange_explicit(&rcu_thread_spinlock, 1, memory_order_acq_rel)) + birdloop_yield(); + + /* Check all threads */ + _Bool critical = 0; + for (struct rcu_thread * _Atomic *tp = &rcu_thread_list, *t; + t = atomic_load_explicit(tp, memory_order_acquire); + tp = &t->next) + /* Found a critical */ + if (critical = rcu_critical(t, phase)) + break; + + /* Unlock */ + ASSERT_DIE(atomic_exchange_explicit(&rcu_thread_spinlock, 0, memory_order_acq_rel)); + + /* Done if no critical */ + if (!critical) + return; + + /* Wait and retry if critical */ + birdloop_yield(); + } } void -rcu_thread_start(struct rcu_thread *rc) +rcu_thread_start(void) { - LOCK_DOMAIN(resource, rcu_domain); - add_tail(&rcu_thread_list, &rc->n); - this_rcu_thread = rc; - UNLOCK_DOMAIN(resource, rcu_domain); + /* Insert this thread to the thread list, no spinlock is needed */ + struct rcu_thread *next = atomic_load_explicit(&rcu_thread_list, memory_order_acquire); + do atomic_store_explicit(&this_rcu_thread.next, next, memory_order_relaxed); + while (!atomic_compare_exchange_strong_explicit( + &rcu_thread_list, &next, &this_rcu_thread, + memory_order_acq_rel, memory_order_acquire)); } void -rcu_thread_stop(struct rcu_thread *rc) +rcu_thread_stop(void) { - LOCK_DOMAIN(resource, rcu_domain); - this_rcu_thread = NULL; - rem_node(&rc->n); - UNLOCK_DOMAIN(resource, rcu_domain); + /* Spinlock */ + while (atomic_exchange_explicit(&rcu_thread_spinlock, 1, memory_order_acq_rel)) + birdloop_yield(); + + /* Find this thread */ + for (struct rcu_thread * _Atomic *tp = &rcu_thread_list, *t; + t = atomic_load_explicit(tp, memory_order_acquire); + tp = &t->next) + if (t == &this_rcu_thread) + { + /* Remove this thread */ + atomic_store_explicit(tp, atomic_load_explicit(&t->next, memory_order_acquire), memory_order_release); + + /* Unlock and go */ + ASSERT_DIE(atomic_exchange_explicit(&rcu_thread_spinlock, 0, memory_order_acq_rel)); + return; + } + + bug("Failed to find a stopped rcu thread"); } void rcu_init(void) { - rcu_domain = DOMAIN_NEW(resource); - DOMAIN_SETUP(resource, rcu_domain, "Read-Copy-Update", NULL); - init_list(&rcu_thread_list); - rcu_thread_start(&main_rcu_thread); + rcu_thread_start(); } diff --git a/lib/rcu.h b/lib/rcu.h index 632c6b18..8a718271 100644 --- a/lib/rcu.h +++ b/lib/rcu.h @@ -15,45 +15,59 @@ #include "lib/lists.h" #include -#define RCU_GP_PHASE 0x100000 -#define RCU_NEST_MASK 0x0fffff -#define RCU_NEST_CNT 0x000001 +#define RCU_GP_PHASE 0x100 +#define RCU_NEST_MASK (RCU_GP_PHASE-1) +#define RCU_NEST_CNT 1 -extern _Atomic uint rcu_gp_ctl; +extern _Atomic u64 rcu_global_phase; struct rcu_thread { - node n; - _Atomic uint ctl; + struct rcu_thread * _Atomic next; + _Atomic u64 ctl; }; -extern _Thread_local struct rcu_thread *this_rcu_thread; +extern _Thread_local struct rcu_thread this_rcu_thread; extern _Thread_local uint rcu_blocked; static inline void rcu_read_lock(void) { - uint cmp = atomic_load_explicit(&this_rcu_thread->ctl, memory_order_acquire); + /* Increment the nesting counter */ + u64 before = atomic_fetch_add_explicit( + &this_rcu_thread.ctl, + RCU_NEST_CNT, + memory_order_acq_rel + ); - if (cmp & RCU_NEST_MASK) - atomic_store_explicit(&this_rcu_thread->ctl, cmp + RCU_NEST_CNT, memory_order_relaxed); - else - atomic_store(&this_rcu_thread->ctl, atomic_load_explicit(&rcu_gp_ctl, memory_order_acquire)); + if (before & RCU_NEST_MASK) + return; + + /* Update the phase */ + u64 phase = atomic_load_explicit(&rcu_global_phase, memory_order_acquire); + u64 dif = (before & ~RCU_NEST_MASK) ^ phase; + + if (dif) + atomic_fetch_xor_explicit( + &this_rcu_thread.ctl, + dif, + memory_order_acq_rel); } static inline void rcu_read_unlock(void) { - atomic_fetch_sub(&this_rcu_thread->ctl, RCU_NEST_CNT); + /* Just decrement the nesting counter; when unlocked, nobody cares */ + atomic_fetch_sub(&this_rcu_thread.ctl, RCU_NEST_CNT); } static inline _Bool rcu_read_active(void) { - return !!(atomic_load_explicit(&this_rcu_thread->ctl, memory_order_acquire) & RCU_NEST_MASK); + return !!(atomic_load_explicit(&this_rcu_thread.ctl, memory_order_acquire) & RCU_NEST_MASK); } void synchronize_rcu(void); /* Registering and unregistering a birdloop. To be called from birdloop implementation */ -void rcu_thread_start(struct rcu_thread *); -void rcu_thread_stop(struct rcu_thread *); +void rcu_thread_start(void); +void rcu_thread_stop(void); /* Run this from resource init */ void rcu_init(void); diff --git a/lib/rcu_test.c b/lib/rcu_test.c index b8372132..7cc69710 100644 --- a/lib/rcu_test.c +++ b/lib/rcu_test.c @@ -27,12 +27,10 @@ static struct block { static struct block *_Atomic bin; static _Atomic uint seen = 0; -_Thread_local struct rcu_thread rtl; - static void * t_rcu_basic_reader(void *_ UNUSED) { - rcu_thread_start(&rtl); + rcu_thread_start(); while (atomic_load_explicit(&bin, memory_order_acquire) == NULL) birdloop_yield(); @@ -58,7 +56,7 @@ t_rcu_basic_reader(void *_ UNUSED) rcu_read_unlock(); } - rcu_thread_stop(&rtl); + rcu_thread_stop(); return NULL; } @@ -80,7 +78,7 @@ spin_unlock(void) static void * t_rcu_basic_writer(void *order_ptr) { - rcu_thread_start(&rtl); + rcu_thread_start(); uint order = (uintptr_t) order_ptr; struct block *cur = &ball[order][0]; @@ -160,7 +158,7 @@ t_rcu_basic_writer(void *order_ptr) cur->value = 0xd4d4d4d4d4d4d4d4; atomic_store_explicit(&cur->next, ((void *) 0xd8d8d8d8d8d8d8d8), memory_order_relaxed); - rcu_thread_stop(&rtl); + rcu_thread_stop(); return NULL; } diff --git a/sysdep/unix/io-loop.c b/sysdep/unix/io-loop.c index f777d210..d36cbcc6 100644 --- a/sysdep/unix/io-loop.c +++ b/sysdep/unix/io-loop.c @@ -784,8 +784,7 @@ bird_thread_main(void *arg) { struct bird_thread *thr = this_thread = arg; - rcu_thread_start(&thr->rcu); - synchronize_rcu(); + rcu_thread_start(); account_to(&thr->overhead); @@ -1065,7 +1064,7 @@ bird_thread_shutdown(void * _ UNUSED) flush_local_pages(); /* Unregister from RCU */ - rcu_thread_stop(&thr->rcu); + rcu_thread_stop(); /* Now we can be cleaned up */ birdloop_leave(thr->meta); diff --git a/sysdep/unix/io-loop.h b/sysdep/unix/io-loop.h index 71e5b109..3dc33d95 100644 --- a/sysdep/unix/io-loop.h +++ b/sysdep/unix/io-loop.h @@ -82,8 +82,6 @@ struct bird_thread pthread_t thread_id; pthread_attr_t thread_attr; - struct rcu_thread rcu; - list loops; struct birdloop_pickup_group *group; pool *pool;