2023-11-24 07:46:50 +00:00
|
|
|
/*
|
|
|
|
* BIRD Library -- Generic lock-free structures
|
|
|
|
*
|
2024-02-29 13:03:30 +00:00
|
|
|
* (c) 2023--2024 Maria Matejka <mq@jmq.cz>
|
|
|
|
* (c) 2023--2024 CZ.NIC, z.s.p.o.
|
2023-11-24 07:46:50 +00:00
|
|
|
*
|
|
|
|
* Can be freely distributed and used under the terms of the GNU GPL.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _BIRD_LOCKFREE_H_
|
|
|
|
#define _BIRD_LOCKFREE_H_
|
|
|
|
|
2024-04-23 15:35:00 +00:00
|
|
|
#include "lib/defer.h"
|
2023-11-24 07:46:50 +00:00
|
|
|
#include "lib/event.h"
|
|
|
|
#include "lib/rcu.h"
|
2024-02-29 13:03:30 +00:00
|
|
|
#include "lib/settle.h"
|
|
|
|
#include "lib/tlists.h"
|
2024-03-05 12:57:11 +00:00
|
|
|
#include "lib/io-loop.h"
|
2023-11-24 07:46:50 +00:00
|
|
|
|
|
|
|
#include <stdatomic.h>
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Lock-free usecounts.
|
|
|
|
*/
|
|
|
|
|
|
|
|
struct lfuc {
|
|
|
|
_Atomic u64 uc;
|
|
|
|
};
|
|
|
|
|
|
|
|
#define LFUC_PU_SHIFT 44
|
|
|
|
#define LFUC_IN_PROGRESS (1ULL << LFUC_PU_SHIFT)
|
|
|
|
|
|
|
|
/**
|
|
|
|
* lfuc_lock - increase an atomic usecount
|
|
|
|
* @c: the usecount structure
|
|
|
|
*/
|
2024-01-10 08:10:03 +00:00
|
|
|
static inline u64 lfuc_lock(struct lfuc *c)
|
2023-11-24 07:46:50 +00:00
|
|
|
{
|
|
|
|
/* Locking is trivial; somebody already holds the underlying data structure
|
|
|
|
* so we just increase the use count. Nothing can be freed underneath our hands. */
|
|
|
|
u64 uc = atomic_fetch_add_explicit(&c->uc, 1, memory_order_acq_rel);
|
|
|
|
ASSERT_DIE(uc > 0);
|
2024-01-10 08:10:03 +00:00
|
|
|
return uc & (LFUC_IN_PROGRESS - 1);
|
2023-11-24 07:46:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* lfuc_lock_revive - increase an atomic usecount even if it's zero
|
|
|
|
* @c: the usecount structure
|
|
|
|
*
|
|
|
|
* If the caller is sure that they can't collide with the prune routine,
|
|
|
|
* they can call this even on structures with already zeroed usecount.
|
|
|
|
* Handy for situations with flapping routes. Use only from the same
|
|
|
|
* loop as which runs the prune routine.
|
|
|
|
*/
|
2024-01-10 08:10:03 +00:00
|
|
|
static inline u64 lfuc_lock_revive(struct lfuc *c)
|
2023-11-24 07:46:50 +00:00
|
|
|
{
|
2024-01-10 08:10:03 +00:00
|
|
|
u64 uc = atomic_fetch_add_explicit(&c->uc, 1, memory_order_acq_rel);
|
|
|
|
return uc & (LFUC_IN_PROGRESS - 1);
|
2023-11-24 07:46:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2024-03-05 12:57:11 +00:00
|
|
|
* lfuc_unlock_immediately - decrease an atomic usecount
|
2023-11-24 07:46:50 +00:00
|
|
|
* @c: the usecount structure
|
|
|
|
* @el: prune event list
|
|
|
|
* @ev: prune event itself
|
|
|
|
*
|
|
|
|
* If the usecount reaches zero, a prune event is run to possibly free the object.
|
|
|
|
* The prune event MUST use lfuc_finished() to check the object state.
|
|
|
|
*/
|
2024-03-05 12:57:11 +00:00
|
|
|
static inline void lfuc_unlock_immediately(struct lfuc *c, event_list *el, event *ev)
|
2023-11-24 07:46:50 +00:00
|
|
|
{
|
|
|
|
/* Unlocking is tricky. We do it lockless so at the same time, the prune
|
|
|
|
* event may be running, therefore if the unlock gets us to zero, it must be
|
|
|
|
* the last thing in this routine, otherwise the prune routine may find the
|
|
|
|
* source's usecount zeroed, freeing it prematurely.
|
|
|
|
*
|
|
|
|
* The usecount is split into two parts:
|
|
|
|
* the top 20 bits are an in-progress indicator
|
|
|
|
* the bottom 44 bits keep the actual usecount.
|
|
|
|
*
|
|
|
|
* Therefore at most 1 million of writers can simultaneously unlock the same
|
|
|
|
* structure, while at most ~17T different places can reference it. Both limits
|
|
|
|
* are insanely high from the 2022 point of view. Let's suppose that when 17T
|
|
|
|
* routes or 1M peers/tables get real, we get also 128bit atomic variables in the
|
|
|
|
* C norm. */
|
|
|
|
|
|
|
|
/* First, we push the in-progress indicator */
|
|
|
|
u64 uc = atomic_fetch_add_explicit(&c->uc, LFUC_IN_PROGRESS, memory_order_acq_rel);
|
|
|
|
|
|
|
|
/* Then we split the indicator to its parts. Remember, we got the value
|
|
|
|
* before the operation happened so we're re-doing the operation locally
|
|
|
|
* to get a view how the indicator _would_ look if nobody else was interacting.
|
|
|
|
*/
|
|
|
|
u64 pending = (uc >> LFUC_PU_SHIFT) + 1;
|
|
|
|
uc &= LFUC_IN_PROGRESS - 1;
|
|
|
|
|
|
|
|
/* We per-use the RCU critical section indicator to make the prune event wait
|
|
|
|
* until we finish here in the rare case we get preempted. */
|
|
|
|
rcu_read_lock();
|
|
|
|
|
|
|
|
/* Obviously, there can't be more pending unlocks than the usecount itself */
|
|
|
|
if (uc == pending)
|
|
|
|
/* If we're the last unlocker (every owner is already unlocking), schedule
|
|
|
|
* the owner's prune event */
|
|
|
|
ev_send(el, ev);
|
|
|
|
else
|
|
|
|
ASSERT_DIE(uc > pending);
|
|
|
|
|
|
|
|
/* And now, finally, simultaneously pop the in-progress indicator and the
|
|
|
|
* usecount, possibly allowing the pruning routine to free this structure */
|
2024-01-10 08:10:03 +00:00
|
|
|
uc = atomic_fetch_sub_explicit(&c->uc, LFUC_IN_PROGRESS + 1, memory_order_acq_rel);
|
2023-11-24 07:46:50 +00:00
|
|
|
|
|
|
|
/* ... and to reduce the load a bit, the pruning routine will better wait for
|
|
|
|
* RCU synchronization instead of a busy loop. */
|
|
|
|
rcu_read_unlock();
|
2024-01-10 08:10:03 +00:00
|
|
|
|
2024-03-05 12:57:11 +00:00
|
|
|
// return uc - LFUC_IN_PROGRESS - 1;
|
|
|
|
}
|
|
|
|
|
2024-04-23 15:35:00 +00:00
|
|
|
struct lfuc_unlock_queue_item {
|
|
|
|
struct deferred_call dc;
|
|
|
|
struct lfuc *c;
|
|
|
|
event_list *el;
|
|
|
|
event *ev;
|
|
|
|
};
|
2024-03-05 12:57:11 +00:00
|
|
|
|
2024-04-23 15:35:00 +00:00
|
|
|
void lfuc_unlock_deferred(struct deferred_call *dc);
|
2024-03-05 12:57:11 +00:00
|
|
|
|
|
|
|
static inline void lfuc_unlock(struct lfuc *c, event_list *el, event *ev)
|
|
|
|
{
|
2024-04-23 15:35:00 +00:00
|
|
|
struct lfuc_unlock_queue_item luqi = {
|
|
|
|
.dc.hook = lfuc_unlock_deferred,
|
2024-03-05 12:57:11 +00:00
|
|
|
.c = c,
|
|
|
|
.el = el,
|
|
|
|
.ev = ev,
|
|
|
|
};
|
2024-04-23 15:35:00 +00:00
|
|
|
|
|
|
|
defer_call(&luqi.dc, sizeof luqi);
|
2023-11-24 07:46:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* lfuc_finished - auxiliary routine for prune event
|
|
|
|
* @c: usecount structure
|
|
|
|
*
|
|
|
|
* This routine simply waits until all unlockers finish their job and leave
|
|
|
|
* the critical section of lfuc_unlock(). Then we decide whether the usecount
|
|
|
|
* is indeed zero or not, and therefore whether the structure is free to be freed.
|
|
|
|
*/
|
|
|
|
static inline _Bool
|
|
|
|
lfuc_finished(struct lfuc *c)
|
|
|
|
{
|
|
|
|
u64 uc;
|
|
|
|
/* Wait until all unlockers finish */
|
|
|
|
while ((uc = atomic_load_explicit(&c->uc, memory_order_acquire)) >> LFUC_PU_SHIFT)
|
|
|
|
synchronize_rcu();
|
|
|
|
|
|
|
|
/* All of them are now done and if the usecount is now zero, then we're
|
|
|
|
* the last place to reference the object and we can call it finished. */
|
|
|
|
return (uc == 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* lfuc_init - auxiliary routine for usecount initialization
|
|
|
|
* @c: usecount structure
|
|
|
|
*
|
|
|
|
* Called on object initialization, sets the usecount to an initial one to make
|
|
|
|
* sure that the prune routine doesn't free it before somebody else references it.
|
|
|
|
*/
|
|
|
|
static inline void
|
|
|
|
lfuc_init(struct lfuc *c)
|
|
|
|
{
|
|
|
|
atomic_store_explicit(&c->uc, 1, memory_order_release);
|
|
|
|
}
|
|
|
|
|
2024-02-29 13:03:30 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Lock-free journal.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* Journal item. Put LFJOUR_ITEM_INHERIT(name) into your structure
|
|
|
|
* to inherit lfjour_item */
|
|
|
|
#define LFJOUR_ITEM \
|
|
|
|
u64 seq; \
|
|
|
|
|
|
|
|
struct lfjour_item {
|
|
|
|
LFJOUR_ITEM;
|
|
|
|
};
|
|
|
|
|
|
|
|
#define LFJOUR_ITEM_INHERIT(name) union { \
|
|
|
|
struct lfjour_item name; \
|
|
|
|
struct { LFJOUR_ITEM; }; \
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Journal item block. Internal structure, no need to check out. */
|
|
|
|
#define TLIST_PREFIX lfjour_block
|
|
|
|
#define TLIST_TYPE struct lfjour_block
|
|
|
|
#define TLIST_ITEM n
|
|
|
|
#define TLIST_WANT_ADD_TAIL
|
|
|
|
|
|
|
|
struct lfjour_block {
|
|
|
|
TLIST_DEFAULT_NODE;
|
|
|
|
_Atomic u32 end;
|
|
|
|
_Atomic _Bool not_last;
|
|
|
|
|
|
|
|
struct lfjour_item _block[0];
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Defines lfjour_block_list */
|
|
|
|
#include "lib/tlists.h"
|
|
|
|
|
|
|
|
/* Journal recipient. Inherit this in your implementation. */
|
|
|
|
#define TLIST_PREFIX lfjour_recipient
|
|
|
|
#define TLIST_TYPE struct lfjour_recipient
|
|
|
|
#define TLIST_ITEM n
|
|
|
|
#define TLIST_WANT_ADD_TAIL
|
|
|
|
#define TLIST_WANT_WALK
|
|
|
|
|
|
|
|
struct lfjour_recipient {
|
|
|
|
TLIST_DEFAULT_NODE;
|
|
|
|
event *event; /* Event running when something is in the journal */
|
|
|
|
event_list *target; /* Event target */
|
|
|
|
struct lfjour_item * _Atomic last; /* Last item processed */
|
|
|
|
struct lfjour_item *cur; /* Processing this now */
|
|
|
|
_Atomic u64 recipient_flags; /* LFJOUR_R_* */
|
|
|
|
};
|
|
|
|
|
|
|
|
enum lfjour_recipient_flags {
|
|
|
|
LFJOUR_R_SEQ_RESET = 1, /* Signalling of sequence number reset */
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Defines lfjour_recipient_list */
|
|
|
|
#include "lib/tlists.h"
|
|
|
|
|
|
|
|
/* Journal base structure. Include this. */
|
|
|
|
struct lfjour {
|
|
|
|
struct domain_generic *domain; /* The journal itself belongs to this domain (if different from the loop) */
|
|
|
|
struct birdloop *loop; /* Cleanup loop */
|
|
|
|
u32 item_size, item_count; /* Allocation parameters */
|
|
|
|
struct lfjour_block_list pending; /* List of packed journal blocks */
|
|
|
|
struct lfjour_item * _Atomic first; /* First journal item to announce */
|
|
|
|
struct lfjour_item *open; /* Journal item in progress */
|
|
|
|
u64 next_seq; /* Next export to push has this ID */
|
|
|
|
struct lfjour_recipient_list recipients; /* Announce updates to these */
|
|
|
|
event announce_kick_event; /* Kicks announce_timer */
|
|
|
|
struct settle announce_timer; /* Announces changes to recipients */
|
|
|
|
event cleanup_event; /* Runs the journal cleanup routine */
|
|
|
|
|
|
|
|
/* Callback on item removal from journal */
|
|
|
|
void (*item_done)(struct lfjour *, struct lfjour_item *);
|
|
|
|
|
|
|
|
/* Callback when the cleanup routine is ending */
|
|
|
|
void (*cleanup_done)(struct lfjour *, u64 begin_seq, u64 end_seq);
|
|
|
|
};
|
|
|
|
|
|
|
|
struct lfjour_item *lfjour_push_prepare(struct lfjour *);
|
|
|
|
void lfjour_push_commit(struct lfjour *);
|
|
|
|
|
|
|
|
struct lfjour_item *lfjour_get(struct lfjour_recipient *);
|
|
|
|
void lfjour_release(struct lfjour_recipient *);
|
|
|
|
static inline _Bool lfjour_reset_seqno(struct lfjour_recipient *r)
|
|
|
|
{
|
|
|
|
return atomic_fetch_and_explicit(&r->recipient_flags, ~LFJOUR_R_SEQ_RESET, memory_order_acq_rel) & LFJOUR_R_SEQ_RESET;
|
|
|
|
}
|
|
|
|
|
|
|
|
void lfjour_announce_now(struct lfjour *);
|
|
|
|
u64 lfjour_pending_items(struct lfjour *);
|
|
|
|
|
|
|
|
static inline void lfjour_schedule_cleanup(struct lfjour *j)
|
|
|
|
{ ev_send_loop(j->loop, &j->cleanup_event); }
|
|
|
|
|
|
|
|
static inline void lfjour_do_cleanup_now(struct lfjour *j)
|
|
|
|
{
|
|
|
|
/* This requires the caller to own the cleanup event loop */
|
|
|
|
ev_postpone(&j->cleanup_event);
|
|
|
|
j->cleanup_event.hook(j->cleanup_event.data);
|
|
|
|
}
|
|
|
|
|
|
|
|
void lfjour_register(struct lfjour *, struct lfjour_recipient *);
|
|
|
|
void lfjour_unregister(struct lfjour_recipient *);
|
|
|
|
static inline uint lfjour_count_recipients(struct lfjour *j)
|
|
|
|
{ return TLIST_LENGTH(lfjour_recipient, &j->recipients); }
|
|
|
|
|
|
|
|
void lfjour_init(struct lfjour *, struct settle_config *);
|
|
|
|
|
|
|
|
|
|
|
|
static inline struct lfjour *lfjour_of_recipient(struct lfjour_recipient *r)
|
|
|
|
{
|
|
|
|
struct lfjour_recipient_list *list = lfjour_recipient_enlisted(r);
|
|
|
|
return list ? SKIP_BACK(struct lfjour, recipients, list) : NULL;
|
|
|
|
}
|
2023-11-24 07:46:50 +00:00
|
|
|
#endif
|