0
0
mirror of https://gitlab.nic.cz/labs/bird.git synced 2024-12-22 17:51:53 +00:00

Table feeds are now lockless

This commit makes the route chains in the tables atomic. This allows not
only standard exports but also feeds and bulk exports to be processed
without ever locking the table.

Design note: the overall data structures are quite brittle. We're using
RCU read-locks to keep track about readers, and we're indicating ongoing
work on the data structures by prepending a REF_OBSOLETE sentinel node
to make every reader go waiting.

All the operations are intended to stay inside nest/rt-table.c and it
may be even best to further refactor the code to hide the routing table
internal structure inside there. Nobody shall definitely write any
routines manipulating live routes in tables from outside.
This commit is contained in:
Maria Matejka 2024-04-03 14:47:15 +02:00
parent 40061ac3ac
commit 09d99617d3
4 changed files with 774 additions and 681 deletions

View File

@ -91,6 +91,9 @@ extern uint rtable_max_id;
struct rtable_config *config; /* Configuration of this table */ \ struct rtable_config *config; /* Configuration of this table */ \
struct birdloop *loop; /* Service thread */ \ struct birdloop *loop; /* Service thread */ \
netindex_hash *netindex; /* Prefix index for this table */ \ netindex_hash *netindex; /* Prefix index for this table */ \
struct network * _Atomic routes; /* Actual route objects in the table */ \
_Atomic u32 routes_block_size; /* Size of the route object pointer block */ \
struct f_trie * _Atomic trie; /* Trie of prefixes defined in fib */ \
event *nhu_event; /* Nexthop updater */ \ event *nhu_event; /* Nexthop updater */ \
event *hcu_event; /* Hostcache updater */ \ event *hcu_event; /* Hostcache updater */ \
@ -103,9 +106,6 @@ struct rtable_private {
/* Here the private items not to be accessed without locking */ /* Here the private items not to be accessed without locking */
pool *rp; /* Resource pool to allocate everything from, including itself */ pool *rp; /* Resource pool to allocate everything from, including itself */
struct slab *rte_slab; /* Slab to allocate route objects */ struct slab *rte_slab; /* Slab to allocate route objects */
struct network *routes; /* Actual route objects in the table */
u32 routes_block_size; /* Size of the route object pointer block */
struct f_trie *trie; /* Trie of prefixes defined in fib */
int use_count; /* Number of protocols using this table */ int use_count; /* Number of protocols using this table */
u32 rt_count; /* Number of routes in the table */ u32 rt_count; /* Number of routes in the table */
u32 net_count; /* Number of nets in the table */ u32 net_count; /* Number of nets in the table */
@ -143,7 +143,7 @@ struct rtable_private {
u32 prune_index; /* Rtable prune FIB iterator */ u32 prune_index; /* Rtable prune FIB iterator */
u32 nhu_index; /* Next Hop Update FIB iterator */ u32 nhu_index; /* Next Hop Update FIB iterator */
struct f_trie *trie_new; /* New prefix trie defined during pruning */ struct f_trie *trie_new; /* New prefix trie defined during pruning */
struct f_trie *trie_old; /* Old prefix trie waiting to be freed */ const struct f_trie *trie_old; /* Old prefix trie waiting to be freed */
u32 trie_lock_count; /* Prefix trie locked by walks */ u32 trie_lock_count; /* Prefix trie locked by walks */
u32 trie_old_lock_count; /* Old prefix trie locked by walks */ u32 trie_old_lock_count; /* Old prefix trie locked by walks */
struct tbf rl_pipe; /* Rate limiting token buffer for pipe collisions */ struct tbf rl_pipe; /* Rate limiting token buffer for pipe collisions */
@ -209,12 +209,12 @@ static inline int rt_cork_check(event *e)
typedef struct network { typedef struct network {
struct rte_storage *routes; /* Available routes for this network */ struct rte_storage * _Atomic routes; /* Available routes for this network */
struct rt_pending_export *first, *last; struct rt_pending_export * _Atomic first, * _Atomic last; /* Uncleaned pending exports */
} net; } net;
struct rte_storage { struct rte_storage {
struct rte_storage *next; /* Next in chain */ struct rte_storage * _Atomic next; /* Next in chain */
union { union {
struct { struct {
RTE_IN_TABLE_WRITABLE; RTE_IN_TABLE_WRITABLE;
@ -366,18 +366,7 @@ struct rt_export_hook {
/* Table-specific items */ /* Table-specific items */
rtable *tab; /* The table pointer to use in corner cases */ rtable *tab; /* The table pointer to use in corner cases */
union { u32 feed_index; /* Routing table iterator used during feeding */
u32 feed_index; /* Routing table iterator used during feeding */
struct {
struct f_trie_walk_state *walk_state; /* Iterator over networks in trie */
struct f_trie *walk_lock; /* Locked trie for walking */
union { /* Last net visited but not processed */
net_addr walk_last;
net_addr_ip4 walk_last_ip4;
net_addr_ip6 walk_last_ip6;
};
};
};
u8 refeed_pending; /* Refeeding and another refeed is scheduled */ u8 refeed_pending; /* Refeeding and another refeed is scheduled */
u8 feed_type; /* Which feeding method is used (TFT_*, see below) */ u8 feed_type; /* Which feeding method is used (TFT_*, see below) */
@ -575,15 +564,12 @@ static inline void rt_unlock_table_pub(rtable *t, const char *file, uint line)
#define rt_unlock_table(t) _Generic((t), rtable *: rt_unlock_table_pub, \ #define rt_unlock_table(t) _Generic((t), rtable *: rt_unlock_table_pub, \
struct rtable_private *: rt_unlock_table_priv)((t), __FILE__, __LINE__) struct rtable_private *: rt_unlock_table_priv)((t), __FILE__, __LINE__)
struct f_trie * rt_lock_trie(struct rtable_private *tab); const struct f_trie * rt_lock_trie(struct rtable_private *tab);
void rt_unlock_trie(struct rtable_private *tab, struct f_trie *trie); void rt_unlock_trie(struct rtable_private *tab, const struct f_trie *trie);
void rt_flowspec_link(rtable *src, rtable *dst); void rt_flowspec_link(rtable *src, rtable *dst);
void rt_flowspec_unlink(rtable *src, rtable *dst); void rt_flowspec_unlink(rtable *src, rtable *dst);
rtable *rt_setup(pool *, struct rtable_config *); rtable *rt_setup(pool *, struct rtable_config *);
static inline net *net_find(struct rtable_private *tab, const struct netindex *i)
{ return (i->index < tab->routes_block_size) ? &(tab->routes[i->index]) : NULL; }
int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter); int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter);
rte *rt_export_merged(struct channel *c, const net_addr *n, const rte ** feed, uint count, linpool *pool, int silent); rte *rt_export_merged(struct channel *c, const net_addr *n, const rte ** feed, uint count, linpool *pool, int silent);
void rt_refresh_begin(struct rt_import_request *); void rt_refresh_begin(struct rt_import_request *);

File diff suppressed because it is too large Load Diff

View File

@ -2648,8 +2648,16 @@ bgp_rte_recalculate(struct rtable_private *table, net *net,
/* The default case - find a new best-in-group route */ /* The default case - find a new best-in-group route */
struct rte_storage *r = new_stored; /* new may not be in the list */ struct rte_storage *r = new_stored; /* new may not be in the list */
for (struct rte_storage *s = net->routes; rte_is_valid(RTE_OR_NULL(s)); s = s->next) struct rte_storage *spinlocked = atomic_load_explicit(&net->routes, memory_order_acquire);
if (use_deterministic_med(s) && same_group(&s->rte, lpref, lasn)) ASSERT_DIE(spinlocked->rte.flags & REF_OBSOLETE);
ASSERT_DIE(!spinlocked->rte.src);
for (struct rte_storage *s, * _Atomic *ptr = &spinlocked->next;
s = atomic_load_explicit(ptr, memory_order_acquire);
ptr = &s->next)
if (!rte_is_valid(&s->rte))
break;
else if (use_deterministic_med(s) && same_group(&s->rte, lpref, lasn))
{ {
s->pflags |= BGP_REF_SUPPRESSED; s->pflags |= BGP_REF_SUPPRESSED;
if (!r || bgp_rte_better(&s->rte, &r->rte)) if (!r || bgp_rte_better(&s->rte, &r->rte))
@ -2665,8 +2673,12 @@ bgp_rte_recalculate(struct rtable_private *table, net *net,
new_stored->pflags &= ~BGP_REF_SUPPRESSED; new_stored->pflags &= ~BGP_REF_SUPPRESSED;
/* Found all existing routes mergable with best-in-group */ /* Found all existing routes mergable with best-in-group */
for (struct rte_storage *s = net->routes; rte_is_valid(RTE_OR_NULL(s)); s = s->next) for (struct rte_storage *s, * _Atomic *ptr = &spinlocked->next;
if (use_deterministic_med(s) && same_group(&s->rte, lpref, lasn)) s = atomic_load_explicit(ptr, memory_order_acquire);
ptr = &s->next)
if (!rte_is_valid(&s->rte))
break;
else if (use_deterministic_med(s) && same_group(&s->rte, lpref, lasn))
if ((s != r) && bgp_rte_mergable(&r->rte, &s->rte)) if ((s != r) && bgp_rte_mergable(&r->rte, &s->rte))
s->pflags &= ~BGP_REF_SUPPRESSED; s->pflags &= ~BGP_REF_SUPPRESSED;

View File

@ -339,83 +339,8 @@ krt_learn_async(struct krt_proto *p, rte *e, int new)
* Routes * Routes
*/ */
static inline int /* Hook defined in nest/rt-table.c ... to be refactored away later */
krt_is_installed(struct krt_proto *p, net *n) rte *krt_export_net(struct channel *c, const net_addr *a, linpool *lp);
{
return n->routes && bmap_test(&p->p.main_channel->export_map, n->routes->rte.id);
}
static uint
rte_feed_count_valid(net *n)
{
uint count = 0;
for (struct rte_storage *e = n->routes; e; e = e->next)
if (rte_is_valid(RTE_OR_NULL(e)))
count++;
return count;
}
static void
rte_feed_obtain_valid(net *n, const rte **feed, uint count)
{
uint i = 0;
for (struct rte_storage *e = n->routes; e; e = e->next)
if (rte_is_valid(RTE_OR_NULL(e)))
{
ASSERT_DIE(i < count);
feed[i++] = &e->rte;
}
ASSERT_DIE(i == count);
}
static struct rte *
krt_export_net(struct krt_proto *p, struct netindex *i, net *net)
{
/* FIXME: Here we are calling filters in table-locked context when exporting
* to kernel. Here BIRD can crash if the user requested ROA check in kernel
* export filter. It doesn't make much sense to write the filters like this,
* therefore we may keep this unfinished piece of work here for later as it
* won't really affect anybody. */
ASSERT_DIE(RT_IS_LOCKED(p->p.main_channel->table));
struct channel *c = p->p.main_channel;
const struct filter *filter = c->out_filter;
if (c->ra_mode == RA_MERGED)
{
uint count = rte_feed_count_valid(net);
if (!count)
return NULL;
const rte **feed = alloca(count * sizeof(rte *));
rte_feed_obtain_valid(net, feed, count);
return rt_export_merged(c, i->addr, feed, count, krt_filter_lp, 1);
}
static _Thread_local rte rt;
rt = net->routes->rte;
if (!rte_is_valid(&rt))
return NULL;
if (filter == FILTER_REJECT)
return NULL;
/* We could run krt_preexport() here, but it is already handled by krt_is_installed() */
if (filter == FILTER_ACCEPT)
goto accept;
if (f_run(filter, &rt, FF_SILENT) > F_ACCEPT)
goto reject;
accept:
return &rt;
reject:
return NULL;
}
static int static int
krt_same_dest(rte *k, rte *e) krt_same_dest(rte *k, rte *e)
@ -465,10 +390,6 @@ krt_got_route(struct krt_proto *p, rte *e, s8 src)
#endif #endif
/* The rest is for KRT_SRC_BIRD (or KRT_SRC_UNKNOWN) */ /* The rest is for KRT_SRC_BIRD (or KRT_SRC_UNKNOWN) */
RT_LOCKED(p->p.main_channel->table, tab)
{
/* Deleting all routes if final flush is requested */ /* Deleting all routes if final flush is requested */
if (p->sync_state == KPS_FLUSHING) if (p->sync_state == KPS_FLUSHING)
goto delete; goto delete;
@ -477,12 +398,8 @@ krt_got_route(struct krt_proto *p, rte *e, s8 src)
if (!p->ready) if (!p->ready)
goto ignore; goto ignore;
struct netindex *i = net_find_index(tab->netindex, e->net); /* Get the exported version */
net *net = i ? net_find(tab, i) : NULL; new = krt_export_net(p->p.main_channel, e->net, krt_filter_lp);
if (!net || !krt_is_installed(p, net))
goto delete;
new = krt_export_net(p, i, net);
/* Rejected by filters */ /* Rejected by filters */
if (!new) if (!new)
@ -524,7 +441,6 @@ delete:
goto done; goto done;
done:; done:;
}
lp_flush(krt_filter_lp); lp_flush(krt_filter_lp);
} }