From 5a89edc6fd0b03716ccb77084e8d1a1910f52ab0 Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Fri, 4 Feb 2022 05:34:02 +0100 Subject: [PATCH] Nest: Implement locking of prefix tries during walks The prune loop may may rebuild the prefix trie and therefore invalidate walk state for asynchronous walks (used in 'show route in' cmd). Fix it by adding locking that keeps the old trie in memory until current walks are done. In future this could be improved by rebuilding trie walk states (by lookup for last found prefix) after the prefix trie rebuild. --- nest/route.h | 6 ++++ nest/rt-show.c | 9 ++++++ nest/rt-table.c | 82 +++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 95 insertions(+), 2 deletions(-) diff --git a/nest/route.h b/nest/route.h index c4e0d2c6..7930058a 100644 --- a/nest/route.h +++ b/nest/route.h @@ -191,6 +191,9 @@ typedef struct rtable { struct fib_iterator prune_fit; /* Rtable prune FIB iterator */ struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */ struct f_trie *trie_new; /* New prefix trie defined during pruning */ + struct f_trie *trie_old; /* Old prefix trie waiting to be freed */ + u32 trie_lock_count; /* Prefix trie locked by walks */ + u32 trie_old_lock_count; /* Old prefix trie locked by walks */ list subscribers; /* Subscribers for notifications */ struct timer *settle_timer; /* Settle time for notifications */ @@ -332,6 +335,8 @@ void rt_preconfig(struct config *); void rt_commit(struct config *new, struct config *old); void rt_lock_table(rtable *); void rt_unlock_table(rtable *); +struct f_trie * rt_lock_trie(rtable *tab); +void rt_unlock_trie(rtable *tab, struct f_trie *trie); void rt_subscribe(rtable *tab, struct rt_subscription *s); void rt_unsubscribe(struct rt_subscription *s); void rt_flowspec_link(rtable *src, rtable *dst); @@ -405,6 +410,7 @@ struct rt_show_data { struct rt_show_data_rtable *last_table; /* Last table in output */ struct fib_iterator fit; /* Iterator over networks in table */ struct f_trie_walk_state *walk_state; /* Iterator over networks in trie */ + struct f_trie *walk_lock; /* Locked trie for walking */ int verbose, tables_defined_by; const struct filter *filter; struct proto *show_protocol; diff --git a/nest/rt-show.c b/nest/rt-show.c index ea1f918c..f8b7ba51 100644 --- a/nest/rt-show.c +++ b/nest/rt-show.c @@ -222,6 +222,9 @@ rt_show_cleanup(struct cli *c) if (d->table_open && !d->trie_walk) fit_get(&d->tab->table->fib, &d->fit); + if (d->walk_lock) + rt_unlock_trie(d->tab->table, d->walk_lock); + /* Unlock referenced tables */ WALK_LIST(tab, d->tables) rt_unlock_table(tab->table); @@ -257,7 +260,10 @@ rt_show_cont(struct cli *c) d->walk_state = lp_allocz(c->parser_pool, sizeof (struct f_trie_walk_state)); if (d->trie_walk) + { + d->walk_lock = rt_lock_trie(tab); trie_walk_init(d->walk_state, tab->trie, d->addr); + } else FIB_ITERATE_INIT(&d->fit, &tab->fib); @@ -288,6 +294,9 @@ rt_show_cont(struct cli *c) if (!--max) return; } + + rt_unlock_trie(tab, d->walk_lock); + d->walk_lock = NULL; } else { diff --git a/nest/rt-table.c b/nest/rt-table.c index ac14aec9..a10979e6 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -2471,7 +2471,19 @@ again: if (tab->trie_new) { /* Finish prefix trie pruning */ - rfree(tab->trie->lp); + + if (!tab->trie_lock_count) + { + rfree(tab->trie->lp); + } + else + { + ASSERT(!tab->trie_old); + tab->trie_old = tab->trie; + tab->trie_old_lock_count = tab->trie_lock_count; + tab->trie_lock_count = 0; + } + tab->trie = tab->trie_new; tab->trie_new = NULL; tab->prune_trie = 0; @@ -2479,7 +2491,7 @@ again: else { /* Schedule prefix trie pruning */ - if (tab->trie && (tab->trie->prefix_count > (2 * tab->fib.entries))) + if (tab->trie && !tab->trie_old && (tab->trie->prefix_count > (2 * tab->fib.entries))) { /* state change 0->1, 2->3 */ tab->prune_state |= 1; @@ -2504,6 +2516,72 @@ again: return; } +/** + * rt_lock_trie - lock a prefix trie of a routing table + * @tab: routing table with prefix trie to be locked + * + * The prune loop may rebuild the prefix trie and invalidate f_trie_walk_state + * structures. Therefore, asynchronous walks should lock the prefix trie using + * this function. That allows the prune loop to rebuild the trie, but postpones + * its freeing until all walks are done (unlocked by rt_unlock_trie()). + * + * Return a current trie that will be locked, the value should be passed back to + * rt_unlock_trie() for unlocking. + * + */ +struct f_trie * +rt_lock_trie(rtable *tab) +{ + ASSERT(tab->trie); + + tab->trie_lock_count++; + return tab->trie; +} + +/** + * rt_unlock_trie - unlock a prefix trie of a routing table + * @tab: routing table with prefix trie to be locked + * @trie: value returned by matching rt_lock_trie() + * + * Done for trie locked by rt_lock_trie() after walk over the trie is done. + * It may free the trie and schedule next trie pruning. + */ +void +rt_unlock_trie(rtable *tab, struct f_trie *trie) +{ + ASSERT(trie); + + if (trie == tab->trie) + { + /* Unlock the current prefix trie */ + ASSERT(tab->trie_lock_count); + tab->trie_lock_count--; + } + else if (trie == tab->trie_old) + { + /* Unlock the old prefix trie */ + ASSERT(tab->trie_old_lock_count); + tab->trie_old_lock_count--; + + /* Free old prefix trie that is no longer needed */ + if (!tab->trie_old_lock_count) + { + rfree(tab->trie_old->lp); + tab->trie_old = NULL; + + /* Kick prefix trie pruning that was postponed */ + if (tab->trie && (tab->trie->prefix_count > (2 * tab->fib.entries))) + { + tab->prune_trie = 1; + rt_schedule_prune(tab); + } + } + } + else + log(L_BUG "Invalid arg to rt_unlock_trie()"); +} + + void rt_preconfig(struct config *c) {