From 1b39473993abcc6180657c8d3bd5f9e12e4bc816 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Fri, 12 Nov 2021 22:58:40 +0100 Subject: [PATCH 01/42] Introducing basic RCU primitives for lock-less shared data structures --- lib/Makefile | 2 +- lib/coro.h | 2 ++ lib/rcu.c | 79 +++++++++++++++++++++++++++++++++++++++++ lib/rcu.h | 55 ++++++++++++++++++++++++++++ lib/resource.c | 3 ++ sysdep/unix/coroutine.c | 10 ++++++ 6 files changed, 150 insertions(+), 1 deletion(-) create mode 100644 lib/rcu.c create mode 100644 lib/rcu.h diff --git a/lib/Makefile b/lib/Makefile index 4378a7bd..98c5db3c 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -1,4 +1,4 @@ -src := bitmap.c bitops.c blake2s.c blake2b.c checksum.c event.c flowspec.c idm.c ip.c lists.c mac.c md5.c mempool.c net.c patmatch.c printf.c resource.c sha1.c sha256.c sha512.c slab.c slists.c strtoul.c tbf.c timer.c xmalloc.c +src := bitmap.c bitops.c blake2s.c blake2b.c checksum.c event.c flowspec.c idm.c ip.c lists.c mac.c md5.c mempool.c net.c patmatch.c printf.c resource.c rcu.c sha1.c sha256.c sha512.c slab.c slists.c strtoul.c tbf.c timer.c xmalloc.c obj := $(src-o-files) $(all-daemon) diff --git a/lib/coro.h b/lib/coro.h index 17ccff89..b36f1d2c 100644 --- a/lib/coro.h +++ b/lib/coro.h @@ -25,5 +25,7 @@ struct coroutine *coro_run(pool *, void (*entry)(void *), void *data); /* Get self. */ extern _Thread_local struct coroutine *this_coro; +/* Just wait for a little while. Not intended for general use; use events if possible. */ +void coro_yield(void); #endif diff --git a/lib/rcu.c b/lib/rcu.c new file mode 100644 index 00000000..69f3442f --- /dev/null +++ b/lib/rcu.c @@ -0,0 +1,79 @@ +/* + * BIRD Library -- Read-Copy-Update Basic Operations + * + * (c) 2021 Maria Matejka + * (c) 2021 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + * Note: all the relevant patents shall be expired. + * + * Using the Supplementary Material for User-Level Implementations of Read-Copy-Update + * by Matthieu Desnoyers, Paul E. McKenney, Alan S. Stern, Michel R. Dagenais and Jonathan Walpole + * obtained from https://www.efficios.com/pub/rcu/urcu-supp-accepted.pdf + */ + +#include "lib/rcu.h" +#include "lib/coro.h" +#include "lib/locking.h" + +_Atomic uint rcu_gp_ctl = RCU_NEST_CNT; +_Thread_local struct rcu_coro *this_rcu_coro = NULL; + +static list rcu_coro_list; + +static struct rcu_coro main_rcu_coro; + +DEFINE_DOMAIN(resource); +static DOMAIN(resource) rcu_domain; + +static int +rcu_gp_ongoing(_Atomic uint *ctl) +{ + uint val = atomic_load(ctl); + return (val & RCU_NEST_CNT) && ((val ^ rcu_gp_ctl) & RCU_GP_PHASE); +} + +static void +update_counter_and_wait(void) +{ + atomic_fetch_xor(&rcu_gp_ctl, RCU_GP_PHASE); + struct rcu_coro *rc; + WALK_LIST(rc, rcu_coro_list) + while (rcu_gp_ongoing(&rc->ctl)) + coro_yield(); +} + +void +synchronize_rcu(void) +{ + LOCK_DOMAIN(resource, rcu_domain); + update_counter_and_wait(); + update_counter_and_wait(); + UNLOCK_DOMAIN(resource, rcu_domain); +} + +void +rcu_coro_start(struct rcu_coro *rc) +{ + LOCK_DOMAIN(resource, rcu_domain); + add_tail(&rcu_coro_list, &rc->n); + this_rcu_coro = rc; + UNLOCK_DOMAIN(resource, rcu_domain); +} + +void +rcu_coro_stop(struct rcu_coro *rc) +{ + LOCK_DOMAIN(resource, rcu_domain); + this_rcu_coro = NULL; + rem_node(&rc->n); + UNLOCK_DOMAIN(resource, rcu_domain); +} + +void +rcu_init(void) +{ + rcu_domain = DOMAIN_NEW(resource, "Read-Copy-Update"); + init_list(&rcu_coro_list); + rcu_coro_start(&main_rcu_coro); +} diff --git a/lib/rcu.h b/lib/rcu.h new file mode 100644 index 00000000..ac8fc9ce --- /dev/null +++ b/lib/rcu.h @@ -0,0 +1,55 @@ +/* + * BIRD Library -- Read-Copy-Update Basic Operations + * + * (c) 2021 Maria Matejka + * (c) 2021 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + * Note: all the relevant patents shall be expired. + */ + +#ifndef _BIRD_RCU_H_ +#define _BIRD_RCU_H_ + +#include "lib/birdlib.h" +#include "lib/lists.h" +#include + +#define RCU_GP_PHASE 0x100000 +#define RCU_NEST_MASK 0x0fffff +#define RCU_NEST_CNT 0x000001 + +extern _Atomic uint rcu_gp_ctl; + +struct rcu_coro { + node n; + _Atomic uint ctl; +}; + +extern _Thread_local struct rcu_coro *this_rcu_coro; + +static inline void rcu_read_lock(void) +{ + uint cmp = atomic_load_explicit(&this_rcu_coro->ctl, memory_order_acquire); + + if (cmp & RCU_NEST_MASK) + atomic_store_explicit(&this_rcu_coro->ctl, cmp + RCU_NEST_CNT, memory_order_relaxed); + else + atomic_store(&this_rcu_coro->ctl, atomic_load_explicit(&rcu_gp_ctl, memory_order_acquire)); +} + +static inline void rcu_read_unlock(void) +{ + atomic_fetch_sub(&this_rcu_coro->ctl, RCU_NEST_CNT); +} + +void synchronize_rcu(void); + +/* Registering and unregistering a coroutine. To be called from coroutine implementation */ +void rcu_coro_start(struct rcu_coro *); +void rcu_coro_stop(struct rcu_coro *); + +/* Run this from resource init */ +void rcu_init(void); + +#endif diff --git a/lib/resource.c b/lib/resource.c index 2d041ad5..0651406f 100644 --- a/lib/resource.c +++ b/lib/resource.c @@ -13,6 +13,7 @@ #include "nest/bird.h" #include "lib/resource.h" #include "lib/string.h" +#include "lib/rcu.h" /** * DOC: Resource pools @@ -284,6 +285,8 @@ rlookup(unsigned long a) void resource_init(void) { + rcu_init(); + root_pool.r.class = &pool_class; root_pool.name = "Root"; init_list(&root_pool.inside); diff --git a/sysdep/unix/coroutine.c b/sysdep/unix/coroutine.c index 4758c056..12ba55d8 100644 --- a/sysdep/unix/coroutine.c +++ b/sysdep/unix/coroutine.c @@ -18,6 +18,7 @@ #include "lib/birdlib.h" #include "lib/locking.h" #include "lib/coro.h" +#include "lib/rcu.h" #include "lib/resource.h" #include "lib/timer.h" @@ -128,6 +129,7 @@ struct coroutine { resource r; pthread_t id; pthread_attr_t attr; + struct rcu_coro rcu; void (*entry)(void *); void *data; }; @@ -137,6 +139,7 @@ static _Thread_local _Bool coro_cleaned_up = 0; static void coro_free(resource *r) { struct coroutine *c = (void *) r; + rcu_coro_stop(&c->rcu); ASSERT_DIE(pthread_equal(pthread_self(), c->id)); pthread_attr_destroy(&c->attr); coro_cleaned_up = 1; @@ -157,6 +160,7 @@ static void *coro_entry(void *p) ASSERT_DIE(c->entry); this_coro = c; + rcu_coro_start(&c->rcu); c->entry(c->data); ASSERT_DIE(coro_cleaned_up); @@ -190,3 +194,9 @@ struct coroutine *coro_run(pool *p, void (*entry)(void *), void *data) return c; } + +void +coro_yield(void) +{ + usleep(100); +} From 1e8e3b7c1214393844c35fe3fbba9865b56ec95f Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Tue, 16 Nov 2021 21:13:32 +0000 Subject: [PATCH 02/42] More information on RPKI errors --- proto/rpki/packets.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/proto/rpki/packets.c b/proto/rpki/packets.c index 897edc09..38830d28 100644 --- a/proto/rpki/packets.c +++ b/proto/rpki/packets.c @@ -233,7 +233,12 @@ static const size_t min_pdu_size[] = { [ERROR] = 16, }; -static int rpki_send_error_pdu(struct rpki_cache *cache, const enum pdu_error_type error_code, const u32 err_pdu_len, const struct pdu_header *erroneous_pdu, const char *fmt, ...); +static int rpki_send_error_pdu_(struct rpki_cache *cache, const enum pdu_error_type error_code, const u32 err_pdu_len, const struct pdu_header *erroneous_pdu, const char *fmt, ...); + +#define rpki_send_error_pdu(cache, error_code, err_pdu_len, erroneous_pdu, fmt...) ({ \ + rpki_send_error_pdu_(cache, error_code, err_pdu_len, erroneous_pdu, #fmt); \ + CACHE_TRACE(D_PACKETS, cache, #fmt); \ + }) static void rpki_pdu_to_network_byte_order(struct pdu_header *pdu) @@ -595,6 +600,7 @@ rpki_handle_error_pdu(struct rpki_cache *cache, const struct pdu_error *pdu) case INTERNAL_ERROR: case INVALID_REQUEST: case UNSUPPORTED_PDU_TYPE: + CACHE_TRACE(D_PACKETS, cache, "Got UNSUPPORTED_PDU_TYPE"); rpki_cache_change_state(cache, RPKI_CS_ERROR_FATAL); break; @@ -1013,7 +1019,7 @@ rpki_connected_hook(sock *sk) * This function prepares Error PDU and sends it to a cache server. */ static int -rpki_send_error_pdu(struct rpki_cache *cache, const enum pdu_error_type error_code, const u32 err_pdu_len, const struct pdu_header *erroneous_pdu, const char *fmt, ...) +rpki_send_error_pdu_(struct rpki_cache *cache, const enum pdu_error_type error_code, const u32 err_pdu_len, const struct pdu_header *erroneous_pdu, const char *fmt, ...) { va_list args; char msg[128]; From 794a4eefa11f3166404d91edddd0f4f19458f652 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Thu, 11 Nov 2021 16:25:59 +0100 Subject: [PATCH 03/42] Keeping un-unmmappable pages until they can be reused On Linux, munmap() may fail with ENOMEM when virtual memory is too fragmented. Working this around by just keeping such blocks for future use. --- lib/locking.h | 1 + lib/resource.c | 23 +++++++++++++++++------ sysdep/unix/alloc.c | 43 +++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 59 insertions(+), 8 deletions(-) diff --git a/lib/locking.h b/lib/locking.h index 0a69f50f..1a8bdcd4 100644 --- a/lib/locking.h +++ b/lib/locking.h @@ -19,6 +19,7 @@ struct lock_order { struct domain_generic *attrs; struct domain_generic *cork; struct domain_generic *event; + struct domain_generic *resource; }; extern _Thread_local struct lock_order locking_stack; diff --git a/lib/resource.c b/lib/resource.c index e80b315b..2d041ad5 100644 --- a/lib/resource.c +++ b/lib/resource.c @@ -60,7 +60,7 @@ static struct resclass pool_class = { pool root_pool; void *alloc_sys_page(void); -void free_sys_page(void *); +int free_sys_page(void *); static int indent; @@ -98,8 +98,10 @@ pool_free(resource *P) if (p->pages) { ASSERT_DIE(!p->pages->used); - for (uint i=0; ipages->free; i++) + + for (uint i = 0; i < p->pages->free; i++) free_sys_page(p->pages->ptr[i]); + free_sys_page(p->pages); } } @@ -476,10 +478,19 @@ free_page(pool *p, void *ptr) ASSERT_DIE(p->pages); p->pages->used--; - if (p->pages->free >= POOL_PAGES_MAX) - return free_sys_page(ptr); - else - p->pages->ptr[p->pages->free++] = ptr; + ASSERT_DIE(p->pages->free <= POOL_PAGES_MAX); + + if (p->pages->free == POOL_PAGES_MAX) + { + const unsigned long keep = POOL_PAGES_MAX / 4; + + for (uint i = keep; i < p->pages->free; i++) + free_sys_page(p->pages->ptr[i]); + + p->pages->free = keep; + } + + p->pages->ptr[p->pages->free++] = ptr; } diff --git a/sysdep/unix/alloc.c b/sysdep/unix/alloc.c index 4c9d5eb5..4ae1a9db 100644 --- a/sysdep/unix/alloc.c +++ b/sysdep/unix/alloc.c @@ -11,6 +11,8 @@ #include #include +#include +#include #ifdef HAVE_MMAP #include @@ -19,6 +21,13 @@ long page_size = 0; _Bool alloc_multipage = 0; +static _Atomic int global_page_list_not_empty; +static list global_page_list; +static _Atomic int global_page_spinlock; + +#define GLOBAL_PAGE_SPIN_LOCK for (int v = 0; !atomic_compare_exchange_weak_explicit(&global_page_spinlock, &v, 1, memory_order_acq_rel, memory_order_acquire); v = 0) +#define GLOBAL_PAGE_SPIN_UNLOCK do { int v = 1; ASSERT_DIE(atomic_compare_exchange_strong_explicit(&global_page_spinlock, &v, 0, memory_order_acq_rel, memory_order_acquire)); } while (0) + #ifdef HAVE_MMAP static _Bool use_fake = 0; #else @@ -28,12 +37,14 @@ static _Bool use_fake = 1; void resource_sys_init(void) { #ifdef HAVE_MMAP + init_list(&global_page_list); + if (!(page_size = sysconf(_SC_PAGESIZE))) die("System page size must be non-zero"); if ((u64_popcount(page_size) > 1) || (page_size > 16384)) - { #endif + { /* Too big or strange page, use the aligned allocator instead */ page_size = 4096; use_fake = 1; @@ -46,6 +57,22 @@ alloc_sys_page(void) #ifdef HAVE_MMAP if (!use_fake) { + if (atomic_load_explicit(&global_page_list_not_empty, memory_order_relaxed)) + { + GLOBAL_PAGE_SPIN_LOCK; + if (!EMPTY_LIST(global_page_list)) + { + node *ret = HEAD(global_page_list); + rem_node(ret); + if (EMPTY_LIST(global_page_list)) + atomic_store_explicit(&global_page_list_not_empty, 0, memory_order_relaxed); + GLOBAL_PAGE_SPIN_UNLOCK; + memset(ret, 0, sizeof(node)); + return (void *) ret; + } + GLOBAL_PAGE_SPIN_UNLOCK; + } + if (alloc_multipage) { void *big = mmap(NULL, page_size * 2, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); @@ -90,7 +117,19 @@ free_sys_page(void *ptr) if (!use_fake) { if (munmap(ptr, page_size) < 0) - bug("munmap(%p) failed: %m", ptr); +#ifdef ENOMEM + if (errno == ENOMEM) + { + memset(ptr, 0, page_size); + + GLOBAL_PAGE_SPIN_LOCK; + add_tail(&global_page_list, (node *) ptr); + atomic_store_explicit(&global_page_list_not_empty, 1, memory_order_relaxed); + GLOBAL_PAGE_SPIN_UNLOCK; + } + else +#endif + bug("munmap(%p) failed: %m", ptr); } else #endif From 2a224a9e1e1fbe1abec96c8585dd754fb716d020 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Sat, 13 Nov 2021 17:52:34 +0100 Subject: [PATCH 04/42] Route sources have their separate global lock --- nest/rt-attr.c | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/nest/rt-attr.c b/nest/rt-attr.c index cd4c6892..cb66b65d 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -86,8 +86,13 @@ const char * rta_dest_names[RTD_MAX] = { }; DOMAIN(attrs) attrs_domain; +DOMAIN(attrs) src_domain; + +#define SRC_LOCK LOCK_DOMAIN(attrs, src_domain) +#define SRC_UNLOCK UNLOCK_DOMAIN(attrs, src_domain) pool *rta_pool; +pool *src_pool; static slab *rta_slab_[4]; static slab *nexthop_slab_[4]; @@ -110,9 +115,11 @@ static struct idm src_ids; static void rte_src_init(void) { - rte_src_slab = sl_new(rta_pool, sizeof(struct rte_src)); + src_domain = DOMAIN_NEW(attrs, "Route sources"); + src_pool = rp_new(&root_pool, "Route sources"); + rte_src_slab = sl_new(src_pool, sizeof(struct rte_src)); - idm_init(&src_ids, rta_pool, SRC_ID_INIT_SIZE); + idm_init(&src_ids, src_pool, SRC_ID_INIT_SIZE); } HASH_DEFINE_REHASH_FN(RSH, struct rte_src) @@ -137,7 +144,7 @@ rt_get_source_o(struct rte_owner *p, u32 id) return src; } - RTA_LOCK; + SRC_LOCK; src = sl_allocz(rte_src_slab); src->owner = p; src->private_id = id; @@ -146,12 +153,12 @@ rt_get_source_o(struct rte_owner *p, u32 id) atomic_store_explicit(&src->uc, 1, memory_order_release); p->uc++; - HASH_INSERT2(p->hash, RSH, rta_pool, src); + HASH_INSERT2(p->hash, RSH, src_pool, src); if (config->table_debug) log(L_TRACE "Allocated new rte_src for %s, ID %uL %uG, have %u sources now", p->name, src->private_id, src->global_id, p->uc); - RTA_UNLOCK; + SRC_UNLOCK; return src; } @@ -182,21 +189,21 @@ rt_prune_sources(void *data) HASH_DO_REMOVE(o->hash, RSH, sp); - RTA_LOCK; + SRC_LOCK; idm_free(&src_ids, src->global_id); sl_free(rte_src_slab, src); - RTA_UNLOCK; + SRC_UNLOCK; } } HASH_WALK_FILTER_END; - RTA_LOCK; - HASH_MAY_RESIZE_DOWN(o->hash, RSH, rta_pool); + SRC_LOCK; + HASH_MAY_RESIZE_DOWN(o->hash, RSH, src_pool); if (o->stop && !o->uc) { rfree(o->prune); - RTA_UNLOCK; + SRC_UNLOCK; if (config->table_debug) log(L_TRACE "All rte_src's for %s pruned, scheduling stop event", o->name); @@ -204,21 +211,21 @@ rt_prune_sources(void *data) rt_done_sources(o); } else - RTA_UNLOCK; + SRC_UNLOCK; } void rt_init_sources(struct rte_owner *o, const char *name, event_list *list) { - RTA_LOCK; - HASH_INIT(o->hash, rta_pool, RSH_INIT_ORDER); + SRC_LOCK; + HASH_INIT(o->hash, src_pool, RSH_INIT_ORDER); o->hash_key = random_u32(); o->uc = 0; o->name = name; - o->prune = ev_new_init(rta_pool, rt_prune_sources, o); + o->prune = ev_new_init(src_pool, rt_prune_sources, o); o->stop = NULL; o->list = list; - RTA_UNLOCK; + SRC_UNLOCK; } void @@ -231,9 +238,9 @@ rt_destroy_sources(struct rte_owner *o, event *done) if (config->table_debug) log(L_TRACE "Source owner %s destroy requested. All rte_src's already pruned, scheduling stop event", o->name); - RTA_LOCK; + SRC_LOCK; rfree(o->prune); - RTA_UNLOCK; + SRC_UNLOCK; rt_done_sources(o); } From 20ace7f2e61bc6881e4ea22c63f0d0eb9a1dd1a2 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Tue, 16 Nov 2021 21:14:24 +0000 Subject: [PATCH 05/42] RPKI: Use the route refresh mechanism also for the first load --- proto/rpki/packets.c | 25 ++----------------------- proto/rpki/rpki.c | 25 +++++++++++++++++++++++++ proto/rpki/rpki.h | 2 ++ 3 files changed, 29 insertions(+), 23 deletions(-) diff --git a/proto/rpki/packets.c b/proto/rpki/packets.c index 38830d28..abe6abfc 100644 --- a/proto/rpki/packets.c +++ b/proto/rpki/packets.c @@ -658,21 +658,7 @@ rpki_handle_cache_response_pdu(struct rpki_cache *cache, const struct pdu_cache_ { if (cache->request_session_id) { - if (cache->last_update) - { - /* - * This isn't the first sync and we already received records. This point - * is after Reset Query and before importing new records from cache - * server. We need to load new ones and kick out missing ones. So start - * a refresh cycle. - */ - if (cache->p->roa4_channel) - rt_refresh_begin(&cache->p->roa4_channel->in_req); - if (cache->p->roa6_channel) - rt_refresh_begin(&cache->p->roa6_channel->in_req); - - cache->p->refresh_channels = 1; - } + rpki_start_refresh(cache->p); cache->session_id = pdu->session_id; cache->request_session_id = 0; } @@ -821,14 +807,7 @@ rpki_handle_end_of_data_pdu(struct rpki_cache *cache, const struct pdu_end_of_da (cf->keep_expire_interval ? "keeps " : ""), cache->expire_interval); } - if (cache->p->refresh_channels) - { - cache->p->refresh_channels = 0; - if (cache->p->roa4_channel) - rt_refresh_end(&cache->p->roa4_channel->in_req); - if (cache->p->roa6_channel) - rt_refresh_end(&cache->p->roa6_channel->in_req); - } + rpki_stop_refresh(cache->p); cache->last_update = current_time(); cache->serial_num = pdu->serial_num; diff --git a/proto/rpki/rpki.c b/proto/rpki/rpki.c index 95066499..e3fccb48 100644 --- a/proto/rpki/rpki.c +++ b/proto/rpki/rpki.c @@ -139,6 +139,30 @@ rpki_table_remove_roa(struct rpki_cache *cache, struct channel *channel, const n rte_update(channel, &pfxr->n, NULL, p->p.main_source); } +void +rpki_start_refresh(struct rpki_proto *p) +{ + if (p->roa4_channel) + rt_refresh_begin(&p->roa4_channel->in_req); + if (p->roa6_channel) + rt_refresh_begin(&p->roa6_channel->in_req); + + p->refresh_channels = 1; +} + +void +rpki_stop_refresh(struct rpki_proto *p) +{ + if (!p->refresh_channels) + return; + + p->refresh_channels = 0; + + if (p->roa4_channel) + rt_refresh_end(&p->roa4_channel->in_req); + if (p->roa6_channel) + rt_refresh_end(&p->roa6_channel->in_req); +} /* * RPKI Protocol Logic @@ -626,6 +650,7 @@ rpki_close_connection(struct rpki_cache *cache) { CACHE_TRACE(D_EVENTS, cache, "Closing a connection"); rpki_tr_close(cache->tr_sock); + rpki_stop_refresh(cache->p); proto_notify_state(&cache->p->p, PS_START); } diff --git a/proto/rpki/rpki.h b/proto/rpki/rpki.h index 8a5c38fd..a70a2027 100644 --- a/proto/rpki/rpki.h +++ b/proto/rpki/rpki.h @@ -83,6 +83,8 @@ const char *rpki_cache_state_to_str(enum rpki_cache_state state); void rpki_table_add_roa(struct rpki_cache *cache, struct channel *channel, const net_addr_union *pfxr); void rpki_table_remove_roa(struct rpki_cache *cache, struct channel *channel, const net_addr_union *pfxr); +void rpki_start_refresh(struct rpki_proto *p); +void rpki_stop_refresh(struct rpki_proto *p); /* * RPKI Protocol Logic From 878eeec12bf020c9e7460040d225a929bbbd2bd2 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Wed, 29 Sep 2021 16:15:13 +0200 Subject: [PATCH 06/42] Routing tables now have their own loops. This basically means that: * there are some more levels of indirection and asynchronicity, mostly in cleanup procedures, requiring correct lock ordering * all the internal table operations (prune, next hop update) are done without blocking the other parts of BIRD * the protocols may get their own loops very soon --- filter/f-inst.c | 4 +- nest/proto.c | 77 +++++--- nest/protocol.h | 9 +- nest/route.h | 106 ++++++----- nest/rt-attr.c | 11 +- nest/rt-show.c | 29 ++- nest/rt-table.c | 399 +++++++++++++++++++++++++++++------------- proto/bgp/attrs.c | 2 +- proto/bgp/bgp.c | 12 +- proto/bgp/bgp.h | 2 +- proto/mrt/mrt.c | 59 +++++-- proto/mrt/mrt.h | 6 +- proto/perf/perf.c | 4 +- proto/radv/radv.c | 5 +- proto/static/static.c | 12 +- sysdep/unix/krt.c | 9 +- 16 files changed, 504 insertions(+), 242 deletions(-) diff --git a/filter/f-inst.c b/filter/f-inst.c index 706eb684..0341a2f1 100644 --- a/filter/f-inst.c +++ b/filter/f-inst.c @@ -1212,7 +1212,7 @@ INST(FI_ROA_CHECK_IMPLICIT, 0, 1) { /* ROA Check */ NEVER_CONSTANT; RTC(1); - struct rtable *table = rtc->table; + rtable *table = rtc->table; ACCESS_RTE; ACCESS_EATTRS; const net_addr *net = fs->rte->net; @@ -1244,7 +1244,7 @@ ARG(1, T_NET); ARG(2, T_INT); RTC(3); - struct rtable *table = rtc->table; + rtable *table = rtc->table; u32 as = v2.val.i; diff --git a/nest/proto.c b/nest/proto.c index 35af3c6c..4ae0cbfd 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -172,7 +172,7 @@ proto_cf_find_channel(struct proto_config *pc, uint net_type) * Returns pointer to channel or NULL */ struct channel * -proto_find_channel_by_table(struct proto *p, struct rtable *t) +proto_find_channel_by_table(struct proto *p, rtable *t) { struct channel *c; @@ -236,7 +236,9 @@ proto_add_channel(struct proto *p, struct channel_config *cf) c->channel = cf->channel; c->proto = p; c->table = cf->table->table; - rt_lock_table(c->table); + + RT_LOCKED(c->table, t) + rt_lock_table(t); c->in_filter = cf->in_filter; c->out_filter = cf->out_filter; @@ -277,7 +279,9 @@ proto_remove_channel(struct proto *p UNUSED, struct channel *c) CD(c, "Removed", c->name); - rt_unlock_table(c->table); + RT_LOCKED(c->table, t) + rt_unlock_table(t); + rem_node(&c->n); mb_free(c); } @@ -391,7 +395,7 @@ static void channel_roa_subscribe_filter(struct channel *c, int dir) { const struct filter *f = dir ? c->in_filter : c->out_filter; - struct rtable *tab; + rtable *tab; int valid = 1, found = 0; if ((f == FILTER_ACCEPT) || (f == FILTER_REJECT)) @@ -560,11 +564,11 @@ channel_check_stopped(struct channel *c) } void -channel_import_stopped(struct rt_import_request *req) +channel_import_stopped(void *_c) { - struct channel *c = SKIP_BACK(struct channel, in_req, req); + struct channel *c = _c; - req->hook = NULL; + c->in_req.hook = NULL; mb_free(c->in_req.name); c->in_req.name = NULL; @@ -661,17 +665,16 @@ channel_aux_stopped(void *data) else c->in_table = NULL; - rfree(cat->tab->rp); - + rfree(cat->tab->priv.rp); mb_free(cat); - return channel_check_stopped(c); + channel_check_stopped(c); } static void -channel_aux_import_stopped(struct rt_import_request *req) +channel_aux_import_stopped(void *_cat) { - struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, push, req); - ASSERT_DIE(cat->tab->delete_event); + struct channel_aux_table *cat = _cat; + cat->push.hook = NULL; } static void @@ -680,24 +683,35 @@ channel_aux_export_stopped(struct rt_export_request *req) struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, get, req); req->hook = NULL; - if (cat->refeed_pending && !cat->tab->delete_event) - { - cat->refeed_pending = 0; - rt_request_export(cat->tab, req); - } - else - ASSERT_DIE(cat->tab->delete_event); + int del; + RT_LOCKED(cat->tab, t) + del = !!t->delete_event; + + if (del) + return; + + ASSERT_DIE(cat->refeed_pending); + cat->refeed_pending = 0; + rt_request_export(cat->tab, req); } static void channel_aux_stop(struct channel_aux_table *cat) { - rt_stop_import(&cat->push, channel_aux_import_stopped); + RT_LOCKED(cat->tab, t) + { + t->delete_event = ev_new_init(t->rp, channel_aux_stopped, cat); + t->delete_event->list = proto_event_list(cat->c->proto); + } + + cat->push_stopped = (event) { + .hook = channel_aux_import_stopped, + .data = cat, + .list = proto_event_list(cat->c->proto), + }; + + rt_stop_import(&cat->push, &cat->push_stopped); rt_stop_export(&cat->get, channel_aux_export_stopped); - - cat->tab->delete_event = ev_new_init(cat->tab->rp, channel_aux_stopped, cat); - - rt_unlock_table(cat->tab); } static void @@ -889,7 +903,6 @@ channel_setup_in_table(struct channel *c, int best) c->in_table->c = c; c->in_table->tab = rt_setup(c->proto->pool, &cat->tab_cf); - rt_lock_table(c->in_table->tab); rt_request_import(c->in_table->tab, &c->in_table->push); rt_request_export(c->in_table->tab, &c->in_table->get); @@ -931,7 +944,6 @@ channel_setup_out_table(struct channel *c) c->out_table->c = c; c->out_table->tab = rt_setup(c->proto->pool, &cat->tab_cf); - rt_lock_table(c->out_table->tab); rt_request_import(c->out_table->tab, &c->out_table->push); rt_request_export(c->out_table->tab, &c->out_table->get); @@ -993,7 +1005,14 @@ channel_do_stop(struct channel *c) /* Stop import */ if (c->in_req.hook) - rt_stop_import(&c->in_req, channel_import_stopped); + { + c->in_stopped = (event) { + .hook = channel_import_stopped, + .data = c, + .list = proto_event_list(c->proto), + }; + rt_stop_import(&c->in_req, &c->in_stopped); + } c->gr_wait = 0; if (c->gr_lock) @@ -2339,7 +2358,7 @@ proto_do_start(struct proto *p) { p->active = 1; - rt_init_sources(&p->sources, p->name, proto_event_list(p)); + rt_init_sources(&p->sources, p->name, proto_work_list(p)); if (!p->sources.class) p->sources.class = &default_rte_owner_class; diff --git a/nest/protocol.h b/nest/protocol.h index 1647fbba..8d077e44 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -18,7 +18,6 @@ struct iface; struct ifa; -struct rtable; struct rte; struct neighbor; struct rta; @@ -207,7 +206,7 @@ struct proto { * rte_remove Called whenever a rte is removed from the routing table. */ - int (*rte_recalculate)(struct rtable *, struct network *, struct rte *, struct rte *, struct rte *); + int (*rte_recalculate)(rtable *, struct network *, struct rte *, struct rte *, struct rte *); int (*rte_better)(struct rte *, struct rte *); int (*rte_mergable)(struct rte *, struct rte *); void (*rte_insert)(struct network *, struct rte *); @@ -496,7 +495,7 @@ struct channel { const struct channel_class *channel; struct proto *proto; - struct rtable *table; + rtable *table; const struct filter *in_filter; /* Input filter */ const struct filter *out_filter; /* Output filter */ struct bmap export_map; /* Keeps track which routes were really exported */ @@ -556,6 +555,7 @@ struct channel { btime last_state_change; /* Time of last state transition */ struct channel_aux_table *in_table; /* Internal table for received routes */ + struct event in_stopped; /* Import stop callback */ u8 reload_pending; /* Reloading and another reload is scheduled */ u8 refeed_pending; /* Refeeding and another refeed is scheduled */ @@ -570,6 +570,7 @@ struct channel_aux_table { struct channel *c; struct rt_import_request push; struct rt_export_request get; + event push_stopped; rtable *tab; event *stop; u8 refeed_pending; @@ -633,7 +634,7 @@ struct channel_config *proto_cf_find_channel(struct proto_config *p, uint net_ty static inline struct channel_config *proto_cf_main_channel(struct proto_config *pc) { return proto_cf_find_channel(pc, pc->net_type); } -struct channel *proto_find_channel_by_table(struct proto *p, struct rtable *t); +struct channel *proto_find_channel_by_table(struct proto *p, rtable *t); struct channel *proto_find_channel_by_name(struct proto *p, const char *n); struct channel *proto_add_channel(struct proto *p, struct channel_config *cf); int proto_configure_channel(struct proto *p, struct channel **c, struct channel_config *cf); diff --git a/nest/route.h b/nest/route.h index 683c966e..9417d97d 100644 --- a/nest/route.h +++ b/nest/route.h @@ -146,30 +146,21 @@ void fit_copy(struct fib *f, struct fib_iterator *dst, struct fib_iterator *src) * It's guaranteed that there is at most one RTE for every (prefix,proto) pair. */ -struct rtable_config { - node n; - char *name; - struct config *config; - struct rtable *table; - struct proto_config *krt_attached; /* Kernel syncer attached to this table */ - uint addr_type; /* Type of address data stored in table (NET_*) */ - int gc_max_ops; /* Maximum number of operations before GC is run */ - int gc_min_time; /* Minimum time between two consecutive GC runs */ - byte sorted; /* Routes of network are sorted according to rte_better() */ - btime min_settle_time; /* Minimum settle time for notifications */ - btime max_settle_time; /* Maximum settle time for notifications */ - btime export_settle_time; /* Delay before exports are announced */ - uint cork_limit; /* Amount of routes to be pending on export to cork imports */ -}; +typedef struct rtable_private { +#define RTABLE_PUBLIC \ + resource r; \ + node n; /* Node in list of all tables */ \ + struct birdloop *loop; /* This loop runs the table */ \ + char *name; /* Name of this table */ \ + uint addr_type; /* Type of address data stored in table (NET_*) */ \ + struct rtable_config *config; /* Configuration of this table */ \ + struct event *nhu_event; /* Event to update next hops */ \ + _Atomic byte nhu_state; /* Next Hop Update state */ \ -typedef struct rtable { - resource r; - node n; /* Node in list of all tables */ + RTABLE_PUBLIC; pool *rp; /* Resource pool to allocate everything from, including itself */ struct slab *rte_slab; /* Slab to allocate route objects */ struct fib fib; - char *name; /* Name of this table */ - uint addr_type; /* Type of address data stored in table (NET_*) */ int use_count; /* Number of protocols using this table */ u32 rt_count; /* Number of routes in the table */ @@ -178,18 +169,15 @@ typedef struct rtable { struct hmap id_map; struct hostcache *hostcache; - struct rtable_config *config; /* Configuration of this table */ struct event *prune_event; /* Event to prune abandoned routes */ struct event *ec_event; /* Event to prune finished exports */ struct event *hcu_event; /* Event to update host cache */ - struct event *nhu_event; /* Event to update next hops */ struct event *delete_event; /* Event to delete the table */ btime last_rt_change; /* Last time when route changed */ btime base_settle_time; /* Start time of rtable settling interval */ btime gc_time; /* Time of last GC */ int gc_counter; /* Number of operations since last GC */ byte prune_state; /* Table prune state, 1 -> scheduled, 2-> running */ - byte nhu_state; /* Next Hop Update state */ byte cork_active; /* Congestion control activated */ @@ -208,8 +196,35 @@ typedef struct rtable { struct rt_pending_export *first_export; /* First export to announce */ u64 next_export_seq; /* The next export will have this ID */ +} rtable_private; + +typedef union { + struct { RTABLE_PUBLIC }; + rtable_private priv; } rtable; +#define RT_LOCK(tab) ({ birdloop_enter((tab)->loop); &(tab)->priv; }) +#define RT_UNLOCK(tab) birdloop_leave((tab)->loop) +#define RT_PRIV(tab) ({ ASSERT_DIE(birdloop_inside((tab)->loop)); &(tab)->priv; }) + +#define RT_LOCKED(tpub, tpriv) for (rtable_private *tpriv = RT_LOCK(tpub); tpriv; RT_UNLOCK(tpriv), (tpriv = NULL)) + +struct rtable_config { + node n; + char *name; + struct config *config; + rtable *table; + struct proto_config *krt_attached; /* Kernel syncer attached to this table */ + uint addr_type; /* Type of address data stored in table (NET_*) */ + int gc_max_ops; /* Maximum number of operations before GC is run */ + int gc_min_time; /* Minimum time between two consecutive GC runs */ + byte sorted; /* Routes of network are sorted according to rte_better() */ + btime min_settle_time; /* Minimum settle time for notifications */ + btime max_settle_time; /* Maximum settle time for notifications */ + btime export_settle_time; /* Delay before exports are announced */ + uint cork_limit; /* Amount of routes to be pending on export to cork imports */ +}; + struct rt_subscription { node n; rtable *tab; @@ -244,7 +259,7 @@ struct hostentry { ip_addr addr; /* IP address of host, part of key */ ip_addr link; /* (link-local) IP address of host, used as gw if host is directly attached */ - struct rtable *tab; /* Dependent table, part of key */ + rtable *tab; /* Dependent table, part of key */ struct hostentry *next; /* Next in hash chain */ unsigned hash_key; /* Hash key */ unsigned uc; /* Use count */ @@ -324,7 +339,7 @@ struct rt_import_hook { u8 stale_pruned; /* Last prune finished when this value was set at stale_valid */ u8 stale_pruning; /* Last prune started when this value was set at stale_valid */ - void (*stopped)(struct rt_import_request *); /* Stored callback when import is stopped */ + struct event *stopped; /* Event to run when import is stopped */ }; struct rt_pending_export { @@ -405,7 +420,7 @@ extern struct event_cork rt_cork; void rt_request_import(rtable *tab, struct rt_import_request *req); void rt_request_export(rtable *tab, struct rt_export_request *req); -void rt_stop_import(struct rt_import_request *, void (*stopped)(struct rt_import_request *)); +void rt_stop_import(struct rt_import_request *, struct event *stopped); void rt_stop_export(struct rt_export_request *, void (*stopped)(struct rt_export_request *)); const char *rt_import_state_name(u8 state); @@ -480,27 +495,27 @@ struct config; void rt_init(void); void rt_preconfig(struct config *); void rt_commit(struct config *new, struct config *old); -void rt_lock_table(rtable *); -void rt_unlock_table(rtable *); +void rt_lock_table(rtable_private *); +void rt_unlock_table(rtable_private *); void rt_subscribe(rtable *tab, struct rt_subscription *s); void rt_unsubscribe(struct rt_subscription *s); rtable *rt_setup(pool *, struct rtable_config *); -static inline net *net_find(rtable *tab, const net_addr *addr) { return (net *) fib_find(&tab->fib, addr); } -static inline net *net_find_valid(rtable *tab, const net_addr *addr) +static inline net *net_find(rtable_private *tab, const net_addr *addr) { return (net *) fib_find(&tab->fib, addr); } +static inline net *net_find_valid(rtable_private *tab, const net_addr *addr) { net *n = net_find(tab, addr); return (n && n->routes && rte_is_valid(&n->routes->rte)) ? n : NULL; } -static inline net *net_get(rtable *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); } -void *net_route(rtable *tab, const net_addr *n); +static inline net *net_get(rtable_private *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); } +void *net_route(rtable_private *tab, const net_addr *n); int net_roa_check(rtable *tab, const net_addr *n, u32 asn); -int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter); +int rt_examine(rtable_private *t, net_addr *a, struct channel *c, const struct filter *filter); rte *rt_export_merged(struct channel *c, rte ** feed, uint count, linpool *pool, int silent); void rt_refresh_begin(struct rt_import_request *); void rt_refresh_end(struct rt_import_request *); -void rt_schedule_prune(rtable *t); +void rt_schedule_prune(rtable_private *t); void rte_dump(struct rte_storage *); -void rte_free(struct rte_storage *, rtable *); -struct rte_storage *rte_store(const rte *, net *net, rtable *); +void rte_free(struct rte_storage *, rtable_private *); +struct rte_storage *rte_store(const rte *, net *net, rtable_private *); void rt_dump(rtable *); void rt_dump_all(void); void rt_dump_hooks(rtable *); @@ -591,7 +606,7 @@ struct rte_src { typedef struct rta { struct rta *next, **pprev; /* Hash chain */ - _Atomic u32 uc; /* Use count */ + u32 uc; /* Use count */ u32 hash_key; /* Hash over important fields */ struct ea_list *eattrs; /* Extended Attribute chain */ struct hostentry *hostentry; /* Hostentry for recursive next-hops */ @@ -732,7 +747,7 @@ struct rte_owner_class { struct rte_owner { struct rte_owner_class *class; - int (*rte_recalculate)(struct rtable *, struct network *, struct rte *, struct rte *, struct rte *); + int (*rte_recalculate)(rtable_private *, struct network *, struct rte *, struct rte *, struct rte *); HASH(struct rte_src) hash; const char *name; u32 hash_key; @@ -863,9 +878,20 @@ static inline size_t rta_size(const rta *a) { return sizeof(rta) + sizeof(u32)*a #define RTA_MAX_SIZE (sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK) rta *rta_lookup(rta *); /* Get rta equivalent to this one, uc++ */ static inline int rta_is_cached(rta *r) { return r->cached; } -static inline rta *rta_clone(rta *r) { ASSERT_DIE(0 < atomic_fetch_add_explicit(&r->uc, 1, memory_order_acq_rel)); return r; } +static inline rta *rta_clone(rta *r) { + RTA_LOCK; + r->uc++; + RTA_UNLOCK; + return r; +} + void rta__free(rta *r); -static inline void rta_free(rta *r) { if (r && (1 == atomic_fetch_sub_explicit(&r->uc, 1, memory_order_acq_rel))) rta__free(r); } +static inline void rta_free(rta *r) { + RTA_LOCK; + if (r && !--r->uc) + rta__free(r); + RTA_UNLOCK; +} rta *rta_do_cow(rta *o, linpool *lp); static inline rta * rta_cow(rta *r, linpool *lp) { return rta_is_cached(r) ? rta_do_cow(r, lp) : r; } static inline void rta_uncache(rta *r) { r->cached = 0; r->uc = 0; } diff --git a/nest/rt-attr.c b/nest/rt-attr.c index 20f9835d..cd4c6892 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -1287,7 +1287,7 @@ rta_lookup(rta *o) for(r=rta_hash_table[h & rta_cache_mask]; r; r=r->next) if (r->hash_key == h && rta_same(r, o)) { - atomic_fetch_add_explicit(&r->uc, 1, memory_order_acq_rel); + r->uc++; RTA_UNLOCK; return r; } @@ -1308,14 +1308,6 @@ rta_lookup(rta *o) void rta__free(rta *a) { - RTA_LOCK; - if (atomic_load_explicit(&a->uc, memory_order_acquire)) - { - /* Somebody has cloned this rta inbetween. This sometimes happens. */ - RTA_UNLOCK; - return; - } - ASSERT(rta_cache_count && a->cached); rta_cache_count--; *a->pprev = a->next; @@ -1327,7 +1319,6 @@ rta__free(rta *a) ea_free(a->eattrs); a->cached = 0; sl_free(rta_slab(a), a); - RTA_UNLOCK; } rta * diff --git a/nest/rt-show.c b/nest/rt-show.c index 8196903d..65b59af4 100644 --- a/nest/rt-show.c +++ b/nest/rt-show.c @@ -239,11 +239,13 @@ rt_show_cleanup(struct cli *c) /* Unlink the iterator */ if (d->table_open) - fit_get(&d->tab->table->fib, &d->fit); + RT_LOCKED(d->tab->table, t) + fit_get(&t->fib, &d->fit); /* Unlock referenced tables */ WALK_LIST(tab, d->tables) - rt_unlock_table(tab->table); + RT_LOCKED(tab->table, t) + rt_unlock_table(t); } static void @@ -255,8 +257,6 @@ rt_show_cont(struct cli *c) #else unsigned max = 64; #endif - struct fib *fib = &d->tab->table->fib; - struct fib_iterator *it = &d->fit; if (d->running_on_config && (d->running_on_config != config)) { @@ -264,9 +264,14 @@ rt_show_cont(struct cli *c) goto done; } + rtable_private *t = RT_LOCK(d->tab->table); + + struct fib *fib = &t->fib; + struct fib_iterator *it = &d->fit; + if (!d->table_open) { - FIB_ITERATE_INIT(&d->fit, &d->tab->table->fib); + FIB_ITERATE_INIT(&d->fit, fib); d->table_open = 1; d->table_counter++; d->kernel = rt_show_get_kernel(d); @@ -284,6 +289,7 @@ rt_show_cont(struct cli *c) if (!max--) { FIB_ITERATE_PUT(it); + RT_UNLOCK(d->tab->table); return; } rt_show_net(c, n, d); @@ -300,6 +306,8 @@ rt_show_cont(struct cli *c) d->net_counter - d->net_counter_last, d->tab->table->name); } + RT_UNLOCK(d->tab->table); + d->kernel = NULL; d->table_open = 0; d->tab = NODE_NEXT(d->tab); @@ -431,7 +439,8 @@ rt_show(struct rt_show_data *d) if (!d->addr) { WALK_LIST(tab, d->tables) - rt_lock_table(tab->table); + RT_LOCKED(tab->table, t) + rt_lock_table(t); /* There is at least one table */ d->tab = HEAD(d->tables); @@ -446,13 +455,17 @@ rt_show(struct rt_show_data *d) d->tab = tab; d->kernel = rt_show_get_kernel(d); + RT_LOCK(tab->table); + if (d->show_for) - n = net_route(tab->table, d->addr); + n = net_route(RT_PRIV(tab->table), d->addr); else - n = net_find(tab->table, d->addr); + n = net_find(RT_PRIV(tab->table), d->addr); if (n) rt_show_net(this_cli, n, d); + + RT_UNLOCK(tab->table); } if (d->rt_counter) diff --git a/nest/rt-table.c b/nest/rt-table.c index fb0496bd..f304372f 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -61,15 +61,15 @@ struct rt_export_block { struct rt_pending_export export[]; }; -static void rt_free_hostcache(rtable *tab); -static void rt_notify_hostcache(rtable *tab, net *net); +static void rt_free_hostcache(rtable_private *tab); +static void rt_notify_hostcache(rtable_private *tab, net *net); static void rt_update_hostcache(void *tab); static void rt_next_hop_update(void *tab); static inline void rt_prune_table(void *tab); -static inline void rt_schedule_notify(rtable *tab); +static inline void rt_schedule_notify(rtable_private *tab); static void rt_feed_channel(void *); -static inline void rt_export_used(rtable *tab); +static inline void rt_export_used(rtable_private *tab); static void rt_export_cleanup(void *tab); const char *rt_import_state_name_array[TIS_MAX] = { @@ -122,7 +122,7 @@ rte_update_unlock(struct channel *c) /* Like fib_route(), but skips empty net entries */ static inline void * -net_route_ip4(rtable *t, net_addr_ip4 *n) +net_route_ip4(rtable_private *t, net_addr_ip4 *n) { net *r; @@ -136,7 +136,7 @@ net_route_ip4(rtable *t, net_addr_ip4 *n) } static inline void * -net_route_ip6(rtable *t, net_addr_ip6 *n) +net_route_ip6(rtable_private *t, net_addr_ip6 *n) { net *r; @@ -150,7 +150,7 @@ net_route_ip6(rtable *t, net_addr_ip6 *n) } static inline void * -net_route_ip6_sadr(rtable *t, net_addr_ip6_sadr *n) +net_route_ip6_sadr(rtable_private *t, net_addr_ip6_sadr *n) { struct fib_node *fn; @@ -189,7 +189,7 @@ net_route_ip6_sadr(rtable *t, net_addr_ip6_sadr *n) } void * -net_route(rtable *tab, const net_addr *n) +net_route(rtable_private *tab, const net_addr *n) { ASSERT(tab->addr_type == n->type); @@ -218,12 +218,15 @@ net_route(rtable *tab, const net_addr *n) static int -net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn) +net_roa_check_ip4(rtable *t, const net_addr_ip4 *px, u32 asn) { struct net_addr_roa4 n = NET_ADDR_ROA4(px->prefix, px->pxlen, 0, 0); struct fib_node *fn; int anything = 0; + RT_LOCK(t); + rtable_private *tab = RT_PRIV(t); + while (1) { for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next) @@ -235,7 +238,10 @@ net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn) { anything = 1; if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) + { + RT_UNLOCK(tab); return ROA_VALID; + } } } @@ -246,16 +252,20 @@ net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn) ip4_clrbit(&n.prefix, n.pxlen); } + RT_UNLOCK(tab); return anything ? ROA_INVALID : ROA_UNKNOWN; } static int -net_roa_check_ip6(rtable *tab, const net_addr_ip6 *px, u32 asn) +net_roa_check_ip6(rtable *t, const net_addr_ip6 *px, u32 asn) { struct net_addr_roa6 n = NET_ADDR_ROA6(px->prefix, px->pxlen, 0, 0); struct fib_node *fn; int anything = 0; + RT_LOCK(t); + rtable_private *tab = RT_PRIV(t); + while (1) { for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next) @@ -267,7 +277,10 @@ net_roa_check_ip6(rtable *tab, const net_addr_ip6 *px, u32 asn) { anything = 1; if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) + { + RT_UNLOCK(tab); return ROA_VALID; + } } } @@ -278,6 +291,7 @@ net_roa_check_ip6(rtable *tab, const net_addr_ip6 *px, u32 asn) ip6_clrbit(&n.prefix, n.pxlen); } + RT_UNLOCK(tab); return anything ? ROA_INVALID : ROA_UNKNOWN; } @@ -328,7 +342,7 @@ rte_find(net *net, struct rte_src *src) struct rte_storage * -rte_store(const rte *r, net *net, rtable *tab) +rte_store(const rte *r, net *net, rtable_private *tab) { struct rte_storage *e = sl_alloc(tab->rte_slab); @@ -354,7 +368,7 @@ rte_store(const rte *r, net *net, rtable *tab) */ void -rte_free(struct rte_storage *e, rtable *tab) +rte_free(struct rte_storage *e, rtable_private *tab) { rt_unlock_source(e->rte.src); rta_free(e->rte.attrs); @@ -912,6 +926,7 @@ rte_export(struct rt_export_hook *hook, struct rt_pending_export *rpe) hook->req->export_one(hook->req, n, rpe); else if (hook->req->export_bulk) { + RT_LOCK(hook->table); net *net = SKIP_BACK(struct network, n.addr, (net_addr (*)[0]) n); uint count = rte_feed_count(net); rte **feed = NULL; @@ -920,6 +935,7 @@ rte_export(struct rt_export_hook *hook, struct rt_pending_export *rpe) feed = alloca(count * sizeof(rte *)); rte_feed_obtain(net, feed, count); } + RT_UNLOCK(hook->table); hook->req->export_bulk(hook->req, n, rpe, feed, count); } else @@ -931,7 +947,11 @@ seen: /* The last block may be available to free */ if (PAGE_HEAD(hook->rpe_next) != PAGE_HEAD(rpe)) - rt_export_used(hook->table); + { + RT_LOCK(hook->table); + rt_export_used(RT_PRIV(hook->table)); + RT_UNLOCK(hook->table); + } /* Releasing this export for cleanup routine */ DBG("store hook=%p last_export=%p seq=%lu\n", hook, rpe, rpe->seq); @@ -970,7 +990,7 @@ seen: * done outside of scope of rte_announce(). */ static void -rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage *old, +rte_announce(rtable_private *tab, net *net, struct rte_storage *new, struct rte_storage *old, struct rte_storage *new_best, struct rte_storage *old_best) { if (!new_best || !rte_is_valid(&new_best->rte)) @@ -1085,10 +1105,10 @@ rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage { ev_cork(&rt_cork); tab->cork_active = 1; - tm_start(tab->export_timer, 0); + tm_start_in(tab->export_timer, 0, tab->loop); } else if (!tm_active(tab->export_timer)) - tm_start(tab->export_timer, tab->config->export_settle_time); + tm_start_in(tab->export_timer, tab->config->export_settle_time, tab->loop); } static struct rt_pending_export * @@ -1117,7 +1137,7 @@ rt_next_export_fast(struct rt_pending_export *last) } static struct rt_pending_export * -rt_next_export(struct rt_export_hook *hook, rtable *tab) +rt_next_export(struct rt_export_hook *hook, rtable_private *tab) { /* As the table is locked, it is safe to reload the last export pointer */ struct rt_pending_export *last = atomic_load_explicit(&hook->last_export, memory_order_acquire); @@ -1140,7 +1160,8 @@ rt_send_export_event(struct rt_export_hook *hook) static void rt_announce_exports(timer *tm) { - rtable *tab = tm->data; + rtable_private *tab = tm->data; + ASSERT_DIE(birdloop_inside(tab->loop)); struct rt_export_hook *c; node *n; WALK_LIST2(c, n, tab->exports, n) @@ -1153,7 +1174,7 @@ rt_announce_exports(timer *tm) } static struct rt_pending_export * -rt_last_export(rtable *tab) +rt_last_export(rtable_private *tab) { struct rt_pending_export *rpe = NULL; @@ -1179,13 +1200,17 @@ rt_export_hook(void *_data) if (!c->rpe_next) { - c->rpe_next = rt_next_export(c, c->table); + RT_LOCK(c->table); + c->rpe_next = rt_next_export(c, RT_PRIV(c->table)); if (!c->rpe_next) { - rt_export_used(c->table); + rt_export_used(RT_PRIV(c->table)); + RT_UNLOCK(c->table); return; } + + RT_UNLOCK(c->table); } /* Process the export */ @@ -1255,10 +1280,9 @@ rte_same(rte *x, rte *y) static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); } static void -rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *src) +rte_recalculate(rtable_private *table, struct rt_import_hook *c, net *net, rte *new, struct rte_src *src) { struct rt_import_request *req = c->req; - struct rtable *table = c->table; struct rt_import_stats *stats = &c->stats; struct rte_storage *old_best_stored = net->routes, *old_stored = NULL; rte *old_best = old_best_stored ? &old_best_stored->rte : NULL; @@ -1521,7 +1545,6 @@ rte_update(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) ASSERT(c->channel_state == CS_UP); - if (c->in_table) rte_import(&c->in_table->push, n, new, src); else @@ -1575,27 +1598,32 @@ rte_import(struct rt_import_request *req, const net_addr *n, rte *new, struct rt if (!hook) return; + RT_LOCK(hook->table); + rtable_private *tab = RT_PRIV(hook->table); + net *nn; if (new) { /* Use the actual struct network, not the dummy one */ - nn = net_get(hook->table, n); + nn = net_get(tab, n); new->net = nn->n.addr; new->sender = hook; } - else if (!(nn = net_find(hook->table, n))) + else if (!(nn = net_find(tab, n))) { req->hook->stats.withdraws_ignored++; + RT_UNLOCK(tab); return; } /* And recalculate the best route */ - rte_recalculate(hook, nn, new, src); + rte_recalculate(tab, hook, nn, new, src); + RT_UNLOCK(tab); } /* Check rtable for best route to given net whether it would be exported do p */ int -rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter) +rt_examine(rtable_private *t, net_addr *a, struct channel *c, const struct filter *filter) { net *n = net_find(t, a); @@ -1623,22 +1651,27 @@ static void rt_export_stopped(void *data) { struct rt_export_hook *hook = data; - rtable *tab = hook->table; - /* Drop pending exports */ - rt_export_used(tab); + RT_LOCKED(hook->table, tab) + { + /* Drop pending exports */ + rt_export_used(tab); - /* Unlist */ - rem_node(&hook->n); + /* Unlist */ + rem_node(&hook->n); + } /* Report the channel as stopped. */ hook->stopped(hook->req); - /* Free the hook together with its coroutine. */ - rfree(hook->pool); - rt_unlock_table(tab); + RT_LOCKED(hook->table, tab) + { + /* Free the hook together with its coroutine. */ + rfree(hook->pool); + rt_unlock_table(tab); - DBG("Export hook %p in table %s finished uc=%u\n", hook, tab->name, tab->use_count); + DBG("Export hook %p in table %s finished uc=%u\n", hook, tab->name, tab->use_count); + } } @@ -1663,8 +1696,10 @@ rt_set_export_state(struct rt_export_hook *hook, u8 state) } void -rt_request_import(rtable *tab, struct rt_import_request *req) +rt_request_import(rtable *t, struct rt_import_request *req) { + RT_LOCK(t); + rtable_private *tab = RT_PRIV(t); rt_lock_table(tab); struct rt_import_hook *hook = req->hook = mb_allocz(tab->rp, sizeof(struct rt_import_hook)); @@ -1672,7 +1707,7 @@ rt_request_import(rtable *tab, struct rt_import_request *req) DBG("Lock table %s for import %p req=%p uc=%u\n", tab->name, hook, req, tab->use_count); hook->req = req; - hook->table = tab; + hook->table = t; if (!hook->stale_set) hook->stale_set = hook->stale_valid = hook->stale_pruning = hook->stale_pruned = 1; @@ -1681,24 +1716,30 @@ rt_request_import(rtable *tab, struct rt_import_request *req) hook->n = (node) {}; add_tail(&tab->imports, &hook->n); + + RT_UNLOCK(t); } void -rt_stop_import(struct rt_import_request *req, void (*stopped)(struct rt_import_request *)) +rt_stop_import(struct rt_import_request *req, event *stopped) { ASSERT_DIE(req->hook); struct rt_import_hook *hook = req->hook; - rt_schedule_prune(hook->table); + RT_LOCK(hook->table); + rt_schedule_prune(RT_PRIV(hook->table)); rt_set_import_state(hook, TIS_STOP); hook->stopped = stopped; + RT_UNLOCK(hook->table); } void -rt_request_export(rtable *tab, struct rt_export_request *req) +rt_request_export(rtable *t, struct rt_export_request *req) { + RT_LOCK(t); + rtable_private *tab = RT_PRIV(t); rt_lock_table(tab); pool *p = rp_new(tab->rp, "Export hook"); @@ -1706,7 +1747,7 @@ rt_request_export(rtable *tab, struct rt_export_request *req) hook->pool = p; hook->req = req; - hook->table = tab; + hook->table = t; /* stats zeroed by mb_allocz */ @@ -1714,7 +1755,7 @@ rt_request_export(rtable *tab, struct rt_export_request *req) rt_set_export_state(hook, TES_HUNGRY); - struct rt_pending_export *rpe = rt_last_export(hook->table); + struct rt_pending_export *rpe = rt_last_export(tab); DBG("store hook=%p last_export=%p seq=%lu\n", hook, rpe, rpe ? rpe->seq : 0); atomic_store_explicit(&hook->last_export, rpe, memory_order_relaxed); @@ -1726,9 +1767,11 @@ rt_request_export(rtable *tab, struct rt_export_request *req) DBG("New export hook %p req %p in table %s uc=%u\n", hook, req, tab->name, tab->use_count); hook->event = ev_new_init(p, rt_feed_channel, hook); - rt_send_export_event(hook); + RT_UNLOCK(t); rt_set_export_state(hook, TES_FEEDING); + ASSERT_DIE(hook->export_state == TES_FEEDING); + rt_send_export_event(hook); } void @@ -1737,7 +1780,8 @@ rt_stop_export(struct rt_export_request *req, void (*stopped)(struct rt_export_r ASSERT_DIE(req->hook); struct rt_export_hook *hook = req->hook; - rtable *tab = hook->table; + RT_LOCK(hook->table); + rtable_private *tab = RT_PRIV(hook->table); /* Stop feeding */ ev_postpone(hook->event); @@ -1750,10 +1794,11 @@ rt_stop_export(struct rt_export_request *req, void (*stopped)(struct rt_export_r rt_send_export_event(hook); + RT_UNLOCK(hook->table); + rt_set_export_state(hook, TES_STOP); } - /** * rt_refresh_begin - start a refresh cycle * @t: related routing table @@ -1772,14 +1817,17 @@ rt_refresh_begin(struct rt_import_request *req) struct rt_import_hook *hook = req->hook; ASSERT_DIE(hook); + RT_LOCK(hook->table); + rtable_private *tab = RT_PRIV(hook->table); + ASSERT_DIE(hook->stale_set == hook->stale_valid); /* If the pruning routine is too slow */ if ((hook->stale_pruned < hook->stale_valid) && (hook->stale_pruned + 128 < hook->stale_valid) || (hook->stale_pruned > hook->stale_valid) && (hook->stale_pruned > hook->stale_valid + 128)) { - log(L_WARN "Route refresh flood in table %s", hook->table->name); - FIB_WALK(&hook->table->fib, net, n) + log(L_WARN "Route refresh flood in table %s", tab->name); + FIB_WALK(&tab->fib, net, n) { for (struct rte_storage *e = n->routes; e; e = e->next) if (e->rte.sender == req->hook) @@ -1799,6 +1847,8 @@ rt_refresh_begin(struct rt_import_request *req) if (req->trace_routes & D_STATES) log(L_TRACE "%s: route refresh begin [%u]", req->name, hook->stale_set); + + RT_UNLOCK(tab); } /** @@ -1815,13 +1865,16 @@ rt_refresh_end(struct rt_import_request *req) struct rt_import_hook *hook = req->hook; ASSERT_DIE(hook); + RT_LOCK(hook->table); hook->stale_valid++; ASSERT_DIE(hook->stale_set == hook->stale_valid); - rt_schedule_prune(hook->table); + rt_schedule_prune(RT_PRIV(hook->table)); if (req->trace_routes & D_STATES) log(L_TRACE "%s: route refresh end [%u]", req->name, hook->stale_valid); + + RT_UNLOCK(hook->table); } /** @@ -1846,8 +1899,10 @@ rte_dump(struct rte_storage *e) * This function dumps contents of a given routing table to debug output. */ void -rt_dump(rtable *t) +rt_dump(rtable *tab) { + RT_LOCK(tab); + rtable_private *t = RT_PRIV(tab); debug("Dump of routing table <%s>%s\n", t->name, t->delete_event ? " (deleted)" : ""); #ifdef DEBUGGING fib_check(&t->fib); @@ -1859,6 +1914,7 @@ rt_dump(rtable *t) } FIB_WALK_END; debug("\n"); + RT_UNLOCK(tab); } /** @@ -1877,11 +1933,13 @@ rt_dump_all(void) } void -rt_dump_hooks(rtable *tab) +rt_dump_hooks(rtable *t) { + RT_LOCK(t); + rtable_private *tab = RT_PRIV(t); debug("Dump of hooks in routing table <%s>%s\n", tab->name, tab->delete_event ? " (deleted)" : ""); debug(" nhu_state=%u hcu_scheduled=%u use_count=%d rt_count=%u\n", - tab->nhu_state, ev_active(tab->hcu_event), tab->use_count, tab->rt_count); + atomic_load(&tab->nhu_state), ev_active(tab->hcu_event), tab->use_count, tab->rt_count); debug(" last_rt_change=%t gc_time=%t gc_counter=%d prune_state=%u\n", tab->last_rt_change, tab->gc_time, tab->gc_counter, tab->prune_state); @@ -1904,6 +1962,7 @@ rt_dump_hooks(rtable *tab) eh, eh->req, eh->refeed_pending, eh->last_state_change, atomic_load_explicit(&eh->export_state, memory_order_relaxed)); } debug("\n"); + RT_UNLOCK(t); } void @@ -1921,37 +1980,36 @@ rt_dump_hooks_all(void) static inline void rt_schedule_nhu(rtable *tab) { - if (tab->nhu_state == NHU_CLEAN) - ev_schedule(tab->nhu_event); + atomic_fetch_or_explicit(&tab->nhu_state, NHU_SCHEDULED, memory_order_acq_rel); + ev_send_loop(tab->loop, tab->nhu_event); /* state change: * NHU_CLEAN -> NHU_SCHEDULED * NHU_RUNNING -> NHU_DIRTY */ - tab->nhu_state |= NHU_SCHEDULED; } void -rt_schedule_prune(rtable *tab) +rt_schedule_prune(rtable_private *tab) { if (tab->prune_state == 0) - ev_schedule(tab->prune_event); + ev_send_loop(tab->loop, tab->prune_event); /* state change 0->1, 2->3 */ tab->prune_state |= 1; } void -rt_export_used(rtable *tab) +rt_export_used(rtable_private *tab) { if (config->table_debug) log(L_TRACE "%s: Export cleanup requested", tab->name); - ev_schedule(tab->ec_event); + ev_send_loop(tab->loop, tab->ec_event); } static inline btime -rt_settled_time(rtable *tab) +rt_settled_time(rtable_private *tab) { ASSUME(tab->base_settle_time != 0); @@ -1962,7 +2020,8 @@ rt_settled_time(rtable *tab) static void rt_settle_timer(timer *t) { - rtable *tab = t->data; + rtable_private *tab = t->data; + ASSERT_DIE(birdloop_inside(tab->loop)); if (!tab->base_settle_time) return; @@ -1970,7 +2029,7 @@ rt_settle_timer(timer *t) btime settled_time = rt_settled_time(tab); if (current_time() < settled_time) { - tm_set(tab->settle_timer, settled_time); + tm_set_in(tab->settle_timer, settled_time, tab->loop); return; } @@ -1983,7 +2042,7 @@ rt_settle_timer(timer *t) } static void -rt_kick_settle_timer(rtable *tab) +rt_kick_settle_timer(rtable_private *tab) { tab->base_settle_time = current_time(); @@ -1991,11 +2050,11 @@ rt_kick_settle_timer(rtable *tab) tab->settle_timer = tm_new_init(tab->rp, rt_settle_timer, tab, 0, 0); if (!tm_active(tab->settle_timer)) - tm_set(tab->settle_timer, rt_settled_time(tab)); + tm_set_in(tab->settle_timer, rt_settled_time(tab), tab->loop); } static inline void -rt_schedule_notify(rtable *tab) +rt_schedule_notify(rtable_private *tab) { if (EMPTY_LIST(tab->subscribers)) return; @@ -2007,25 +2066,33 @@ rt_schedule_notify(rtable *tab) } void -rt_subscribe(rtable *tab, struct rt_subscription *s) +rt_subscribe(rtable *t, struct rt_subscription *s) { - s->tab = tab; - rt_lock_table(tab); - DBG("rt_subscribe(%s)\n", tab->name); - add_tail(&tab->subscribers, &s->n); + s->tab = t; + RT_LOCKED(t, tab) + { + rt_lock_table(tab); + DBG("rt_subscribe(%s)\n", tab->name); + add_tail(&tab->subscribers, &s->n); + } } void rt_unsubscribe(struct rt_subscription *s) { - rem_node(&s->n); - rt_unlock_table(s->tab); + RT_LOCKED(s->tab, tab) + { + rem_node(&s->n); + if (EMPTY_LIST(tab->subscribers) && tm_active(tab->settle_timer)) + tm_stop(tab->settle_timer); + rt_unlock_table(tab); + } } static void rt_free(resource *_r) { - rtable *r = (rtable *) _r; + rtable_private *r = (rtable_private *) _r; DBG("Deleting routing table %s\n", r->name); ASSERT_DIE(r->use_count == 0); @@ -2046,14 +2113,14 @@ rt_free(resource *_r) static void rt_res_dump(resource *_r) { - rtable *r = (rtable *) _r; + RT_LOCKED((rtable *) _r, r) debug("name \"%s\", addr_type=%s, rt_count=%u, use_count=%d\n", r->name, net_label[r->addr_type], r->rt_count, r->use_count); } static struct resclass rt_class = { .name = "Routing table", - .size = sizeof(struct rtable), + .size = sizeof(rtable_private), .free = rt_free, .dump = rt_res_dump, .lookup = NULL, @@ -2068,9 +2135,8 @@ rt_setup(pool *pp, struct rtable_config *cf) ASSERT_DIE(ns - 1 == bsnprintf(nb, ns, "Routing table %s", cf->name)); pool *p = rp_new(pp, nb); - mb_move(nb, p); - rtable *t = ralloc(p, &rt_class); + rtable_private *t = ralloc(p, &rt_class); t->rp = p; t->rte_slab = sl_new(p, sizeof(struct rte_storage)); @@ -2090,6 +2156,8 @@ rt_setup(pool *pp, struct rtable_config *cf) init_list(&t->pending_exports); init_list(&t->subscribers); + t->loop = birdloop_new(p, DOMAIN_ORDER(rtable), nb); + t->ec_event = ev_new_init(p, rt_export_cleanup, t); t->prune_event = ev_new_init(p, rt_prune_table, t); t->hcu_event = ev_new_init(p, rt_update_hostcache, t); @@ -2106,7 +2174,8 @@ rt_setup(pool *pp, struct rtable_config *cf) t->nhu_lp = lp_new_default(p); - return t; + mb_move(nb, p); + return (rtable *) t; } /** @@ -2141,7 +2210,9 @@ rt_init(void) static void rt_prune_table(void *data) { - rtable *tab = data; + rtable_private *tab = data; + ASSERT_DIE(birdloop_inside(tab->loop)); + struct fib_iterator *fit = &tab->prune_fit; int limit = 512; @@ -2156,6 +2227,8 @@ rt_prune_table(void *data) if (tab->prune_state == 0) return; + rt_lock_table(tab); + if (tab->prune_state == 1) { /* Mark channels to flush */ @@ -2189,11 +2262,12 @@ again: if (limit <= 0) { FIB_ITERATE_PUT(fit); - ev_schedule(tab->prune_event); + ev_send_loop(tab->loop, tab->prune_event); + rt_unlock_table(tab); return; } - rte_recalculate(e->rte.sender, n, NULL, e->rte.src); + rte_recalculate(tab, e->rte.sender, n, NULL, e->rte.src); limit--; goto rescan; @@ -2217,7 +2291,8 @@ again: tab->gc_time = current_time(); /* state change 2->0, 3->1 */ - tab->prune_state &= 1; + if (tab->prune_state &= 1) + ev_send_loop(tab->loop, tab->prune_event); uint flushed_channels = 0; @@ -2240,12 +2315,15 @@ again: /* In some cases, we may want to directly proceed to export cleanup */ if (EMPTY_LIST(tab->exports) && flushed_channels) rt_export_cleanup(tab); + + rt_unlock_table(tab); } static void rt_export_cleanup(void *data) { - rtable *tab = data; + rtable_private *tab = data; + ASSERT_DIE(birdloop_inside(tab->loop)); u64 min_seq = ~((u64) 0); struct rt_pending_export *last_export_to_free = NULL; @@ -2394,7 +2472,7 @@ done:; if (!first_export || (first_export->seq >= ih->flush_seq)) { ih->import_state = TIS_CLEARED; - ih->stopped(ih->req); + ev_send(ih->stopped->list, ih->stopped); rem_node(&ih->n); mb_free(ih); rt_unlock_table(tab); @@ -2535,7 +2613,7 @@ no_nexthop: } static inline struct rte_storage * -rt_next_hop_update_rte(rtable *tab, net *n, rte *old) +rt_next_hop_update_rte(rtable_private *tab, net *n, rte *old) { rta *a = alloca(RTA_MAX_SIZE); memcpy(a, old->attrs, rta_size(old->attrs)); @@ -2553,7 +2631,7 @@ rt_next_hop_update_rte(rtable *tab, net *n, rte *old) } static inline int -rt_next_hop_update_net(rtable *tab, net *n) +rt_next_hop_update_net(rtable_private *tab, net *n) { struct rte_storage *new; int count = 0; @@ -2638,17 +2716,21 @@ rt_next_hop_update_net(rtable *tab, net *n) static void rt_next_hop_update(void *data) { - rtable *tab = data; + rtable_private *tab = data; + ASSERT_DIE(birdloop_inside(tab->loop)); + struct fib_iterator *fit = &tab->nhu_fit; int max_feed = 32; - if (tab->nhu_state == NHU_CLEAN) + if (atomic_load_explicit(&tab->nhu_state, memory_order_acquire) == NHU_CLEAN) return; - if (tab->nhu_state == NHU_SCHEDULED) + rt_lock_table(tab); + + if (atomic_load_explicit(&tab->nhu_state, memory_order_acquire) == NHU_SCHEDULED) { FIB_ITERATE_INIT(fit, &tab->fib); - tab->nhu_state = NHU_RUNNING; + ASSERT_DIE(atomic_exchange_explicit(&tab->nhu_state, NHU_RUNNING, memory_order_acq_rel) == NHU_SCHEDULED); } FIB_ITERATE_START(&tab->fib, fit, net, n) @@ -2656,7 +2738,8 @@ rt_next_hop_update(void *data) if (max_feed <= 0) { FIB_ITERATE_PUT(fit); - ev_schedule(tab->nhu_event); + ev_send_loop(tab->loop, tab->nhu_event); + rt_unlock_table(tab); return; } max_feed -= rt_next_hop_update_net(tab, n); @@ -2667,10 +2750,10 @@ rt_next_hop_update(void *data) * NHU_DIRTY -> NHU_SCHEDULED * NHU_RUNNING -> NHU_CLEAN */ - tab->nhu_state &= 1; + if (atomic_fetch_and_explicit(&tab->nhu_state, NHU_SCHEDULED, memory_order_acq_rel) != NHU_RUNNING) + ev_send_loop(tab->loop, tab->nhu_event); - if (tab->nhu_state != NHU_CLEAN) - ev_schedule(tab->nhu_event); + rt_unlock_table(tab); } @@ -2713,11 +2796,22 @@ rt_new_table(struct symbol *s, uint addr_type) * configuration. */ void -rt_lock_table(rtable *r) +rt_lock_table(rtable_private *r) { r->use_count++; } +static void +rt_loop_stopped(void *data) +{ + rtable_private *r = data; + birdloop_free(r->loop); + r->loop = NULL; + r->prune_event->list = r->ec_event->list = NULL; + r->nhu_event->list = r->hcu_event->list = NULL; + ev_send(r->delete_event->list, r->delete_event); +} + /** * rt_unlock_table - unlock a routing table * @r: routing table to be unlocked @@ -2727,14 +2821,14 @@ rt_lock_table(rtable *r) * for deletion by configuration changes. */ void -rt_unlock_table(rtable *r) +rt_unlock_table(rtable_private *r) { - if (!--r->use_count && r->delete_event) + if (!--r->use_count && r->delete_event && + !r->prune_state && !atomic_load_explicit(&r->nhu_state, memory_order_acquire)) /* Delete the routing table by freeing its pool */ - ev_schedule(r->delete_event); + birdloop_stop_self(r->loop, rt_loop_stopped, r); } - static struct rtable_config * rt_find_table_config(struct config *cf, char *name) { @@ -2745,7 +2839,9 @@ rt_find_table_config(struct config *cf, char *name) static void rt_done(void *data) { - rtable *t = data; + rtable_private *t = data; + ASSERT_DIE(t->loop == NULL); + struct rtable_config *tc = t->config; struct config *c = tc->config; @@ -2755,6 +2851,7 @@ rt_done(void *data) if (t->hostcache) rt_free_hostcache(t); + rfree(t->delete_event); rfree(t->rp); config_del_obstacle(c); @@ -2782,14 +2879,15 @@ rt_commit(struct config *new, struct config *old) { WALK_LIST(o, old->tables) { - rtable *ot = o->table; + RT_LOCK(o->table); + rtable_private *ot = RT_PRIV(o->table); if (!ot->delete_event) { r = rt_find_table_config(new, o->name); if (r && (r->addr_type == o->addr_type) && !new->shutdown) { DBG("\t%s: same\n", o->name); - r->table = ot; + r->table = (rtable *) ot; ot->name = r->name; ot->config = r; if (o->sorted != r->sorted) @@ -2799,11 +2897,13 @@ rt_commit(struct config *new, struct config *old) { DBG("\t%s: deleted\n", o->name); rt_lock_table(ot); - ot->delete_event = ev_new_init(ot->rp, rt_done, ot); + ot->delete_event = ev_new_init(&root_pool, rt_done, ot); + ot->delete_event->list = &global_event_list; config_add_obstacle(old); rt_unlock_table(ot); } } + RT_UNLOCK(o->table); } } @@ -2834,46 +2934,98 @@ rt_feed_channel(void *data) struct fib_iterator *fit = &c->feed_fit; int max_feed = 256; + RT_LOCK(c->table); + rtable_private *tab = RT_PRIV(c->table); + ASSERT(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); - FIB_ITERATE_START(&c->table->fib, fit, net, n) +redo: + FIB_ITERATE_START(&tab->fib, fit, net, n) { if (max_feed <= 0) { FIB_ITERATE_PUT(fit); rt_send_export_event(c); + + RT_UNLOCK(c->table); return; } if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_FEEDING) + { + RT_UNLOCK(c->table); return; + } - if (c->req->export_bulk) + if (!n->routes || !rte_is_valid(&n->routes->rte)) + ; /* if no route, do nothing */ + else if (c->req->export_bulk) { uint count = rte_feed_count(n); if (count) { rte **feed = alloca(count * sizeof(rte *)); rte_feed_obtain(n, feed, count); + + struct rt_pending_export *rpe_last, *rpe_first = n->first; + for (struct rt_pending_export *rpe = rpe_first; rpe; rpe = rpe_next(rpe, NULL)) + rpe_last = rpe; + + FIB_ITERATE_PUT_NEXT(fit, &tab->fib); + RT_UNLOCK(c->table); + c->req->export_bulk(c->req, n->n.addr, NULL, feed, count); + + RT_LOCK(c->table); + + for (struct rt_pending_export *rpe = rpe_first; rpe; rpe = rpe_next(rpe, NULL)) + { + rpe_mark_seen(c, rpe); + if (rpe == rpe_last) + break; + ASSERT_DIE(rpe->seq < rpe_last->seq); + } + max_feed -= count; + + goto redo; } } - else if (n->routes && rte_is_valid(&n->routes->rte)) + else if (c->req->export_one) { struct rt_pending_export rpe = { .new = n->routes, .new_best = n->routes }; - c->req->export_one(c->req, n->n.addr, &rpe); - max_feed--; - } - for (struct rt_pending_export *rpe = n->first; rpe; rpe = rpe_next(rpe, NULL)) - rpe_mark_seen(c, rpe); + struct rt_pending_export *rpe_last, *rpe_first = n->first; + for (struct rt_pending_export *rpe = rpe_first; rpe; rpe = rpe_next(rpe, NULL)) + rpe_last = rpe; + + FIB_ITERATE_PUT_NEXT(fit, &tab->fib); + RT_UNLOCK(c->table); + + c->req->export_one(c->req, n->n.addr, &rpe); + + RT_LOCK(c->table); + for (struct rt_pending_export *rpe = rpe_first; rpe; rpe = rpe_next(rpe, NULL)) + { + rpe_mark_seen(c, rpe); + if (rpe == rpe_last) + break; + ASSERT_DIE(rpe->seq < rpe_last->seq); + } + + max_feed--; + goto redo; + } + else + bug("Export request must always provide an export method"); } FIB_ITERATE_END; c->event->hook = rt_export_hook; rt_send_export_event(c); + RT_UNLOCK(c->table); + rt_set_export_state(c, TES_READY); } @@ -2981,7 +3133,7 @@ hc_delete_hostentry(struct hostcache *hc, pool *p, struct hostentry *he) } static void -rt_init_hostcache(rtable *tab) +rt_init_hostcache(rtable_private *tab) { struct hostcache *hc = mb_allocz(tab->rp, sizeof(struct hostcache)); init_list(&hc->hostentries); @@ -2997,7 +3149,7 @@ rt_init_hostcache(rtable *tab) } static void -rt_free_hostcache(rtable *tab) +rt_free_hostcache(rtable_private *tab) { struct hostcache *hc = tab->hostcache; @@ -3020,13 +3172,13 @@ rt_free_hostcache(rtable *tab) } static void -rt_notify_hostcache(rtable *tab, net *net) +rt_notify_hostcache(rtable_private *tab, net *net) { if (ev_active(tab->hcu_event)) return; if (trie_match_net(tab->hostcache->trie, net->n.addr)) - ev_schedule(tab->hcu_event); + ev_send_loop(tab->loop, tab->hcu_event); } static int @@ -3059,7 +3211,7 @@ rt_get_igp_metric(rte *rt) } static int -rt_update_hostentry(rtable *tab, struct hostentry *he) +rt_update_hostentry(rtable_private *tab, struct hostentry *he) { rta *old_src = he->src; int direct = 0; @@ -3125,7 +3277,9 @@ done: static void rt_update_hostcache(void *data) { - rtable *tab = data; + rtable_private *tab = data; + ASSERT_DIE(birdloop_inside(tab->loop)); + struct hostcache *hc = tab->hostcache; struct hostentry *he; node *n, *x; @@ -3149,10 +3303,12 @@ rt_update_hostcache(void *data) } struct hostentry * -rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep) +rt_get_hostentry(rtable *t, ip_addr a, ip_addr ll, rtable *dep) { struct hostentry *he; + rtable_private *tab = RT_LOCK(t); + if (!tab->hostcache) rt_init_hostcache(tab); @@ -3160,10 +3316,13 @@ rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep) struct hostcache *hc = tab->hostcache; for (he = hc->hash_table[k >> hc->hash_shift]; he != NULL; he = he->next) if (ipa_equal(he->addr, a) && (he->tab == dep)) - return he; + goto done; he = hc_new_hostentry(hc, tab->rp, a, ipa_zero(ll) ? a : ll, dep, k); rt_update_hostentry(tab, he); + +done: + RT_UNLOCK(t); return he; } diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 9b9013f9..1080db77 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -2140,7 +2140,7 @@ use_deterministic_med(struct rte_storage *r) } int -bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) +bgp_rte_recalculate(rtable_private *table, net *net, rte *new, rte *old, rte *old_best) { rte *key = new ? new : old; u32 lpref = key->attrs->pref; diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index dc845550..aac1f45c 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -1800,10 +1800,12 @@ bgp_channel_start(struct channel *C) ip_addr src = p->local_ip; if (c->igp_table_ip4) - rt_lock_table(c->igp_table_ip4); + RT_LOCKED(c->igp_table_ip4, t) + rt_lock_table(t); if (c->igp_table_ip6) - rt_lock_table(c->igp_table_ip6); + RT_LOCKED(c->igp_table_ip6, t) + rt_lock_table(t); c->pool = p->p.pool; // XXXX bgp_init_bucket_table(c); @@ -1884,10 +1886,12 @@ bgp_channel_cleanup(struct channel *C) struct bgp_channel *c = (void *) C; if (c->igp_table_ip4) - rt_unlock_table(c->igp_table_ip4); + RT_LOCKED(c->igp_table_ip4, t) + rt_unlock_table(t); if (c->igp_table_ip6) - rt_unlock_table(c->igp_table_ip6); + RT_LOCKED(c->igp_table_ip6, t) + rt_unlock_table(t); c->index = 0; diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index 7cb4df1f..60f93bce 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -586,7 +586,7 @@ void bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *bp); int bgp_rte_better(struct rte *, struct rte *); int bgp_rte_mergable(rte *pri, rte *sec); -int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best); +int bgp_rte_recalculate(rtable_private *table, net *net, rte *new, rte *old, rte *old_best); void bgp_rte_modify_stale(struct rt_export_request *, const net_addr *, struct rt_pending_export *, rte **, uint); u32 bgp_rte_igp_metric(struct rte *); void bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, const rte *old); diff --git a/proto/mrt/mrt.c b/proto/mrt/mrt.c index 9d78438d..b40592d2 100644 --- a/proto/mrt/mrt.c +++ b/proto/mrt/mrt.c @@ -228,7 +228,7 @@ mrt_next_table_(rtable *tab, rtable *tab_ptr, const char *pattern) NODE_VALID(tn); tn = tn->next) { - tab = SKIP_BACK(struct rtable, n, tn); + tab = SKIP_BACK(rtable, n, tn); if (patmatch(pattern, tab->name) && ((tab->addr_type == NET_IP4) || (tab->addr_type == NET_IP6))) return tab; @@ -243,13 +243,21 @@ mrt_next_table(struct mrt_table_dump_state *s) rtable *tab = mrt_next_table_(s->table, s->table_ptr, s->table_expr); if (s->table) - rt_unlock_table(s->table); + { + RT_LOCK(s->table); + rt_unlock_table(RT_PRIV(s->table)); + RT_UNLOCK(s->table); + } s->table = tab; s->ipv4 = tab ? (tab->addr_type == NET_IP4) : 0; if (s->table) - rt_lock_table(s->table); + { + RT_LOCK(s->table); + rt_lock_table(RT_PRIV(s->table)); + RT_UNLOCK(s->table); + } return s->table; } @@ -573,14 +581,23 @@ mrt_table_dump_init(pool *pp) static void mrt_table_dump_free(struct mrt_table_dump_state *s) { - if (s->table_open) - FIB_ITERATE_UNLINK(&s->fit, &s->table->fib); - if (s->table) - rt_unlock_table(s->table); + { + RT_LOCK(s->table); + + if (s->table_open) + FIB_ITERATE_UNLINK(&s->fit, &RT_PRIV(s->table)->fib); + + rt_unlock_table(RT_PRIV(s->table)); + RT_UNLOCK(s->table); + } if (s->table_ptr) - rt_unlock_table(s->table_ptr); + { + RT_LOCK(s->table_ptr); + rt_unlock_table(RT_PRIV(s->table_ptr)); + RT_UNLOCK(s->table_ptr); + } config_del_obstacle(s->config); @@ -596,8 +613,14 @@ mrt_table_dump_step(struct mrt_table_dump_state *s) s->max = 2048; s->bws = &bws; + rtable_private *tab; + if (s->table_open) + { + RT_LOCK(s->table); + tab = RT_PRIV(s->table); goto step; + } while (mrt_next_table(s)) { @@ -606,15 +629,18 @@ mrt_table_dump_step(struct mrt_table_dump_state *s) mrt_peer_table_dump(s); - FIB_ITERATE_INIT(&s->fit, &s->table->fib); + RT_LOCK(s->table); + tab = RT_PRIV(s->table); + FIB_ITERATE_INIT(&s->fit, &tab->fib); s->table_open = 1; step: - FIB_ITERATE_START(&s->table->fib, &s->fit, net, n) + FIB_ITERATE_START(&tab->fib, &s->fit, net, n) { if (s->max < 0) { FIB_ITERATE_PUT(&s->fit); + RT_UNLOCK(s->table); return 0; } @@ -634,6 +660,7 @@ mrt_table_dump_step(struct mrt_table_dump_state *s) mrt_peer_table_flush(s); } + RT_UNLOCK(s->table); return 1; } @@ -661,7 +688,11 @@ mrt_timer(timer *t) s->always_add_path = cf->always_add_path; if (s->table_ptr) - rt_lock_table(s->table_ptr); + { + RT_LOCK(s->table_ptr); + rt_lock_table(RT_PRIV(s->table_ptr)); + RT_UNLOCK(s->table_ptr); + } p->table_dump = s; ev_schedule(p->event); @@ -734,7 +765,11 @@ mrt_dump_cmd(struct mrt_dump_data *d) s->filename = d->filename; if (s->table_ptr) - rt_lock_table(s->table_ptr); + { + RT_LOCK(s->table_ptr); + rt_lock_table(RT_PRIV(s->table_ptr)); + RT_UNLOCK(s->table_ptr); + } this_cli->cont = mrt_dump_cont; this_cli->cleanup = mrt_dump_cleanup; diff --git a/proto/mrt/mrt.h b/proto/mrt/mrt.h index 4ff94c12..04865089 100644 --- a/proto/mrt/mrt.h +++ b/proto/mrt/mrt.h @@ -40,7 +40,7 @@ struct mrt_proto { struct mrt_dump_data { const char *table_expr; - struct rtable *table_ptr; + rtable *table_ptr; const struct filter *filter; const char *filename; }; @@ -60,7 +60,7 @@ struct mrt_table_dump_state { /* Configuration information */ const char *table_expr; /* Wildcard for table name (or NULL) */ - struct rtable *table_ptr; /* Explicit table (or NULL) */ + rtable *table_ptr; /* Explicit table (or NULL) */ const struct filter *filter; /* Optional filter */ const char *filename; /* Filename pattern */ int always_add_path; /* Always use *_ADDPATH message subtypes */ @@ -73,7 +73,7 @@ struct mrt_table_dump_state { HASH(struct mrt_peer_entry) peer_hash; /* Hash for peers to find the index */ - struct rtable *table; /* Processed table, NULL initially */ + rtable *table; /* Processed table, NULL initially */ struct fib_iterator fit; /* Iterator in processed table */ int table_open; /* Whether iterator is linked */ diff --git a/proto/perf/perf.c b/proto/perf/perf.c index 8b2cb69f..aa688d88 100644 --- a/proto/perf/perf.c +++ b/proto/perf/perf.c @@ -198,7 +198,9 @@ perf_loop(void *data) p->exp++; } - rt_schedule_prune(P->main_channel->table); + RT_LOCK(P->main_channel->table); + rt_schedule_prune(RT_PRIV(P->main_channel->table)); + RT_UNLOCK(P->main_channel->table); ev_schedule(p->loop); } diff --git a/proto/radv/radv.c b/proto/radv/radv.c index fa228c69..d572c1b7 100644 --- a/proto/radv/radv.c +++ b/proto/radv/radv.c @@ -555,7 +555,10 @@ radv_check_active(struct radv_proto *p) return 1; struct channel *c = p->p.main_channel; - return rt_examine(c->table, &cf->trigger, c, c->out_filter); + RT_LOCK(c->table); + int active = rt_examine(RT_PRIV(c->table), &cf->trigger, c, c->out_filter); + RT_UNLOCK(c->table); + return active; } static void diff --git a/proto/static/static.c b/proto/static/static.c index 45791e8e..bd7f3f5b 100644 --- a/proto/static/static.c +++ b/proto/static/static.c @@ -491,10 +491,12 @@ static_start(struct proto *P) static_lp = lp_new(&root_pool, LP_GOOD_SIZE(1024)); if (p->igp_table_ip4) - rt_lock_table(p->igp_table_ip4); + RT_LOCKED(p->igp_table_ip4, t) + rt_lock_table(t); if (p->igp_table_ip6) - rt_lock_table(p->igp_table_ip6); + RT_LOCKED(p->igp_table_ip6, t) + rt_lock_table(t); p->event = ev_new_init(p->p.pool, static_announce_marked, p); @@ -521,10 +523,12 @@ static_shutdown(struct proto *P) static_reset_rte(p, r); if (p->igp_table_ip4) - rt_unlock_table(p->igp_table_ip4); + RT_LOCKED(p->igp_table_ip4, t) + rt_unlock_table(t); if (p->igp_table_ip6) - rt_unlock_table(p->igp_table_ip6); + RT_LOCKED(p->igp_table_ip6, t) + rt_unlock_table(t); return PS_DOWN; } diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index 5431bebe..98c56391 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -427,6 +427,7 @@ krt_got_route(struct krt_proto *p, rte *e, s8 src) #endif /* The rest is for KRT_SRC_BIRD (or KRT_SRC_UNKNOWN) */ + RT_LOCK(p->p.main_channel->table); /* Deleting all routes if flush is requested */ if (p->flush_routes) goto delete; @@ -435,7 +436,7 @@ krt_got_route(struct krt_proto *p, rte *e, s8 src) if (!p->ready) goto ignore; - net *net = net_find(p->p.main_channel->table, e->net); + net *net = net_find(RT_PRIV(p->p.main_channel->table), e->net); if (!net || !krt_is_installed(p, net)) goto delete; @@ -481,6 +482,7 @@ delete: goto done; done: + RT_UNLOCK(p->p.main_channel->table); lp_flush(krt_filter_lp); } @@ -498,7 +500,8 @@ krt_init_scan(struct krt_proto *p) static void krt_prune(struct krt_proto *p) { - struct rtable *t = p->p.main_channel->table; + RT_LOCK(p->p.main_channel->table); + rtable_private *t = RT_PRIV(p->p.main_channel->table); KRT_TRACE(p, D_EVENTS, "Pruning table %s", t->name); FIB_WALK(&t->fib, net, n) @@ -518,6 +521,8 @@ krt_prune(struct krt_proto *p) } FIB_WALK_END; + RT_UNLOCK(p->p.main_channel->table); + #ifdef KRT_ALLOW_LEARN if (KRT_CF->learn) channel_refresh_end(p->p.main_channel); From 4f3fa1623f66acd24c227cf0cc5a4af2f5133b6c Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Mon, 18 Oct 2021 21:22:58 +0200 Subject: [PATCH 07/42] Pipe runs in parallel. --- nest/proto.c | 1 + proto/pipe/config.Y | 1 + 2 files changed, 2 insertions(+) diff --git a/nest/proto.c b/nest/proto.c index 4ae0cbfd..623585f1 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -1413,6 +1413,7 @@ proto_event(void *ptr) { if (p->proto == &proto_unix_iface) if_flush_ifaces(p); + p->do_stop = 0; } diff --git a/proto/pipe/config.Y b/proto/pipe/config.Y index c869de9f..fc08445f 100644 --- a/proto/pipe/config.Y +++ b/proto/pipe/config.Y @@ -25,6 +25,7 @@ proto: pipe_proto '}' { this_channel = NULL; } ; pipe_proto_start: proto_start PIPE { this_proto = proto_config_new(&proto_pipe, $1); + this_proto->loop_order = DOMAIN_ORDER(proto); PIPE_CFG->max_generation = 16; } proto_name From dc160e11e1a9e4344bbee6fd0bc8aee229d7c540 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Fri, 12 Nov 2021 15:53:33 +0000 Subject: [PATCH 08/42] Route table import-to-export announcement indirection to reduce pipe traffic --- nest/proto.c | 3 +++ nest/route.h | 7 ++++--- nest/rt-table.c | 53 ++++++++++++++++++++++++++++++++++--------------- 3 files changed, 44 insertions(+), 19 deletions(-) diff --git a/nest/proto.c b/nest/proto.c index 623585f1..b7dbae5e 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -468,6 +468,7 @@ channel_start_import(struct channel *c) c->in_req = (struct rt_import_request) { .name = rn, + .list = proto_work_list(c->proto), .trace_routes = c->debug | c->proto->debug, .dump_req = channel_dump_import_req, .log_state_change = channel_import_log_state_change, @@ -886,6 +887,7 @@ channel_setup_in_table(struct channel *c, int best) c->in_table = &cat->cat; c->in_table->push = (struct rt_import_request) { .name = cat->name, + .list = proto_work_list(c->proto), .trace_routes = c->debug | c->proto->debug, .dump_req = channel_in_push_dump_req, .log_state_change = channel_push_log_state_change, @@ -928,6 +930,7 @@ channel_setup_out_table(struct channel *c) c->out_table = &cat->cat; c->out_table->push = (struct rt_import_request) { .name = cat->name, + .list = proto_work_list(c->proto), .trace_routes = c->debug | c->proto->debug, .dump_req = channel_out_push_dump_req, .log_state_change = channel_push_log_state_change, diff --git a/nest/route.h b/nest/route.h index 9417d97d..3f8bf433 100644 --- a/nest/route.h +++ b/nest/route.h @@ -170,6 +170,7 @@ typedef struct rtable_private { struct hmap id_map; struct hostcache *hostcache; struct event *prune_event; /* Event to prune abandoned routes */ + struct event *announce_event; /* Event to announce pending exports */ struct event *ec_event; /* Event to prune finished exports */ struct event *hcu_event; /* Event to update host cache */ struct event *delete_event; /* Event to delete the table */ @@ -191,8 +192,6 @@ typedef struct rtable_private { struct timer *settle_timer; /* Settle time for notifications */ list pending_exports; /* List of packed struct rt_pending_export */ - btime base_export_time; /* When first pending export was announced */ - struct timer *export_timer; struct rt_pending_export *first_export; /* First export to announce */ u64 next_export_seq; /* The next export will have this ID */ @@ -221,7 +220,6 @@ struct rtable_config { byte sorted; /* Routes of network are sorted according to rte_better() */ btime min_settle_time; /* Minimum settle time for notifications */ btime max_settle_time; /* Maximum settle time for notifications */ - btime export_settle_time; /* Delay before exports are announced */ uint cork_limit; /* Amount of routes to be pending on export to cork imports */ }; @@ -309,6 +307,8 @@ struct rt_import_request { char *name; u8 trace_routes; + event_list *list; /* Where to schedule import events */ + void (*dump_req)(struct rt_import_request *req); void (*log_state_change)(struct rt_import_request *req, u8 state); /* Preimport is called when the @new route is just-to-be inserted, replacing @old. @@ -339,6 +339,7 @@ struct rt_import_hook { u8 stale_pruned; /* Last prune finished when this value was set at stale_valid */ u8 stale_pruning; /* Last prune started when this value was set at stale_valid */ + struct event *export_announce_event; /* Event to run to announce new exports */ struct event *stopped; /* Event to run when import is stopped */ }; diff --git a/nest/rt-table.c b/nest/rt-table.c index f304372f..f33b9153 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -1023,8 +1023,6 @@ rte_announce(rtable_private *tab, net *net, struct rte_storage *new, struct rte_ rt_notify_hostcache(tab, net); } - rt_schedule_notify(tab); - if (EMPTY_LIST(tab->exports) && EMPTY_LIST(tab->pending_exports)) { /* No export hook and no pending exports to cleanup. We may free the route immediately. */ @@ -1105,10 +1103,7 @@ rte_announce(rtable_private *tab, net *net, struct rte_storage *new, struct rte_ { ev_cork(&rt_cork); tab->cork_active = 1; - tm_start_in(tab->export_timer, 0, tab->loop); } - else if (!tm_active(tab->export_timer)) - tm_start_in(tab->export_timer, tab->config->export_settle_time, tab->loop); } static struct rt_pending_export * @@ -1158,11 +1153,13 @@ rt_send_export_event(struct rt_export_hook *hook) } static void -rt_announce_exports(timer *tm) +rt_announce_exports(void *data) { - rtable_private *tab = tm->data; + rtable_private *tab = data; ASSERT_DIE(birdloop_inside(tab->loop)); + rt_schedule_notify(tab); + struct rt_export_hook *c; node *n; WALK_LIST2(c, n, tab->exports, n) { @@ -1173,6 +1170,26 @@ rt_announce_exports(timer *tm) } } +static void +rt_import_announce_exports(void *data) +{ + struct rt_import_hook *hook = data; + RT_LOCKED(hook->table, tab) + { + if (hook->import_state == TIS_CLEARED) + { + rfree(hook->export_announce_event); + + ev_send(hook->stopped->list, hook->stopped); + rem_node(&hook->n); + mb_free(hook); + rt_unlock_table(tab); + } + else + ev_send_loop(tab->loop, tab->announce_event); + } +} + static struct rt_pending_export * rt_last_export(rtable_private *tab) { @@ -1471,6 +1488,8 @@ rte_recalculate(rtable_private *table, struct rt_import_hook *c, net *net, rte * rte_announce(table, net, new_stored, old_stored, net->routes, old_best_stored); + ev_send(req->list, c->export_announce_event); + if (!net->routes && (table->gc_counter++ >= table->config->gc_max_ops) && (table->gc_time + table->config->gc_min_time <= current_time())) @@ -1709,6 +1728,8 @@ rt_request_import(rtable *t, struct rt_import_request *req) hook->req = req; hook->table = t; + hook->export_announce_event = ev_new_init(tab->rp, rt_import_announce_exports, hook); + if (!hook->stale_set) hook->stale_set = hook->stale_valid = hook->stale_pruning = hook->stale_pruned = 1; @@ -1727,11 +1748,12 @@ rt_stop_import(struct rt_import_request *req, event *stopped) struct rt_import_hook *hook = req->hook; RT_LOCK(hook->table); + rt_schedule_prune(RT_PRIV(hook->table)); rt_set_import_state(hook, TIS_STOP); - hook->stopped = stopped; + RT_UNLOCK(hook->table); } @@ -2158,6 +2180,7 @@ rt_setup(pool *pp, struct rtable_config *cf) t->loop = birdloop_new(p, DOMAIN_ORDER(rtable), nb); + t->announce_event = ev_new_init(p, rt_announce_exports, t); t->ec_event = ev_new_init(p, rt_export_cleanup, t); t->prune_event = ev_new_init(p, rt_prune_table, t); t->hcu_event = ev_new_init(p, rt_update_hostcache, t); @@ -2166,7 +2189,6 @@ rt_setup(pool *pp, struct rtable_config *cf) t->nhu_event->cork = &rt_cork; t->prune_event->cork = &rt_cork; - t->export_timer = tm_new_init(p, rt_announce_exports, t, 0, 0); t->last_rt_change = t->gc_time = current_time(); t->next_export_seq = 1; @@ -2472,15 +2494,11 @@ done:; if (!first_export || (first_export->seq >= ih->flush_seq)) { ih->import_state = TIS_CLEARED; - ev_send(ih->stopped->list, ih->stopped); - rem_node(&ih->n); - mb_free(ih); - rt_unlock_table(tab); + ev_send(ih->req->list, ih->export_announce_event); } - - if (EMPTY_LIST(tab->pending_exports) && tm_active(tab->export_timer)) - tm_stop(tab->export_timer); + if (EMPTY_LIST(tab->pending_exports) && ev_active(tab->announce_event)) + ev_postpone(tab->announce_event); /* If reduced to at most one export block pending */ if (tab->cork_active && @@ -2753,6 +2771,8 @@ rt_next_hop_update(void *data) if (atomic_fetch_and_explicit(&tab->nhu_state, NHU_SCHEDULED, memory_order_acq_rel) != NHU_RUNNING) ev_send_loop(tab->loop, tab->nhu_event); + ev_send_loop(tab->loop, tab->announce_event); + rt_unlock_table(tab); } @@ -2809,6 +2829,7 @@ rt_loop_stopped(void *data) r->loop = NULL; r->prune_event->list = r->ec_event->list = NULL; r->nhu_event->list = r->hcu_event->list = NULL; + r->announce_event->list = NULL; ev_send(r->delete_event->list, r->delete_event); } From adf37d8eff8f281871295c402a51ae1dd654851c Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Mon, 15 Nov 2021 10:53:58 +0100 Subject: [PATCH 09/42] VRF setting reduced to one argument, using default dummy iface for default vrf --- nest/config.Y | 4 ++-- nest/iface.c | 13 +++++++++---- nest/iface.h | 2 ++ nest/neighbor.c | 10 +++++----- nest/proto.c | 8 +++----- nest/protocol.h | 2 -- proto/bfd/bfd.c | 2 +- 7 files changed, 22 insertions(+), 19 deletions(-) diff --git a/nest/config.Y b/nest/config.Y index 6e7689ed..0914048b 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -266,8 +266,8 @@ proto_item: | MRTDUMP mrtdump_mask { this_proto->mrtdump = $2; } | ROUTER ID idval { this_proto->router_id = $3; } | DESCRIPTION text { this_proto->dsc = $2; } - | VRF text { this_proto->vrf = if_get_by_name($2); this_proto->vrf_set = 1; } - | VRF DEFAULT { this_proto->vrf = NULL; this_proto->vrf_set = 1; } + | VRF text { this_proto->vrf = if_get_by_name($2); } + | VRF DEFAULT { this_proto->vrf = &default_vrf; } ; diff --git a/nest/iface.c b/nest/iface.c index 83a633a3..5cb9e814 100644 --- a/nest/iface.c +++ b/nest/iface.c @@ -37,6 +37,7 @@ static pool *if_pool; list iface_list; +struct iface default_vrf; static void if_recalc_preferred(struct iface *i); @@ -147,7 +148,7 @@ ifa_send_notify(struct proto *p, unsigned c, struct ifa *a) { if (p->ifa_notify && (p->proto_state != PS_DOWN) && - (!p->vrf_set || p->vrf == a->iface->master)) + (!p->vrf || p->vrf == a->iface->master)) { if (p->debug & D_IFACES) log(L_TRACE "%s < address %N on interface %s %s", @@ -185,7 +186,7 @@ if_send_notify(struct proto *p, unsigned c, struct iface *i) { if (p->if_notify && (p->proto_state != PS_DOWN) && - (!p->vrf_set || p->vrf == i->master)) + (!p->vrf || p->vrf == i->master)) { if (p->debug & D_IFACES) log(L_TRACE "%s < interface %s %s", p->name, i->name, @@ -243,7 +244,7 @@ if_recalc_flags(struct iface *i UNUSED, uint flags) { if ((flags & IF_ADMIN_UP) && !(flags & (IF_SHUTDOWN | IF_TMP_DOWN)) && - !(i->master_index && !i->master)) + !(i->master_index && i->master == &default_vrf)) flags |= IF_UP; else flags &= ~IF_UP; @@ -301,6 +302,9 @@ if_update(struct iface *new) struct iface *i; unsigned c; + if (!new->master) + new->master = &default_vrf; + WALK_LIST(i, iface_list) if (!strcmp(new->name, i->name)) { @@ -711,6 +715,7 @@ if_init(void) { if_pool = rp_new(&root_pool, "Interfaces"); init_list(&iface_list); + strcpy(default_vrf.name, "default"); neigh_init(if_pool); } @@ -843,7 +848,7 @@ if_show(void) continue; char mbuf[16 + sizeof(i->name)] = {}; - if (i->master) + if (i->master != &default_vrf) bsprintf(mbuf, " master=%s", i->master->name); else if (i->master_index) bsprintf(mbuf, " master=#%u", i->master_index); diff --git a/nest/iface.h b/nest/iface.h index 1189cdd4..13f3bd12 100644 --- a/nest/iface.h +++ b/nest/iface.h @@ -28,6 +28,8 @@ struct ifa { /* Interface address */ unsigned flags; /* Analogous to iface->flags */ }; +extern struct iface default_vrf; + struct iface { node n; char name[16]; diff --git a/nest/neighbor.c b/nest/neighbor.c index 1a31fb79..cb2d1b2b 100644 --- a/nest/neighbor.c +++ b/nest/neighbor.c @@ -142,7 +142,7 @@ if_connected(ip_addr a, struct iface *i, struct ifa **ap, uint flags) } static inline int -if_connected_any(ip_addr a, struct iface *vrf, uint vrf_set, struct iface **iface, struct ifa **addr, uint flags) +if_connected_any(ip_addr a, struct iface *vrf, struct iface **iface, struct ifa **addr, uint flags) { struct iface *i; struct ifa *b; @@ -153,7 +153,7 @@ if_connected_any(ip_addr a, struct iface *vrf, uint vrf_set, struct iface **ifac /* Prefer SCOPE_HOST or longer prefix */ WALK_LIST(i, iface_list) - if ((!vrf_set || vrf == i->master) && ((s = if_connected(a, i, &b, flags)) >= 0)) + if ((!vrf || vrf == i->master) && ((s = if_connected(a, i, &b, flags)) >= 0)) if (scope_better(s, scope) || (scope_remote(s, scope) && ifa_better(b, *addr))) { *iface = i; @@ -245,7 +245,7 @@ neigh_find(struct proto *p, ip_addr a, struct iface *iface, uint flags) iface = (scope < 0) ? NULL : iface; } else - scope = if_connected_any(a, p->vrf, p->vrf_set, &iface, &addr, flags); + scope = if_connected_any(a, p->vrf, &iface, &addr, flags); /* scope < 0 means i don't know neighbor */ /* scope >= 0 <=> iface != NULL */ @@ -369,7 +369,7 @@ neigh_update(neighbor *n, struct iface *iface) return; /* VRF-bound neighbors ignore changes in other VRFs */ - if (p->vrf_set && (p->vrf != iface->master)) + if (p->vrf && (p->vrf != iface->master)) return; scope = if_connected(n->addr, iface, &ifa, n->flags); @@ -379,7 +379,7 @@ neigh_update(neighbor *n, struct iface *iface) { /* When neighbor is going down, try to respawn it on other ifaces */ if ((scope < 0) && (n->scope >= 0) && !n->ifreq && (n->flags & NEF_STICKY)) - scope = if_connected_any(n->addr, p->vrf, p->vrf_set, &iface, &ifa, n->flags); + scope = if_connected_any(n->addr, p->vrf, &iface, &ifa, n->flags); } else { diff --git a/nest/proto.c b/nest/proto.c index b7dbae5e..8babedee 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -1469,7 +1469,6 @@ proto_init(struct proto_config *c, node *n) p->proto_state = PS_DOWN; p->last_state_change = current_time(); p->vrf = c->vrf; - p->vrf_set = c->vrf_set; insert_node(&p->n, n); p->event = ev_new_init(proto_pool, proto_event, p); @@ -1654,8 +1653,7 @@ proto_reconfigure(struct proto *p, struct proto_config *oc, struct proto_config if ((nc->protocol != oc->protocol) || (nc->net_type != oc->net_type) || (nc->disabled != p->disabled) || - (nc->vrf != oc->vrf) || - (nc->vrf_set != oc->vrf_set)) + (nc->vrf != oc->vrf)) return 0; p->name = nc->name; @@ -2626,8 +2624,8 @@ proto_cmd_show(struct proto *p, uintptr_t verbose, int cnt) cli_msg(-1006, " Message: %s", p->message); if (p->cf->router_id) cli_msg(-1006, " Router ID: %R", p->cf->router_id); - if (p->vrf_set) - cli_msg(-1006, " VRF: %s", p->vrf ? p->vrf->name : "default"); + if (p->vrf) + cli_msg(-1006, " VRF: %s", p->vrf->name); if (p->proto->show_proto_info) p->proto->show_proto_info(p); diff --git a/nest/protocol.h b/nest/protocol.h index 8d077e44..1d4f2059 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -117,7 +117,6 @@ struct proto_config { int class; /* SYM_PROTO or SYM_TEMPLATE */ u8 net_type; /* Protocol network type (NET_*), 0 for undefined */ u8 disabled; /* Protocol enabled/disabled by default */ - u8 vrf_set; /* Related VRF instance (below) is defined */ u8 late_if_feed; /* Delay interface feed after channels are up */ u32 debug, mrtdump; /* Debugging bitfields, both use D_* constants */ u32 router_id; /* Protocol specific router ID */ @@ -154,7 +153,6 @@ struct proto { uint active_coroutines; /* Number of active coroutines */ byte net_type; /* Protocol network type (NET_*), 0 for undefined */ byte disabled; /* Manually disabled */ - byte vrf_set; /* Related VRF instance (above) is defined */ byte proto_state; /* Protocol state machine (PS_*, see below) */ byte active; /* From PS_START to cleanup after PS_STOP */ byte do_stop; /* Stop actions are scheduled */ diff --git a/proto/bfd/bfd.c b/proto/bfd/bfd.c index 3964c267..dd3488d4 100644 --- a/proto/bfd/bfd.c +++ b/proto/bfd/bfd.c @@ -658,7 +658,7 @@ bfd_add_request(struct bfd_proto *p, struct bfd_request *req) { struct bfd_config *cf = (struct bfd_config *) (p->p.cf); - if (p->p.vrf_set && (p->p.vrf != req->vrf)) + if (p->p.vrf && (p->p.vrf != req->vrf)) return 0; if (ipa_is_ip4(req->addr) ? !cf->accept_ipv4 : !cf->accept_ipv6) From 0fd1c1d091ee8e43eb0e15c67a92960ca581ed5f Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Sat, 13 Nov 2021 22:13:51 +0100 Subject: [PATCH 10/42] Route attribute cache is now lockless on read / clone. Lots of time was spent locking when accessing route attribute cache. This overhead should be now reduced to a minimum. --- nest/route.h | 25 +++--- nest/rt-attr.c | 214 ++++++++++++++++++++++++++++++++++++------------- 2 files changed, 171 insertions(+), 68 deletions(-) diff --git a/nest/route.h b/nest/route.h index 3f8bf433..531e004b 100644 --- a/nest/route.h +++ b/nest/route.h @@ -606,8 +606,8 @@ struct rte_src { typedef struct rta { - struct rta *next, **pprev; /* Hash chain */ - u32 uc; /* Use count */ + struct rta * _Atomic next, * _Atomic *pprev; /* Hash chain */ + _Atomic u32 uc; /* Use count */ u32 hash_key; /* Hash over important fields */ struct ea_list *eattrs; /* Extended Attribute chain */ struct hostentry *hostentry; /* Hostentry for recursive next-hops */ @@ -758,12 +758,6 @@ struct rte_owner { event *stop; }; -DEFINE_DOMAIN(attrs); -extern DOMAIN(attrs) attrs_domain; - -#define RTA_LOCK LOCK_DOMAIN(attrs, attrs_domain) -#define RTA_UNLOCK UNLOCK_DOMAIN(attrs, attrs_domain) - #define RTE_SRC_PU_SHIFT 44 #define RTE_SRC_IN_PROGRESS (1ULL << RTE_SRC_PU_SHIFT) @@ -879,20 +873,23 @@ static inline size_t rta_size(const rta *a) { return sizeof(rta) + sizeof(u32)*a #define RTA_MAX_SIZE (sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK) rta *rta_lookup(rta *); /* Get rta equivalent to this one, uc++ */ static inline int rta_is_cached(rta *r) { return r->cached; } + static inline rta *rta_clone(rta *r) { - RTA_LOCK; - r->uc++; - RTA_UNLOCK; + u32 uc = atomic_fetch_add_explicit(&r->uc, 1, memory_order_acq_rel); + ASSERT_DIE(uc > 0); return r; } void rta__free(rta *r); static inline void rta_free(rta *r) { - RTA_LOCK; - if (r && !--r->uc) + if (!r) + return; + + u32 uc = atomic_fetch_sub_explicit(&r->uc, 1, memory_order_acq_rel); + if (uc == 1) rta__free(r); - RTA_UNLOCK; } + rta *rta_do_cow(rta *o, linpool *lp); static inline rta * rta_cow(rta *r, linpool *lp) { return rta_is_cached(r) ? rta_do_cow(r, lp) : r; } static inline void rta_uncache(rta *r) { r->cached = 0; r->uc = 0; } diff --git a/nest/rt-attr.c b/nest/rt-attr.c index cb66b65d..9a5498ed 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -54,6 +54,7 @@ #include "lib/hash.h" #include "lib/idm.h" #include "lib/resource.h" +#include "lib/rcu.h" #include "lib/string.h" #include @@ -85,8 +86,8 @@ const char * rta_dest_names[RTD_MAX] = { [RTD_PROHIBIT] = "prohibited", }; -DOMAIN(attrs) attrs_domain; -DOMAIN(attrs) src_domain; +DEFINE_DOMAIN(attrs); +static DOMAIN(attrs) src_domain; #define SRC_LOCK LOCK_DOMAIN(attrs, src_domain) #define SRC_UNLOCK UNLOCK_DOMAIN(attrs, src_domain) @@ -1166,21 +1167,28 @@ ea_append(ea_list *to, ea_list *what) * rta's */ -static uint rta_cache_count; -static uint rta_cache_size = 32; -static uint rta_cache_limit; -static uint rta_cache_mask; -static rta **rta_hash_table; +static DOMAIN(attrs) attrs_domain; -static void -rta_alloc_hash(void) +#define RTA_LOCK LOCK_DOMAIN(attrs, attrs_domain) +#define RTA_UNLOCK UNLOCK_DOMAIN(attrs, attrs_domain) + +struct rta_cache { + u32 count; + u32 size; + u32 limit; + u32 mask; + rta * _Atomic table[0]; +} * _Atomic rta_cache; +// rta_aux, rta_cache = { .size = ATOMIC_VAR_INIT(32), }; + +static struct rta_cache * +rta_alloc_hash(u32 size) { - rta_hash_table = mb_allocz(rta_pool, sizeof(rta *) * rta_cache_size); - if (rta_cache_size < 32768) - rta_cache_limit = rta_cache_size * 2; - else - rta_cache_limit = ~0; - rta_cache_mask = rta_cache_size - 1; + struct rta_cache *c = mb_allocz(rta_pool, sizeof(struct rta_cache) + sizeof(rta * _Atomic) * size); + c->size = size; + c->limit = (size >> 20) ? (~0U) : (size * 2); + c->mask = size - 1; + return c; } static inline uint @@ -1234,34 +1242,88 @@ rta_copy(rta *o) } static inline void -rta_insert(rta *r) +rta_insert(rta *r, struct rta_cache *c) { - uint h = r->hash_key & rta_cache_mask; - r->next = rta_hash_table[h]; - if (r->next) - r->next->pprev = &r->next; - r->pprev = &rta_hash_table[h]; - rta_hash_table[h] = r; + uint h = r->hash_key & c->mask; + rta *next = atomic_load_explicit(&c->table[h], memory_order_relaxed); + + atomic_store_explicit(&r->next, next, memory_order_relaxed); + r->pprev = &c->table[h]; + + if (next) + next->pprev = &r->next; + + /* This store MUST be the last and MUST have release order for thread-safety */ + atomic_store_explicit(&c->table[h], r, memory_order_release); } static void -rta_rehash(void) +rta_rehash(struct rta_cache *c) { - uint ohs = rta_cache_size; - uint h; - rta *r, *n; - rta **oht = rta_hash_table; + u32 os = c->size; - rta_cache_size = 2*rta_cache_size; - DBG("Rehashing rta cache from %d to %d entries.\n", ohs, rta_cache_size); - rta_alloc_hash(); - for(h=0; hcount = c->count; + + /* First we simply copy every chain to both new locations */ + for (u32 h = 0; h < os; h++) + { + rta *r = atomic_load_explicit(&c->table[h], memory_order_relaxed); + atomic_store_explicit(&nc->table[h], r, memory_order_relaxed); + atomic_store_explicit(&nc->table[h + os], r, memory_order_relaxed); + } + + /* Then we exchange the hashes; release semantics forces the previous code to be already done */ + atomic_store_explicit(&rta_cache, nc, memory_order_release); + + /* And now we pass through both chains and filter them */ + for (u32 h = 0; h < c->size; h++) + { + rta * _Atomic * ap = &nc->table[h]; + rta * _Atomic * bp = &nc->table[h + os]; + + rta *r = atomic_load_explicit(ap, memory_order_relaxed); + ASSERT_DIE(r == atomic_load_explicit(bp, memory_order_relaxed)); + + while (r) + { + if (r->hash_key & os) { - n = r->next; - rta_insert(r); + r->pprev = bp; + atomic_store_explicit(bp, r, memory_order_release); + bp = &r->next; } - mb_free(oht); + else + { + r->pprev = ap; + atomic_store_explicit(ap, r, memory_order_release); + ap = &r->next; + } + + r = atomic_load_explicit(&r->next, memory_order_acquire); + } + + atomic_store_explicit(ap, NULL, memory_order_release); + atomic_store_explicit(bp, NULL, memory_order_release); + } + + synchronize_rcu(); + mb_free(c); +} + +static rta * +rta_find(rta *o, u32 h, struct rta_cache *c) +{ + rta *r = NULL; + + for (r = atomic_load_explicit(&c->table[h & c->mask], memory_order_acquire); r; r = atomic_load_explicit(&r->next, memory_order_acquire)) + if (r->hash_key == h && rta_same(r, o)) + { + atomic_fetch_add_explicit(&r->uc, 1, memory_order_acq_rel); + return r; + } + + return NULL; } /** @@ -1289,24 +1351,34 @@ rta_lookup(rta *o) h = rta_hash(o); + /* Lockless lookup */ + rcu_read_lock(); + r = rta_find(o, h, atomic_load_explicit(&rta_cache, memory_order_acquire)); + rcu_read_unlock(); + + if (r) + return r; + RTA_LOCK; - for(r=rta_hash_table[h & rta_cache_mask]; r; r=r->next) - if (r->hash_key == h && rta_same(r, o)) - { - r->uc++; - RTA_UNLOCK; - return r; - } + /* Locked lookup to avoid duplicates if possible */ + struct rta_cache *c = atomic_load_explicit(&rta_cache, memory_order_acquire); + r = rta_find(o, h, c); + if (r) + { + RTA_UNLOCK; + return r; + } + /* Store the rta */ r = rta_copy(o); r->hash_key = h; r->cached = 1; rt_lock_hostentry(r->hostentry); - rta_insert(r); + rta_insert(r, c); - if (++rta_cache_count > rta_cache_limit) - rta_rehash(); + if (++c->count > c->limit) + rta_rehash(c); RTA_UNLOCK; return r; @@ -1315,17 +1387,47 @@ rta_lookup(rta *o) void rta__free(rta *a) { - ASSERT(rta_cache_count && a->cached); - rta_cache_count--; - *a->pprev = a->next; - if (a->next) - a->next->pprev = a->pprev; + ASSERT(a->cached); + + RTA_LOCK; + struct rta_cache *c = atomic_load_explicit(&rta_cache, memory_order_acquire); + + if (atomic_load_explicit(&a->uc, memory_order_acquire)) + { + /* Acquired inbetween */ + RTA_UNLOCK; + return; + } + + /* Relink the forward pointer */ + rta *next = atomic_load_explicit(&a->next, memory_order_acquire); + atomic_store_explicit(a->pprev, next, memory_order_release); + + /* Relink the backwards pointer */ + if (next) + next->pprev = a->pprev; + + /* Wait until nobody knows about us */ + synchronize_rcu(); + + if (atomic_load_explicit(&a->uc, memory_order_acquire)) + { + /* Acquired inbetween, relink back */ + rta_insert(a, c); + RTA_UNLOCK; + return; + } + + /* Cleared to free the memory */ rt_unlock_hostentry(a->hostentry); if (a->nh.next) nexthop_free(a->nh.next); ea_free(a->eattrs); a->cached = 0; + c->count--; sl_free(rta_slab(a), a); + + RTA_UNLOCK; } rta * @@ -1394,9 +1496,13 @@ rta_dump_all(void) RTA_LOCK; - debug("Route attribute cache (%d entries, rehash at %d):\n", rta_cache_count, rta_cache_limit); - for(h=0; hnext) + struct rta_cache *c = atomic_load_explicit(&rta_cache, memory_order_acquire); + + debug("Route attribute cache (%d entries, rehash at %d):\n", c->count, c->limit); + for(h=0; hsize; h++) + for(a = atomic_load_explicit(&c->table[h], memory_order_acquire); + a; + a = atomic_load_explicit(&a->next, memory_order_acquire)) { debug("%p ", a); rta_dump(a); @@ -1440,7 +1546,7 @@ rta_init(void) nexthop_slab_[2] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*2); nexthop_slab_[3] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK); - rta_alloc_hash(); + atomic_store_explicit(&rta_cache, rta_alloc_hash(32), memory_order_relaxed); rte_src_init(); } From df476c2e5d0684c3beab9058bc85d627b0e4d7ed Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Mon, 15 Nov 2021 22:48:24 +0100 Subject: [PATCH 11/42] Corking also feed start to keep BIRD running when refeeds would easily cause congestion --- nest/proto.c | 2 ++ nest/rt-table.c | 36 ++++++++++++++++++++++++------------ 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/nest/proto.c b/nest/proto.c index 8babedee..978582ca 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -883,6 +883,7 @@ channel_setup_in_table(struct channel *c, int best) cat->tab_cf.name = cat->name; cat->tab_cf.addr_type = c->net_type; + cat->tab_cf.cork_limit = 4 * page_size / sizeof(struct rt_pending_export); c->in_table = &cat->cat; c->in_table->push = (struct rt_import_request) { @@ -926,6 +927,7 @@ channel_setup_out_table(struct channel *c) cat->tab_cf.name = cat->name; cat->tab_cf.addr_type = c->net_type; + cat->tab_cf.cork_limit = 4 * page_size / sizeof(struct rt_pending_export); c->out_table = &cat->cat; c->out_table->push = (struct rt_import_request) { diff --git a/nest/rt-table.c b/nest/rt-table.c index f33b9153..a2f62df7 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -1099,8 +1099,13 @@ rte_announce(rtable_private *tab, net *net, struct rte_storage *new, struct rte_ if (tab->first_export == NULL) tab->first_export = rpe; - if ((tab->first_export->seq + tab->config->cork_limit <= tab->next_export_seq) && !tab->cork_active) + if (!EMPTY_LIST(tab->exports) && + (tab->first_export->seq + tab->config->cork_limit <= tab->next_export_seq) && + !tab->cork_active) { + if (config->table_debug) + log(L_TRACE "%s: cork activated", tab->name); + ev_cork(&rt_cork); tab->cork_active = 1; } @@ -1777,22 +1782,14 @@ rt_request_export(rtable *t, struct rt_export_request *req) rt_set_export_state(hook, TES_HUNGRY); - struct rt_pending_export *rpe = rt_last_export(tab); - DBG("store hook=%p last_export=%p seq=%lu\n", hook, rpe, rpe ? rpe->seq : 0); - atomic_store_explicit(&hook->last_export, rpe, memory_order_relaxed); - hook->n = (node) {}; add_tail(&tab->exports, &hook->n); - FIB_ITERATE_INIT(&hook->feed_fit, &tab->fib); - DBG("New export hook %p req %p in table %s uc=%u\n", hook, req, tab->name, tab->use_count); hook->event = ev_new_init(p, rt_feed_channel, hook); RT_UNLOCK(t); - rt_set_export_state(hook, TES_FEEDING); - ASSERT_DIE(hook->export_state == TES_FEEDING); rt_send_export_event(hook); } @@ -2506,6 +2503,8 @@ done:; { tab->cork_active = 0; ev_uncork(&rt_cork); + if (config->table_debug) + log(L_TRACE "%s: cork released", tab->name); } } @@ -2955,10 +2954,23 @@ rt_feed_channel(void *data) struct fib_iterator *fit = &c->feed_fit; int max_feed = 256; - RT_LOCK(c->table); - rtable_private *tab = RT_PRIV(c->table); + rtable_private *tab; + if (c->export_state == TES_HUNGRY) + { + rt_set_export_state(c, TES_FEEDING); - ASSERT(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); + tab = RT_LOCK(c->table); + + struct rt_pending_export *rpe = rt_last_export(tab); + DBG("store hook=%p last_export=%p seq=%lu\n", c, rpe, rpe ? rpe->seq : 0); + atomic_store_explicit(&c->last_export, rpe, memory_order_relaxed); + + FIB_ITERATE_INIT(&c->feed_fit, &tab->fib); + } + else + tab = RT_LOCK(c->table); + + ASSERT_DIE(c->export_state == TES_FEEDING); redo: FIB_ITERATE_START(&tab->fib, fit, net, n) From aadf690b14b3ac7773beb63102989fb8c1cdf7db Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Tue, 16 Nov 2021 12:23:24 +0100 Subject: [PATCH 12/42] Higher settle times when route refresh in the source table is running --- nest/route.h | 3 +++ nest/rt-table.c | 36 ++++++++++++++++++++++++++++-------- 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/nest/route.h b/nest/route.h index 531e004b..b5d44040 100644 --- a/nest/route.h +++ b/nest/route.h @@ -163,6 +163,7 @@ typedef struct rtable_private { struct fib fib; int use_count; /* Number of protocols using this table */ u32 rt_count; /* Number of routes in the table */ + u32 rr_count; /* Number of running route refresh requests */ list imports; /* Registered route importers */ list exports; /* Registered route exporters */ @@ -220,6 +221,8 @@ struct rtable_config { byte sorted; /* Routes of network are sorted according to rte_better() */ btime min_settle_time; /* Minimum settle time for notifications */ btime max_settle_time; /* Maximum settle time for notifications */ + btime min_rr_settle_time; /* Minimum settle time for notifications when route refresh is running */ + btime max_rr_settle_time; /* Maximum settle time for notifications when route refresh is running */ uint cork_limit; /* Amount of routes to be pending on export to cork imports */ }; diff --git a/nest/rt-table.c b/nest/rt-table.c index a2f62df7..d09abbef 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -1752,14 +1752,18 @@ rt_stop_import(struct rt_import_request *req, event *stopped) ASSERT_DIE(req->hook); struct rt_import_hook *hook = req->hook; - RT_LOCK(hook->table); + rtable_private *tab = RT_LOCK(hook->table); - rt_schedule_prune(RT_PRIV(hook->table)); + rt_schedule_prune(tab); rt_set_import_state(hook, TIS_STOP); hook->stopped = stopped; - RT_UNLOCK(hook->table); + if (hook->stale_set < hook->stale_valid) + if (!--tab->rr_count) + rt_schedule_notify(tab); + + RT_UNLOCK(tab); } void @@ -1864,6 +1868,8 @@ rt_refresh_begin(struct rt_import_request *req) hook->stale_valid = 0; } + tab->rr_count++; + if (req->trace_routes & D_STATES) log(L_TRACE "%s: route refresh begin [%u]", req->name, hook->stale_set); @@ -1884,16 +1890,19 @@ rt_refresh_end(struct rt_import_request *req) struct rt_import_hook *hook = req->hook; ASSERT_DIE(hook); - RT_LOCK(hook->table); + rtable_private *tab = RT_LOCK(hook->table); hook->stale_valid++; ASSERT_DIE(hook->stale_set == hook->stale_valid); - rt_schedule_prune(RT_PRIV(hook->table)); + rt_schedule_prune(tab); if (req->trace_routes & D_STATES) log(L_TRACE "%s: route refresh end [%u]", req->name, hook->stale_valid); - RT_UNLOCK(hook->table); + if (!--tab->rr_count) + rt_schedule_notify(tab); + + RT_UNLOCK(tab); } /** @@ -2032,8 +2041,17 @@ rt_settled_time(rtable_private *tab) { ASSUME(tab->base_settle_time != 0); - return MIN(tab->last_rt_change + tab->config->min_settle_time, - tab->base_settle_time + tab->config->max_settle_time); + btime min_settle_time = tab->rr_count ? tab->config->min_rr_settle_time : tab->config->min_settle_time; + btime max_settle_time = tab->rr_count ? tab->config->max_rr_settle_time : tab->config->max_settle_time; + + DBG("settled time computed from %t %t %t %t as %t / %t, now is %t\n", + tab->name, tab->last_rt_change, min_settle_time, + tab->base_settle_time, max_settle_time, + tab->last_rt_change + min_settle_time, + tab->base_settle_time + max_settle_time, current_time()); + + return MIN(tab->last_rt_change + min_settle_time, + tab->base_settle_time + max_settle_time); } static void @@ -2794,6 +2812,8 @@ rt_new_table(struct symbol *s, uint addr_type) c->gc_min_time = 5; c->min_settle_time = 1 S; c->max_settle_time = 20 S; + c->min_rr_settle_time = 30 S; + c->max_rr_settle_time = 90 S; c->cork_limit = 4 * page_size / sizeof(struct rt_pending_export); c->config = new_config; From 3fd1f46184aa74d8ab7ed65c9ab6954f7e49d309 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Wed, 17 Nov 2021 21:34:54 +0100 Subject: [PATCH 13/42] RPKI has its own loop --- proto/rpki/config.Y | 1 + proto/rpki/rpki.c | 19 +++++++++++++++---- proto/rpki/ssh_transport.c | 2 ++ proto/rpki/tcp_transport.c | 2 ++ proto/rpki/transport.c | 2 ++ 5 files changed, 22 insertions(+), 4 deletions(-) diff --git a/proto/rpki/config.Y b/proto/rpki/config.Y index d6d326b8..743b5b42 100644 --- a/proto/rpki/config.Y +++ b/proto/rpki/config.Y @@ -42,6 +42,7 @@ proto: rpki_proto ; rpki_proto_start: proto_start RPKI { this_proto = proto_config_new(&proto_rpki, $1); + this_proto->loop_order = DOMAIN_ORDER(proto); RPKI_CFG->retry_interval = RPKI_RETRY_INTERVAL; RPKI_CFG->refresh_interval = RPKI_REFRESH_INTERVAL; RPKI_CFG->expire_interval = RPKI_EXPIRE_INTERVAL; diff --git a/proto/rpki/rpki.c b/proto/rpki/rpki.c index e3fccb48..cc86ab6a 100644 --- a/proto/rpki/rpki.c +++ b/proto/rpki/rpki.c @@ -109,6 +109,7 @@ static void rpki_schedule_next_expire_check(struct rpki_cache *cache); static void rpki_stop_refresh_timer_event(struct rpki_cache *cache); static void rpki_stop_retry_timer_event(struct rpki_cache *cache); static void rpki_stop_expire_timer_event(struct rpki_cache *cache); +static void rpki_stop_all_timers(struct rpki_cache *cache); /* @@ -219,6 +220,8 @@ rpki_force_restart_proto(struct rpki_proto *p) { if (p->cache) { + rpki_tr_close(p->cache->tr_sock); + rpki_stop_all_timers(p->cache); CACHE_DBG(p->cache, "Connection object destroying"); } @@ -342,7 +345,7 @@ rpki_schedule_next_refresh(struct rpki_cache *cache) btime t = cache->refresh_interval S; CACHE_DBG(cache, "after %t s", t); - tm_start(cache->refresh_timer, t); + tm_start_in(cache->refresh_timer, t, cache->p->p.loop); } static void @@ -351,7 +354,7 @@ rpki_schedule_next_retry(struct rpki_cache *cache) btime t = cache->retry_interval S; CACHE_DBG(cache, "after %t s", t); - tm_start(cache->retry_timer, t); + tm_start_in(cache->retry_timer, t, cache->p->p.loop); } static void @@ -362,7 +365,7 @@ rpki_schedule_next_expire_check(struct rpki_cache *cache) t = MAX(t, 1 S); CACHE_DBG(cache, "after %t s", t); - tm_start(cache->expire_timer, t); + tm_start_in(cache->expire_timer, t, cache->p->p.loop); } static void @@ -379,13 +382,21 @@ rpki_stop_retry_timer_event(struct rpki_cache *cache) tm_stop(cache->retry_timer); } -static void UNUSED +static void rpki_stop_expire_timer_event(struct rpki_cache *cache) { CACHE_DBG(cache, "Stop"); tm_stop(cache->expire_timer); } +static void +rpki_stop_all_timers(struct rpki_cache *cache) +{ + rpki_stop_refresh_timer_event(cache); + rpki_stop_retry_timer_event(cache); + rpki_stop_expire_timer_event(cache); +} + static int rpki_do_we_recv_prefix_pdu_in_last_seconds(struct rpki_cache *cache) { diff --git a/proto/rpki/ssh_transport.c b/proto/rpki/ssh_transport.c index 6333f367..223afa80 100644 --- a/proto/rpki/ssh_transport.c +++ b/proto/rpki/ssh_transport.c @@ -38,6 +38,8 @@ rpki_tr_ssh_open(struct rpki_tr_sock *tr) if (sk_open(sk) != 0) return RPKI_TR_ERROR; + sk_start(sk); + return RPKI_TR_SUCCESS; } diff --git a/proto/rpki/tcp_transport.c b/proto/rpki/tcp_transport.c index 132f8e2d..4e850c44 100644 --- a/proto/rpki/tcp_transport.c +++ b/proto/rpki/tcp_transport.c @@ -31,6 +31,8 @@ rpki_tr_tcp_open(struct rpki_tr_sock *tr) if (sk_open(sk) != 0) return RPKI_TR_ERROR; + sk_start(sk); + return RPKI_TR_SUCCESS; } diff --git a/proto/rpki/transport.c b/proto/rpki/transport.c index a1ac7587..b52495dc 100644 --- a/proto/rpki/transport.c +++ b/proto/rpki/transport.c @@ -85,6 +85,7 @@ rpki_tr_open(struct rpki_tr_sock *tr) sk->rbsize = RPKI_RX_BUFFER_SIZE; sk->tbsize = RPKI_TX_BUFFER_SIZE; sk->tos = IP_PREC_INTERNET_CONTROL; + sk->flags |= SKF_THREAD; if (ipa_zero(sk->daddr) && sk->host) { @@ -119,6 +120,7 @@ rpki_tr_close(struct rpki_tr_sock *tr) if (tr->sk) { + sk_stop(tr->sk); rfree(tr->sk); tr->sk = NULL; } From dc28c6ed1c6643ec19d2e8f94f92955f58c357a7 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Thu, 18 Aug 2022 18:32:33 +0200 Subject: [PATCH 14/42] Simplified the protocol hookup code in Makefiles --- Makefile.in | 11 +---------- nest/Makefile | 5 ++--- proto/babel/Makefile | 1 - proto/bfd/Makefile | 1 - proto/bgp/Makefile | 1 - proto/mrt/Makefile | 1 - proto/ospf/Makefile | 1 - proto/perf/Makefile | 1 - proto/pipe/Makefile | 1 - proto/radv/Makefile | 1 - proto/rip/Makefile | 1 - proto/rpki/Makefile | 1 - proto/static/Makefile | 1 - sysdep/unix/Makefile | 2 -- 14 files changed, 3 insertions(+), 26 deletions(-) diff --git a/Makefile.in b/Makefile.in index 0d55807b..fa534872 100644 --- a/Makefile.in +++ b/Makefile.in @@ -26,6 +26,7 @@ INSTALL_DATA=@INSTALL_DATA@ client=$(addprefix $(exedir)/,@CLIENT@) daemon=$(exedir)/bird protocols=@protocols@ +PROTO_BUILD := $(protocols) dev kif krt prefix=@prefix@ exec_prefix=@exec_prefix@ @@ -82,9 +83,6 @@ conf-lex-targets := $(addprefix $(objdir)/conf/,cf-lex.o) conf-y-targets := $(addprefix $(objdir)/conf/,cf-parse.y keywords.h commands.h) cf-local = $(conf-y-targets): $(s)config.Y -# nest/Makefile declarations needed for all other modules -proto-build-c := $(addprefix $(objdir)/nest/,proto-build.c) - src-o-files = $(patsubst %.c,$(o)%.o,$(src)) tests-target-files = $(patsubst %.c,$(o)%,$(tests_src)) @@ -98,13 +96,6 @@ else o = $(patsubst $(srcdir)%,$(objdir)%,$(s)) endif -define proto-build_in = -PROTO_BUILD += $(1) -$(proto-build-c): $(lastword $(MAKEFILE_LIST)) -endef - -proto-build = $(eval $(call proto-build_in,$(1))) - define clean_in = clean:: rm -f $(addprefix $(o),$(1)) diff --git a/nest/Makefile b/nest/Makefile index 7d451ba4..a2e30ee2 100644 --- a/nest/Makefile +++ b/nest/Makefile @@ -2,11 +2,10 @@ src := a-path.c a-set.c cli.c cmds.c iface.c locks.c neighbor.c password.c proto obj := $(src-o-files) $(all-daemon) $(cf-local) -$(call proto-build,dev_build) -$(proto-build-c): $(lastword $(MAKEFILE_LIST)) +$(objdir)/nest/proto-build.c: $(lastword $(MAKEFILE_LIST)) $(E)echo GEN $@ - $(Q)echo "$(patsubst %,void %(void); ,$(PROTO_BUILD)) void protos_build_gen(void) { $(patsubst %, %(); ,$(PROTO_BUILD))}" > $@ + $(Q)echo "$(patsubst %,void %_build(void); ,$(PROTO_BUILD)) void protos_build_gen(void) { $(patsubst %, %_build(); ,$(PROTO_BUILD))}" > $@ tests_src := a-set_test.c a-path_test.c tests_targets := $(tests_targets) $(tests-target-files) diff --git a/proto/babel/Makefile b/proto/babel/Makefile index ae6aeaf2..06b58e95 100644 --- a/proto/babel/Makefile +++ b/proto/babel/Makefile @@ -2,6 +2,5 @@ src := babel.c packets.c obj := $(src-o-files) $(all-daemon) $(cf-local) -$(call proto-build,babel_build) tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/bfd/Makefile b/proto/bfd/Makefile index dbdc0a09..d9aecfa9 100644 --- a/proto/bfd/Makefile +++ b/proto/bfd/Makefile @@ -2,6 +2,5 @@ src := bfd.c io.c packets.c obj := $(src-o-files) $(all-daemon) $(cf-local) -$(call proto-build,bfd_build) tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/bgp/Makefile b/proto/bgp/Makefile index 2a4cc99c..f6a38678 100644 --- a/proto/bgp/Makefile +++ b/proto/bgp/Makefile @@ -2,6 +2,5 @@ src := attrs.c bgp.c packets.c obj := $(src-o-files) $(all-daemon) $(cf-local) -$(call proto-build,bgp_build) tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/mrt/Makefile b/proto/mrt/Makefile index 000e1c1c..8cd44ac1 100644 --- a/proto/mrt/Makefile +++ b/proto/mrt/Makefile @@ -2,6 +2,5 @@ src := mrt.c obj := $(src-o-files) $(all-daemon) $(cf-local) -$(call proto-build,mrt_build) tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/ospf/Makefile b/proto/ospf/Makefile index 85664543..015f394a 100644 --- a/proto/ospf/Makefile +++ b/proto/ospf/Makefile @@ -2,6 +2,5 @@ src := dbdes.c hello.c iface.c lsack.c lsalib.c lsreq.c lsupd.c neighbor.c ospf. obj := $(src-o-files) $(all-daemon) $(cf-local) -$(call proto-build,ospf_build) tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/perf/Makefile b/proto/perf/Makefile index 42051f43..7877fb19 100644 --- a/proto/perf/Makefile +++ b/proto/perf/Makefile @@ -2,6 +2,5 @@ src := perf.c obj := $(src-o-files) $(all-daemon) $(cf-local) -$(call proto-build,perf_build) tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/pipe/Makefile b/proto/pipe/Makefile index ba66027f..0d68db4c 100644 --- a/proto/pipe/Makefile +++ b/proto/pipe/Makefile @@ -2,6 +2,5 @@ src := pipe.c obj := $(src-o-files) $(all-daemon) $(cf-local) -$(call proto-build,pipe_build) tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/radv/Makefile b/proto/radv/Makefile index 4780bee3..5c56fbf3 100644 --- a/proto/radv/Makefile +++ b/proto/radv/Makefile @@ -2,6 +2,5 @@ src := packets.c radv.c obj := $(src-o-files) $(all-daemon) $(cf-local) -$(call proto-build,radv_build) tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/rip/Makefile b/proto/rip/Makefile index b9ff62d6..f4a6fa72 100644 --- a/proto/rip/Makefile +++ b/proto/rip/Makefile @@ -2,6 +2,5 @@ src := packets.c rip.c obj := $(src-o-files) $(all-daemon) $(cf-local) -$(call proto-build,rip_build) tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/rpki/Makefile b/proto/rpki/Makefile index 8e3a2761..0f60b2a0 100644 --- a/proto/rpki/Makefile +++ b/proto/rpki/Makefile @@ -2,6 +2,5 @@ src := rpki.c packets.c tcp_transport.c ssh_transport.c transport.c obj := $(src-o-files) $(all-daemon) $(cf-local) -$(call proto-build,rpki_build) tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/static/Makefile b/proto/static/Makefile index 26aed31f..de6e819b 100644 --- a/proto/static/Makefile +++ b/proto/static/Makefile @@ -2,6 +2,5 @@ src := static.c obj := $(src-o-files) $(all-daemon) $(cf-local) -$(call proto-build,static_build) tests_objs := $(tests_objs) $(src-o-files) diff --git a/sysdep/unix/Makefile b/sysdep/unix/Makefile index 51ab98a9..d0d36b5f 100644 --- a/sysdep/unix/Makefile +++ b/sysdep/unix/Makefile @@ -2,8 +2,6 @@ src := alloc.c io.c krt.c log.c main.c random.c obj := $(src-o-files) $(all-daemon) $(cf-local) -$(call proto-build,kif_build) -$(call proto-build,krt_build) $(conf-y-targets): $(s)krt.Y src := $(filter-out main.c, $(src)) From 71b3456eede17be6646b7deebff84f34ee5755f7 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Fri, 22 Oct 2021 19:43:55 +0200 Subject: [PATCH 15/42] Better profylaction recursive route loops In some specific configurations, it was possible to send BIRD into an infinite loop of recursive next hop resolution. This was caused by route priority inversion. To prevent priority inversions affecting other next hops, we simply refuse to resolve any next hop if the best route for the matching prefix is recursive or any other route with the same preference is recursive. Next hop resolution doesn't change route priority, therefore it is perfectly OK to resolve BGP next hops e.g. by an OSPF route, yet if the same (or covering) prefix is also announced by iBGP, by retraction of the OSPF route we would get a possible priority inversion. --- nest/rt-table.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/nest/rt-table.c b/nest/rt-table.c index abb29fe1..4127912c 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -3529,9 +3529,10 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) { rte *e = n->routes; rta *a = e->attrs; - pxlen = n->n.addr->pxlen; + word pref = a->pref; - if (a->hostentry) + for (rte *ee = n->routes; ee; ee = ee->next) + if ((ee->attrs->pref >= pref) && ee->attrs->hostentry) { /* Recursive route should not depend on another recursive route */ log(L_WARN "Next hop address %I resolvable through recursive route for %N", @@ -3539,6 +3540,8 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) goto done; } + pxlen = n->n.addr->pxlen; + if (a->dest == RTD_UNICAST) { for (struct nexthop *nh = &(a->nh); nh; nh = nh->next) From 974f16b1f70ae8b7fa4efa6a217988e1811069e7 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Fri, 9 Sep 2022 15:04:36 +0200 Subject: [PATCH 16/42] Created a dedicated settle timer structure --- conf/confbase.Y | 10 ++++++++ doc/bird.sgml | 17 ++++++------- lib/settle.h | 64 +++++++++++++++++++++++++++++++++++++++++++++++++ nest/config.Y | 6 ++--- nest/proto.c | 53 +++++++++++++++++----------------------- nest/protocol.h | 7 +++--- 6 files changed, 110 insertions(+), 47 deletions(-) create mode 100644 lib/settle.h diff --git a/conf/confbase.Y b/conf/confbase.Y index 241c332d..8e5da9e3 100644 --- a/conf/confbase.Y +++ b/conf/confbase.Y @@ -14,6 +14,7 @@ CF_HDR #include "conf/conf.h" #include "lib/resource.h" #include "lib/socket.h" +#include "lib/settle.h" #include "lib/timer.h" #include "lib/string.h" #include "nest/protocol.h" @@ -93,6 +94,7 @@ CF_DECLS struct proto_spec ps; struct channel_limit cl; struct timeformat *tf; + struct settle_config settle; struct adata *ad; struct bytestring *bs; } @@ -111,6 +113,7 @@ CF_DECLS %type expr bool pxlen4 %type