From 487b21b491b14e7a5919a22d13b785de2c8eb04f Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Tue, 4 Jun 2024 08:11:30 +0200 Subject: [PATCH] BGP: Freeing prefixes deferred to save rcu synchronization --- lib/defer.c | 2 + lib/rcu.c | 2 +- proto/bgp/attrs.c | 244 +++++++++++++++++++++++++------------------- proto/bgp/bgp.c | 7 +- proto/bgp/bgp.h | 55 +++++++--- proto/bgp/packets.c | 48 +++++---- 6 files changed, 206 insertions(+), 152 deletions(-) diff --git a/lib/defer.c b/lib/defer.c index 3d51534c..789673ba 100644 --- a/lib/defer.c +++ b/lib/defer.c @@ -13,6 +13,8 @@ _Thread_local struct deferred local_deferred = {}; static void defer_execute(void *_ld) { + synchronize_rcu(); + ASSERT_DIE(_ld == &local_deferred); /* Run */ diff --git a/lib/rcu.c b/lib/rcu.c index bd442c74..212d166a 100644 --- a/lib/rcu.c +++ b/lib/rcu.c @@ -36,7 +36,7 @@ rcu_critical(struct rcu_thread *t, u64 phase) void synchronize_rcu(void) { - if (!rcu_blocked && last_locked) + if (!rcu_blocked && (last_locked > &locking_stack.meta)) bug("Forbidden to synchronize RCU unless an appropriate lock is taken"); /* Increment phase */ diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index b8b1c1e0..ec6cb508 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -1586,7 +1586,7 @@ bgp_finish_attrs(struct bgp_parse_state *s, ea_list **to) HASH_DEFINE_REHASH_FN(RBH, struct bgp_bucket) static void -bgp_init_bucket_table(struct bgp_channel *c) +bgp_init_bucket_table(struct bgp_ptx_private *c) { HASH_INIT(c->bucket_hash, c->pool, 8); c->bucket_slab = sl_new(c->pool, sizeof(struct bgp_bucket)); @@ -1596,7 +1596,7 @@ bgp_init_bucket_table(struct bgp_channel *c) } static struct bgp_bucket * -bgp_get_bucket(struct bgp_channel *c, ea_list *new) +bgp_get_bucket(struct bgp_ptx_private *c, ea_list *new) { /* Hash and lookup */ ea_list *ns = ea_lookup(new, 0, EALS_CUSTOM); @@ -1622,7 +1622,7 @@ bgp_get_bucket(struct bgp_channel *c, ea_list *new) } static struct bgp_bucket * -bgp_get_withdraw_bucket(struct bgp_channel *c) +bgp_get_withdraw_bucket(struct bgp_ptx_private *c) { if (!c->withdraw_bucket) { @@ -1634,7 +1634,7 @@ bgp_get_withdraw_bucket(struct bgp_channel *c) } static void -bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b) +bgp_free_bucket(struct bgp_ptx_private *c, struct bgp_bucket *b) { HASH_REMOVE2(c->bucket_hash, RBH, c->pool, b); ea_free(b->attrs); @@ -1642,7 +1642,7 @@ bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b) } int -bgp_done_bucket(struct bgp_channel *c, struct bgp_bucket *b) +bgp_done_bucket(struct bgp_ptx_private *c, struct bgp_bucket *b) { /* Won't free the withdraw bucket */ if (b == c->withdraw_bucket) @@ -1659,19 +1659,12 @@ bgp_done_bucket(struct bgp_channel *c, struct bgp_bucket *b) } void -bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b) -{ - rem_node(&b->send_node); - add_tail(&c->bucket_queue, &b->send_node); -} - -void -bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b) +bgp_withdraw_bucket(struct bgp_ptx_private *c, struct bgp_bucket *b) { if (b->bmp) return; - SKIP_BACK_DECLARE(struct bgp_proto, p, p, c->c.proto); + SKIP_BACK_DECLARE(struct bgp_proto, p, p, c->c->c.proto); struct bgp_bucket *wb = bgp_get_withdraw_bucket(c); log(L_ERR "%s: Attribute list too long", p->p.name); @@ -1691,25 +1684,27 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b) */ static void -bgp_init_prefix_table(struct bgp_channel *c) +bgp_init_prefix_table(struct bgp_ptx_private *c) { ASSERT_DIE(!c->prefix_slab); c->prefix_slab = sl_new(c->pool, sizeof(struct bgp_prefix)); - ASSERT_DIE(!c->tx_netindex); - c->tx_netindex = netindex_hash_new(c->pool, proto_event_list(c->c.proto)); + /* Netindex must be allocated from the main BGP pool + * as its cleanup routines are expecting to be allocated from something + * locked while entering a loop. That's kinda stupid but i'm lazy now + * to rework it. */ + ASSERT_DIE(!c->netindex); + c->netindex = netindex_hash_new(c->c->pool, proto_event_list(c->c->c.proto)); u32 len = 64; struct bgp_prefix * _Atomic * block = mb_allocz(c->pool, len * sizeof *block); atomic_store_explicit(&c->prefixes_len, len, memory_order_relaxed); atomic_store_explicit(&c->prefixes, block, memory_order_relaxed); - - c->tx_lock = DOMAIN_NEW_RCU_SYNC(rtable); } static struct bgp_prefix * -bgp_find_prefix(struct bgp_channel *c, struct netindex *ni, struct rte_src *src, int add_path_tx) +bgp_find_prefix(struct bgp_ptx_private *c, struct netindex *ni, struct rte_src *src, int add_path_tx) { u32 len = atomic_load_explicit(&c->prefixes_len, memory_order_relaxed); struct bgp_prefix * _Atomic * block = atomic_load_explicit(&c->prefixes, memory_order_relaxed); @@ -1723,12 +1718,10 @@ bgp_find_prefix(struct bgp_channel *c, struct netindex *ni, struct rte_src *src, return px; return NULL; - - c->tx_lock = DOMAIN_NEW(rtable); } static struct bgp_prefix * -bgp_get_prefix(struct bgp_channel *c, struct netindex *ni, struct rte_src *src, int add_path_tx) +bgp_get_prefix(struct bgp_ptx_private *c, struct netindex *ni, struct rte_src *src, int add_path_tx) { /* Find existing */ struct bgp_prefix *px = bgp_find_prefix(c, ni, src, add_path_tx); @@ -1753,7 +1746,7 @@ bgp_get_prefix(struct bgp_channel *c, struct netindex *ni, struct rte_src *src, atomic_store_explicit(&c->prefixes, nb, memory_order_release); atomic_store_explicit(&c->prefixes_len, nlen, memory_order_release); - atomic_store_explicit(&c->prefix_exporter.max_feed_index, nlen, memory_order_release); + atomic_store_explicit(&c->exporter.max_feed_index, nlen, memory_order_release); synchronize_rcu(); @@ -1771,7 +1764,7 @@ bgp_get_prefix(struct bgp_channel *c, struct netindex *ni, struct rte_src *src, .next = atomic_load_explicit(&block[ni->index], memory_order_relaxed), }; - net_lock_index(c->tx_netindex, ni); + net_lock_index(c->netindex, ni); rt_lock_source(src); atomic_store_explicit(&block[ni->index], px, memory_order_release); @@ -1779,15 +1772,15 @@ bgp_get_prefix(struct bgp_channel *c, struct netindex *ni, struct rte_src *src, return px; } -static void bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px); +static void bgp_free_prefix(struct bgp_ptx_private *c, struct bgp_prefix *px); static inline int -bgp_update_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *b) +bgp_update_prefix(struct bgp_ptx_private *c, struct bgp_prefix *px, struct bgp_bucket *b) { #define IS_WITHDRAW_BUCKET(b) ((b) == c->withdraw_bucket) #define BPX_TRACE(what) do { \ - if (c->c.debug & D_ROUTES) log(L_TRACE "%s.%s < %s %N %uG %s", \ - c->c.proto->name, c->c.name, what, \ + if (c->c->c.debug & D_ROUTES) log(L_TRACE "%s.%s < %s %N %uG %s", \ + c->c->c.proto->name, c->c->c.name, what, \ px->ni->addr, px->src->global_id, IS_WITHDRAW_BUCKET(b) ? "withdraw" : "update"); } while (0) px->lastmod = current_time(); @@ -1806,7 +1799,7 @@ bgp_update_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucke } /* The new bucket is the same as we sent before */ - if ((px->last == b) || c->tx_keep && !px->last && IS_WITHDRAW_BUCKET(b)) + if ((px->last == b) || c->c->tx_keep && !px->last && IS_WITHDRAW_BUCKET(b)) { if (px->cur) BPX_TRACE("reverted"); @@ -1835,8 +1828,29 @@ bgp_update_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucke #undef BPX_TRACE } +struct bgp_free_prefix_deferred_item { + struct deferred_call dc; + union bgp_ptx *tx; + struct bgp_prefix *px; +}; + static void -bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px) +bgp_free_prefix_deferred(struct deferred_call *dc) +{ + SKIP_BACK_DECLARE(struct bgp_free_prefix_deferred_item, bfpdi, dc, dc); + union bgp_ptx *tx = bfpdi->tx; + struct bgp_prefix *px = bfpdi->px; + + BGP_PTX_LOCK(tx, ptx); + + net_unlock_index(ptx->netindex, px->ni); + rt_unlock_source(px->src); + + sl_free(px); +} + +static void +bgp_free_prefix(struct bgp_ptx_private *c, struct bgp_prefix *px) { u32 len = atomic_load_explicit(&c->prefixes_len, memory_order_relaxed); struct bgp_prefix * _Atomic * block = @@ -1853,16 +1867,17 @@ bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px) break; } - synchronize_rcu(); + struct bgp_free_prefix_deferred_item bfpdi = { + .dc.hook = bgp_free_prefix_deferred, + .tx = BGP_PTX_PUB(c), + .px = px, + }; - net_unlock_index(c->tx_netindex, px->ni); - rt_unlock_source(px->src); - - sl_free(px); + defer_call(&bfpdi.dc, sizeof bfpdi); } void -bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *buck) +bgp_done_prefix(struct bgp_ptx_private *c, struct bgp_prefix *px, struct bgp_bucket *buck) { /* BMP hack */ if (buck->bmp) @@ -1873,7 +1888,7 @@ bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket rem_node(&px->buck_node); /* We may want to store the updates */ - if (c->tx_keep) + if (c->c->tx_keep) { /* Nothing to be sent right now */ px->cur = NULL; @@ -1898,11 +1913,11 @@ bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket } void -bgp_tx_resend(struct bgp_proto *p, struct bgp_channel *c) +bgp_tx_resend(struct bgp_proto *p, struct bgp_channel *bc) { - LOCK_DOMAIN(rtable, c->tx_lock); + BGP_PTX_LOCK(bc->tx, c); - ASSERT_DIE(c->tx_keep); + ASSERT_DIE(bc->tx_keep); uint seen = 0; u32 len = atomic_load_explicit(&c->prefixes_len, memory_order_relaxed); @@ -1926,14 +1941,12 @@ bgp_tx_resend(struct bgp_proto *p, struct bgp_channel *c) seen += bgp_update_prefix(c, px, last); } - if (c->c.debug & D_EVENTS) + if (bc->c.debug & D_EVENTS) log(L_TRACE "%s.%s: TX resending %u routes", - c->c.proto->name, c->c.name, seen); - - UNLOCK_DOMAIN(rtable, c->tx_lock); + bc->c.proto->name, bc->c.name, seen); if (seen) - bgp_schedule_packet(p->conn, c, PKT_UPDATE); + bgp_schedule_packet(p->conn, bc, PKT_UPDATE); } /* @@ -1948,7 +1961,7 @@ static struct rt_export_feed * bgp_out_feed_net(struct rt_exporter *e, struct rcu_unwinder *u, struct netindex *ni, const struct rt_export_item *_first) { struct rt_export_feed *feed = NULL; - SKIP_BACK_DECLARE(struct bgp_channel, c, prefix_exporter, e); + SKIP_BACK_DECLARE(union bgp_ptx, c, exporter, e); u32 len = atomic_load_explicit(&c->prefixes_len, memory_order_relaxed); if (ni->index >= len) @@ -2012,11 +2025,22 @@ void bgp_init_pending_tx(struct bgp_channel *c) { ASSERT_DIE(c->c.out_table == NULL); + ASSERT_DIE(c->tx == NULL); - bgp_init_bucket_table(c); - bgp_init_prefix_table(c); + DOMAIN(rtable) dom = DOMAIN_NEW_RCU_SYNC(rtable); + LOCK_DOMAIN(rtable, dom); + pool *p = rp_newf(c->pool, dom.rtable, "%s.%s TX", c->c.proto->name, c->c.name); - c->prefix_exporter = (struct rt_exporter) { + struct bgp_ptx_private *bpp = mb_allocz(p, sizeof *bpp); + + bpp->lock = dom; + bpp->pool = p; + bpp->c = c; + + bgp_init_bucket_table(bpp); + bgp_init_prefix_table(bpp); + + bpp->exporter = (struct rt_exporter) { .journal = { .loop = c->c.proto->loop, .item_size = sizeof(struct rt_export_item), @@ -2024,49 +2048,75 @@ bgp_init_pending_tx(struct bgp_channel *c) }, .name = mb_sprintf(c->c.proto->pool, "%s.%s.export", c->c.proto->name, c->c.name), .net_type = c->c.net_type, - .max_feed_index = atomic_load_explicit(&c->prefixes_len, memory_order_relaxed), - .netindex = c->tx_netindex, + .max_feed_index = atomic_load_explicit(&bpp->prefixes_len, memory_order_relaxed), + .netindex = bpp->netindex, .trace_routes = c->c.debug, .feed_net = bgp_out_feed_net, }; - rt_exporter_init(&c->prefix_exporter, &c->cf->ptx_exporter_settle); + rt_exporter_init(&bpp->exporter, &c->cf->ptx_exporter_settle); + c->c.out_table = &bpp->exporter; - c->c.out_table = &c->prefix_exporter; + c->tx = BGP_PTX_PUB(bpp); + + UNLOCK_DOMAIN(rtable, dom); } struct bgp_pending_tx_finisher { event e; - struct bgp_channel *c; + union bgp_ptx *ptx; }; static void bgp_finish_pending_tx(void *_bptf) { struct bgp_pending_tx_finisher *bptf = _bptf; - struct bgp_channel *c = bptf->c; + union bgp_ptx *ptx = bptf->ptx; + struct bgp_ptx_private *c = &ptx->priv; + struct bgp_channel *bc = c->c; + + DOMAIN(rtable) dom = c->lock; + LOCK_DOMAIN(rtable, dom); mb_free(bptf); - channel_del_obstacle(&c->c); + + mb_free(atomic_load_explicit(&c->prefixes, memory_order_relaxed)); + sl_delete(c->prefix_slab); + c->prefix_slab = NULL; + + HASH_WALK(c->bucket_hash, next, n) + bug("Stray bucket after cleanup"); + HASH_WALK_END; + + HASH_FREE(c->bucket_hash); + sl_delete(c->bucket_slab); + c->bucket_slab = NULL; + + rp_free(ptx->priv.pool); + + UNLOCK_DOMAIN(rtable, dom); + DOMAIN_FREE(rtable, dom); + + channel_del_obstacle(&bc->c); } void -bgp_free_pending_tx(struct bgp_channel *c) +bgp_free_pending_tx(struct bgp_channel *bc) { - if (!c->bucket_hash.data) + if (!bc->tx) return; - LOCK_DOMAIN(rtable, c->tx_lock); + BGP_PTX_LOCK(bc->tx, c); - c->c.out_table = NULL; - rt_exporter_shutdown(&c->prefix_exporter, NULL); + bc->c.out_table = NULL; + rt_exporter_shutdown(&c->exporter, NULL); struct bgp_prefix *px; u32 len = atomic_load_explicit(&c->prefixes_len, memory_order_relaxed); struct bgp_prefix * _Atomic * block = atomic_load_explicit(&c->prefixes, memory_order_relaxed); - if (c->tx_keep) + if (bc->tx_keep) { /* Move all kept prefixes to the withdraw bucket */ struct bgp_bucket *b = bgp_get_withdraw_bucket(c); @@ -2098,39 +2148,28 @@ bgp_free_pending_tx(struct bgp_channel *c) atomic_store_explicit(&c->prefixes, NULL, memory_order_release); atomic_store_explicit(&c->prefixes_len, 0, memory_order_release); - atomic_store_explicit(&c->prefix_exporter.max_feed_index, 0, memory_order_release); + atomic_store_explicit(&c->exporter.max_feed_index, 0, memory_order_release); - synchronize_rcu(); - mb_free(block); - sl_delete(c->prefix_slab); - c->prefix_slab = NULL; - - HASH_WALK(c->bucket_hash, next, n) - bug("Stray bucket after cleanup"); - HASH_WALK_END; - - HASH_FREE(c->bucket_hash); - sl_delete(c->bucket_slab); - c->bucket_slab = NULL; - - struct bgp_pending_tx_finisher *bptf = mb_alloc(c->c.proto->pool, sizeof *bptf); + struct bgp_pending_tx_finisher *bptf = mb_alloc(c->pool, sizeof *bptf); *bptf = (struct bgp_pending_tx_finisher) { .e = { .hook = bgp_finish_pending_tx, .data = bptf, }, - .c = c, + .ptx = bc->tx, }; - channel_add_obstacle(&c->c); - netindex_hash_delete(c->tx_netindex, &bptf->e, proto_event_list(c->c.proto)); - c->tx_netindex = NULL; - c->prefix_exporter.netindex = NULL; - - UNLOCK_DOMAIN(rtable, c->tx_lock); - DOMAIN_FREE(rtable, c->tx_lock); + channel_add_obstacle(&bc->c); + netindex_hash_delete(c->netindex, &bptf->e, proto_event_list(c->c->c.proto)); + /* We can't null this, bgp_free_prefix_deferred expects + * this to be set: + * c->netindex = NULL; + */ + c->exporter.netindex = NULL; + bc->tx = NULL; } + /* * BGP protocol glue */ @@ -2349,7 +2388,7 @@ void bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, const rte *old) { struct bgp_proto *p = (void *) P; - struct bgp_channel *c = (void *) C; + struct bgp_channel *bc = (void *) C; struct bgp_bucket *buck; struct rte_src *path; @@ -2357,30 +2396,21 @@ bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, c if (C->class != &channel_bgp) return; - LOCK_DOMAIN(rtable, c->tx_lock); + struct ea_list *attrs = new ? bgp_update_attrs(p, bc, new, new->attrs, tmp_linpool) : NULL; - if (new) - { - struct ea_list *attrs = bgp_update_attrs(p, c, new, new->attrs, tmp_linpool); + BGP_PTX_LOCK(bc->tx, c); - /* Error during attribute processing */ - if (!attrs) - log(L_ERR "%s: Invalid route %N withdrawn", p->p.name, n); + /* Error during attribute processing */ + if (new && !attrs) + log(L_ERR "%s: Invalid route %N withdrawn", p->p.name, n); - /* If attributes are invalid, we fail back to withdraw */ - buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c); - path = new->src; - } - else - { - buck = bgp_get_withdraw_bucket(c); - path = old->src; - } + /* If attributes are invalid, we fail back to withdraw */ + buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c); + path = (new ?: old)->src; - if (bgp_update_prefix(c, bgp_get_prefix(c, net_get_index(c->tx_netindex, n), path, c->add_path_tx), buck)) - bgp_schedule_packet(p->conn, c, PKT_UPDATE); - - UNLOCK_DOMAIN(rtable, c->tx_lock); + /* And queue the notification */ + if (bgp_update_prefix(c, bgp_get_prefix(c, net_get_index(c->netindex, n), path, bc->add_path_tx), buck)) + bgp_schedule_packet(p->conn, bc, PKT_UPDATE); } diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index f9169a7c..437201e9 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -2434,7 +2434,8 @@ bgp_channel_reconfigure(struct channel *C, struct channel_config *CC, int *impor *export_changed = 1; /* Update prefix exporter settle timer */ - c->prefix_exporter.journal.announce_timer.cf = c->cf->ptx_exporter_settle; + if (c->tx) + c->tx->exporter.journal.announce_timer.cf = c->cf->ptx_exporter_settle; c->cf = new; return 1; @@ -2854,11 +2855,13 @@ bgp_show_proto_info(struct proto *P) if (c->base_table) cli_msg(-1006, " Base table: %s", c->base_table->name); + BGP_PTX_LOCK(c->tx, tx); + uint bucket_cnt = 0; uint prefix_cnt = 0; struct bgp_bucket *buck; struct bgp_prefix *px; - WALK_LIST(buck, c->bucket_queue) + WALK_LIST(buck, tx->bucket_queue) { bucket_cnt++; WALK_LIST(px, buck->prefixes) diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index 3b689317..e61873b9 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -397,19 +397,7 @@ struct bgp_channel { /* Rest are zeroed when down */ pool *pool; - HASH(struct bgp_bucket) bucket_hash; /* Hash table of route buckets */ - struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */ - list bucket_queue; /* Queue of buckets to send (struct bgp_bucket) */ - - /* Prefixes to be sent */ - struct bgp_prefix * _Atomic * _Atomic prefixes; - u32 _Atomic prefixes_len; /* Block size of prefixes array */ - netindex_hash *tx_netindex; /* Netindex indexing the prefixes to be sent */ - DOMAIN(rtable) tx_lock; /* Domain to be locked for prefix access */ - - slab *prefix_slab; /* Slab holding prefix nodes */ - slab *bucket_slab; /* Slab holding buckets to send */ - struct rt_exporter prefix_exporter; /* Table-like exporter for ptx */ + union bgp_ptx *tx; /* TX encapsulation */ ip_addr next_hop_addr; /* Local address for NEXT_HOP attribute */ ip_addr link_addr; /* Link-local version of next_hop_addr */ @@ -435,6 +423,39 @@ struct bgp_channel { u8 load_state; /* Load state (RX) for EoR, RR packets, see BFS_* */ }; +struct bgp_ptx_private { +#define BGP_PTX_PUBLIC \ + DOMAIN(rtable) lock; /* Domain to be locked for prefix access */ \ + struct bgp_prefix * _Atomic * _Atomic prefixes; \ + u32 _Atomic prefixes_len; /* Block size of prefixes array */ \ + struct rt_exporter exporter; /* Table-like exporter for ptx */ \ + struct bgp_channel *c; /* Backlink to the channel */ \ + + struct { BGP_PTX_PUBLIC; }; + struct bgp_ptx_private **locked_at; + + pool *pool; /* Resource pool for TX related allocations */ + + HASH(struct bgp_bucket) bucket_hash; /* Hash table of route buckets */ + struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */ + list bucket_queue; /* Queue of buckets to send (struct bgp_bucket) */ + + /* Prefixes to be sent */ + netindex_hash *netindex; /* Netindex indexing the prefixes to be sent */ + + slab *prefix_slab; /* Slab holding prefix nodes */ + slab *bucket_slab; /* Slab holding buckets to send */ +}; + +typedef union bgp_ptx { + struct { BGP_PTX_PUBLIC; }; + struct bgp_ptx_private priv; +} bgp_ptx; + +LOBJ_UNLOCK_CLEANUP(bgp_ptx, rtable); +#define BGP_PTX_LOCK(_c, _tx) LOBJ_LOCK(_c, _tx, bgp_ptx, rtable) +#define BGP_PTX_PUB(_tx) SKIP_BACK(union bgp_ptx, priv, (_tx)) + struct bgp_prefix { node buck_node; /* Node in per-bucket list */ struct bgp_prefix * _Atomic next; /* Node in prefix block table */ @@ -470,7 +491,7 @@ struct bgp_export_state { struct bgp_write_state { struct bgp_proto *proto; - struct bgp_channel *channel; + struct bgp_ptx_private *ptx; struct linpool *pool; int mp_reach; @@ -641,10 +662,10 @@ void bgp_init_pending_tx(struct bgp_channel *c); void bgp_free_pending_tx(struct bgp_channel *c); void bgp_tx_resend(struct bgp_proto *p, struct bgp_channel *c); -void bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b); -int bgp_done_bucket(struct bgp_channel *c, struct bgp_bucket *b); +void bgp_withdraw_bucket(struct bgp_ptx_private *c, struct bgp_bucket *b); +int bgp_done_bucket(struct bgp_ptx_private *c, struct bgp_bucket *b); -void bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *buck); +void bgp_done_prefix(struct bgp_ptx_private *c, struct bgp_prefix *px, struct bgp_bucket *buck); int bgp_rte_better(const rte *, const rte *); int bgp_rte_mergable(const rte *pri, const rte *sec); diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 23d2f3e9..35e59e1a 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -1337,7 +1337,7 @@ bgp_encode_next_hop_ip(struct bgp_write_state *s, eattr *a, byte *buf, uint size * IPv6 address with IPv6 NLRI. */ - if (bgp_channel_is_ipv4(s->channel) && ipa_is_ip4(nh[0])) + if (bgp_channel_is_ipv4(s->ptx->c) && ipa_is_ip4(nh[0])) { put_ip4(buf, ipa_to_ip4(nh[0])); return 4; @@ -1412,7 +1412,7 @@ bgp_encode_next_hop_vpn(struct bgp_write_state *s, eattr *a, byte *buf, uint siz * IPv6 address with VPNv6 NLRI. */ - if (bgp_channel_is_ipv4(s->channel) && ipa_is_ip4(nh[0])) + if (bgp_channel_is_ipv4(s->ptx->c) && ipa_is_ip4(nh[0])) { put_u64(buf, 0); /* VPN RD is 0 */ put_ip4(buf+8, ipa_to_ip4(nh[0])); @@ -1623,7 +1623,7 @@ bgp_encode_nlri_ip4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu memcpy(pos, &a, b); ADVANCE(pos, size, b); - bgp_done_prefix(s->channel, px, buck); + bgp_done_prefix(s->ptx, px, buck); } return pos - buf; @@ -1709,7 +1709,7 @@ bgp_encode_nlri_ip6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu memcpy(pos, &a, b); ADVANCE(pos, size, b); - bgp_done_prefix(s->channel, px, buck); + bgp_done_prefix(s->ptx, px, buck); } return pos - buf; @@ -1798,7 +1798,7 @@ bgp_encode_nlri_vpn4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *b memcpy(pos, &a, b); ADVANCE(pos, size, b); - bgp_done_prefix(s->channel, px, buck); + bgp_done_prefix(s->ptx, px, buck); } return pos - buf; @@ -1896,7 +1896,7 @@ bgp_encode_nlri_vpn6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *b memcpy(pos, &a, b); ADVANCE(pos, size, b); - bgp_done_prefix(s->channel, px, buck); + bgp_done_prefix(s->ptx, px, buck); } return pos - buf; @@ -1984,7 +1984,7 @@ bgp_encode_nlri_flow4(struct bgp_write_state *s, struct bgp_bucket *buck, byte * memcpy(pos, net->data, flen); ADVANCE(pos, size, flen); - bgp_done_prefix(s->channel, px, buck); + bgp_done_prefix(s->ptx, px, buck); } return pos - buf; @@ -2072,7 +2072,7 @@ bgp_encode_nlri_flow6(struct bgp_write_state *s, struct bgp_bucket *buck, byte * memcpy(pos, net->data, flen); ADVANCE(pos, size, flen); - bgp_done_prefix(s->channel, px, buck); + bgp_done_prefix(s->ptx, px, buck); } return pos - buf; @@ -2278,13 +2278,13 @@ bgp_get_af_desc(u32 afi) static inline uint bgp_encode_nlri(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end) { - return s->channel->desc->encode_nlri(s, buck, buf, end - buf); + return s->ptx->c->desc->encode_nlri(s, buck, buf, end - buf); } static inline uint bgp_encode_next_hop(struct bgp_write_state *s, eattr *nh, byte *buf) { - return s->channel->desc->encode_next_hop(s, nh, buf, 255); + return s->ptx->c->desc->encode_next_hop(s, nh, buf, 255); } void @@ -2306,7 +2306,7 @@ bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu * var IPv4 Network Layer Reachability Information */ - ASSERT_DIE(s->channel->withdraw_bucket != buck); + ASSERT_DIE(s->ptx->withdraw_bucket != buck); int lr, la; @@ -2314,7 +2314,7 @@ bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu if (la < 0) { /* Attribute list too long */ - bgp_withdraw_bucket(s->channel, buck); + bgp_withdraw_bucket(s->ptx, buck); return NULL; } @@ -2329,7 +2329,7 @@ bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu static byte * bgp_create_mp_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end) { - ASSERT_DIE(s->channel->withdraw_bucket != buck); + ASSERT_DIE(s->ptx->withdraw_bucket != buck); /* * 2 B IPv4 Withdrawn Routes Length (zero) @@ -2354,7 +2354,7 @@ bgp_create_mp_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu buf[4] = BAF_OPTIONAL | BAF_EXT_LEN; buf[5] = BA_MP_REACH_NLRI; put_u16(buf+6, 0); /* Will be fixed later */ - put_af3(buf+8, s->channel->afi); + put_af3(buf+8, s->ptx->c->afi); byte *pos = buf+11; /* Encode attributes to temporary buffer */ @@ -2363,7 +2363,7 @@ bgp_create_mp_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu if (la < 0) { /* Attribute list too long */ - bgp_withdraw_bucket(s->channel, buck); + bgp_withdraw_bucket(s->ptx, buck); return NULL; } @@ -2439,7 +2439,7 @@ bgp_create_mp_unreach(struct bgp_write_state *s, struct bgp_bucket *buck, byte * buf[4] = BAF_OPTIONAL | BAF_EXT_LEN; buf[5] = BA_MP_UNREACH_NLRI; put_u16(buf+6, 3+len); - put_af3(buf+8, s->channel->afi); + put_af3(buf+8, s->ptx->c->afi); return buf+11+len; } @@ -2544,7 +2544,7 @@ bgp_create_update(struct bgp_channel *c, byte *buf) byte *res = NULL; struct lp_state *tmpp = NULL; - LOCK_DOMAIN(rtable, c->tx_lock); + BGP_PTX_LOCK(c->tx, ptx); again: if (tmpp) @@ -2555,7 +2555,7 @@ again: /* Initialize write state */ struct bgp_write_state s = { .proto = p, - .channel = c, + .ptx = ptx, .pool = tmp_linpool, .mp_reach = (c->afi != BGP_AF_IPV4) || c->ext_next_hop, .as4_session = p->as4_session, @@ -2564,7 +2564,7 @@ again: }; /* Try unreachable bucket */ - if ((buck = c->withdraw_bucket) && !EMPTY_LIST(buck->prefixes)) + if ((buck = ptx->withdraw_bucket) && !EMPTY_LIST(buck->prefixes)) { res = (c->afi == BGP_AF_IPV4) && !c->ext_next_hop ? bgp_create_ip_unreach(&s, buck, buf, end): @@ -2574,19 +2574,19 @@ again: } /* Try reachable buckets */ - if (!EMPTY_LIST(c->bucket_queue)) + if (!EMPTY_LIST(ptx->bucket_queue)) { - buck = HEAD(c->bucket_queue); + buck = HEAD(ptx->bucket_queue); /* Cleanup empty buckets */ - if (bgp_done_bucket(c, buck)) + if (bgp_done_bucket(ptx, buck)) goto again; res = !s.mp_reach ? bgp_create_ip_reach(&s, buck, buf, end): bgp_create_mp_reach(&s, buck, buf, end); - bgp_done_bucket(c, buck); + bgp_done_bucket(ptx, buck); if (!res) goto again; @@ -2595,12 +2595,10 @@ again: } /* No more prefixes to send */ - UNLOCK_DOMAIN(rtable, c->tx_lock); lp_restore(tmp_linpool, tmpp); return NULL; done: - UNLOCK_DOMAIN(rtable, c->tx_lock); BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE"); p->stats.tx_updates++; lp_restore(tmp_linpool, tmpp);