0
0
mirror of https://gitlab.nic.cz/labs/bird.git synced 2025-01-03 07:31:54 +00:00

BGP: Dropping the netindex experiment, prefix hash is faster

This commit is contained in:
Maria Matejka 2024-06-06 13:09:39 +02:00
parent 7c59ef3faa
commit d78448dd7c
4 changed files with 172 additions and 260 deletions

View File

@ -121,6 +121,7 @@ struct rt_export_request {
/* Enlisting */
struct rt_exporter * _Atomic exporter;
DOMAIN(rtable) domain; /* Lock this instead of RCU */
/* Prefiltering, useful for more scenarios */
struct rt_prefilter {
@ -210,6 +211,7 @@ struct rt_exporter {
_Bool _Atomic feeders_lock; /* Spinlock for the above list */
u8 trace_routes; /* Debugging flags (D_*) */
u8 net_type; /* Which net this exporter provides */
DOMAIN(rtable) domain; /* Lock this instead of RCU */
u32 _Atomic max_feed_index; /* Stop feeding at this index */
const char *name; /* Name for logging */
netindex_hash *netindex; /* Table for net <-> id conversion */

View File

@ -141,8 +141,17 @@ rt_export_get(struct rt_export_request *r)
{
/* But this net shall get a feed first! */
rtex_trace(r, D_ROUTES, "Expediting %N feed due to pending update %lu", n, update->seq);
if (r->feeder.domain.rtable)
{
LOCK_DOMAIN(rtable, r->feeder.domain);
feed = e->feed_net(e, NULL, ni, update);
UNLOCK_DOMAIN(rtable, r->feeder.domain);
}
else
{
RCU_ANCHOR(u);
feed = e->feed_net(e, u, ni, update);
}
bmap_set(&r->feed_map, ni->index);
ASSERT_DIE(feed);
@ -234,13 +243,12 @@ rt_alloc_feed(uint routes, uint exports)
return feed;
}
struct rt_export_feed *
rt_export_next_feed(struct rt_export_feeder *f)
static struct rt_export_feed *
rt_export_get_next_feed(struct rt_export_feeder *f, struct rcu_unwinder *u)
{
ASSERT_DIE(f);
while (1)
{
RCU_ANCHOR(u);
ASSERT_DIE(u || DOMAIN_IS_LOCKED(rtable, f->domain));
struct rt_exporter *e = atomic_load_explicit(&f->exporter, memory_order_acquire);
if (!e)
@ -257,18 +265,24 @@ rt_export_next_feed(struct rt_export_feeder *f)
if (!ni)
{
f->feed_index = ~0;
break;
return NULL;
}
if (!rt_prefilter_net(&f->prefilter, ni->addr))
{
rtex_trace(f, D_ROUTES, "Not feeding %N due to prefilter", ni->addr);
if (u)
RCU_RETRY_FAST(u);
else
continue;
}
if (f->feeding && !rt_net_is_feeding_feeder(f, ni->addr))
{
rtex_trace(f, D_ROUTES, "Not feeding %N, not requested", ni->addr);
if (u)
RCU_RETRY_FAST(u);
else
continue;
}
@ -279,6 +293,28 @@ rt_export_next_feed(struct rt_export_feeder *f)
return feed;
}
}
}
struct rt_export_feed *
rt_export_next_feed(struct rt_export_feeder *f)
{
ASSERT_DIE(f);
struct rt_export_feed *feed = NULL;
if (f->domain.rtable)
{
LOCK_DOMAIN(rtable, f->domain);
feed = rt_export_get_next_feed(f, NULL);
UNLOCK_DOMAIN(rtable, f->domain);
}
else
{
RCU_ANCHOR(u);
feed = rt_export_get_next_feed(f, u);
}
if (feed)
return feed;
/* Feeding done */
while (f->feeding)
@ -445,6 +481,7 @@ rt_feeder_subscribe(struct rt_exporter *e, struct rt_export_feeder *f)
f->feed_index = 0;
atomic_store_explicit(&f->exporter, e, memory_order_relaxed);
f->domain = e->domain;
RTEX_FEEDERS_LOCK(e);
rt_export_feeder_add_tail(&e->feeders, f);
@ -452,10 +489,9 @@ rt_feeder_subscribe(struct rt_exporter *e, struct rt_export_feeder *f)
rtex_trace(f, D_STATES, "Subscribed to exporter %s", e->name);
}
void
rt_feeder_unsubscribe(struct rt_export_feeder *f)
static void
rt_feeder_do_unsubscribe(struct rt_export_feeder *f)
{
RCU_ANCHOR(a);
struct rt_exporter *e = atomic_exchange_explicit(&f->exporter, NULL, memory_order_acquire);
if (e)
{
@ -468,6 +504,22 @@ rt_feeder_unsubscribe(struct rt_export_feeder *f)
rtex_trace(f, D_STATES, "Already unsubscribed");
}
void
rt_feeder_unsubscribe(struct rt_export_feeder *f)
{
if (f->domain.rtable)
{
LOCK_DOMAIN(rtable, f->domain);
rt_feeder_do_unsubscribe(f);
UNLOCK_DOMAIN(rtable, f->domain);
}
else
{
RCU_ANCHOR(u);
rt_feeder_do_unsubscribe(f);
}
}
void
rt_exporter_shutdown(struct rt_exporter *e, void (*stopped)(struct rt_exporter *))
{

View File

@ -1683,41 +1683,29 @@ bgp_withdraw_bucket(struct bgp_ptx_private *c, struct bgp_bucket *b)
* Prefix hash table
*/
#define PXH_KEY(px) px->ni, px->src
#define PXH_NEXT(px) px->next
#define PXH_EQ(n1,s1,n2,s2) (n1 == n2) && (!add_path_tx || (s1 == s2))
#define PXH_FN(ni, src) u32_hash(ni->index)
#define PXH_REHASH bgp_pxh_rehash
#define PXH_PARAMS /8, *2, 2, 2, 12, 20
HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix);
static void
bgp_init_prefix_table(struct bgp_ptx_private *c)
{
ASSERT_DIE(!c->prefix_slab);
c->prefix_slab = sl_new(c->pool, sizeof(struct bgp_prefix));
/* Netindex must be allocated from the main BGP pool
* as its cleanup routines are expecting to be allocated from something
* locked while entering a loop. That's kinda stupid but i'm lazy now
* to rework it. */
ASSERT_DIE(!c->netindex);
c->netindex = netindex_hash_new(c->c->pool, proto_event_list(c->c->c.proto), c->c->c.net_type);
u32 len = 64;
struct bgp_prefix * _Atomic * block = mb_allocz(c->pool, len * sizeof *block);
atomic_store_explicit(&c->prefixes_len, len, memory_order_relaxed);
atomic_store_explicit(&c->prefixes, block, memory_order_relaxed);
HASH_INIT(c->prefix_hash, c->pool, 8);
}
static struct bgp_prefix *
bgp_find_prefix(struct bgp_ptx_private *c, struct netindex *ni, struct rte_src *src, int add_path_tx)
{
u32 len = atomic_load_explicit(&c->prefixes_len, memory_order_relaxed);
struct bgp_prefix * _Atomic * block = atomic_load_explicit(&c->prefixes, memory_order_relaxed);
for (
struct bgp_prefix *px = ni->index < len ?
atomic_load_explicit(&block[ni->index], memory_order_relaxed) : NULL;
px; px = atomic_load_explicit(&px->next, memory_order_relaxed)
)
if (!add_path_tx || (px->src == src))
return px;
return NULL;
return HASH_FIND(c->prefix_hash, PXH, ni, src);
}
static struct bgp_prefix *
@ -1728,46 +1716,20 @@ bgp_get_prefix(struct bgp_ptx_private *c, struct netindex *ni, struct rte_src *s
if (px)
return px;
/* Possibly expand the block */
u32 len = atomic_load_explicit(&c->prefixes_len, memory_order_relaxed);
struct bgp_prefix * _Atomic * block =
atomic_load_explicit(&c->prefixes, memory_order_relaxed);
u32 nlen = len;
while (ni->index >= nlen)
nlen *= 2;
if (nlen > len)
{
/* Yes, expansion needed */
struct bgp_prefix * _Atomic * nb = mb_allocz(c->pool, nlen * sizeof *nb);
memcpy(nb, block, len * sizeof *nb);
memset(&nb[len], 0, (nlen - len) * sizeof *nb);
atomic_store_explicit(&c->prefixes, nb, memory_order_release);
atomic_store_explicit(&c->prefixes_len, nlen, memory_order_release);
atomic_store_explicit(&c->exporter.max_feed_index, nlen, memory_order_release);
synchronize_rcu();
mb_free(block);
block = nb;
len = nlen;
}
/* Allocate new prefix */
px = sl_alloc(c->prefix_slab);
*px = (struct bgp_prefix) {
.src = src,
.ni = ni,
.next = atomic_load_explicit(&block[ni->index], memory_order_relaxed),
};
net_lock_index(c->netindex, ni);
net_lock_index(c->c->c.table->netindex, ni);
rt_lock_source(src);
atomic_store_explicit(&block[ni->index], px, memory_order_release);
HASH_INSERT2(c->prefix_hash, PXH, c->pool, px);
if (ni->index >= atomic_load_explicit(&c->exporter.max_feed_index, memory_order_relaxed))
atomic_store_explicit(&c->exporter.max_feed_index, ni->index + 1, memory_order_release);
return px;
}
@ -1828,52 +1790,15 @@ bgp_update_prefix(struct bgp_ptx_private *c, struct bgp_prefix *px, struct bgp_b
#undef BPX_TRACE
}
struct bgp_free_prefix_deferred_item {
struct deferred_call dc;
union bgp_ptx *tx;
struct bgp_prefix *px;
};
static void
bgp_free_prefix_deferred(struct deferred_call *dc)
{
SKIP_BACK_DECLARE(struct bgp_free_prefix_deferred_item, bfpdi, dc, dc);
union bgp_ptx *tx = bfpdi->tx;
struct bgp_prefix *px = bfpdi->px;
BGP_PTX_LOCK(tx, ptx);
net_unlock_index(ptx->netindex, px->ni);
rt_unlock_source(px->src);
sl_free(px);
}
static void
bgp_free_prefix(struct bgp_ptx_private *c, struct bgp_prefix *px)
{
u32 len = atomic_load_explicit(&c->prefixes_len, memory_order_relaxed);
struct bgp_prefix * _Atomic * block =
atomic_load_explicit(&c->prefixes, memory_order_relaxed);
HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px);
ASSERT_DIE(px->ni->index < len);
for (
struct bgp_prefix * _Atomic *ppx = &block[px->ni->index], *cpx;
cpx = atomic_load_explicit(ppx, memory_order_relaxed);
ppx = &cpx->next)
if (cpx == px)
{
atomic_store_explicit(ppx, atomic_load_explicit(&cpx->next, memory_order_relaxed), memory_order_release);
break;
}
net_unlock_index(c->c->c.table->netindex, px->ni);
rt_unlock_source(px->src);
struct bgp_free_prefix_deferred_item bfpdi = {
.dc.hook = bgp_free_prefix_deferred,
.tx = BGP_PTX_PUB(c),
.px = px,
};
defer_call(&bfpdi.dc, sizeof bfpdi);
sl_free(px);
}
void
@ -1920,14 +1845,8 @@ bgp_tx_resend(struct bgp_proto *p, struct bgp_channel *bc)
ASSERT_DIE(bc->tx_keep);
uint seen = 0;
u32 len = atomic_load_explicit(&c->prefixes_len, memory_order_relaxed);
struct bgp_prefix * _Atomic * block =
atomic_load_explicit(&c->prefixes, memory_order_relaxed);
for (u32 i = 0; i < len; i++)
for (struct bgp_prefix * _Atomic *ppx = &block[i], *px;
px = atomic_load_explicit(ppx, memory_order_relaxed);
ppx = &px->next)
HASH_WALK(c->prefix_hash, next, px)
{
if (!px->cur)
{
ASSERT_DIE(px->last);
@ -1940,6 +1859,8 @@ bgp_tx_resend(struct bgp_proto *p, struct bgp_channel *bc)
/* And send it once again */
seen += bgp_update_prefix(c, px, last);
}
}
HASH_WALK_END;
if (bc->c.debug & D_EVENTS)
log(L_TRACE "%s.%s: TX resending %u routes",
@ -1960,21 +1881,19 @@ bgp_out_item_done(struct lfjour *j, struct lfjour_item *i)
static struct rt_export_feed *
bgp_out_feed_net(struct rt_exporter *e, struct rcu_unwinder *u, struct netindex *ni, const struct rt_export_item *_first)
{
ASSERT_DIE(u == NULL);
SKIP_BACK_DECLARE(struct bgp_ptx_private, c, exporter, e);
ASSERT_DIE(DOMAIN_IS_LOCKED(rtable, c->lock));
struct rt_export_feed *feed = NULL;
SKIP_BACK_DECLARE(union bgp_ptx, c, exporter, e);
u32 len = atomic_load_explicit(&c->prefixes_len, memory_order_relaxed);
if (ni->index >= len)
return NULL;
struct bgp_prefix * _Atomic * block =
atomic_load_explicit(&c->prefixes, memory_order_relaxed);
uint count = 0;
for (struct bgp_prefix * _Atomic *ppx = &block[ni->index], *cpx;
cpx = atomic_load_explicit(ppx, memory_order_relaxed);
ppx = &cpx->next)
count++;
struct bgp_prefix *chain = HASH_FIND_CHAIN(c->prefix_hash, PXH, ni, NULL);
for (struct bgp_prefix *px = chain; px; px = px->next)
if (px->ni == ni)
count += !!px->last + !!px->cur;
if (count)
{
@ -1982,38 +1901,29 @@ bgp_out_feed_net(struct rt_exporter *e, struct rcu_unwinder *u, struct netindex
feed->ni = ni;
uint pos = 0;
for (struct bgp_prefix * _Atomic *ppx = &block[ni->index], *cpx;
cpx = atomic_load_explicit(ppx, memory_order_relaxed);
ppx = &cpx->next)
if (cpx->ni == ni)
for (struct bgp_prefix *px = chain; px; px = px->next)
if (px->ni == ni)
{
if (cpx->cur)
if (pos >= count)
RCU_RETRY(u);
else
if (px->cur)
feed->block[pos++] = (rte) {
.attrs = cpx->cur->attrs ? ea_ref_tmp(cpx->cur->attrs) : NULL,
.attrs = px->cur->attrs ? ea_ref_tmp(px->cur->attrs) : NULL,
.net = ni->addr,
.src = cpx->src,
.lastmod = cpx->lastmod,
.src = px->src,
.lastmod = px->lastmod,
.flags = REF_PENDING,
};
if (cpx->last)
if (pos >= count)
RCU_RETRY(u);
else
if (px->last)
feed->block[pos++] = (rte) {
.attrs = cpx->last->attrs ? ea_ref_tmp(cpx->last->attrs) : NULL,
.attrs = px->last->attrs ? ea_ref_tmp(px->last->attrs) : NULL,
.net = ni->addr,
.src = cpx->src,
.lastmod = cpx->lastmod,
.flags = REF_PENDING,
.src = px->src,
.lastmod = px->lastmod,
};
}
if (pos != count)
RCU_RETRY(u);
ASSERT_DIE(pos == count);
}
return feed;
@ -2048,10 +1958,11 @@ bgp_init_pending_tx(struct bgp_channel *c)
},
.name = mb_sprintf(c->c.proto->pool, "%s.%s.export", c->c.proto->name, c->c.name),
.net_type = c->c.net_type,
.max_feed_index = atomic_load_explicit(&bpp->prefixes_len, memory_order_relaxed),
.netindex = bpp->netindex,
.max_feed_index = 0,
.netindex = c->c.table->netindex,
.trace_routes = c->c.debug,
.feed_net = bgp_out_feed_net,
.domain = dom,
};
rt_exporter_init(&bpp->exporter, &c->cf->ptx_exporter_settle);
@ -2062,25 +1973,44 @@ bgp_init_pending_tx(struct bgp_channel *c)
UNLOCK_DOMAIN(rtable, dom);
}
struct bgp_pending_tx_finisher {
event e;
union bgp_ptx *ptx;
};
static void
bgp_finish_pending_tx(void *_bptf)
void
bgp_free_pending_tx(struct bgp_channel *bc)
{
struct bgp_pending_tx_finisher *bptf = _bptf;
union bgp_ptx *ptx = bptf->ptx;
struct bgp_ptx_private *c = &ptx->priv;
struct bgp_channel *bc = c->c;
if (!bc->tx)
return;
DOMAIN(rtable) dom = c->lock;
DOMAIN(rtable) dom = bc->tx->lock;
LOCK_DOMAIN(rtable, dom);
struct bgp_ptx_private *c = &bc->tx->priv;
mb_free(bptf);
bc->c.out_table = NULL;
rt_exporter_shutdown(&c->exporter, NULL); /* TODO: actually implement exports */
mb_free(atomic_load_explicit(&c->prefixes, memory_order_relaxed));
/* Move all prefixes to the withdraw bucket to unref the "last" prefixes */
struct bgp_bucket *b = bgp_get_withdraw_bucket(c);
HASH_WALK(c->prefix_hash, next, px)
bgp_update_prefix(c, px, b);
HASH_WALK_END;
/* Flush withdrawals */
struct bgp_prefix *px;
WALK_LIST_FIRST(px, b->prefixes)
bgp_done_prefix(c, px, b);
/* Flush pending TX */
WALK_LIST_FIRST(b, c->bucket_queue)
{
WALK_LIST_FIRST(px, b->prefixes)
bgp_done_prefix(c, px, b);
bgp_done_bucket(c, b);
}
/* Consistency and resource leak checks */
HASH_WALK(c->prefix_hash, next, n)
bug("Stray prefix after cleanup");
HASH_WALK_END;
HASH_FREE(c->prefix_hash);
sl_delete(c->prefix_slab);
c->prefix_slab = NULL;
@ -2092,80 +2022,11 @@ bgp_finish_pending_tx(void *_bptf)
sl_delete(c->bucket_slab);
c->bucket_slab = NULL;
rp_free(ptx->priv.pool);
rp_free(c->pool);
UNLOCK_DOMAIN(rtable, dom);
DOMAIN_FREE(rtable, dom);
channel_del_obstacle(&bc->c);
}
void
bgp_free_pending_tx(struct bgp_channel *bc)
{
if (!bc->tx)
return;
BGP_PTX_LOCK(bc->tx, c);
bc->c.out_table = NULL;
rt_exporter_shutdown(&c->exporter, NULL);
struct bgp_prefix *px;
u32 len = atomic_load_explicit(&c->prefixes_len, memory_order_relaxed);
struct bgp_prefix * _Atomic * block =
atomic_load_explicit(&c->prefixes, memory_order_relaxed);
if (bc->tx_keep)
{
/* Move all kept prefixes to the withdraw bucket */
struct bgp_bucket *b = bgp_get_withdraw_bucket(c);
for (u32 i = 0; i < len; i++)
for (struct bgp_prefix * _Atomic *ppx = &block[i], *cpx;
cpx = atomic_load_explicit(ppx, memory_order_relaxed);
ppx = &cpx->next)
bgp_update_prefix(c, cpx, b);
}
/* Flush withdrawals */
if (c->withdraw_bucket)
WALK_LIST_FIRST(px, c->withdraw_bucket->prefixes)
bgp_done_prefix(c, px, c->withdraw_bucket);
/* Flush pending TX */
struct bgp_bucket *b;
WALK_LIST_FIRST(b, c->bucket_queue)
{
WALK_LIST_FIRST(px, b->prefixes)
bgp_done_prefix(c, px, b);
bgp_done_bucket(c, b);
}
/* Consistence and resource leak checks */
for (u32 i = 0; i < len; i++)
if (atomic_load_explicit(&block[i], memory_order_relaxed))
bug("Stray prefix after cleanup");
atomic_store_explicit(&c->prefixes, NULL, memory_order_release);
atomic_store_explicit(&c->prefixes_len, 0, memory_order_release);
atomic_store_explicit(&c->exporter.max_feed_index, 0, memory_order_release);
struct bgp_pending_tx_finisher *bptf = mb_alloc(c->pool, sizeof *bptf);
*bptf = (struct bgp_pending_tx_finisher) {
.e = {
.hook = bgp_finish_pending_tx,
.data = bptf,
},
.ptx = bc->tx,
};
channel_add_obstacle(&bc->c);
netindex_hash_delete(c->netindex, &bptf->e, proto_event_list(c->c->c.proto));
/* We can't null this, bgp_free_prefix_deferred expects
* this to be set:
* c->netindex = NULL;
*/
c->exporter.netindex = NULL;
bc->tx = NULL;
}
@ -2409,7 +2270,7 @@ bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, c
path = (new ?: old)->src;
/* And queue the notification */
if (bgp_update_prefix(c, bgp_get_prefix(c, net_get_index(c->netindex, n), path, bc->add_path_tx), buck))
if (bgp_update_prefix(c, bgp_get_prefix(c, NET_TO_INDEX(n), path, bc->add_path_tx), buck))
bgp_schedule_packet(p->conn, bc, PKT_UPDATE);
}

View File

@ -426,8 +426,6 @@ struct bgp_channel {
struct bgp_ptx_private {
#define BGP_PTX_PUBLIC \
DOMAIN(rtable) lock; /* Domain to be locked for prefix access */ \
struct bgp_prefix * _Atomic * _Atomic prefixes; \
u32 _Atomic prefixes_len; /* Block size of prefixes array */ \
struct rt_exporter exporter; /* Table-like exporter for ptx */ \
struct bgp_channel *c; /* Backlink to the channel */ \
@ -440,8 +438,7 @@ struct bgp_ptx_private {
struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */
list bucket_queue; /* Queue of buckets to send (struct bgp_bucket) */
/* Prefixes to be sent */
netindex_hash *netindex; /* Netindex indexing the prefixes to be sent */
HASH(struct bgp_prefix) prefix_hash; /* Hash table of pending prefices */
slab *prefix_slab; /* Slab holding prefix nodes */
slab *bucket_slab; /* Slab holding buckets to send */
@ -458,12 +455,12 @@ LOBJ_UNLOCK_CLEANUP(bgp_ptx, rtable);
struct bgp_prefix {
node buck_node; /* Node in per-bucket list */
struct bgp_prefix * _Atomic next; /* Node in prefix block table */
struct bgp_prefix *next; /* Node in prefix hash */
struct bgp_bucket *last; /* Last bucket sent with this prefix */
struct bgp_bucket *cur; /* Current bucket (cur == last) if no update is required */
btime lastmod; /* Last modification of this prefix */
struct rte_src *src; /* Path ID encoded as rte_src */
struct netindex *ni;
struct netindex *ni; /* Shared with the table */
};
struct bgp_bucket {