0
0
mirror of https://gitlab.nic.cz/labs/bird.git synced 2024-11-09 20:58:44 +00:00

Lockless hostentry resolution

Now the hostentry doesn't need to lock table, instead it tracks the
hostentry version and retries if the hostentry changed while updating.
This commit is contained in:
Maria Matejka 2024-04-04 11:38:52 +02:00
parent 83a409abd2
commit 182a97f5e6
3 changed files with 129 additions and 111 deletions

View File

@ -502,9 +502,10 @@ struct hostentry {
struct hostentry *next; /* Next in hash chain */ struct hostentry *next; /* Next in hash chain */
unsigned hash_key; /* Hash key */ unsigned hash_key; /* Hash key */
u32 igp_metric; /* Chosen route IGP metric */ u32 igp_metric; /* Chosen route IGP metric */
_Atomic u32 version; /* Bumped on update */
byte nexthop_linkable; /* Nexthop list is completely non-device */
ea_list *src; /* Source attributes */ ea_list *src; /* Source attributes */
struct lfuc uc; /* Use count */ struct lfuc uc; /* Use count */
byte nexthop_linkable; /* Nexthop list is completely non-device */
}; };
struct hostcache { struct hostcache {
@ -669,6 +670,7 @@ struct rt_show_data_rtable * rt_show_add_table(struct rt_show_data *d, rtable *t
/* Host entry: Resolve hook for recursive nexthops */ /* Host entry: Resolve hook for recursive nexthops */
extern struct ea_class ea_gen_hostentry; extern struct ea_class ea_gen_hostentry;
extern struct ea_class ea_gen_hostentry_version;
struct hostentry_adata { struct hostentry_adata {
adata ad; adata ad;
struct hostentry *he; struct hostentry *he;

View File

@ -161,6 +161,13 @@ struct ea_class ea_gen_hostentry = {
.freed = ea_gen_hostentry_freed, .freed = ea_gen_hostentry_freed,
}; };
struct ea_class ea_gen_hostentry_version = {
.name = "hostentry version",
.type = T_INT,
.readonly = 1,
.hidden = 1,
};
const char * rta_dest_names[RTD_MAX] = { const char * rta_dest_names[RTD_MAX] = {
[RTD_NONE] = "", [RTD_NONE] = "",
[RTD_UNICAST] = "unicast", [RTD_UNICAST] = "unicast",
@ -1821,6 +1828,7 @@ rta_init(void)
/* These attributes are required to be first for nice "show route" output */ /* These attributes are required to be first for nice "show route" output */
ea_register_init(&ea_gen_nexthop); ea_register_init(&ea_gen_nexthop);
ea_register_init(&ea_gen_hostentry); ea_register_init(&ea_gen_hostentry);
ea_register_init(&ea_gen_hostentry_version);
/* Other generic route attributes */ /* Other generic route attributes */
ea_register_init(&ea_gen_preference); ea_register_init(&ea_gen_preference);

View File

@ -3001,18 +3001,32 @@ ea_set_hostentry(ea_list **to, rtable *dep, rtable *src, ip_addr gw, ip_addr ll,
static void static void
rta_apply_hostentry(struct rtable_private *tab UNUSED, ea_list **to, struct hostentry_adata *head) rta_apply_hostentry(ea_list **to, struct hostentry_adata *head)
{ {
struct hostentry *he = head->he;
u32 *labels = head->labels; u32 *labels = head->labels;
u32 lnum = (u32 *) (head->ad.data + head->ad.length) - labels; u32 lnum = (u32 *) (head->ad.data + head->ad.length) - labels;
struct hostentry *he = head->he;
rcu_read_lock();
u32 version = atomic_load_explicit(&he->version, memory_order_acquire);
while (1)
{
if (version & 1)
{
rcu_read_unlock();
birdloop_yield();
rcu_read_lock();
version = atomic_load_explicit(&he->version, memory_order_acquire);
continue;
}
ea_set_attr_u32(to, &ea_gen_igp_metric, 0, he->igp_metric); ea_set_attr_u32(to, &ea_gen_igp_metric, 0, he->igp_metric);
if (!he->src) if (!he->src)
{ {
ea_set_dest(to, 0, RTD_UNREACHABLE); ea_set_dest(to, 0, RTD_UNREACHABLE);
return; break;
} }
eattr *he_nh_ea = ea_find(he->src, &ea_gen_nexthop); eattr *he_nh_ea = ea_find(he->src, &ea_gen_nexthop);
@ -3023,9 +3037,10 @@ rta_apply_hostentry(struct rtable_private *tab UNUSED, ea_list **to, struct host
if ((idest != RTD_UNICAST) || if ((idest != RTD_UNICAST) ||
!lnum && he->nexthop_linkable) !lnum && he->nexthop_linkable)
{ /* Just link the nexthop chain, no label append happens. */ {
/* Just link the nexthop chain, no label append happens. */
ea_copy_attr(to, he->src, &ea_gen_nexthop); ea_copy_attr(to, he->src, &ea_gen_nexthop);
return; break;
} }
uint total_size = OFFSETOF(struct nexthop_adata, nh); uint total_size = OFFSETOF(struct nexthop_adata, nh);
@ -3052,7 +3067,7 @@ rta_apply_hostentry(struct rtable_private *tab UNUSED, ea_list **to, struct host
}; };
ea_set_attr_data(to, &ea_gen_nexthop, 0, &nha.ad.data, nha.ad.length); ea_set_attr_data(to, &ea_gen_nexthop, 0, &nha.ad.data, nha.ad.length);
return; break;
} }
struct nexthop_adata *new = (struct nexthop_adata *) tmp_alloc_adata(total_size); struct nexthop_adata *new = (struct nexthop_adata *) tmp_alloc_adata(total_size);
@ -3087,53 +3102,42 @@ rta_apply_hostentry(struct rtable_private *tab UNUSED, ea_list **to, struct host
new->ad.length = (void *) dest - (void *) new->ad.data; new->ad.length = (void *) dest - (void *) new->ad.data;
ea_set_attr(to, EA_LITERAL_DIRECT_ADATA( ea_set_attr(to, EA_LITERAL_DIRECT_ADATA(
&ea_gen_nexthop, 0, &new->ad)); &ea_gen_nexthop, 0, &new->ad));
/* Has the HE version changed? */
u32 end_version = atomic_load_explicit(&he->version, memory_order_acquire);
/* Stayed stable, we can finalize the route */
if (end_version == version)
break;
/* No, retry once again */
version = end_version;
} }
static inline struct hostentry_adata * rcu_read_unlock();
rta_next_hop_outdated(ea_list *a)
{
/* First retrieve the hostentry */
eattr *heea = ea_find(a, &ea_gen_hostentry);
if (!heea)
return NULL;
struct hostentry_adata *head = (struct hostentry_adata *) heea->u.ptr; ea_set_attr_u32(to, &ea_gen_hostentry_version, 0, version);
/* If no nexthop is present, we have to create one */
eattr *a_nh_ea = ea_find(a, &ea_gen_nexthop);
if (!a_nh_ea)
return head;
struct nexthop_adata *nhad = (struct nexthop_adata *) a_nh_ea->u.ptr;
/* Shortcut for unresolvable hostentry */
if (!head->he->src)
return NEXTHOP_IS_REACHABLE(nhad) ? head : NULL;
/* Comparing our nexthop with the hostentry nexthop */
eattr *he_nh_ea = ea_find(head->he->src, &ea_gen_nexthop);
return (
(ea_get_int(a, &ea_gen_igp_metric, IGP_METRIC_UNKNOWN) != head->he->igp_metric) ||
(!head->he->nexthop_linkable) ||
(!he_nh_ea != !a_nh_ea) ||
(he_nh_ea && a_nh_ea && !adata_same(he_nh_ea->u.ptr, a_nh_ea->u.ptr)))
? head : NULL;
} }
static inline int static inline int
rt_next_hop_update_rte(const rte *old, rte *new) rt_next_hop_update_rte(const rte *old, rte *new)
{ {
struct hostentry_adata *head = rta_next_hop_outdated(old->attrs); eattr *hev = ea_find(old->attrs, &ea_gen_hostentry_version);
if (!head) if (!hev)
return 0;
u32 last_version = hev->u.data;
eattr *heea = ea_find(old->attrs, &ea_gen_hostentry);
ASSERT_DIE(heea);
struct hostentry_adata *head = (struct hostentry_adata *) heea->u.ptr;
u32 current_version = atomic_load_explicit(&head->he->version, memory_order_acquire);
if (current_version == last_version)
return 0; return 0;
/* Get the state of the route just before nexthop was resolved */
*new = *old; *new = *old;
new->attrs = ea_strip_to(new->attrs, BIT32_ALL(EALS_PREIMPORT, EALS_FILTERED)); new->attrs = ea_strip_to(new->attrs, BIT32_ALL(EALS_PREIMPORT, EALS_FILTERED));
rta_apply_hostentry(&new->attrs, head);
RT_LOCKED(head->he->owner, tab)
rta_apply_hostentry(tab, &new->attrs, head);
return 1; return 1;
} }
@ -3144,10 +3148,7 @@ rt_next_hop_resolve_rte(rte *r)
if (!heea) if (!heea)
return; return;
struct hostentry_adata *head = (struct hostentry_adata *) heea->u.ptr; rta_apply_hostentry(&r->attrs, (struct hostentry_adata *) heea->u.ptr);
RT_LOCKED(head->he->owner, tab)
rta_apply_hostentry(tab, &r->attrs, head);
} }
#ifdef CONFIG_BGP #ifdef CONFIG_BGP
@ -4374,6 +4375,9 @@ rt_update_hostentry(struct rtable_private *tab, struct hostentry *he)
int direct = 0; int direct = 0;
int pxlen = 0; int pxlen = 0;
/* Signalize work in progress */
ASSERT_DIE((atomic_fetch_add_explicit(&he->version, 1, memory_order_acq_rel) & 1) == 0);
/* Reset the hostentry */ /* Reset the hostentry */
he->src = NULL; he->src = NULL;
he->nexthop_linkable = 0; he->nexthop_linkable = 0;
@ -4426,6 +4430,10 @@ rt_update_hostentry(struct rtable_private *tab, struct hostentry *he)
} }
done: done:
/* Signalize work done and wait for readers */
ASSERT_DIE((atomic_fetch_add_explicit(&he->version, 1, memory_order_acq_rel) & 1) == 1);
synchronize_rcu();
/* Add a prefix range to the trie */ /* Add a prefix range to the trie */
trie_add_prefix(tab->hostcache->trie, &he_addr, pxlen, he_addr.pxlen); trie_add_prefix(tab->hostcache->trie, &he_addr, pxlen, he_addr.pxlen);