diff --git a/filter/f-inst.c b/filter/f-inst.c index fff93517..60042476 100644 --- a/filter/f-inst.c +++ b/filter/f-inst.c @@ -692,7 +692,7 @@ switch (sa.sa_code) { case SA_NET: RESULT(sa.type, net, fs->rte->net); break; - case SA_PROTO: RESULT(sa.type, s, fs->rte->src->proto->name); break; + case SA_PROTO: RESULT(sa.type, s, fs->rte->src->owner->name); break; default: { struct eattr *nhea = ea_find(*fs->eattrs, &ea_gen_nexthop); @@ -771,7 +771,8 @@ struct iface *ifa = (ipa_is_link_local(ip) && nh_ea) ? ((struct nexthop_adata *) nh_ea->u.ptr)->nh.iface : NULL; - neighbor *n = neigh_find(fs->rte->src->proto, ip, ifa, 0); + /* XXX this code supposes that every owner is a protocol XXX */ + neighbor *n = neigh_find(SKIP_BACK(struct proto, sources, fs->rte->src->owner), ip, ifa, 0); if (!n || (n->scope == SCOPE_HOST)) runtime( "Invalid gw address" ); diff --git a/lib/locking.h b/lib/locking.h index a9a8aa9b..1df30063 100644 --- a/lib/locking.h +++ b/lib/locking.h @@ -16,6 +16,7 @@ struct lock_order { struct domain_generic *the_bird; struct domain_generic *proto; struct domain_generic *rtable; + struct domain_generic *attrs; struct domain_generic *resource; }; diff --git a/lib/route.h b/lib/route.h index 2d691215..cf3c70ba 100644 --- a/lib/route.h +++ b/lib/route.h @@ -11,11 +11,14 @@ #define _BIRD_LIB_ROUTE_H_ #include "lib/type.h" +#include "lib/rcu.h" +#include "lib/hash.h" +#include "lib/event.h" struct network; struct proto; struct cli; - +struct rtable; typedef struct rte { struct ea_list *attrs; /* Attributes of this route */ @@ -43,19 +46,101 @@ static inline int rte_is_filtered(rte *r) { return !!(r->flags & REF_FILTERED); struct rte_src { struct rte_src *next; /* Hash chain */ - struct proto *proto; /* Protocol the source is based on */ + struct rte_owner *owner; /* Route source owner */ u32 private_id; /* Private ID, assigned by the protocol */ u32 global_id; /* Globally unique ID of the source */ - unsigned uc; /* Use count */ + _Atomic u64 uc; /* Use count */ }; +struct rte_owner_class { + void (*get_route_info)(struct rte *, byte *buf); /* Get route information (for `show route' command) */ + int (*rte_better)(struct rte *, struct rte *); + int (*rte_mergable)(struct rte *, struct rte *); + u32 (*rte_igp_metric)(const rte *); +}; + +struct rte_owner { + struct rte_owner_class *class; + int (*rte_recalculate)(struct rtable *, struct network *, struct rte *, struct rte *, struct rte *); + HASH(struct rte_src) hash; + const char *name; + u32 hash_key; + u32 uc; + event_list *list; + event *prune; + event *stop; +}; + +DEFINE_DOMAIN(attrs); +extern DOMAIN(attrs) attrs_domain; + +#define RTA_LOCK LOCK_DOMAIN(attrs, attrs_domain) +#define RTA_UNLOCK UNLOCK_DOMAIN(attrs, attrs_domain) + +#define RTE_SRC_PU_SHIFT 44 +#define RTE_SRC_IN_PROGRESS (1ULL << RTE_SRC_PU_SHIFT) + +/* Get a route source. This also locks the source, therefore the caller has to + * unlock the source after the route has been propagated. */ +struct rte_src *rt_get_source_o(struct rte_owner *o, u32 id); +#define rt_get_source(p, id) rt_get_source_o(&(p)->sources, (id)) -struct rte_src *rt_find_source(struct proto *p, u32 id); -struct rte_src *rt_get_source(struct proto *p, u32 id); struct rte_src *rt_find_source_global(u32 id); -static inline void rt_lock_source(struct rte_src *src) { src->uc++; } -static inline void rt_unlock_source(struct rte_src *src) { src->uc--; } -void rt_prune_sources(void); + +static inline void rt_lock_source(struct rte_src *src) +{ + /* Locking a source is trivial; somebody already holds it so we just increase + * the use count. Nothing can be freed underneath our hands. */ + u64 uc = atomic_fetch_add_explicit(&src->uc, 1, memory_order_acq_rel); + ASSERT_DIE(uc > 0); +} + +static inline void rt_unlock_source(struct rte_src *src) +{ + /* Unlocking is tricky. We do it lockless so at the same time, the prune + * event may be running, therefore if the unlock gets us to zero, it must be + * the last thing in this routine, otherwise the prune routine may find the + * source's usecount zeroed, freeing it prematurely. + * + * The usecount is split into two parts: + * the top 20 bits are an in-progress indicator + * the bottom 44 bits keep the actual usecount. + * + * Therefore at most 1 million of writers can simultaneously unlock the same + * source, while at most ~17T different routes can reference it. Both limits + * are insanely high from the 2022 point of view. Let's suppose that when 17T + * routes or 1M writers get real, we get also 128bit atomic variables in the + * C norm. */ + + /* First, we push the in-progress indicator */ + u64 uc = atomic_fetch_add_explicit(&src->uc, RTE_SRC_IN_PROGRESS, memory_order_acq_rel); + + /* Then we split the indicator to its parts. Remember, we got the value before the operation happened. */ + u64 pending = (uc >> RTE_SRC_PU_SHIFT) + 1; + uc &= RTE_SRC_IN_PROGRESS - 1; + + /* We per-use the RCU critical section indicator to make the prune event wait + * until we finish here in the rare case we get preempted. */ + rcu_read_lock(); + + /* Obviously, there can't be more pending unlocks than the usecount itself */ + if (uc == pending) + /* If we're the last unlocker, schedule the owner's prune event */ + ev_send(src->owner->list, src->owner->prune); + else + ASSERT_DIE(uc > pending); + + /* And now, finally, simultaneously pop the in-progress indicator and the + * usecount, possibly allowing the source pruning routine to free this structure */ + atomic_fetch_sub_explicit(&src->uc, RTE_SRC_IN_PROGRESS + 1, memory_order_acq_rel); + + /* ... and to reduce the load a bit, the source pruning routine will better wait for + * RCU synchronization instead of a busy loop. */ + rcu_read_unlock(); +} + +void rt_init_sources(struct rte_owner *, const char *name, event_list *list); +void rt_destroy_sources(struct rte_owner *, event *); /* * Route Attributes diff --git a/nest/proto.c b/nest/proto.c index 1d480ba2..e64cef28 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -1963,10 +1963,17 @@ channel_reset_limit(struct channel *c, struct limit *l, int dir) c->limit_active &= ~(1 << dir); } +static struct rte_owner_class default_rte_owner_class; + static inline void proto_do_start(struct proto *p) { p->active = 1; + + rt_init_sources(&p->sources, p->name, proto_event_list(p)); + if (!p->sources.class) + p->sources.class = &default_rte_owner_class; + if (!p->cf->late_if_feed) if_feed_baby(p); } @@ -1975,10 +1982,8 @@ static void proto_do_up(struct proto *p) { if (!p->main_source) - { p->main_source = rt_get_source(p, 0); - rt_lock_source(p->main_source); - } + // Locked automaticaly proto_start_channels(p); @@ -2005,6 +2010,7 @@ proto_do_stop(struct proto *p) } proto_stop_channels(p); + rt_destroy_sources(&p->sources, p->event); p->do_stop = 1; proto_send_event(p); diff --git a/nest/protocol.h b/nest/protocol.h index 3c823ae1..709d6415 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -60,7 +60,6 @@ struct protocol { int (*start)(struct proto *); /* Start the instance */ int (*shutdown)(struct proto *); /* Stop the instance */ void (*get_status)(struct proto *, byte *buf); /* Get instance status (for `show protocols' command) */ - void (*get_route_info)(struct rte *, byte *buf); /* Get route information (for `show route' command) */ // int (*get_attr)(const struct eattr *, byte *buf, int buflen); /* ASCIIfy dynamic attribute (returns GA_*) */ void (*show_proto_info)(struct proto *); /* Show protocol info (for `show protocols all' command) */ void (*copy_config)(struct proto_config *, struct proto_config *); /* Copy config from given protocol instance */ @@ -128,6 +127,7 @@ struct proto { list channels; /* List of channels to rtables (struct channel) */ struct channel *main_channel; /* Primary channel */ struct rte_src *main_source; /* Primary route source */ + struct rte_owner sources; /* Route source owner structure */ struct iface *vrf; /* Related VRF instance, NULL if global */ const char *name; /* Name of this instance (== cf->name) */ @@ -342,7 +342,7 @@ void proto_notify_state(struct proto *p, unsigned state); */ static inline int proto_is_inactive(struct proto *p) -{ return (p->active_channels == 0) && (p->active_loops == 0); } +{ return (p->active_channels == 0) && (p->active_loops == 0) && (p->sources.uc == 0); } /* diff --git a/nest/rt-attr.c b/nest/rt-attr.c index b31bc5cc..b3a4c7a1 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -178,6 +178,8 @@ const char * flowspec_valid_names[FLOWSPEC__MAX] = { [FLOWSPEC_INVALID] = "invalid", }; +DOMAIN(attrs) attrs_domain; + pool *rta_pool; static slab *rte_src_slab; @@ -187,16 +189,14 @@ static struct idm src_ids; /* rte source hash */ -#define RSH_KEY(n) n->proto, n->private_id +#define RSH_KEY(n) n->private_id #define RSH_NEXT(n) n->next -#define RSH_EQ(p1,n1,p2,n2) p1 == p2 && n1 == n2 -#define RSH_FN(p,n) p->hash_key ^ u32_hash(n) +#define RSH_EQ(n1,n2) n1 == n2 +#define RSH_FN(n) u32_hash(n) #define RSH_REHASH rte_src_rehash #define RSH_PARAMS /2, *2, 1, 1, 8, 20 -#define RSH_INIT_ORDER 6 - -static HASH(struct rte_src) src_hash; +#define RSH_INIT_ORDER 2 static struct rte_src **rte_src_global; static uint rte_src_global_max = SRC_ID_INIT_SIZE; @@ -207,34 +207,44 @@ rte_src_init(void) rte_src_global = mb_allocz(rta_pool, sizeof(struct rte_src *) * rte_src_global_max); idm_init(&src_ids, rta_pool, SRC_ID_INIT_SIZE); - - HASH_INIT(src_hash, rta_pool, RSH_INIT_ORDER); } - HASH_DEFINE_REHASH_FN(RSH, struct rte_src) -struct rte_src * -rt_find_source(struct proto *p, u32 id) +static struct rte_src * +rt_find_source(struct rte_owner *p, u32 id) { - return HASH_FIND(src_hash, RSH, p, id); + return HASH_FIND(p->hash, RSH, id); } struct rte_src * -rt_get_source(struct proto *p, u32 id) +rt_get_source_o(struct rte_owner *p, u32 id) { + if (p->stop) + bug("Stopping route owner asked for another source."); + struct rte_src *src = rt_find_source(p, id); if (src) + { + UNUSED u64 uc = atomic_fetch_add_explicit(&src->uc, 1, memory_order_acq_rel); return src; + } + RTA_LOCK; src = sl_allocz(rte_src_slab); - src->proto = p; + src->owner = p; src->private_id = id; src->global_id = idm_alloc(&src_ids); - src->uc = 0; - HASH_INSERT2(src_hash, RSH, rta_pool, src); + atomic_store_explicit(&src->uc, 1, memory_order_release); + p->uc++; + + HASH_INSERT2(p->hash, RSH, rta_pool, src); + if (config->table_debug) + log(L_TRACE "Allocated new rte_src for %s, ID %uL %uG, have %u sources now", + p->name, src->private_id, src->global_id, p->uc); + if (src->global_id >= rte_src_global_max) { rte_src_global = mb_realloc(rte_src_global, sizeof(struct rte_src *) * (rte_src_global_max *= 2)); @@ -243,6 +253,7 @@ rt_get_source(struct proto *p, u32 id) } rte_src_global[src->global_id] = src; + RTA_UNLOCK; return src; } @@ -256,23 +267,89 @@ rt_find_source_global(u32 id) return rte_src_global[id]; } -void -rt_prune_sources(void) +static inline void +rt_done_sources(struct rte_owner *o) { - HASH_WALK_FILTER(src_hash, next, src, sp) + ev_send(o->list, o->stop); +} + +void +rt_prune_sources(void *data) +{ + struct rte_owner *o = data; + + HASH_WALK_FILTER(o->hash, next, src, sp) { - if (src->uc == 0) + u64 uc; + while ((uc = atomic_load_explicit(&src->uc, memory_order_acquire)) >> RTE_SRC_PU_SHIFT) + synchronize_rcu(); + + if (uc == 0) { - HASH_DO_REMOVE(src_hash, RSH, sp); + o->uc--; + + HASH_DO_REMOVE(o->hash, RSH, sp); + + RTA_LOCK; + rte_src_global[src->global_id] = NULL; idm_free(&src_ids, src->global_id); sl_free(src); + RTA_UNLOCK; } } HASH_WALK_FILTER_END; - HASH_MAY_RESIZE_DOWN(src_hash, RSH, rta_pool); + RTA_LOCK; + HASH_MAY_RESIZE_DOWN(o->hash, RSH, rta_pool); + + if (o->stop && !o->uc) + { + rfree(o->prune); + RTA_UNLOCK; + + if (config->table_debug) + log(L_TRACE "All rte_src's for %s pruned, scheduling stop event", o->name); + + rt_done_sources(o); + } + else + RTA_UNLOCK; } +void +rt_init_sources(struct rte_owner *o, const char *name, event_list *list) +{ + RTA_LOCK; + HASH_INIT(o->hash, rta_pool, RSH_INIT_ORDER); + o->hash_key = random_u32(); + o->uc = 0; + o->name = name; + o->prune = ev_new_init(rta_pool, rt_prune_sources, o); + o->stop = NULL; + o->list = list; + RTA_UNLOCK; +} + +void +rt_destroy_sources(struct rte_owner *o, event *done) +{ + o->stop = done; + + if (!o->uc) + { + if (config->table_debug) + log(L_TRACE "Source owner %s destroy requested. All rte_src's already pruned, scheduling stop event", o->name); + + RTA_LOCK; + rfree(o->prune); + RTA_UNLOCK; + + rt_done_sources(o); + } + else + if (config->table_debug) + log(L_TRACE "Source owner %s destroy requested. Remaining %u rte_src's to prune.", o->name, o->uc); +} /* * Multipath Next Hop @@ -1466,6 +1543,8 @@ ea_show_list(struct cli *c, ea_list *eal) void rta_init(void) { + attrs_domain = DOMAIN_NEW(attrs, "Attributes"); + rta_pool = rp_new(&root_pool, "Attributes"); rta_alloc_hash(); diff --git a/nest/rt-dev.c b/nest/rt-dev.c index 7f45985f..4199e17c 100644 --- a/nest/rt-dev.c +++ b/nest/rt-dev.c @@ -68,6 +68,7 @@ dev_ifa_notify(struct proto *P, uint flags, struct ifa *ad) /* Use iface ID as local source ID */ struct rte_src *src = rt_get_source(P, ad->iface->index); rte_update(c, net, NULL, src); + rt_unlock_source(src); } else if (flags & IF_CHANGE_UP) { @@ -95,6 +96,7 @@ dev_ifa_notify(struct proto *P, uint flags, struct ifa *ad) }; rte_update(c, net, &e0, src); + rt_unlock_source(src); } } diff --git a/nest/rt-show.c b/nest/rt-show.c index b784bf83..17400029 100644 --- a/nest/rt-show.c +++ b/nest/rt-show.c @@ -58,7 +58,7 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, int primary if (d->verbose && !rta_is_cached(a) && a) a = ea_normalize(a, 0); - get_route_info = e->src->proto->proto->get_route_info; + get_route_info = e->src->owner->class ? e->src->owner->class->get_route_info : NULL; if (get_route_info) get_route_info(e, info); else @@ -74,7 +74,7 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, int primary cli_printf(c, -1007, "%-20s %s [%s %s%s]%s%s", ia, net_is_flow(e->net) ? flowspec_valid_name(flowspec_valid) : had ? "recursive" : rta_dest_name(dest), - e->src->proto->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info); + e->src->owner->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info); if (d->verbose) { @@ -178,7 +178,7 @@ rt_show_net(struct rt_show_data *d, const net_addr *n, rte **feed, uint count) } } - if (d->show_protocol && (d->show_protocol != e.src->proto)) + if (d->show_protocol && (&d->show_protocol->sources != e.src->owner)) goto skip; if (f_run(d->filter, &e, 0) > F_ACCEPT) diff --git a/nest/rt-table.c b/nest/rt-table.c index 15dbc371..b8f0e61d 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -673,16 +673,16 @@ rte_better(rte *new, rte *old) return 1; if (np < op) return 0; - if (new->src->proto->proto != old->src->proto->proto) + if (new->src->owner->class != old->src->owner->class) { /* * If the user has configured protocol preferences, so that two different protocols * have the same preference, try to break the tie by comparing addresses. Not too * useful, but keeps the ordering of routes unambiguous. */ - return new->src->proto->proto > old->src->proto->proto; + return new->src->owner->class > old->src->owner->class; } - if (better = new->src->proto->rte_better) + if (better = new->src->owner->class->rte_better) return better(new, old); return 0; } @@ -698,10 +698,10 @@ rte_mergable(rte *pri, rte *sec) if (rt_get_preference(pri) != rt_get_preference(sec)) return 0; - if (pri->src->proto->proto != sec->src->proto->proto) + if (pri->src->owner->class != sec->src->owner->class) return 0; - if (mergable = pri->src->proto->rte_mergable) + if (mergable = pri->src->owner->class->rte_mergable) return mergable(pri, sec); return 0; @@ -1596,10 +1596,10 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr { if (!old->generation && !new->generation) bug("Two protocols claim to author a route with the same rte_src in table %s: %N %s/%u:%u", - c->table->name, net->n.addr, old->src->proto->name, old->src->private_id, old->src->global_id); + c->table->name, net->n.addr, old->src->owner->name, old->src->private_id, old->src->global_id); log_rl(&table->rl_pipe, L_ERR "Route source collision in table %s: %N %s/%u:%u", - c->table->name, net->n.addr, old->src->proto->name, old->src->private_id, old->src->global_id); + c->table->name, net->n.addr, old->src->owner->name, old->src->private_id, old->src->global_id); } if (new && rte_same(old, &new_stored->rte)) @@ -1675,8 +1675,8 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr /* If routes are not sorted, find the best route and move it on the first position. There are several optimized cases. */ - if (src->proto->rte_recalculate && - src->proto->rte_recalculate(table, net, new_stored ? &new_stored->rte : NULL, old, old_best)) + if (src->owner->rte_recalculate && + src->owner->rte_recalculate(table, net, new_stored ? &new_stored->rte : NULL, old, old_best)) goto do_recalculate; if (new_stored && rte_better(&new_stored->rte, old_best)) @@ -2876,8 +2876,6 @@ again: } } - rt_prune_sources(); - uint flushed_channels = 0; /* Close flushed channels */ @@ -3048,7 +3046,6 @@ rt_export_cleanup(rtable *tab) done:; struct rt_import_hook *ih; node *x; - _Bool imports_stopped = 0; WALK_LIST2_DELSAFE(ih, n, x, tab->imports, n) if (ih->import_state == TIS_WAITING) if (!first || (first->seq >= ih->flush_seq)) @@ -3058,19 +3055,11 @@ done:; rem_node(&ih->n); mb_free(ih); rt_unlock_table(tab); - imports_stopped = 1; } if (tab->export_used) ev_schedule(tab->rt_event); - if (imports_stopped) - { - if (config->table_debug) - log(L_TRACE "%s: Sources pruning routine requested", tab->name); - - rt_prune_sources(); - } if (EMPTY_LIST(tab->exporter.pending) && tm_active(tab->exporter.export_timer)) tm_stop(tab->exporter.export_timer); @@ -3554,8 +3543,8 @@ rt_next_hop_update_net(rtable *tab, net *n) /* Call a pre-comparison hook */ /* Not really an efficient way to compute this */ - if (e->rte.src->proto->rte_recalculate) - e->rte.src->proto->rte_recalculate(tab, n, &new->rte, &e->rte, &old_best->rte); + if (e->rte.src->owner->rte_recalculate) + e->rte.src->owner->rte_recalculate(tab, n, &new->rte, &e->rte, &old_best->rte); updates[pos++] = (struct rte_multiupdate) { .old = e, @@ -4170,8 +4159,8 @@ rt_get_igp_metric(const rte *rt) if (rt_get_source_attr(rt) == RTS_DEVICE) return 0; - if (rt->src->proto->rte_igp_metric) - return rt->src->proto->rte_igp_metric(rt); + if (rt->src->owner->class->rte_igp_metric) + return rt->src->owner->class->rte_igp_metric(rt); return IGP_METRIC_UNKNOWN; } diff --git a/proto/babel/babel.c b/proto/babel/babel.c index 00b9aa79..4d024e3a 100644 --- a/proto/babel/babel.c +++ b/proto/babel/babel.c @@ -2259,7 +2259,7 @@ babel_kick_timer(struct babel_proto *p) static int babel_preexport(struct channel *C, struct rte *new) { - if (new->src->proto != C->proto) + if (new->src->owner != &C->proto->sources) return 0; /* Reject our own unreachable routes */ @@ -2289,7 +2289,7 @@ babel_rt_notify(struct proto *P, struct channel *c UNUSED, const net_addr *net, uint rt_metric = ea_get_int(new->attrs, &ea_babel_metric, 0); u64 rt_router_id = 0; - if (new->src->proto == P) + if (new->src->owner == &P->sources) { rt_seqno = ea_get_int(new->attrs, &ea_babel_seqno, 0); eattr *e = ea_find(new->attrs, &ea_babel_router_id); @@ -2373,6 +2373,12 @@ babel_postconfig(struct proto_config *CF) cf->ip6_channel = ip6 ?: ip6_sadr; } +static struct rte_owner_class babel_rte_owner_class = { + .get_route_info = babel_get_route_info, + .rte_better = babel_rte_better, + .rte_igp_metric = babel_rte_igp_metric, +}; + static struct proto * babel_init(struct proto_config *CF) { @@ -2386,8 +2392,8 @@ babel_init(struct proto_config *CF) P->if_notify = babel_if_notify; P->rt_notify = babel_rt_notify; P->preexport = babel_preexport; - P->rte_better = babel_rte_better; - P->rte_igp_metric = babel_rte_igp_metric; + + P->sources.class = &babel_rte_owner_class; return P; } @@ -2498,7 +2504,6 @@ struct protocol proto_babel = { .start = babel_start, .shutdown = babel_shutdown, .reconfigure = babel_reconfigure, - .get_route_info = babel_get_route_info, }; void diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index a7b1a7ed..e96b175d 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -1671,8 +1671,9 @@ bgp_free_prefix_table(struct bgp_channel *c) } static struct bgp_prefix * -bgp_get_prefix(struct bgp_channel *c, const net_addr *net, u32 path_id) +bgp_get_prefix(struct bgp_channel *c, const net_addr *net, struct rte_src *src) { + u32 path_id = src->global_id; u32 path_id_hash = c->add_path_tx ? path_id : 0; /* We must use a different hash function than the rtable */ u32 hash = u32_hash(net_hash(net) ^ u32_hash(path_id_hash)); @@ -1690,6 +1691,7 @@ bgp_get_prefix(struct bgp_channel *c, const net_addr *net, u32 path_id) px->hash = hash; px->path_id = path_id; net_copy(px->net, net); + rt_lock_source(src); HASH_INSERT2(c->prefix_hash, PXH, c->pool, px); @@ -1756,6 +1758,8 @@ bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px) { HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px); + rt_unlock_source(rt_find_source_global(px->path_id)); + if (c->prefix_slab) sl_free(px); else @@ -1931,9 +1935,8 @@ bgp_setup_out_table(struct bgp_channel *c) int bgp_preexport(struct channel *C, rte *e) { - struct proto *SRC = e->src->proto; struct bgp_proto *p = (struct bgp_proto *) C->proto; - struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (struct bgp_proto *) SRC : NULL; + struct bgp_proto *src = bgp_rte_proto(e); /* Reject our routes */ if (src == p) @@ -2002,8 +2005,7 @@ bgp_preexport(struct channel *C, rte *e) static ea_list * bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *attrs0, struct linpool *pool) { - struct proto *SRC = e->src->proto; - struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (void *) SRC : NULL; + struct bgp_proto *src = bgp_rte_proto(e); struct bgp_export_state s = { .proto = p, .channel = c, .pool = pool, .src = src, .route = e, .mpls = c->desc->mpls }; ea_list *attrs = attrs0; eattr *a; @@ -2121,7 +2123,7 @@ bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, c struct bgp_proto *p = (void *) P; struct bgp_channel *c = (void *) C; struct bgp_bucket *buck; - u32 path; + struct rte_src *path; if (new) { @@ -2133,12 +2135,12 @@ bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, c /* If attributes are invalid, we fail back to withdraw */ buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c); - path = new->src->global_id; + path = new->src; } else { buck = bgp_get_withdraw_bucket(c); - path = old->src->global_id; + path = old->src; } if (bgp_update_prefix(c, bgp_get_prefix(c, n, path), buck)) @@ -2156,7 +2158,7 @@ bgp_get_neighbor(rte *r) return as; /* If AS_PATH is not defined, we treat rte as locally originated */ - struct bgp_proto *p = (void *) r->src->proto; + struct bgp_proto *p = bgp_rte_proto(r); return p->cf->confederation ?: p->local_as; } @@ -2186,8 +2188,8 @@ rte_stale(rte *r) int bgp_rte_better(rte *new, rte *old) { - struct bgp_proto *new_bgp = (struct bgp_proto *) new->src->proto; - struct bgp_proto *old_bgp = (struct bgp_proto *) old->src->proto; + struct bgp_proto *new_bgp = bgp_rte_proto(new); + struct bgp_proto *old_bgp = bgp_rte_proto(old); eattr *x, *y; u32 n, o; @@ -2331,8 +2333,8 @@ bgp_rte_better(rte *new, rte *old) int bgp_rte_mergable(rte *pri, rte *sec) { - struct bgp_proto *pri_bgp = (struct bgp_proto *) pri->src->proto; - struct bgp_proto *sec_bgp = (struct bgp_proto *) sec->src->proto; + struct bgp_proto *pri_bgp = bgp_rte_proto(pri); + struct bgp_proto *sec_bgp = bgp_rte_proto(sec); eattr *x, *y; u32 p, s; @@ -2416,8 +2418,8 @@ same_group(rte *r, u32 lpref, u32 lasn) static inline int use_deterministic_med(struct rte_storage *r) { - struct proto *P = r->rte.src->proto; - return (P->proto == &proto_bgp) && ((struct bgp_proto *) P)->cf->deterministic_med; + struct bgp_proto *p = bgp_rte_proto(&r->rte); + return p && p->cf->deterministic_med; } int diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 33849b0b..d240112c 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -1710,6 +1710,13 @@ done: return p->p.proto_state; } +struct rte_owner_class bgp_rte_owner_class = { + .get_route_info = bgp_get_route_info, + .rte_better = bgp_rte_better, + .rte_mergable = bgp_rte_mergable, + .rte_igp_metric = bgp_rte_igp_metric, +}; + static struct proto * bgp_init(struct proto_config *CF) { @@ -1723,10 +1730,9 @@ bgp_init(struct proto_config *CF) P->reload_routes = bgp_reload_routes; P->feed_begin = bgp_feed_begin; P->feed_end = bgp_feed_end; - P->rte_better = bgp_rte_better; - P->rte_mergable = bgp_rte_mergable; - P->rte_recalculate = cf->deterministic_med ? bgp_rte_recalculate : NULL; - P->rte_igp_metric = bgp_rte_igp_metric; + + P->sources.class = &bgp_rte_owner_class; + P->sources.rte_recalculate = cf->deterministic_med ? bgp_rte_recalculate : NULL; p->cf = cf; p->is_internal = (cf->local_as == cf->remote_as); @@ -2638,7 +2644,6 @@ struct protocol proto_bgp = { .reconfigure = bgp_reconfigure, .copy_config = bgp_copy_config, .get_status = bgp_get_status, - .get_route_info = bgp_get_route_info, .show_proto_info = bgp_show_proto_info }; diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index 469f0cb9..81382099 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -535,6 +535,7 @@ rte_resolvable(const rte *rt) return NEXTHOP_IS_REACHABLE(nhad) || (nhad->dest != RTD_UNREACHABLE); } +extern struct rte_owner_class bgp_rte_owner_class; #ifdef LOCAL_DEBUG #define BGP_FORCE_DEBUG 1 @@ -585,6 +586,12 @@ int bgp_preexport(struct channel *, struct rte *); void bgp_get_route_info(struct rte *, byte *); int bgp_total_aigp_metric_(const rte *e, u64 *metric, const struct adata **ad); +static inline struct bgp_proto *bgp_rte_proto(struct rte *rte) +{ + return (rte->src->owner->class == &bgp_rte_owner_class) ? + SKIP_BACK(struct bgp_proto, p.sources, rte->src->owner) : NULL; +} + #define BGP_AIGP_METRIC 1 #define BGP_AIGP_MAX U64(0xffffffffffffffff) diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index de976588..867be75f 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -1372,6 +1372,8 @@ bgp_rte_update(struct bgp_parse_state *s, const net_addr *n, u32 path_id, ea_lis { if (path_id != s->last_id) { + rt_unlock_source(s->last_src); + s->last_src = rt_get_source(&s->proto->p, path_id); s->last_id = path_id; @@ -2449,6 +2451,7 @@ bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_lis s->last_id = 0; s->last_src = s->proto->p.main_source; + rt_lock_source(s->last_src); /* * IPv4 BGP and MP-BGP may be used together in one update, therefore we do not @@ -2475,6 +2478,8 @@ bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_lis rta_free(s->cached_ea); s->cached_ea = NULL; + + rt_unlock_source(s->last_src); } static void diff --git a/proto/mrt/mrt.c b/proto/mrt/mrt.c index 5ef4cd44..f4c09ab1 100644 --- a/proto/mrt/mrt.c +++ b/proto/mrt/mrt.c @@ -472,9 +472,9 @@ mrt_rib_table_entry(struct mrt_table_dump_state *s, rte *r) #ifdef CONFIG_BGP /* Find peer index */ - if (r->src->proto->proto == &proto_bgp) + struct bgp_proto *p = bgp_rte_proto(r); + if (p) { - struct bgp_proto *p = (void *) r->src->proto; struct mrt_peer_entry *n = HASH_FIND(s->peer_hash, PEER, p->remote_id, p->remote_as, p->remote_ip); diff --git a/proto/ospf/ospf.c b/proto/ospf/ospf.c index 9c25f0f0..4e29f960 100644 --- a/proto/ospf/ospf.c +++ b/proto/ospf/ospf.c @@ -377,8 +377,8 @@ ospf_init(struct proto_config *CF) P->reload_routes = ospf_reload_routes; P->feed_begin = ospf_feed_begin; P->feed_end = ospf_feed_end; - P->rte_better = ospf_rte_better; - P->rte_igp_metric = ospf_rte_igp_metric; + + P->sources.class = &ospf_rte_owner_class; return P; } @@ -492,7 +492,7 @@ ospf_preexport(struct channel *C, rte *e) struct ospf_area *oa = ospf_main_area(p); /* Reject our own routes */ - if (e->src->proto == &p->p) + if (e->sender == C->in_req.hook) return -1; /* Do not export routes to stub areas */ @@ -1506,6 +1506,12 @@ ospf_sh_lsadb(struct lsadb_show_data *ld) } +struct rte_owner_class ospf_rte_owner_class = { + .get_route_info = ospf_get_route_info, + .rte_better = ospf_rte_better, + .rte_igp_metric = ospf_rte_igp_metric, +}; + struct protocol proto_ospf = { .name = "OSPF", .template = "ospf%d", @@ -1519,7 +1525,6 @@ struct protocol proto_ospf = { .shutdown = ospf_shutdown, .reconfigure = ospf_reconfigure, .get_status = ospf_get_status, - .get_route_info = ospf_get_route_info }; struct ea_class ea_ospf_metric1 = { diff --git a/proto/ospf/ospf.h b/proto/ospf/ospf.h index 7bed5c85..3477ba5a 100644 --- a/proto/ospf/ospf.h +++ b/proto/ospf/ospf.h @@ -1002,6 +1002,8 @@ void ospf_sh_state(struct proto *P, int verbose, int reachable); void ospf_sh_lsadb(struct lsadb_show_data *ld); +extern struct rte_owner_class ospf_rte_owner_class; + /* iface.c */ void ospf_iface_chstate(struct ospf_iface *ifa, u8 state); void ospf_iface_sm(struct ospf_iface *ifa, int event); diff --git a/proto/pipe/pipe.c b/proto/pipe/pipe.c index 8af6de81..b3b50a0d 100644 --- a/proto/pipe/pipe.c +++ b/proto/pipe/pipe.c @@ -87,7 +87,7 @@ pipe_preexport(struct channel *C, rte *e) { log_rl(&p->rl_gen, L_ERR "Route overpiped (%u hops of %u configured in %s) in table %s: %N %s/%u:%u", e->generation, max_generation, C->proto->name, - C->table->name, e->net, e->src->proto->name, e->src->private_id, e->src->global_id); + C->table->name, e->net, e->src->owner->name, e->src->private_id, e->src->global_id); return -1; } diff --git a/proto/rip/rip.c b/proto/rip/rip.c index f5c01380..ab0e3f4b 100644 --- a/proto/rip/rip.c +++ b/proto/rip/rip.c @@ -377,7 +377,7 @@ rip_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *net, s en->valid = RIP_ENTRY_VALID; en->metric = rt_metric; en->tag = rt_tag; - en->from = (new->src->proto == P) ? rt_from : NULL; + en->from = (new->src->owner == &P->sources) ? rt_from : NULL; eattr *nhea = ea_find(new->attrs, &ea_gen_nexthop); if (nhea) @@ -1112,11 +1112,20 @@ rip_reload_routes(struct channel *C) rip_kick_timer(p); } +static struct rte_owner_class rip_rte_owner_class; + +static inline struct rip_proto * +rip_rte_proto(struct rte *rte) +{ + return (rte->src->owner->class == &rip_rte_owner_class) ? + SKIP_BACK(struct rip_proto, p.sources, rte->src->owner) : NULL; +} + static int rip_rte_better(struct rte *new, struct rte *old) { ASSERT_DIE(new->src == old->src); - struct rip_proto *p = (struct rip_proto *) new->src->proto; + struct rip_proto *p = rip_rte_proto(new); u32 new_metric = ea_get_int(new->attrs, &ea_rip_metric, p->infinity); u32 old_metric = ea_get_int(old->attrs, &ea_rip_metric, p->infinity); @@ -1151,8 +1160,7 @@ rip_init(struct proto_config *CF) P->rt_notify = rip_rt_notify; P->neigh_notify = rip_neigh_notify; P->reload_routes = rip_reload_routes; - P->rte_better = rip_rte_better; - P->rte_igp_metric = rip_rte_igp_metric; + P->sources.class = &rip_rte_owner_class; return P; } @@ -1227,7 +1235,7 @@ rip_reconfigure(struct proto *P, struct proto_config *CF) static void rip_get_route_info(rte *rte, byte *buf) { - struct rip_proto *p = (struct rip_proto *) rte->src->proto; + struct rip_proto *p = rip_rte_proto(rte); u32 rt_metric = ea_get_int(rte->attrs, &ea_rip_metric, p->infinity); u32 rt_tag = ea_get_int(rte->attrs, &ea_rip_tag, 0); @@ -1359,6 +1367,12 @@ rip_dump(struct proto *P) } +static struct rte_owner_class rip_rte_owner_class = { + .get_route_info = rip_get_route_info, + .rte_better = rip_rte_better, + .rte_igp_metric = rip_rte_igp_metric, +}; + struct protocol proto_rip = { .name = "RIP", .template = "rip%d", @@ -1372,7 +1386,6 @@ struct protocol proto_rip = { .start = rip_start, .shutdown = rip_shutdown, .reconfigure = rip_reconfigure, - .get_route_info = rip_get_route_info, }; void diff --git a/proto/static/static.c b/proto/static/static.c index f0a514f7..65f3eccc 100644 --- a/proto/static/static.c +++ b/proto/static/static.c @@ -50,11 +50,14 @@ static inline struct rte_src * static_get_source(struct static_proto *p, uint i) { return i ? rt_get_source(&p->p, i) : p->p.main_source; } +static inline void static_free_source(struct rte_src *src, uint i) +{ if (i) rt_unlock_source(src); } + static void static_announce_rte(struct static_proto *p, struct static_route *r) { + struct rte_src *src; ea_list *ea = NULL; - struct rte_src *src = static_get_source(p, r->index); ea_set_attr_u32(&ea, &ea_gen_preference, 0, p->p.main_channel->preference); ea_set_attr_u32(&ea, &ea_gen_source, 0, RTS_STATIC); @@ -114,6 +117,7 @@ static_announce_rte(struct static_proto *p, struct static_route *r) return; /* We skip rta_lookup() here */ + src = static_get_source(p, r->index); rte e0 = { .attrs = ea, .src = src, .net = r->net, }, *e = &e0; /* Evaluate the filter */ @@ -121,6 +125,8 @@ static_announce_rte(struct static_proto *p, struct static_route *r) f_eval_rte(r->cmds, e); rte_update(p->p.main_channel, r->net, e, src); + static_free_source(src, r->index); + r->state = SRS_CLEAN; return; @@ -128,7 +134,9 @@ withdraw: if (r->state == SRS_DOWN) return; + src = static_get_source(p, r->index); rte_update(p->p.main_channel, r->net, NULL, src); + static_free_source(src, r->index); r->state = SRS_DOWN; } @@ -294,7 +302,11 @@ static void static_remove_rte(struct static_proto *p, struct static_route *r) { if (r->state) - rte_update(p->p.main_channel, r->net, NULL, static_get_source(p, r->index)); + { + struct rte_src *src = static_get_source(p, r->index); + rte_update(p->p.main_channel, r->net, NULL, src); + static_free_source(src, r->index); + } static_reset_rte(p, r); } @@ -437,6 +449,8 @@ static_postconfig(struct proto_config *CF) static_index_routes(cf); } +static struct rte_owner_class static_rte_owner_class; + static struct proto * static_init(struct proto_config *CF) { @@ -448,8 +462,7 @@ static_init(struct proto_config *CF) P->neigh_notify = static_neigh_notify; P->reload_routes = static_reload_routes; - P->rte_better = static_rte_better; - P->rte_mergable = static_rte_mergable; + P->sources.class = &static_rte_owner_class; if (cf->igp_table_ip4) p->igp_table_ip4 = cf->igp_table_ip4->table; @@ -748,6 +761,11 @@ static_show(struct proto *P) static_show_rt(r); } +static struct rte_owner_class static_rte_owner_class = { + .get_route_info = static_get_route_info, + .rte_better = static_rte_better, + .rte_mergable = static_rte_mergable, +}; struct protocol proto_static = { .name = "Static", @@ -763,7 +781,6 @@ struct protocol proto_static = { .shutdown = static_shutdown, .reconfigure = static_reconfigure, .copy_config = static_copy_config, - .get_route_info = static_get_route_info, }; void diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index 46b5a51d..f796a159 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -314,6 +314,7 @@ krt_learn_scan(struct krt_proto *p, rte *e) ea_set_attr_u32(&e0.attrs, &ea_gen_preference, 0, p->p.main_channel->preference); rte_update(p->p.main_channel, e->net, &e0, e0.src); + rt_unlock_source(e0.src); } static void @@ -322,9 +323,9 @@ krt_learn_async(struct krt_proto *p, rte *e, int new) if (new) return krt_learn_scan(p, e); - struct rte_src *src = rt_find_source(&p->p, krt_metric(e)); - if (src) - rte_update(p->p.main_channel, e->net, NULL, src); + struct rte_src *src = rt_get_source(&p->p, krt_metric(e)); + rte_update(p->p.main_channel, e->net, NULL, src); + rt_unlock_source(src); } #endif @@ -683,7 +684,7 @@ krt_scan_timer_kick(struct krt_proto *p) static int krt_preexport(struct channel *C, rte *e) { - if (e->src->proto == C->proto) + if (e->src->owner == &C->proto->sources) return -1; if (!krt_capable(e))