From 54d94f4b1a5b9d8f2943236323d789290bb7bb2f Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Thu, 16 Jun 2022 12:39:08 +0200 Subject: [PATCH 01/20] Showing the nexthop resolution target in import tables --- nest/rt-show.c | 16 ++++++++++++++-- proto/bgp/bgp.h | 3 +++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/nest/rt-show.c b/nest/rt-show.c index f3852d17..35036fe6 100644 --- a/nest/rt-show.c +++ b/nest/rt-show.c @@ -71,8 +71,13 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, int primary if (d->last_table != d->tab) rt_show_table(c, d); - cli_printf(c, -1007, "%-20s %s [%s %s%s]%s%s", ia, - net_is_flow(e->net) ? flowspec_valid_name(flowspec_valid) : rta_dest_name(dest), + eattr *heea; + struct hostentry_adata *had = NULL; + if (!net_is_flow(e->net) && (dest == RTD_NONE) && (heea = ea_find(a, &ea_gen_hostentry))) + had = (struct hostentry_adata *) heea->u.ptr; + + cli_printf(c, -1007, "%-20s %s [%s %s%s]%s%s", ia, + net_is_flow(e->net) ? flowspec_valid_name(flowspec_valid) : had ? "recursive" : rta_dest_name(dest), e->src->proto->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info); if (dest == RTD_UNICAST) @@ -100,6 +105,13 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, int primary cli_printf(c, -1007, "\tdev %s%s%s", nh->iface->name, mpls, onlink, weight); } + else if (had) + { + if (ipa_nonzero(had->he->link) && !ipa_equal(had->he->link, had->he->addr)) + cli_printf(c, -1007, "\tvia %I %I table %s", had->he->addr, had->he->link, had->he->tab->name); + else + cli_printf(c, -1007, "\tvia %I table %s", had->he->addr, had->he->tab->name); + } if (d->verbose) ea_show_list(c, a); diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index b3966bc3..cca7873a 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -520,6 +520,9 @@ static inline int rte_resolvable(const rte *rt) { eattr *nhea = ea_find(rt->attrs, &ea_gen_nexthop); + if (!nhea) + return 0; + struct nexthop_adata *nhad = (void *) nhea->u.ptr; return NEXTHOP_IS_REACHABLE(nhad) || (nhad->dest != RTD_UNREACHABLE); } From 8c92f47ac77f267368b6d6bd161689a0c0bc5e5a Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Thu, 16 Jun 2022 23:24:53 +0200 Subject: [PATCH 02/20] Route attribute storage keeps the previous layers --- lib/attrs.h | 2 +- lib/route.h | 4 +-- nest/rt-attr.c | 74 ++++++++++++++++++++++++++++++----------------- nest/rt-show.c | 2 +- proto/bgp/attrs.c | 4 +-- proto/mrt/mrt.c | 2 +- 6 files changed, 54 insertions(+), 34 deletions(-) diff --git a/lib/attrs.h b/lib/attrs.h index 79b7b14a..a75abcd3 100644 --- a/lib/attrs.h +++ b/lib/attrs.h @@ -42,7 +42,7 @@ lp_store_adata(struct linpool *pool, const void *buf, uint len) #define tmp_copy_adata(ad) tmp_store_adata((ad)->data, (ad)->length) static inline int adata_same(const struct adata *a, const struct adata *b) -{ return (a->length == b->length && !memcmp(a->data, b->data, a->length)); } +{ return (!a && !b) || (a->length == b->length && !memcmp(a->data, b->data, a->length)); } diff --git a/lib/route.h b/lib/route.h index 1b2f4de6..7e28b91e 100644 --- a/lib/route.h +++ b/lib/route.h @@ -237,7 +237,7 @@ ea_list *ea_append(ea_list *to, ea_list *what); void ea_format_bitfield(const struct eattr *a, byte *buf, int bufsize, const char **names, int min, int max); /* Normalize ea_list; allocates the result from tmp_linpool */ -ea_list *ea_normalize(const ea_list *e); +ea_list *ea_normalize(ea_list *e, int overlay); uint ea_list_size(ea_list *); void ea_list_copy(ea_list *dest, ea_list *src, uint size); @@ -414,7 +414,7 @@ static inline int rte_dest(const rte *r) void rta_init(void); ea_list *ea_lookup(ea_list *); /* Get a cached (and normalized) variant of this attribute list */ -static inline int ea_is_cached(ea_list *r) { return r->flags & EALF_CACHED; } +static inline int ea_is_cached(const ea_list *r) { return r->flags & EALF_CACHED; } static inline ea_list *ea_clone(ea_list *r) { r->uc++; return r; } void ea__free(ea_list *r); static inline void ea_free(ea_list *r) { if (r && !--r->uc) ea__free(r); } diff --git a/nest/rt-attr.c b/nest/rt-attr.c index 31e2057e..c0f81b9d 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -726,8 +726,8 @@ ea_do_prune(ea_list *e) s++; /* Now s0 is the most recent version, s[-1] the oldest one */ - /* Drop undefs */ - if (s0->undef) + /* Drop undefs unless this is a true overlay */ + if (s0->undef && !e->next) continue; /* Copy the newest version to destination */ @@ -760,18 +760,15 @@ ea_do_prune(ea_list *e) static void ea_sort(ea_list *e) { - while (e) - { - if (!(e->flags & EALF_SORTED)) - { - ea_do_sort(e); - ea_do_prune(e); - e->flags |= EALF_SORTED; - } - if (e->count > 5) - e->flags |= EALF_BISECT; - e = e->next; - } + if (!(e->flags & EALF_SORTED)) + { + ea_do_sort(e); + ea_do_prune(e); + e->flags |= EALF_SORTED; + } + + if (e->count > 5) + e->flags |= EALF_BISECT; } /** @@ -782,7 +779,7 @@ ea_sort(ea_list *e) * a given &ea_list after merging with ea_merge(). */ static unsigned -ea_scan(const ea_list *e) +ea_scan(const ea_list *e, int overlay) { unsigned cnt = 0; @@ -790,6 +787,8 @@ ea_scan(const ea_list *e) { cnt += e->count; e = e->next; + if (e && overlay && ea_is_cached(e)) + break; } return sizeof(ea_list) + sizeof(eattr)*cnt; } @@ -809,27 +808,32 @@ ea_scan(const ea_list *e) * by calling ea_sort(). */ static void -ea_merge(const ea_list *e, ea_list *t) +ea_merge(ea_list *e, ea_list *t, int overlay) { eattr *d = t->attrs; t->flags = 0; t->count = 0; - t->next = NULL; + while (e) { memcpy(d, e->attrs, sizeof(eattr)*e->count); t->count += e->count; d += e->count; e = e->next; + + if (e && overlay && ea_is_cached(e)) + break; } + + t->next = e; } ea_list * -ea_normalize(const ea_list *e) +ea_normalize(ea_list *e, int overlay) { - ea_list *t = tmp_alloc(ea_scan(e)); - ea_merge(e, t); + ea_list *t = tmp_alloc(ea_scan(e, overlay)); + ea_merge(e, t, overlay); ea_sort(t); return t->count ? t : NULL; @@ -850,7 +854,8 @@ ea_same(ea_list *x, ea_list *y) if (!x || !y) return x == y; - ASSERT(!x->next && !y->next); + if (x->next != y->next) + return 0; if (x->count != y->count) return 0; for(c=0; ccount; c++) @@ -876,13 +881,12 @@ ea_list_size(ea_list *o) unsigned i, elen; ASSERT_DIE(o); - ASSERT_DIE(!o->next); elen = BIRD_CPU_ALIGN(sizeof(ea_list) + sizeof(eattr) * o->count); for(i=0; icount; i++) { eattr *a = &o->attrs[i]; - if (!(a->type & EAF_EMBEDDED)) + if (!a->undef && !(a->type & EAF_EMBEDDED)) elen += ADATA_SIZE(a->u.ptr->length); } @@ -899,7 +903,7 @@ ea_list_copy(ea_list *n, ea_list *o, uint elen) for(uint i=0; icount; i++) { eattr *a = &n->attrs[i]; - if (!(a->type & EAF_EMBEDDED)) + if (!a->undef && !(a->type & EAF_EMBEDDED)) { unsigned size = ADATA_SIZE(a->u.ptr->length); ASSERT_DIE(adpos + size <= elen); @@ -923,12 +927,21 @@ ea_list_ref(ea_list *l) eattr *a = &l->attrs[i]; ASSERT_DIE(a->id < ea_class_max); + if (a->undef) + continue; + struct ea_class *cl = ea_class_global[a->id]; ASSERT_DIE(cl && cl->uc); CALL(cl->stored, a); cl->uc++; } + + if (l->next) + { + ASSERT_DIE(ea_is_cached(l->next)); + ea_clone(l->next); + } } static void @@ -939,6 +952,9 @@ ea_list_unref(ea_list *l) eattr *a = &l->attrs[i]; ASSERT_DIE(a->id < ea_class_max); + if (a->undef) + continue; + struct ea_class *cl = ea_class_global[a->id]; ASSERT_DIE(cl && cl->uc); @@ -946,6 +962,9 @@ ea_list_unref(ea_list *l) if (!--cl->uc) ea_class_free(cl); } + + if (l->next) + ea_free(l->next); } void @@ -1183,11 +1202,13 @@ ea_hash(ea_list *e) if (e) /* Assuming chain of length 1 */ { - ASSERT_DIE(!e->next); + h ^= mem_hash(&e->next, sizeof(e->next)); for(i=0; icount; i++) { struct eattr *a = &e->attrs[i]; h ^= a->id; h *= mul; + if (a->undef) + continue; if (a->type & EAF_EMBEDDED) h ^= a->u.data; else @@ -1295,7 +1316,7 @@ ea_lookup(ea_list *o) uint h; ASSERT(!ea_is_cached(o)); - o = ea_normalize(o); + o = ea_normalize(o, 1); h = ea_hash(o); for(r=rta_hash_table[h & rta_cache_mask]; r; r=r->next_hash) @@ -1328,7 +1349,6 @@ ea__free(ea_list *a) if (a->next_hash) a->next_hash->pprev_hash = a->pprev_hash; - ASSERT(!a->next); ea_list_unref(a); mb_free(a); } diff --git a/nest/rt-show.c b/nest/rt-show.c index 35036fe6..8bf74754 100644 --- a/nest/rt-show.c +++ b/nest/rt-show.c @@ -60,7 +60,7 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, int primary /* Need to normalize the extended attributes */ if (d->verbose && !rta_is_cached(a) && a) - a = ea_normalize(a); + a = ea_normalize(a, 0); get_route_info = e->src->proto->proto->get_route_info; if (get_route_info) diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 6d33ef2e..46e949bf 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -1253,10 +1253,10 @@ bgp_export_attr(struct bgp_export_state *s, eattr *a, ea_list *to) * Result: one sorted attribute list segment, or NULL if attributes are unsuitable. */ static inline ea_list * -bgp_export_attrs(struct bgp_export_state *s, const ea_list *a) +bgp_export_attrs(struct bgp_export_state *s, ea_list *a) { /* Merge the attribute list */ - ea_list *new = ea_normalize(a); + ea_list *new = ea_normalize(a, 0); ASSERT_DIE(new); uint i, count; diff --git a/proto/mrt/mrt.c b/proto/mrt/mrt.c index fcc1dcfe..fcbe317b 100644 --- a/proto/mrt/mrt.c +++ b/proto/mrt/mrt.c @@ -431,7 +431,7 @@ mrt_rib_table_entry_bgp_attrs(struct mrt_table_dump_state *s, rte *r) /* Attribute list must be normalized for bgp_encode_attrs() */ if (!rta_is_cached(r->attrs)) - eattrs = ea_normalize(eattrs); + eattrs = ea_normalize(eattrs, 0); mrt_buffer_need(b, MRT_ATTR_BUFFER_SIZE); byte *pos = b->pos; From becab5072d6d84d6f9c9402387a9e1c14dcc384d Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Thu, 16 Jun 2022 23:24:56 +0200 Subject: [PATCH 03/20] Import tables are stored as an attribute layer inside the main tables. The separate import tables were too memory-greedy, there is no need for them being stored as full-sized tables. --- lib/route.h | 3 +- nest/config.Y | 13 +++- nest/proto.c | 77 ++++++++++--------- nest/protocol.h | 16 ++-- nest/rt-attr.c | 4 +- nest/rt-show.c | 12 ++- nest/rt-table.c | 181 ++++++-------------------------------------- nest/rt.h | 1 + proto/bgp/bgp.c | 12 +-- proto/bgp/packets.c | 2 +- proto/ospf/rt.c | 2 +- proto/perf/perf.c | 2 +- proto/pipe/pipe.c | 2 +- 13 files changed, 101 insertions(+), 226 deletions(-) diff --git a/lib/route.h b/lib/route.h index 7e28b91e..f7b089d9 100644 --- a/lib/route.h +++ b/lib/route.h @@ -161,6 +161,7 @@ typedef struct ea_list { #define EALF_SORTED 1 /* Attributes are sorted by code */ #define EALF_BISECT 2 /* Use interval bisection for searching */ #define EALF_CACHED 4 /* List is cached */ +#define EALF_OVERLAY 8 /* List is an overlay in the same table */ struct ea_class { #define EA_CLASS_INSIDE \ @@ -413,7 +414,7 @@ static inline int rte_dest(const rte *r) } void rta_init(void); -ea_list *ea_lookup(ea_list *); /* Get a cached (and normalized) variant of this attribute list */ +ea_list *ea_lookup(ea_list *, int overlay); /* Get a cached (and normalized) variant of this attribute list */ static inline int ea_is_cached(const ea_list *r) { return r->flags & EALF_CACHED; } static inline ea_list *ea_clone(ea_list *r) { r->uc++; return r; } void ea__free(ea_list *r); diff --git a/nest/config.Y b/nest/config.Y index 2d73b4c7..ea7e1266 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -311,7 +311,14 @@ channel_item_: | IMPORT LIMIT limit_spec { this_channel->in_limit = $3; } | EXPORT LIMIT limit_spec { this_channel->out_limit = $3; } | PREFERENCE expr { this_channel->preference = $2; check_u16($2); } - | IMPORT KEEP FILTERED bool { this_channel->in_keep_filtered = $4; } + | IMPORT KEEP FILTERED bool { + if ($4) + this_channel->in_keep |= RIK_REJECTED; + else if ((this_channel->in_keep & RIK_PREFILTER) == RIK_PREFILTER) + cf_error("Import keep filtered is implied by the import table."); + else + this_channel->in_keep &= ~RIK_REJECTED; + } | RPKI RELOAD bool { this_channel->rpki_reload = $3; } ; @@ -674,8 +681,8 @@ r_args: $$->tables_defined_by = RSD_TDB_ALL; } | r_args IMPORT TABLE channel_arg { - if (!$4->in_table) cf_error("No import table in channel %s.%s", $4->proto->name, $4->name); - rt_show_add_table($$, $4->in_table); + if (!($4->in_keep & RIK_PREFILTER)) cf_error("No import table in channel %s.%s", $4->proto->name, $4->name); + rt_show_add_table($$, $4->table)->prefilter = $4; $$->tables_defined_by = RSD_TDB_DIRECT; } | r_args EXPORT TABLE channel_arg { diff --git a/nest/proto.c b/nest/proto.c index 77817888..5e67d940 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -224,7 +224,7 @@ proto_add_channel(struct proto *p, struct channel_config *cf) c->preference = cf->preference; c->debug = cf->debug; c->merge_limit = cf->merge_limit; - c->in_keep_filtered = cf->in_keep_filtered; + c->in_keep = cf->in_keep; c->rpki_reload = cf->rpki_reload; c->channel_state = CS_DOWN; @@ -294,7 +294,7 @@ static void channel_roa_in_changed(struct rt_subscription *s) { struct channel *c = s->data; - int active = c->reload_event && ev_active(c->reload_event); + int active = !!c->reload_req.hook; CD(c, "Reload triggered by RPKI change%s", active ? " - already active" : ""); @@ -379,7 +379,7 @@ channel_roa_subscribe_filter(struct channel *c, int dir) #ifdef CONFIG_BGP /* No automatic reload for BGP channels without in_table / out_table */ if (c->channel == &channel_bgp) - valid = dir ? !!c->in_table : !!c->out_table; + valid = dir ? ((c->in_keep & RIK_PREFILTER) == RIK_PREFILTER) : !!c->out_table; #endif struct filter_iterator fit; @@ -534,9 +534,6 @@ channel_import_stopped(struct rt_import_request *req) req->hook = NULL; - if (c->in_table) - rt_prune_sync(c->in_table, 1); - mb_free(c->in_req.name); c->in_req.name = NULL; @@ -603,43 +600,48 @@ channel_schedule_reload(struct channel *c) { ASSERT(c->in_req.hook); - rt_reload_channel_abort(c); - ev_schedule_work(c->reload_event); + rt_request_export(c->table, &c->reload_req); } static void -channel_reload_loop(void *ptr) +channel_reload_stopped(struct rt_export_request *req) { - struct channel *c = ptr; - - /* Start reload */ - if (!c->reload_active) - c->reload_pending = 0; - - if (!rt_reload_channel(c)) - { - ev_schedule_work(c->reload_event); - return; - } + struct channel *c = SKIP_BACK(struct channel, reload_req, req); /* Restart reload */ if (c->reload_pending) channel_request_reload(c); } +static void +channel_reload_log_state_change(struct rt_export_request *req, u8 state) +{ + if (state == TES_READY) + rt_stop_export(req, channel_reload_stopped); +} + +static void +channel_reload_dump_req(struct rt_export_request *req) +{ + struct channel *c = SKIP_BACK(struct channel, reload_req, req); + debug(" Channel %s.%s import reload request %p\n", c->proto->name, c->name, req); +} + +void channel_reload_export_bulk(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe, rte **feed, uint count); + /* Called by protocol to activate in_table */ void channel_setup_in_table(struct channel *c) { - struct rtable_config *cf = mb_allocz(c->proto->pool, sizeof(struct rtable_config)); + c->reload_req = (struct rt_export_request) { + .name = mb_sprintf(c->proto->pool, "%s.%s.import", c->proto->name, c->name), + .trace_routes = c->debug | c->proto->debug, + .export_bulk = channel_reload_export_bulk, + .dump_req = channel_reload_dump_req, + .log_state_change = channel_reload_log_state_change, + }; - cf->name = "import"; - cf->addr_type = c->net_type; - cf->internal = 1; - - c->in_table = rt_setup(c->proto->pool, cf); - - c->reload_event = ev_new_init(c->proto->pool, channel_reload_loop, c); + c->in_keep |= RIK_PREFILTER; } /* Called by protocol to activate out_table */ @@ -680,10 +682,10 @@ static void channel_do_pause(struct channel *c) { /* Need to abort feeding */ - if (c->reload_event) + if (c->reload_req.hook) { - ev_postpone(c->reload_event); - rt_reload_channel_abort(c); + c->reload_pending = 0; + rt_stop_export(&c->reload_req, channel_reload_stopped); } /* Stop export */ @@ -710,15 +712,13 @@ channel_do_stop(struct channel *c) CALL(c->channel->shutdown, c); /* This have to be done in here, as channel pool is freed before channel_do_down() */ - c->in_table = NULL; - c->reload_event = NULL; c->out_table = NULL; } static void channel_do_down(struct channel *c) { - ASSERT(!c->reload_active); + ASSERT(!c->reload_req.hook); c->proto->active_channels--; @@ -726,8 +726,6 @@ channel_do_down(struct channel *c) memset(&c->import_stats, 0, sizeof(struct channel_import_stats)); memset(&c->export_stats, 0, sizeof(struct channel_export_stats)); - c->in_table = NULL; - c->reload_event = NULL; c->out_table = NULL; /* The in_table and out_table are going to be freed by freeing their resource pools. */ @@ -922,7 +920,9 @@ int channel_reconfigure(struct channel *c, struct channel_config *cf) { /* FIXME: better handle these changes, also handle in_keep_filtered */ - if ((c->table != cf->table->table) || (cf->ra_mode && (c->ra_mode != cf->ra_mode))) + if ((c->table != cf->table->table) || + (cf->ra_mode && (c->ra_mode != cf->ra_mode)) || + (cf->in_keep != c->in_keep)) return 0; /* Note that filter_same() requires arguments in (new, old) order */ @@ -949,7 +949,6 @@ channel_reconfigure(struct channel *c, struct channel_config *cf) c->preference = cf->preference; c->debug = cf->debug; c->in_req.trace_routes = c->out_req.trace_routes = c->debug | c->proto->debug; - c->in_keep_filtered = cf->in_keep_filtered; c->rpki_reload = cf->rpki_reload; /* Execute channel-specific reconfigure hook */ @@ -2099,7 +2098,7 @@ channel_show_stats(struct channel *c) u32 in_routes = c->in_limit.count; u32 out_routes = c->out_limit.count; - if (c->in_keep_filtered) + if (c->in_keep) cli_msg(-1006, " Routes: %u imported, %u filtered, %u exported, %u preferred", in_routes, (rx_routes - in_routes), out_routes, SRI(pref)); else diff --git a/nest/protocol.h b/nest/protocol.h index aeb60ac6..b482ed99 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -454,7 +454,7 @@ struct channel_config { const struct filter *in_filter, *out_filter; /* Attached filters */ struct channel_limit rx_limit; /* Limit for receiving routes from protocol - (relevant when in_keep_filtered is active) */ + (relevant when in_keep & RIK_REJECTED) */ struct channel_limit in_limit; /* Limit for importing routes from protocol */ struct channel_limit out_limit; /* Limit for exporting routes to protocol */ @@ -463,7 +463,7 @@ struct channel_config { u16 preference; /* Default route preference */ u32 debug; /* Debugging flags (D_*) */ u8 merge_limit; /* Maximal number of nexthops for RA_MERGED */ - u8 in_keep_filtered; /* Routes rejected in import filter are kept */ + u8 in_keep; /* Which states of routes to keep (RIK_*) */ u8 rpki_reload; /* RPKI changes trigger channel reload */ }; @@ -480,7 +480,7 @@ struct channel { struct bmap export_map; /* Keeps track which routes were really exported */ struct bmap export_reject_map; /* Keeps track which routes were rejected by export filter */ - struct limit rx_limit; /* Receive limit (for in_keep_filtered) */ + struct limit rx_limit; /* Receive limit (for in_keep & RIK_REJECTED) */ struct limit in_limit; /* Input limit */ struct limit out_limit; /* Output limit */ @@ -517,7 +517,7 @@ struct channel { u16 preference; /* Default route preference */ u32 debug; /* Debugging flags (D_*) */ u8 merge_limit; /* Maximal number of nexthops for RA_MERGED */ - u8 in_keep_filtered; /* Routes rejected in import filter are kept */ + u8 in_keep; /* Which states of routes to keep (RIK_*) */ u8 disabled; u8 stale; /* Used in reconfiguration */ @@ -529,11 +529,7 @@ struct channel { btime last_state_change; /* Time of last state transition */ - struct rtable *in_table; /* Internal table for received routes */ - struct event *reload_event; /* Event responsible for reloading from in_table */ - struct fib_iterator reload_fit; /* FIB iterator in in_table used during reloading */ - struct rte_storage *reload_next_rte; /* Route iterator in in_table used during reloading */ - u8 reload_active; /* Iterator reload_fit is linked */ + struct rt_export_request reload_req; /* Feeder for import reload */ u8 reload_pending; /* Reloading and another reload is scheduled */ u8 refeed_pending; /* Refeeding and another refeed is scheduled */ @@ -544,6 +540,8 @@ struct channel { list roa_subscriptions; /* List of active ROA table subscriptions based on filters roa_check() */ }; +#define RIK_REJECTED 1 /* Routes rejected in import filter are kept */ +#define RIK_PREFILTER (2 | RIK_REJECTED) /* All routes' attribute state before import filter is kept */ /* * Channel states diff --git a/nest/rt-attr.c b/nest/rt-attr.c index c0f81b9d..35f85274 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -1310,13 +1310,13 @@ rta_rehash(void) * converted to the normalized form. */ ea_list * -ea_lookup(ea_list *o) +ea_lookup(ea_list *o, int overlay) { ea_list *r; uint h; ASSERT(!ea_is_cached(o)); - o = ea_normalize(o, 1); + o = ea_normalize(o, overlay); h = ea_hash(o); for(r=rta_hash_table[h & rta_cache_mask]; r; r=r->next_hash) diff --git a/nest/rt-show.c b/nest/rt-show.c index 8bf74754..12ddc816 100644 --- a/nest/rt-show.c +++ b/nest/rt-show.c @@ -26,7 +26,8 @@ rt_show_table(struct cli *c, struct rt_show_data *d) return; if (d->last_table) cli_printf(c, -1007, ""); - cli_printf(c, -1007, "Table %s:", d->tab->table->name); + cli_printf(c, -1007, "Table %s:", + d->tab->prefilter ? "import" : d->tab->table->name); d->last_table = d->tab; } @@ -156,7 +157,7 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) for (struct rte_storage *er = n->routes; er; er = er->next) { - if (rte_is_filtered(&er->rte) != d->filtered) + if (!d->tab->prefilter && (rte_is_filtered(&er->rte) != d->filtered)) continue; d->rt_counter++; @@ -167,6 +168,11 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) continue; struct rte e = er->rte; + if (d->tab->prefilter) + if (e.sender != d->tab->prefilter->in_req.hook) + continue; + else while (e.attrs->next) + e.attrs = e.attrs->next; /* Export channel is down, do not try to export routes to it */ if (ec && !ec->out_req.hook) @@ -239,7 +245,7 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) else ia[0] = 0; - rt_show_rte(c, ia, &e, d, (n->routes == er)); + rt_show_rte(c, ia, &e, d, !d->tab->prefilter && (n->routes == er)); first_show = 0; } diff --git a/nest/rt-table.c b/nest/rt-table.c index 1631e00f..975da363 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -606,7 +606,7 @@ rte_store(const rte *r, net *net, rtable *tab) if (ea_is_cached(e->rte.attrs)) e->rte.attrs = rta_clone(e->rte.attrs); else - e->rte.attrs = rta_lookup(e->rte.attrs); + e->rte.attrs = rta_lookup(e->rte.attrs, 1); return e; } @@ -1513,7 +1513,7 @@ channel_preimport(struct rt_import_request *req, rte *new, rte *old) if (new_in && !old_in) if (CHANNEL_LIMIT_PUSH(c, IN)) - if (c->in_keep_filtered) + if (c->in_keep & RIK_REJECTED) { new->flags |= REF_FILTERED; return new; @@ -1527,8 +1527,6 @@ channel_preimport(struct rt_import_request *req, rte *new, rte *old) return new; } -static void rte_update_direct(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); - void rte_update(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) { @@ -1537,15 +1535,13 @@ rte_update(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) ASSERT(c->channel_state == CS_UP); - if (c->in_table && !rte_update_in(c, n, new, src)) - return; + /* The import reloader requires prefilter routes to be the first layer */ + if (new && (c->in_keep & RIK_PREFILTER)) + if (ea_is_cached(new->attrs) && !new->attrs->next) + new->attrs = ea_clone(new->attrs); + else + new->attrs = ea_lookup(new->attrs, 0); - return rte_update_direct(c, n, new, src); -} - -static void -rte_update_direct(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) -{ const struct filter *filter = c->in_filter; struct channel_import_stats *stats = &c->import_stats; @@ -1563,7 +1559,7 @@ rte_update_direct(struct channel *c, const net_addr *n, rte *new, struct rte_src stats->updates_filtered++; channel_rte_trace_in(D_FILTERS, c, new, "filtered out"); - if (c->in_keep_filtered) + if (c->in_keep & RIK_REJECTED) new->flags |= REF_FILTERED; else new = NULL; @@ -1588,6 +1584,11 @@ rte_update_direct(struct channel *c, const net_addr *n, rte *new, struct rte_src rte_import(&c->in_req, n, new, src); + /* Now the route attributes are kept by the in-table cached version + * and we may drop the local handle */ + if (new && (c->in_keep & RIK_PREFILTER)) + ea_free(new->attrs); + rte_update_unlock(); } @@ -3183,154 +3184,22 @@ done: * Import table */ -int -rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) + +void channel_reload_export_bulk(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe UNUSED, rte **feed, uint count) { - struct rtable *tab = c->in_table; - net *net; + struct channel *c = SKIP_BACK(struct channel, reload_req, req); - if (new) - net = net_get(tab, n); - else - { - net = net_find(tab, n); - - if (!net) - goto drop_withdraw; - } - - /* Find the old rte */ - struct rte_storage **pos = rte_find(net, src); - if (*pos) + for (uint i=0; isender == c->in_req.hook) { - rte *old = &(*pos)->rte; - if (new && rte_same(old, new)) - { - /* Refresh the old rte, continue with update to main rtable */ - if (old->flags & (REF_STALE | REF_DISCARD | REF_MODIFY)) - { - old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY); - return 1; - } + /* Strip the later attribute layers */ + rte new = *feed[i]; + while (new.attrs->next) + new.attrs = new.attrs->next; - goto drop_update; - } - - if (!new) - CHANNEL_LIMIT_POP(c, RX); - - /* Move iterator if needed */ - if (*pos == c->reload_next_rte) - c->reload_next_rte = (*pos)->next; - - /* Remove the old rte */ - struct rte_storage *del = *pos; - *pos = (*pos)->next; - rte_free(del); - tab->rt_count--; + /* And reload the route */ + rte_update(c, net, &new, new.src); } - else if (new) - { - if (CHANNEL_LIMIT_PUSH(c, RX)) - { - /* Required by rte_trace_in() */ - new->net = n; - - channel_rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); - goto drop_update; - } - } - else - goto drop_withdraw; - - if (!new) - { - if (!net->routes) - fib_delete(&tab->fib, net); - - return 1; - } - - /* Insert the new rte */ - struct rte_storage *e = rte_store(new, net, tab); - e->rte.lastmod = current_time(); - e->next = *pos; - *pos = e; - tab->rt_count++; - return 1; - -drop_update: - c->import_stats.updates_received++; - c->in_req.hook->stats.updates_ignored++; - - if (!net->routes) - fib_delete(&tab->fib, net); - - return 0; - -drop_withdraw: - c->import_stats.withdraws_received++; - c->in_req.hook->stats.withdraws_ignored++; - return 0; -} - -int -rt_reload_channel(struct channel *c) -{ - struct rtable *tab = c->in_table; - struct fib_iterator *fit = &c->reload_fit; - int max_feed = 64; - - ASSERT(c->channel_state == CS_UP); - - if (!c->reload_active) - { - FIB_ITERATE_INIT(fit, &tab->fib); - c->reload_active = 1; - } - - do { - for (struct rte_storage *e = c->reload_next_rte; e; e = e->next) - { - if (max_feed-- <= 0) - { - c->reload_next_rte = e; - debug("%s channel reload burst split (max_feed=%d)", c->proto->name, max_feed); - return 0; - } - - rte r = e->rte; - rte_update_direct(c, r.net, &r, r.src); - } - - c->reload_next_rte = NULL; - - FIB_ITERATE_START(&tab->fib, fit, net, n) - { - if (c->reload_next_rte = n->routes) - { - FIB_ITERATE_PUT_NEXT(fit, &tab->fib); - break; - } - } - FIB_ITERATE_END; - } - while (c->reload_next_rte); - - c->reload_active = 0; - return 1; -} - -void -rt_reload_channel_abort(struct channel *c) -{ - if (c->reload_active) - { - /* Unlink the iterator */ - fit_get(&c->in_table->fib, &c->reload_fit); - c->reload_next_rte = NULL; - c->reload_active = 0; - } } void diff --git a/nest/rt.h b/nest/rt.h index 32bba6a6..d5e28cb6 100644 --- a/nest/rt.h +++ b/nest/rt.h @@ -387,6 +387,7 @@ struct rt_show_data_rtable { node n; rtable *table; struct channel *export_channel; + struct channel *prefilter; }; struct rt_show_data { diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 84430287..8df99420 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -900,9 +900,6 @@ bgp_refresh_begin(struct bgp_channel *c) c->load_state = BFS_REFRESHING; rt_refresh_begin(c->c.table, &c->c.in_req); - - if (c->c.in_table) - rt_refresh_begin(c->c.in_table, &c->c.in_req); } /** @@ -924,9 +921,6 @@ bgp_refresh_end(struct bgp_channel *c) c->load_state = BFS_NONE; rt_refresh_end(c->c.table, &c->c.in_req); - - if (c->c.in_table) - rt_prune_sync(c->c.in_table, 0); } @@ -1393,9 +1387,9 @@ bgp_reload_routes(struct channel *C) struct bgp_proto *p = (void *) C->proto; struct bgp_channel *c = (void *) C; - ASSERT(p->conn && (p->route_refresh || c->c.in_table)); + ASSERT(p->conn && (p->route_refresh || (C->in_keep & RIK_PREFILTER))); - if (c->c.in_table) + if (C->in_keep & RIK_PREFILTER) channel_schedule_reload(C); else bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH); @@ -2153,7 +2147,7 @@ bgp_channel_reconfigure(struct channel *C, struct channel_config *CC, int *impor (new->cost != old->cost)) { /* import_changed itself does not force ROUTE_REFRESH when import_table is active */ - if (c->c.in_table && (c->c.channel_state == CS_UP)) + if ((c->c.in_keep & RIK_PREFILTER) && (c->c.channel_state == CS_UP)) bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH); *import_changed = 1; diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 45ee4ed2..c2e98340 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -1392,7 +1392,7 @@ bgp_rte_update(struct bgp_parse_state *s, const net_addr *n, u32 path_id, ea_lis /* Prepare cached route attributes */ if (s->cached_ea == NULL) - s->cached_ea = ea_lookup(a0); + s->cached_ea = ea_lookup(a0, 0); rte e0 = { .attrs = s->cached_ea, diff --git a/proto/ospf/rt.c b/proto/ospf/rt.c index aedf3df6..69c2907d 100644 --- a/proto/ospf/rt.c +++ b/proto/ospf/rt.c @@ -2086,7 +2086,7 @@ again1: ASSERT_DIE(ARRAY_SIZE(eattrs.a) >= eattrs.l.count); - ea_list *eal = ea_lookup(&eattrs.l); + ea_list *eal = ea_lookup(&eattrs.l, 0); ea_free(nf->old_ea); nf->old_ea = eal; diff --git a/proto/perf/perf.c b/proto/perf/perf.c index 67ad2ada..d82ac8aa 100644 --- a/proto/perf/perf.c +++ b/proto/perf/perf.c @@ -156,7 +156,7 @@ perf_loop(void *data) ea_set_attr_data(&ea, &ea_gen_nexthop, 0, &nhad.ad.data, sizeof nhad - sizeof nhad.ad); - p->data[i].a = rta_lookup(ea); + p->data[i].a = rta_lookup(ea, 0); } else p->data[i].a = rta_clone(p->data[i-1].a); diff --git a/proto/pipe/pipe.c b/proto/pipe/pipe.c index 99d4b737..d12e6731 100644 --- a/proto/pipe/pipe.c +++ b/proto/pipe/pipe.c @@ -126,7 +126,7 @@ pipe_postconfig(struct proto_config *CF) if (cc->rx_limit.action) cf_error("Pipe protocol does not support receive limits"); - if (cc->in_keep_filtered) + if (cc->in_keep) cf_error("Pipe protocol prohibits keeping filtered routes"); cc->debug = cf->c.debug; From 050b4b4e5e6d4c05345bed2ec62c865258b4e7ee Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Mon, 20 Jun 2022 21:29:10 +0200 Subject: [PATCH 04/20] Table export generalized to allow for exporting from non-tables --- nest/proto.c | 6 ++-- nest/rt-table.c | 90 ++++++++++++++++++++++++++++++++++--------------- nest/rt.h | 17 +++++++--- 3 files changed, 79 insertions(+), 34 deletions(-) diff --git a/nest/proto.c b/nest/proto.c index 5e67d940..8513b9cb 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -498,7 +498,7 @@ channel_start_export(struct channel *c) } DBG("%s.%s: Channel start export req=%p\n", c->proto->name, c->name, &c->out_req); - rt_request_export(c->table, &c->out_req); + rt_request_export(&c->table->exporter, &c->out_req); } static void @@ -552,7 +552,7 @@ channel_export_stopped(struct rt_export_request *req) { c->refeeding = 1; c->refeed_pending = 0; - rt_request_export(c->table, req); + rt_request_export(&c->table->exporter, req); return; } @@ -600,7 +600,7 @@ channel_schedule_reload(struct channel *c) { ASSERT(c->in_req.hook); - rt_request_export(c->table, &c->reload_req); + rt_request_export(&c->table->exporter, &c->reload_req); } static void diff --git a/nest/rt-table.c b/nest/rt-table.c index 975da363..14c80138 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -139,7 +139,6 @@ const char *rt_import_state_name_array[TIS_MAX] = { const char *rt_export_state_name_array[TES_MAX] = { [TES_DOWN] = "DOWN", - [TES_HUNGRY] = "HUNGRY", [TES_FEEDING] = "FEEDING", [TES_READY] = "READY", [TES_STOP] = "STOP" @@ -1183,7 +1182,7 @@ rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage } struct rt_export_hook *eh; - WALK_LIST(eh, tab->exports) + WALK_LIST(eh, tab->exporter.hooks) { if (eh->export_state == TES_STOP) continue; @@ -1677,11 +1676,21 @@ rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filte return v > 0; } +static void +rt_table_export_done(struct rt_export_hook *hook) +{ + struct rt_exporter *re = hook->table; + struct rtable *tab = SKIP_BACK(struct rtable, exporter, re); + + rt_unlock_table(tab); + DBG("Export hook %p in table %s finished uc=%u\n", hook, tab->name, tab->use_count); +} + static void rt_export_stopped(void *data) { struct rt_export_hook *hook = data; - rtable *tab = hook->table; + struct rt_exporter *tab = hook->table; /* Unlist */ rem_node(&hook->n); @@ -1689,14 +1698,13 @@ rt_export_stopped(void *data) /* Reporting the channel as stopped. */ hook->stopped(hook->req); + /* Reporting the hook as finished. */ + tab->done(hook); + /* Freeing the hook together with its coroutine. */ rfree(hook->pool); - rt_unlock_table(tab); - - DBG("Export hook %p in table %s finished uc=%u\n", hook, tab->name, tab->use_count); } - static inline void rt_set_import_state(struct rt_import_hook *hook, u8 state) { @@ -1748,54 +1756,73 @@ rt_stop_import(struct rt_import_request *req, void (*stopped)(struct rt_import_r hook->stopped = stopped; } -void -rt_request_export(rtable *tab, struct rt_export_request *req) +static struct rt_export_hook * +rt_table_export_start(struct rt_exporter *re, struct rt_export_request *req) { + rtable *tab = SKIP_BACK(rtable, exporter, re); rt_lock_table(tab); pool *p = rp_new(tab->rp, "Export hook"); - struct rt_export_hook *hook = req->hook = mb_allocz(p, sizeof(struct rt_export_hook)); + struct rt_export_hook *hook = mb_allocz(p, sizeof(struct rt_export_hook)); hook->pool = p; hook->lp = lp_new_default(p); - - hook->req = req; - hook->table = tab; /* stats zeroed by mb_allocz */ - rt_set_export_state(hook, TES_HUNGRY); - - hook->n = (node) {}; - add_tail(&tab->exports, &hook->n); - FIB_ITERATE_INIT(&hook->feed_fit, &tab->fib); DBG("New export hook %p req %p in table %s uc=%u\n", hook, req, tab->name, tab->use_count); + hook->event = ev_new_init(p, rt_feed_channel, hook); + + return hook; +} + +void +rt_request_export(struct rt_exporter *re, struct rt_export_request *req) +{ + struct rt_export_hook *hook = req->hook = re->start(re, req); + + hook->req = req; + hook->table = re; + + hook->n = (node) {}; + add_tail(&re->hooks, &hook->n); + rt_set_export_state(hook, TES_FEEDING); - hook->event = ev_new_init(p, rt_feed_channel, hook); ev_schedule_work(hook->event); } +static void +rt_table_export_stop(struct rt_export_hook *hook) +{ + rtable *tab = SKIP_BACK(rtable, exporter, hook->table); + + if (hook->export_state == TES_FEEDING) + fit_get(&tab->fib, &hook->feed_fit); +} + void rt_stop_export(struct rt_export_request *req, void (*stopped)(struct rt_export_request *)) { ASSERT_DIE(req->hook); struct rt_export_hook *hook = req->hook; - rtable *tab = hook->table; - - /* Stop feeding */ + /* Cancel the feeder event */ ev_postpone(hook->event); - if (hook->export_state == TES_FEEDING) - fit_get(&tab->fib, &hook->feed_fit); + /* Stop feeding from the exporter */ + hook->table->stop(hook); + /* Reset the event as the stopped event */ hook->event->hook = rt_export_stopped; hook->stopped = stopped; + /* Update export state */ rt_set_export_state(hook, TES_STOP); + + /* Run the stopped event */ ev_schedule(hook->event); } @@ -1948,7 +1975,7 @@ rt_dump_hooks(rtable *tab) } struct rt_export_hook *eh; - WALK_LIST(eh, tab->exports) + WALK_LIST(eh, tab->exporter.hooks) { eh->req->dump_req(eh->req); debug(" Export hook %p requested by %p:" @@ -2252,10 +2279,17 @@ rt_setup(pool *pp, struct rtable_config *cf) init_list(&t->flowspec_links); + t->exporter = (struct rt_exporter) { + .start = rt_table_export_start, + .stop = rt_table_export_stop, + .done = rt_table_export_done, + }; + init_list(&t->exporter.hooks); + if (!(t->internal = cf->internal)) { init_list(&t->imports); - init_list(&t->exports); + hmap_init(&t->id_map, p, 1024); hmap_set(&t->id_map, 0); @@ -3138,7 +3172,9 @@ rt_feed_channel(void *data) ASSERT(c->export_state == TES_FEEDING); - FIB_ITERATE_START(&c->table->fib, fit, net, n) + rtable *tab = SKIP_BACK(rtable, exporter, c->table); + + FIB_ITERATE_START(&tab->fib, fit, net, n) { if (max_feed <= 0) { diff --git a/nest/rt.h b/nest/rt.h index d5e28cb6..0f5a5ba7 100644 --- a/nest/rt.h +++ b/nest/rt.h @@ -56,6 +56,16 @@ struct rtable_config { btime max_settle_time; /* Maximum settle time for notifications */ }; +struct rt_export_hook; +struct rt_export_request; + +struct rt_exporter { + list hooks; /* Registered route export hooks */ + struct rt_export_hook *(*start)(struct rt_exporter *, struct rt_export_request *); + void (*stop)(struct rt_export_hook *); + void (*done)(struct rt_export_hook *); +}; + typedef struct rtable { resource r; node n; /* Node in list of all tables */ @@ -69,7 +79,7 @@ typedef struct rtable { u32 rt_count; /* Number of routes in the table */ list imports; /* Registered route importers */ - list exports; /* Registered route exporters */ + struct rt_exporter exporter; /* Exporter API structure */ struct hmap id_map; struct hostcache *hostcache; @@ -221,7 +231,7 @@ struct rt_export_request { struct rt_export_hook { node n; - rtable *table; /* The connected table */ + struct rt_exporter *table; /* The connected table */ pool *pool; linpool *lp; @@ -255,14 +265,13 @@ struct rt_export_hook { #define TIS_MAX 6 #define TES_DOWN 0 -#define TES_HUNGRY 1 #define TES_FEEDING 2 #define TES_READY 3 #define TES_STOP 4 #define TES_MAX 5 void rt_request_import(rtable *tab, struct rt_import_request *req); -void rt_request_export(rtable *tab, struct rt_export_request *req); +void rt_request_export(struct rt_exporter *tab, struct rt_export_request *req); void rt_stop_import(struct rt_import_request *, void (*stopped)(struct rt_import_request *)); void rt_stop_export(struct rt_export_request *, void (*stopped)(struct rt_export_request *)); From 9c9059fd172dcc2f8805529de4b3174f280c109c Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Wed, 22 Jun 2022 12:45:42 +0200 Subject: [PATCH 05/20] Allowed optimized exporting of a subprefix tree Added an option for export filter to allow for prefiltering based on the prefix. Routes outside the given prefix are completely ignored. Config is simple: export in ; --- doc/bird.sgml | 8 ++-- nest/config.Y | 6 +++ nest/proto.c | 8 +++- nest/protocol.h | 2 + nest/rt-table.c | 89 +++++++++++++++++++++++++++++++++++++-------- nest/rt.h | 3 ++ proto/pipe/config.Y | 6 +++ proto/pipe/pipe.c | 8 ++++ proto/pipe/pipe.h | 1 + 9 files changed, 111 insertions(+), 20 deletions(-) diff --git a/doc/bird.sgml b/doc/bird.sgml index d17de23f..c9ce670b 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -914,10 +914,12 @@ inherited from templates can be updated by new definitions.