/* * BIRD -- UNIX Kernel Synchronization * * (c) 1998--2000 Martin Mares * * Can be freely distributed and used under the terms of the GNU GPL. */ /** * DOC: Kernel synchronization * * This system dependent module implements the Kernel and Device protocol, * that is synchronization of interface lists and routing tables with the * OS kernel. * * The whole kernel synchronization is a bit messy and touches some internals * of the routing table engine, because routing table maintenance is a typical * example of the proverbial compatibility between different Unices and we want * to keep the overhead of our KRT business as low as possible and avoid maintaining * a local routing table copy. * * The kernel syncer can work in three different modes (according to system config header): * Either with a single routing table and single KRT protocol [traditional UNIX] * or with many routing tables and separate KRT protocols for all of them * or with many routing tables, but every scan including all tables, so we start * separate KRT protocols which cooperate with each other [Linux]. * In this case, we keep only a single scan timer. * * We use FIB node flags in the routing table to keep track of route * synchronization status. We also attach temporary &rte's to the routing table, * but it cannot do any harm to the rest of BIRD since table synchronization is * an atomic process. * * When starting up, we cheat by looking if there is another * KRT instance to be initialized later and performing table scan * only once for all the instances. * * The code uses OS-dependent parts for kernel updates and scans. These parts are * in more specific sysdep directories (e.g. sysdep/linux) in functions krt_sys_* * and kif_sys_* (and some others like krt_replace_rte()) and krt-sys.h header file. * This is also used for platform specific protocol options and route attributes. * * There was also an old code that used traditional UNIX ioctls for these tasks. * It was unmaintained and later removed. For reference, see sysdep/krt-* files * in commit 396dfa9042305f62da1f56589c4b98fac57fc2f6 */ /* * If you are brave enough, continue now. You cannot say you haven't been warned. */ #undef LOCAL_DEBUG #include "nest/bird.h" #include "nest/iface.h" #include "nest/rt.h" #include "nest/protocol.h" #include "filter/filter.h" #include "conf/conf.h" #include "lib/string.h" #include "lib/timer.h" #include "unix.h" #include "krt.h" /* * Global resources */ pool *krt_pool; static linpool *krt_filter_lp; static list krt_proto_list; void krt_io_init(void) { krt_pool = rp_new(&root_pool, the_bird_domain.the_bird, "Kernel Syncer"); krt_filter_lp = lp_new_default(krt_pool); init_list(&krt_proto_list); krt_sys_io_init(); } /* * Interfaces */ struct kif_proto *kif_proto; static struct kif_config *kif_cf; static timer *kif_scan_timer; static btime kif_last_shot; static struct kif_iface_config kif_default_iface = {}; struct kif_iface_config * kif_get_iface_config(struct iface *iface) { struct kif_config *cf = (void *) (kif_proto->p.cf); struct kif_iface_config *ic = (void *) iface_patt_find(&cf->iface_list, iface, NULL); return ic ?: &kif_default_iface; } static void kif_scan(timer *t) { struct kif_proto *p = t->data; KRT_TRACE(p, D_EVENTS, "Scanning interfaces"); kif_last_shot = current_time(); kif_do_scan(p); } static void kif_force_scan(void) { if (kif_proto && ((kif_last_shot + 2 S) < current_time())) { kif_scan(kif_scan_timer); tm_start(kif_scan_timer, ((struct kif_config *) kif_proto->p.cf)->scan_time); } } void kif_request_scan(void) { if (kif_proto && (kif_scan_timer->expires > (current_time() + 1 S))) tm_start(kif_scan_timer, 1 S); } static struct proto * kif_init(struct proto_config *c) { struct kif_proto *p = proto_new(c); kif_sys_init(p); return &p->p; } static int kif_start(struct proto *P) { struct kif_proto *p = (struct kif_proto *) P; kif_proto = p; kif_sys_start(p); /* Start periodic interface scanning */ kif_scan_timer = tm_new_init(P->pool, kif_scan, p, KIF_CF->scan_time, 0); kif_scan(kif_scan_timer); tm_start(kif_scan_timer, KIF_CF->scan_time); return PS_UP; } static int kif_shutdown(struct proto *P) { struct kif_proto *p = (struct kif_proto *) P; tm_stop(kif_scan_timer); kif_sys_shutdown(p); kif_proto = NULL; return PS_DOWN; } static void kif_cleanup(struct proto *p) { if (p->debug & D_EVENTS) log(L_TRACE "%s: Flushing interfaces", p->name); if_start_update(); if_end_update(); } static int kif_reconfigure(struct proto *p, struct proto_config *new) { struct kif_config *o = (struct kif_config *) p->cf; struct kif_config *n = (struct kif_config *) new; if (!kif_sys_reconfigure((struct kif_proto *) p, n, o)) return 0; if (o->scan_time != n->scan_time) { tm_stop(kif_scan_timer); kif_scan_timer->recurrent = n->scan_time; kif_scan(kif_scan_timer); tm_start(kif_scan_timer, n->scan_time); } if (!EMPTY_LIST(o->iface_list) || !EMPTY_LIST(n->iface_list)) { /* This is hack, we have to update a configuration * to the new value just now, because it is used * for recalculation of preferred addresses. */ p->cf = new; if_recalc_all_preferred_addresses(); } return 1; } static void kif_preconfig(struct protocol *P UNUSED, struct config *c) { kif_cf = NULL; kif_sys_preconfig(c); } struct proto_config * kif_init_config(int class) { if (kif_cf) cf_error("Kernel device protocol already defined"); kif_cf = (struct kif_config *) proto_config_new(&proto_unix_iface, class); kif_cf->scan_time = 60 S; init_list(&kif_cf->iface_list); kif_sys_init_config(kif_cf); return (struct proto_config *) kif_cf; } static void kif_copy_config(struct proto_config *dest, struct proto_config *src) { struct kif_config *d = (struct kif_config *) dest; struct kif_config *s = (struct kif_config *) src; /* Copy interface config list */ cfg_copy_list(&d->iface_list, &s->iface_list, sizeof(struct kif_iface_config)); /* Fix sysdep parts */ kif_sys_copy_config(d, s); } struct protocol proto_unix_iface = { .name = "Device", .template = "device%d", .proto_size = sizeof(struct kif_proto), .config_size = sizeof(struct kif_config), .startup = PROTOCOL_STARTUP_NECESSARY, .preconfig = kif_preconfig, .init = kif_init, .start = kif_start, .shutdown = kif_shutdown, .cleanup = kif_cleanup, .reconfigure = kif_reconfigure, .copy_config = kif_copy_config }; void kif_build(void) { proto_build(&proto_unix_iface); } /* * Tracing of routes */ static inline void krt_trace_in(struct krt_proto *p, rte *e, char *msg) { if (p->p.debug & D_PACKETS) log(L_TRACE "%s: %N: %s", p->p.name, e->net, msg); } static inline void krt_trace_in_rl(struct tbf *f, struct krt_proto *p, rte *e, char *msg) { if (p->p.debug & D_PACKETS) log_rl(f, L_TRACE "%s: %N: %s", p->p.name, e->net, msg); } /* * Inherited Routes */ #ifdef KRT_ALLOW_LEARN static struct tbf rl_alien = TBF_DEFAULT_LOG_LIMITS; /* * krt_same_key() specifies what (aside from the net) is the key in * kernel routing tables. It should be OS-dependent, this is for * Linux. It is important for asynchronous alien updates, because a * positive update is implicitly a negative one for any old route with * the same key. */ static inline u32 krt_metric(rte *a) { eattr *ea = ea_find(a->attrs, &ea_krt_metric); return ea ? ea->u.data : 0; } static void krt_learn_alien_attr(struct channel *c, rte *e) { ea_set_attr_u32(&e->attrs, &ea_gen_preference, 0, c->preference); } /* Called when alien route is discovered during scan */ static void krt_learn_scan(struct krt_proto *p, rte *e) { rte e0 = { .attrs = e->attrs, .src = rt_get_source(&p->p, krt_metric(e)), }; krt_learn_alien_attr(p->p.main_channel, &e0); rte_update(p->p.main_channel, e->net, &e0, e0.src); rt_unlock_source(e0.src); } static void krt_learn_async(struct krt_proto *p, rte *e, int new) { if (new) return krt_learn_scan(p, e); struct rte_src *src = rt_get_source(&p->p, krt_metric(e)); rte_update(p->p.main_channel, e->net, NULL, src); rt_unlock_source(src); } #endif /* * Routes */ static inline int krt_is_installed(struct krt_proto *p, net *n) { return n->routes && bmap_test(&p->p.main_channel->export_map, n->routes->rte.id); } static uint rte_feed_count_valid(net *n) { uint count = 0; for (struct rte_storage *e = n->routes; e; e = e->next) if (rte_is_valid(RTE_OR_NULL(e))) count++; return count; } static void rte_feed_obtain_valid(net *n, const rte **feed, uint count) { uint i = 0; for (struct rte_storage *e = n->routes; e; e = e->next) if (rte_is_valid(RTE_OR_NULL(e))) { ASSERT_DIE(i < count); feed[i++] = &e->rte; } ASSERT_DIE(i == count); } static struct rte * krt_export_net(struct krt_proto *p, net *net) { /* FIXME: Here we are calling filters in table-locked context when exporting * to kernel. Here BIRD can crash if the user requested ROA check in kernel * export filter. It doesn't make much sense to write the filters like this, * therefore we may keep this unfinished piece of work here for later as it * won't really affect anybody. */ ASSERT_DIE(RT_IS_LOCKED(p->p.main_channel->table)); struct channel *c = p->p.main_channel; const struct filter *filter = c->out_filter; if (c->ra_mode == RA_MERGED) { uint count = rte_feed_count_valid(net); if (!count) return NULL; const rte **feed = alloca(count * sizeof(rte *)); rte_feed_obtain_valid(net, feed, count); return rt_export_merged(c, net->n.addr, feed, count, krt_filter_lp, 1); } static _Thread_local rte rt; rt = net->routes->rte; if (!rte_is_valid(&rt)) return NULL; if (filter == FILTER_REJECT) return NULL; /* We could run krt_preexport() here, but it is already handled by krt_is_installed() */ if (filter == FILTER_ACCEPT) goto accept; if (f_run(filter, &rt, FF_SILENT) > F_ACCEPT) goto reject; accept: return &rt; reject: return NULL; } static int krt_same_dest(rte *k, rte *e) { ea_list *ka = k->attrs, *ea = e->attrs; eattr *nhea_k = ea_find(ka, &ea_gen_nexthop); eattr *nhea_e = ea_find(ea, &ea_gen_nexthop); return (!nhea_k == !nhea_e) && adata_same(nhea_k->u.ptr, nhea_e->u.ptr); } /* * This gets called back when the low-level scanning code discovers a route. * We expect that the route is a temporary rte and its attributes are uncached. */ void krt_got_route(struct krt_proto *p, rte *e, s8 src) { /* Ignore when flushing from table */ if (p->flush_routes == 1) return; rte *new = NULL; e->pflags = 0; #ifdef KRT_ALLOW_LEARN switch (src) { case KRT_SRC_KERNEL: krt_trace_in(p, e, "ignored"); return; case KRT_SRC_REDIRECT: krt_trace_in(p, e, "deleting"); krt_replace_rte(p, e->net, NULL, e); return; case KRT_SRC_ALIEN: if (KRT_CF->learn) krt_learn_scan(p, e); else krt_trace_in_rl(&rl_alien, p, e, "[alien] ignored"); return; } #endif /* The rest is for KRT_SRC_BIRD (or KRT_SRC_UNKNOWN) */ RT_LOCKED(p->p.main_channel->table, tab) { /* Deleting all routes if final flush is requested */ if (p->flush_routes == 2) goto delete; /* We wait for the initial feed to have correct installed state */ if (!p->ready) goto ignore; net *net = net_find(tab, e->net); if (!net || !krt_is_installed(p, net)) goto delete; new = krt_export_net(p, net); /* Rejected by filters */ if (!new) goto delete; /* Route to this destination was already seen. Strange, but it happens... */ if (bmap_test(&p->seen_map, new->id)) goto aseen; /* Mark route as seen */ bmap_set(&p->seen_map, new->id); /* TODO: There also may be changes in route eattrs, we ignore that for now. */ if (!bmap_test(&p->sync_map, new->id) || !krt_same_dest(e, new)) goto update; goto seen; seen: krt_trace_in(p, e, "seen"); goto done; aseen: krt_trace_in(p, e, "already seen"); goto done; ignore: krt_trace_in(p, e, "ignored"); goto done; update: krt_trace_in(p, new, "updating"); krt_replace_rte(p, e->net, new, e); goto done; delete: krt_trace_in(p, e, "deleting"); krt_replace_rte(p, e->net, NULL, e); goto done; done:; } lp_flush(krt_filter_lp); } static void krt_init_scan(struct krt_proto *p) { bmap_reset(&p->seen_map, 1024); } static void krt_prune(struct krt_proto *p) { RT_LOCKED(p->p.main_channel->table, t) { KRT_TRACE(p, D_EVENTS, "Pruning table %s", t->name); FIB_WALK(&t->fib, net, n) { if (p->ready && krt_is_installed(p, n) && !bmap_test(&p->seen_map, n->routes->rte.id)) { rte *new = krt_export_net(p, n); if (new) { krt_trace_in(p, new, "installing"); krt_replace_rte(p, n->n.addr, new, NULL); } lp_flush(krt_filter_lp); } } FIB_WALK_END; if (p->ready) p->initialized = 1; } } void krt_got_route_async(struct krt_proto *p, rte *e, int new, s8 src) { e->pflags = 0; switch (src) { case KRT_SRC_BIRD: /* Should be filtered by the back end */ bug("BIRD originated routes should not get here."); case KRT_SRC_REDIRECT: if (new) { krt_trace_in(p, e, "[redirect] deleting"); krt_replace_rte(p, e->net, NULL, e); } /* If !new, it is probably echo of our deletion */ break; #ifdef KRT_ALLOW_LEARN case KRT_SRC_ALIEN: if (KRT_CF->learn) { krt_learn_async(p, e, new); return; } #endif } } /* * Periodic scanning */ static timer *krt_scan_all_timer; static int krt_scan_all_count; static _Bool krt_scan_all_tables; static void krt_scan_all(timer *t UNUSED) { struct krt_proto *p; node *n; kif_force_scan(); /* We need some node to decide whether to print the debug messages or not */ p = SKIP_BACK(struct krt_proto, krt_node, HEAD(krt_proto_list)); KRT_TRACE(p, D_EVENTS, "Scanning routing table"); WALK_LIST2(p, n, krt_proto_list, krt_node) krt_init_scan(p); krt_do_scan(NULL); WALK_LIST2(p, n, krt_proto_list, krt_node) krt_prune(p); } static void krt_scan_all_timer_start(struct krt_proto *p) { if (!krt_scan_all_count) krt_scan_all_timer = tm_new_init(krt_pool, krt_scan_all, NULL, KRT_CF->scan_time, 0); krt_scan_all_count++; tm_start(krt_scan_all_timer, 1 S); } static void krt_scan_all_timer_stop(void) { ASSERT(krt_scan_all_count > 0); krt_scan_all_count--; if (!krt_scan_all_count) { rfree(krt_scan_all_timer); krt_scan_all_timer = NULL; } } static void krt_scan_all_timer_kick(void) { tm_start(krt_scan_all_timer, 0); } void krt_use_shared_scan(void) { krt_scan_all_tables = 1; } static void krt_scan(timer *t) { struct krt_proto *p = t->data; kif_force_scan(); KRT_TRACE(p, D_EVENTS, "Scanning routing table"); krt_init_scan(p); krt_do_scan(p); krt_prune(p); } static void krt_scan_timer_start(struct krt_proto *p) { if (krt_scan_all_tables) krt_scan_all_timer_start(p); else { p->scan_timer = tm_new_init(p->p.pool, krt_scan, p, KRT_CF->scan_time, 0); tm_start(p->scan_timer, 1 S); } } static void krt_scan_timer_stop(struct krt_proto *p) { if (krt_scan_all_tables) krt_scan_all_timer_stop(); else tm_stop(p->scan_timer); } static void krt_scan_timer_kick(struct krt_proto *p) { if (krt_scan_all_tables) krt_scan_all_timer_kick(); else tm_start(p->scan_timer, 0); } /* * Updates */ static int krt_preexport(struct channel *C, rte *e) { if (e->src->owner == &C->proto->sources) #ifdef CONFIG_SINGLE_ROUTE return 1; #else return -1; #endif if (!krt_capable(e)) return -1; return 0; } static void krt_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *net, rte *new, const rte *old) { struct krt_proto *p = (struct krt_proto *) P; if (p->flush_routes) { krt_replace_rte(p, net, NULL, old ?: new); return; } #ifdef CONFIG_SINGLE_ROUTE /* Got the same route as we imported. Keep it, do nothing. */ if (new && new->src->owner == &P->sources) return; #endif if (p->initialized) /* Before first scan we don't touch the routes */ krt_replace_rte(p, net, new, old); } static void krt_if_notify(struct proto *P, uint flags, struct iface *iface UNUSED) { struct krt_proto *p = (struct krt_proto *) P; /* * When interface went down, we should remove routes to it. In the ideal world, * OS kernel would send us route removal notifications in such cases, but we * cannot rely on it as it is often not true. E.g. Linux kernel removes related * routes when an interface went down, but it does not notify userspace about * that. To be sure, we just schedule a scan to ensure synchronization. */ if ((flags & IF_CHANGE_DOWN) && KRT_CF->learn) krt_scan_timer_kick(p); } static int krt_reload_routes(struct channel *C, struct channel_import_request *UNUSED) { struct krt_proto *p = (void *) C->proto; /* Although we keep learned routes in krt_table, we rather schedule a scan */ if (KRT_CF->learn) { p->reload = 1; krt_scan_timer_kick(p); } return 1; } static void krt_cleanup(struct krt_proto *p); static void krt_feed_end(struct channel *C) { struct krt_proto *p = (void *) C->proto; if (C->refeeding && C->refeed_req.hook) return; if (p->flush_routes) { p->flush_routes = 2; krt_init_scan(p); krt_do_scan(p); krt_cleanup(p); proto_notify_state(&p->p, PS_DOWN); return; } p->ready = 1; krt_scan_timer_kick(p); } static int krt_rte_better(const rte *new, const rte *old) { u32 n = ea_get_int(new->attrs, &ea_krt_metric, IGP_METRIC_UNKNOWN); u32 o = ea_get_int(old->attrs, &ea_krt_metric, IGP_METRIC_UNKNOWN); return (n < o); } /* * Protocol glue */ struct krt_config *krt_cf; static void krt_preconfig(struct protocol *P UNUSED, struct config *c) { krt_cf = NULL; krt_sys_preconfig(c); } static void krt_postconfig(struct proto_config *CF) { struct krt_config *cf = (void *) CF; /* Do not check templates at all */ if (cf->c.class == SYM_TEMPLATE) return; if (! proto_cf_main_channel(CF)) cf_error("Channel not specified"); struct channel_config *cc = proto_cf_main_channel(CF); struct rtable_config *tab = cc->table; if (tab->krt_attached) cf_error("Kernel syncer (%s) already attached to table %s", tab->krt_attached->name, tab->name); tab->krt_attached = CF; if (cf->merge_paths) { cc->ra_mode = RA_MERGED; cc->merge_limit = cf->merge_paths; } krt_sys_postconfig(cf); } struct rte_owner_class krt_rte_owner_class = { .rte_better = krt_rte_better, }; static struct proto * krt_init(struct proto_config *CF) { struct krt_proto *p = proto_new(CF); // struct krt_config *cf = (void *) CF; p->p.main_channel = proto_add_channel(&p->p, proto_cf_main_channel(CF)); p->p.preexport = krt_preexport; p->p.rt_notify = krt_rt_notify; p->p.iface_sub.if_notify = krt_if_notify; p->p.reload_routes = krt_reload_routes; p->p.feed_end = krt_feed_end; p->p.sources.class = &krt_rte_owner_class; krt_sys_init(p); return &p->p; } static int krt_start(struct proto *P) { struct krt_proto *p = (struct krt_proto *) P; switch (p->p.net_type) { case NET_IP4: p->af = AF_INET; break; case NET_IP6: p->af = AF_INET6; break; case NET_IP6_SADR: p->af = AF_INET6; break; #ifdef AF_MPLS case NET_MPLS: p->af = AF_MPLS; break; #endif default: log(L_ERR "KRT: Tried to start with strange net type: %d", p->p.net_type); return PS_START; break; } bmap_init(&p->sync_map, p->p.pool, 1024); bmap_init(&p->seen_map, p->p.pool, 1024); add_tail(&krt_proto_list, &p->krt_node); if (!krt_sys_start(p)) { rem_node(&p->krt_node); return PS_START; } krt_scan_timer_start(p); if (p->p.gr_recovery && KRT_CF->graceful_restart) p->p.main_channel->gr_wait = 1; return PS_UP; } static int krt_shutdown(struct proto *P) { struct krt_proto *p = (struct krt_proto *) P; krt_scan_timer_stop(p); if (p->p.proto_state == PS_START) return PS_DOWN; /* FIXME we should flush routes even when persist during reconfiguration */ if (p->initialized && !KRT_CF->persist && (P->down_code != PDC_CMD_GR_DOWN)) { p->flush_routes = 1; channel_request_feeding_dynamic(p->p.main_channel, CFRT_AUXILIARY); return PS_UP; } else { krt_cleanup(p); return PS_DOWN; } } static void krt_cleanup(struct krt_proto *p) { p->ready = 0; p->initialized = 0; krt_sys_shutdown(p); rem_node(&p->krt_node); bmap_free(&p->sync_map); } static int krt_reconfigure(struct proto *p, struct proto_config *CF) { struct krt_config *o = (void *) p->cf; struct krt_config *n = (void *) CF; if (!proto_configure_channel(p, &p->main_channel, proto_cf_main_channel(CF))) return 0; if (!krt_sys_reconfigure((struct krt_proto *) p, n, o)) return 0; /* persist, graceful restart need not be the same */ return o->scan_time == n->scan_time && o->learn == n->learn; } struct proto_config * krt_init_config(int class) { #ifndef CONFIG_MULTIPLE_TABLES if (krt_cf) cf_error("Kernel protocol already defined"); #endif krt_cf = (struct krt_config *) proto_config_new(&proto_unix_kernel, class); krt_cf->scan_time = 60 S; krt_sys_init_config(krt_cf); return (struct proto_config *) krt_cf; } static void krt_copy_config(struct proto_config *dest, struct proto_config *src) { struct krt_config *d = (struct krt_config *) dest; struct krt_config *s = (struct krt_config *) src; /* Fix sysdep parts */ krt_sys_copy_config(d, s); } struct ea_class ea_krt_source = { .name = "krt_source", .type = T_INT, }; struct ea_class ea_krt_metric = { .name = "krt_metric", .type = T_INT, }; #ifdef CONFIG_IP6_SADR_KERNEL #define MAYBE_IP6_SADR NB_IP6_SADR #else #define MAYBE_IP6_SADR 0 #endif #ifdef HAVE_MPLS_KERNEL #define MAYBE_MPLS NB_MPLS #else #define MAYBE_MPLS 0 #endif struct protocol proto_unix_kernel = { .name = "Kernel", .template = "kernel%d", .preference = DEF_PREF_INHERITED, .channel_mask = NB_IP | MAYBE_IP6_SADR | MAYBE_MPLS, .proto_size = sizeof(struct krt_proto), .config_size = sizeof(struct krt_config), .startup = PROTOCOL_STARTUP_CONNECTOR, .preconfig = krt_preconfig, .postconfig = krt_postconfig, .init = krt_init, .start = krt_start, .shutdown = krt_shutdown, .reconfigure = krt_reconfigure, .copy_config = krt_copy_config, }; void krt_build(void) { proto_build(&proto_unix_kernel); EA_REGISTER_ALL( &ea_krt_source, &ea_krt_metric, ); }