/* * BIRD -- UNIX Kernel Synchronization * * (c) 1998--2000 Martin Mares * * Can be freely distributed and used under the terms of the GNU GPL. */ /** * DOC: Kernel synchronization * * This system dependent module implements the Kernel and Device protocol, * that is synchronization of interface lists and routing tables with the * OS kernel. * * The whole kernel synchronization is a bit messy and touches some internals * of the routing table engine, because routing table maintenance is a typical * example of the proverbial compatibility between different Unices and we want * to keep the overhead of our KRT business as low as possible and avoid maintaining * a local routing table copy. * * The kernel syncer can work in three different modes (according to system config header): * Either with a single routing table and single KRT protocol [traditional UNIX] * or with many routing tables and separate KRT protocols for all of them * or with many routing tables, but every scan including all tables, so we start * separate KRT protocols which cooperate with each other [Linux]. * In this case, we keep only a single scan timer. * * We use FIB node flags in the routing table to keep track of route * synchronization status. We also attach temporary &rte's to the routing table, * but it cannot do any harm to the rest of BIRD since table synchronization is * an atomic process. * * When starting up, we cheat by looking if there is another * KRT instance to be initialized later and performing table scan * only once for all the instances. * * The code uses OS-dependent parts for kernel updates and scans. These parts are * in more specific sysdep directories (e.g. sysdep/linux) in functions krt_sys_* * and kif_sys_* (and some others like krt_replace_rte()) and krt-sys.h header file. * This is also used for platform specific protocol options and route attributes. * * There was also an old code that used traditional UNIX ioctls for these tasks. * It was unmaintained and later removed. For reference, see sysdep/krt-* files * in commit 396dfa9042305f62da1f56589c4b98fac57fc2f6 */ /* * If you are brave enough, continue now. You cannot say you haven't been warned. */ #undef LOCAL_DEBUG #include "nest/bird.h" #include "nest/iface.h" #include "nest/route.h" #include "nest/protocol.h" #include "filter/filter.h" #include "conf/conf.h" #include "lib/string.h" #include "lib/timer.h" #include "unix.h" #include "krt.h" /* * Global resources */ pool *krt_pool; static linpool *krt_filter_lp; static list krt_proto_list; void krt_io_init(void) { krt_pool = rp_new(&root_pool, the_bird_domain.the_bird, "Kernel Syncer"); krt_filter_lp = lp_new_default(krt_pool); init_list(&krt_proto_list); krt_sys_io_init(); } /* * Interfaces */ struct kif_proto *kif_proto; static struct kif_config *kif_cf; static timer *kif_scan_timer; static btime kif_last_shot; static struct kif_iface_config kif_default_iface = {}; struct kif_iface_config * kif_get_iface_config(struct iface *iface) { struct kif_config *cf = (void *) (kif_proto->p.cf); struct kif_iface_config *ic = (void *) iface_patt_find(&cf->iface_list, iface, NULL); return ic ?: &kif_default_iface; } static void kif_scan(timer *t) { struct kif_proto *p = t->data; KRT_TRACE(p, D_EVENTS, "Scanning interfaces"); kif_last_shot = current_time(); kif_do_scan(p); } static void kif_force_scan(void) { if (kif_proto && ((kif_last_shot + 2 S) < current_time())) { kif_scan(kif_scan_timer); tm_start(kif_scan_timer, ((struct kif_config *) kif_proto->p.cf)->scan_time); } } void kif_request_scan(void) { if (kif_proto && (kif_scan_timer->expires > (current_time() + 1 S))) tm_start(kif_scan_timer, 1 S); } static struct proto * kif_init(struct proto_config *c) { struct kif_proto *p = proto_new(c); kif_sys_init(p); return &p->p; } static int kif_start(struct proto *P) { struct kif_proto *p = (struct kif_proto *) P; kif_proto = p; kif_sys_start(p); /* Start periodic interface scanning */ kif_scan_timer = tm_new_init(P->pool, kif_scan, p, KIF_CF->scan_time, 0); kif_scan(kif_scan_timer); tm_start(kif_scan_timer, KIF_CF->scan_time); return PS_UP; } static int kif_shutdown(struct proto *P) { struct kif_proto *p = (struct kif_proto *) P; tm_stop(kif_scan_timer); kif_sys_shutdown(p); kif_proto = NULL; return PS_DOWN; } static void kif_cleanup(struct proto *p) { if (p->debug & D_EVENTS) log(L_TRACE "%s: Flushing interfaces", p->name); if_start_update(); if_end_update(); } static int kif_reconfigure(struct proto *p, struct proto_config *new) { struct kif_config *o = (struct kif_config *) p->cf; struct kif_config *n = (struct kif_config *) new; if (!kif_sys_reconfigure((struct kif_proto *) p, n, o)) return 0; if (o->scan_time != n->scan_time) { tm_stop(kif_scan_timer); kif_scan_timer->recurrent = n->scan_time; kif_scan(kif_scan_timer); tm_start(kif_scan_timer, n->scan_time); } if (!EMPTY_LIST(o->iface_list) || !EMPTY_LIST(n->iface_list)) { /* This is hack, we have to update a configuration * to the new value just now, because it is used * for recalculation of preferred addresses. */ p->cf = new; if_recalc_all_preferred_addresses(); } return 1; } static void kif_preconfig(struct protocol *P UNUSED, struct config *c) { kif_cf = NULL; kif_sys_preconfig(c); } struct proto_config * kif_init_config(int class) { if (kif_cf) cf_error("Kernel device protocol already defined"); kif_cf = (struct kif_config *) proto_config_new(&proto_unix_iface, class); kif_cf->scan_time = 60 S; init_list(&kif_cf->iface_list); kif_sys_init_config(kif_cf); return (struct proto_config *) kif_cf; } static void kif_copy_config(struct proto_config *dest, struct proto_config *src) { struct kif_config *d = (struct kif_config *) dest; struct kif_config *s = (struct kif_config *) src; /* Copy interface config list */ cfg_copy_list(&d->iface_list, &s->iface_list, sizeof(struct kif_iface_config)); /* Fix sysdep parts */ kif_sys_copy_config(d, s); } struct protocol proto_unix_iface = { .name = "Device", .template = "device%d", .proto_size = sizeof(struct kif_proto), .config_size = sizeof(struct kif_config), .startup = PROTOCOL_STARTUP_NECESSARY, .preconfig = kif_preconfig, .init = kif_init, .start = kif_start, .shutdown = kif_shutdown, .cleanup = kif_cleanup, .reconfigure = kif_reconfigure, .copy_config = kif_copy_config }; void kif_build(void) { proto_build(&proto_unix_iface); } /* * Tracing of routes */ static inline void krt_trace_in(struct krt_proto *p, rte *e, char *msg) { if (p->p.debug & D_PACKETS) log(L_TRACE "%s: %N: %s", p->p.name, e->net, msg); } static inline void krt_trace_in_rl(struct tbf *f, struct krt_proto *p, rte *e, char *msg) { if (p->p.debug & D_PACKETS) log_rl(f, L_TRACE "%s: %N: %s", p->p.name, e->net, msg); } /* * Inherited Routes */ #ifdef KRT_ALLOW_LEARN static struct tbf rl_alien = TBF_DEFAULT_LOG_LIMITS; /* * krt_same_key() specifies what (aside from the net) is the key in * kernel routing tables. It should be OS-dependent, this is for * Linux. It is important for asynchronous alien updates, because a * positive update is implicitly a negative one for any old route with * the same key. */ static inline u32 krt_metric(rte *a) { eattr *ea = ea_find(a->attrs, &ea_krt_metric); return ea ? ea->u.data : 0; } static void krt_learn_alien_attr(struct channel *c, rte *e) { ea_set_attr_u32(&e->attrs, &ea_gen_preference, 0, c->preference); } /* Called when alien route is discovered during scan */ static void krt_learn_scan(struct krt_proto *p, rte *e) { rte e0 = { .attrs = e->attrs, .src = rt_get_source(&p->p, krt_metric(e)), }; krt_learn_alien_attr(p->p.main_channel, &e0); rte_update(p->p.main_channel, e->net, &e0, e0.src); rt_unlock_source(e0.src); } static void krt_learn_async(struct krt_proto *p, rte *e, int new) { if (new) return krt_learn_scan(p, e); struct rte_src *src = rt_get_source(&p->p, krt_metric(e)); rte_update(p->p.main_channel, e->net, NULL, src); rt_unlock_source(src); } #endif /* * Routes */ /* Hook defined in nest/rt-table.c ... to be refactored away later */ rte *krt_export_net(struct channel *c, const net_addr *a, linpool *lp); static int krt_same_dest(rte *k, rte *e) { ea_list *ka = k->attrs, *ea = e->attrs; eattr *nhea_k = ea_find(ka, &ea_gen_nexthop); eattr *nhea_e = ea_find(ea, &ea_gen_nexthop); return (!nhea_k == !nhea_e) && adata_same(nhea_k->u.ptr, nhea_e->u.ptr); } /* * This gets called back when the low-level scanning code discovers a route. * We expect that the route is a temporary rte and its attributes are uncached. */ void krt_got_route(struct krt_proto *p, rte *e, s8 src) { rte *new = NULL; e->pflags = 0; #ifdef KRT_ALLOW_LEARN switch (src) { case KRT_SRC_REDIRECT: krt_trace_in(p, e, "deleting"); krt_replace_rte(p, e->net, NULL, e); return; case KRT_SRC_KERNEL: if (KRT_CF->learn != KRT_LEARN_ALL) { krt_trace_in(p, e, "ignored"); return; } /* fallthrough */ case KRT_SRC_ALIEN: if (KRT_CF->learn) krt_learn_scan(p, e); else krt_trace_in_rl(&rl_alien, p, e, "[alien] ignored"); return; } #endif /* The rest is for KRT_SRC_BIRD (or KRT_SRC_UNKNOWN) */ /* Deleting all routes if final flush is requested */ if (p->sync_state == KPS_FLUSHING) goto delete; /* We wait for the initial feed to have correct installed state */ if (!p->ready) goto ignore; /* Get the exported version */ new = krt_export_net(p->p.main_channel, e->net, krt_filter_lp); /* Rejected by filters */ if (!new) goto delete; /* Route to this destination was already seen. Strange, but it happens... */ if (bmap_test(&p->seen_map, new->id)) goto aseen; /* Mark route as seen */ bmap_set(&p->seen_map, new->id); /* TODO: There also may be changes in route eattrs, we ignore that for now. */ if (!bmap_test(&p->sync_map, new->id) || !krt_same_dest(e, new)) goto update; goto seen; seen: krt_trace_in(p, e, "seen"); goto done; aseen: krt_trace_in(p, e, "already seen"); goto done; ignore: krt_trace_in(p, e, "ignored"); goto done; update: krt_trace_in(p, new, "updating"); krt_replace_rte(p, e->net, new, e); goto done; delete: krt_trace_in(p, e, "deleting"); krt_replace_rte(p, e->net, NULL, e); goto done; done:; lp_flush(krt_filter_lp); } static bool krt_init_scan(struct krt_proto *p) { switch (p->sync_state) { case KPS_IDLE: rt_refresh_begin(&p->p.main_channel->in_req); bmap_reset(&p->seen_map, 1024); p->sync_state = KPS_SCANNING; return 1; case KPS_SCANNING: bug("Kernel scan double-init"); case KPS_PRUNING: log(L_WARN "%s: Can't scan, still pruning", p->p.name); return 0; case KPS_FLUSHING: bug("Can't scan, flushing"); } bug("Bad kernel sync state"); } static void krt_prune(struct krt_proto *p) { switch (p->sync_state) { case KPS_IDLE: bug("Kernel scan prune without scan"); case KPS_SCANNING: p->sync_state = KPS_PRUNING; KRT_TRACE(p, D_EVENTS, "Pruning table %s", p->p.main_channel->table->name); rt_refresh_end(&p->p.main_channel->in_req); channel_request_full_refeed(p->p.main_channel); break; case KPS_PRUNING: bug("Kernel scan double-prune"); case KPS_FLUSHING: bug("Attemted kernel scan prune when flushing"); } } void krt_got_route_async(struct krt_proto *p, rte *e, int new, s8 src) { e->pflags = 0; switch (src) { case KRT_SRC_BIRD: /* Should be filtered by the back end */ bug("BIRD originated routes should not get here."); case KRT_SRC_REDIRECT: if (new) { krt_trace_in(p, e, "[redirect] deleting"); krt_replace_rte(p, e->net, NULL, e); } /* If !new, it is probably echo of our deletion */ break; #ifdef KRT_ALLOW_LEARN case KRT_SRC_KERNEL: if (KRT_CF->learn != KRT_LEARN_ALL) break; /* fallthrough */ case KRT_SRC_ALIEN: if (KRT_CF->learn) { krt_learn_async(p, e, new); return; } #endif } } /* * Periodic scanning */ static timer *krt_scan_all_timer; static int krt_scan_all_count; static bool krt_scan_all_tables; static void krt_scan_all(timer *t UNUSED) { struct krt_proto *p; node *n; kif_force_scan(); /* We need some node to decide whether to print the debug messages or not */ p = SKIP_BACK(struct krt_proto, krt_node, HEAD(krt_proto_list)); KRT_TRACE(p, D_EVENTS, "Scanning routing table"); WALK_LIST2(p, n, krt_proto_list, krt_node) krt_init_scan(p); krt_do_scan(NULL); WALK_LIST2(p, n, krt_proto_list, krt_node) if (p->sync_state == KPS_SCANNING) krt_prune(p); } static void krt_scan_all_timer_start(struct krt_proto *p) { if (!krt_scan_all_count) krt_scan_all_timer = tm_new_init(krt_pool, krt_scan_all, NULL, KRT_CF->scan_time, 0); krt_scan_all_count++; tm_start(krt_scan_all_timer, 1 S); } static void krt_scan_all_timer_stop(void) { ASSERT(krt_scan_all_count > 0); krt_scan_all_count--; if (!krt_scan_all_count) { rfree(krt_scan_all_timer); krt_scan_all_timer = NULL; } } static void krt_scan_all_timer_kick(void) { tm_start(krt_scan_all_timer, 0); } void krt_use_shared_scan(void) { krt_scan_all_tables = 1; } static void krt_scan(timer *t) { struct krt_proto *p = t->data; kif_force_scan(); KRT_TRACE(p, D_EVENTS, "Scanning routing table"); if (!krt_init_scan(p)) return; krt_do_scan(p); krt_prune(p); } static void krt_scan_timer_start(struct krt_proto *p) { if (krt_scan_all_tables) krt_scan_all_timer_start(p); else { p->scan_timer = tm_new_init(p->p.pool, krt_scan, p, KRT_CF->scan_time, 0); tm_start(p->scan_timer, 1 S); } } static void krt_scan_timer_stop(struct krt_proto *p) { if (krt_scan_all_tables) krt_scan_all_timer_stop(); else tm_stop(p->scan_timer); } static void krt_scan_timer_kick(struct krt_proto *p) { if (krt_scan_all_tables) krt_scan_all_timer_kick(); else tm_start(p->scan_timer, 0); } /* * Updates */ static int krt_preexport(struct channel *C, rte *e) { if (e->src->owner == &C->proto->sources) #ifdef CONFIG_SINGLE_ROUTE return 1; #else return -1; #endif if (!krt_capable(e)) { if (C->debug & D_ROUTES) log(L_TRACE "%s.%s: refusing incapable route for %N", C->proto->name, C->name, e->net); return -1; } /* Before first scan we don't touch the routes */ if (!SKIP_BACK(struct krt_proto, p, C->proto)->ready) { if (C->debug & D_ROUTES) log(L_TRACE "%s.%s not ready yet to accept route for %N", C->proto->name, C->name, e->net); return -1; } return 0; } static void krt_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *net, rte *new, const rte *old) { struct krt_proto *p = (struct krt_proto *) P; #ifdef CONFIG_SINGLE_ROUTE /* Got the same route as we imported. Keep it, do nothing. */ if (new && new->src->owner == &P->sources) return; #endif switch (p->sync_state) { case KPS_IDLE: case KPS_PRUNING: if (new && bmap_test(&p->seen_map, new->id)) /* Already installed and seen in the kernel dump */ return; /* fall through */ case KPS_SCANNING: /* Actually replace the route */ krt_replace_rte(p, net, new, old); break; case KPS_FLUSHING: /* Drop any incoming route */ krt_replace_rte(p, net, NULL, old ?: new); } } static void krt_if_notify(struct proto *P, uint flags, struct iface *iface UNUSED) { struct krt_proto *p = (struct krt_proto *) P; /* * When interface went down, we should remove routes to it. In the ideal world, * OS kernel would send us route removal notifications in such cases, but we * cannot rely on it as it is often not true. E.g. Linux kernel removes related * routes when an interface went down, but it does not notify userspace about * that. To be sure, we just schedule a scan to ensure synchronization. */ if ((flags & IF_CHANGE_DOWN) && KRT_CF->learn) krt_scan_timer_kick(p); } static int krt_reload_routes(struct channel *C, struct rt_feeding_request *rfr) { struct krt_proto *p = (void *) C->proto; if (KRT_CF->learn) { p->reload = 1; krt_scan_timer_kick(p); } if (rfr) CALL(rfr->done, rfr); return 1; } static void krt_cleanup(struct krt_proto *p); static void krt_export_fed(struct channel *C) { struct krt_proto *p = (void *) C->proto; p->ready = 1; p->initialized = 1; switch (p->sync_state) { case KPS_IDLE: krt_scan_timer_kick(p); break; case KPS_SCANNING: break; case KPS_PRUNING: KRT_TRACE(p, D_EVENTS, "Table %s pruned", p->p.main_channel->table->name); p->sync_state = KPS_IDLE; break; case KPS_FLUSHING: krt_do_scan(p); krt_cleanup(p); proto_notify_state(&p->p, PS_DOWN); return; } } static int krt_rte_better(const rte *new, const rte *old) { u32 n = ea_get_int(new->attrs, &ea_krt_metric, IGP_METRIC_UNKNOWN); u32 o = ea_get_int(old->attrs, &ea_krt_metric, IGP_METRIC_UNKNOWN); return (n < o); } /* * Protocol glue */ struct krt_config *krt_cf; static void krt_preconfig(struct protocol *P UNUSED, struct config *c) { krt_cf = NULL; krt_sys_preconfig(c); } static void krt_postconfig(struct proto_config *CF) { struct krt_config *cf = (void *) CF; /* Do not check templates at all */ if (cf->c.class == SYM_TEMPLATE) return; if (! proto_cf_main_channel(CF)) cf_error("Channel not specified"); struct channel_config *cc = proto_cf_main_channel(CF); struct rtable_config *tab = cc->table; if (tab->krt_attached) cf_error("Kernel syncer (%s) already attached to table %s", tab->krt_attached->name, tab->name); tab->krt_attached = CF; if (cf->merge_paths) { cc->ra_mode = RA_MERGED; cc->merge_limit = cf->merge_paths; } krt_sys_postconfig(cf); } struct rte_owner_class krt_rte_owner_class = { .rte_better = krt_rte_better, }; static struct proto * krt_init(struct proto_config *CF) { struct krt_proto *p = proto_new(CF); // struct krt_config *cf = (void *) CF; proto_add_main_channel(&p->p, proto_cf_main_channel(CF)); p->p.preexport = krt_preexport; p->p.rt_notify = krt_rt_notify; p->p.iface_sub.if_notify = krt_if_notify; p->p.reload_routes = krt_reload_routes; p->p.export_fed = krt_export_fed; p->p.sources.class = &krt_rte_owner_class; krt_sys_init(p); return &p->p; } static int krt_start(struct proto *P) { struct krt_proto *p = (struct krt_proto *) P; switch (p->p.net_type) { case NET_IP4: p->af = AF_INET; break; case NET_IP6: p->af = AF_INET6; break; case NET_IP6_SADR: p->af = AF_INET6; break; #ifdef AF_MPLS case NET_MPLS: p->af = AF_MPLS; break; #endif default: log(L_ERR "KRT: Tried to start with strange net type: %d", p->p.net_type); return PS_START; break; } bmap_init(&p->sync_map, p->p.pool, 1024); bmap_init(&p->seen_map, p->p.pool, 1024); add_tail(&krt_proto_list, &p->krt_node); if (!krt_sys_start(p)) { rem_node(&p->krt_node); return PS_START; } krt_scan_timer_start(p); if (p->p.gr_recovery && KRT_CF->graceful_restart) p->p.main_channel->gr_wait = 1; return PS_UP; } static int krt_shutdown(struct proto *P) { struct krt_proto *p = (struct krt_proto *) P; krt_scan_timer_stop(p); if (p->p.proto_state == PS_START) return PS_DOWN; /* FIXME we should flush routes even when persist during reconfiguration */ if (p->initialized && !KRT_CF->persist && (P->down_code != PDC_CMD_GR_DOWN)) { p->sync_state = KPS_FLUSHING; channel_request_full_refeed(p->p.main_channel); /* Keeping the protocol UP until the feed-to-flush is done */ return PS_UP; } else { krt_cleanup(p); return PS_DOWN; } } static void krt_cleanup(struct krt_proto *p) { p->ready = 0; p->initialized = 0; krt_sys_shutdown(p); rem_node(&p->krt_node); bmap_free(&p->sync_map); } static int krt_reconfigure(struct proto *p, struct proto_config *CF) { struct krt_config *o = (void *) p->cf; struct krt_config *n = (void *) CF; if (!proto_configure_channel(p, &p->main_channel, proto_cf_main_channel(CF))) return 0; if (!krt_sys_reconfigure((struct krt_proto *) p, n, o)) return 0; /* persist, graceful restart need not be the same */ return o->scan_time == n->scan_time && o->learn == n->learn; } struct proto_config * krt_init_config(int class) { #ifndef CONFIG_MULTIPLE_TABLES if (krt_cf) cf_error("Kernel protocol already defined"); #endif krt_cf = (struct krt_config *) proto_config_new(&proto_unix_kernel, class); krt_cf->scan_time = 60 S; krt_sys_init_config(krt_cf); return (struct proto_config *) krt_cf; } static void krt_copy_config(struct proto_config *dest, struct proto_config *src) { struct krt_config *d = (struct krt_config *) dest; struct krt_config *s = (struct krt_config *) src; /* Fix sysdep parts */ krt_sys_copy_config(d, s); } struct ea_class ea_krt_source = { .name = "krt_source", .type = T_INT, }; struct ea_class ea_krt_metric = { .name = "krt_metric", .type = T_INT, }; #ifdef CONFIG_IP6_SADR_KERNEL #define MAYBE_IP6_SADR NB_IP6_SADR #else #define MAYBE_IP6_SADR 0 #endif #ifdef HAVE_MPLS_KERNEL #define MAYBE_MPLS NB_MPLS #else #define MAYBE_MPLS 0 #endif struct protocol proto_unix_kernel = { .name = "Kernel", .template = "kernel%d", .preference = DEF_PREF_INHERITED, .channel_mask = NB_IP | MAYBE_IP6_SADR | MAYBE_MPLS, .proto_size = sizeof(struct krt_proto), .config_size = sizeof(struct krt_config), .startup = PROTOCOL_STARTUP_CONNECTOR, .preconfig = krt_preconfig, .postconfig = krt_postconfig, .init = krt_init, .start = krt_start, .shutdown = krt_shutdown, .reconfigure = krt_reconfigure, .copy_config = krt_copy_config, }; void krt_build(void) { proto_build(&proto_unix_kernel); EA_REGISTER_ALL( &ea_krt_source, &ea_krt_metric, ); }