/* * BIRD -- Protocols * * (c) 1998--2000 Martin Mares * * Can be freely distributed and used under the terms of the GNU GPL. */ #undef LOCAL_DEBUG #include "nest/bird.h" #include "nest/protocol.h" #include "lib/resource.h" #include "lib/lists.h" #include "lib/event.h" #include "lib/timer.h" #include "lib/string.h" #include "conf/conf.h" #include "nest/route.h" #include "nest/iface.h" #include "nest/mpls.h" #include "nest/cli.h" #include "filter/filter.h" #include "filter/f-inst.h" pool *proto_pool; static TLIST_LIST(proto) global_proto_list; static list STATIC_LIST_INIT(protocol_list); #define CD(c, msg, args...) ({ if (c->debug & D_STATES) log(L_TRACE "%s.%s: " msg, c->proto->name, c->name ?: "?", ## args); }) #define PD(p, msg, args...) ({ if (p->debug & D_STATES) log(L_TRACE "%s: " msg, p->name, ## args); }) static struct graceful_recovery_context _graceful_recovery_context; OBSREF(struct graceful_recovery_context) graceful_recovery_context; static char *p_states[] = { "DOWN", "START", "UP", "STOP" }; static char *c_states[] = { "DOWN", "START", "UP", "STOP", "RESTART" }; proto_state_table proto_state_table_pub; extern struct protocol proto_unix_iface; static void proto_rethink_goal(struct proto *p); static char *proto_state_name(struct proto *p); void proto_journal_item_cleanup(struct lfjour * journal UNUSED, struct lfjour_item *i); static void channel_init_limit(struct channel *c, struct limit *l, int dir, struct channel_limit *cf); static void channel_update_limit(struct channel *c, struct limit *l, int dir, struct channel_limit *cf); static void channel_reset_limit(struct channel *c, struct limit *l, int dir); static void channel_stop_export(struct channel *c); static void channel_check_stopped(struct channel *c); static inline void channel_reimport(struct channel *c, struct rt_feeding_request *rfr) { rt_export_refeed(&c->reimporter, rfr); ev_send(proto_event_list(c->proto), &c->reimport_event); } static inline void channel_refeed(struct channel *c, struct rt_feeding_request *rfr) { rt_export_refeed(&c->out_req, rfr); } static inline int proto_is_done(struct proto *p) { return (p->proto_state == PS_FLUSH) && proto_is_inactive(p); } static inline int channel_is_active(struct channel *c) { return (c->channel_state != CS_DOWN); } static inline int channel_reloadable(struct channel *c) { return c->reloadable && c->proto->reload_routes || ((c->in_keep & RIK_PREFILTER) == RIK_PREFILTER); } static inline void channel_log_state_change(struct channel *c) { CD(c, "State changed to %s", c_states[c->channel_state]); } static void channel_import_log_state_change(struct rt_import_request *req, u8 state) { SKIP_BACK_DECLARE(struct channel, c, in_req, req); CD(c, "Channel import state changed to %s", rt_import_state_name(state)); } static void channel_export_fed(struct rt_export_request *req) { SKIP_BACK_DECLARE(struct channel, c, out_req, req); struct limit *l = &c->out_limit; if ((c->limit_active & (1 << PLD_OUT)) && (l->count <= l->max)) { c->limit_active &= ~(1 << PLD_OUT); channel_request_full_refeed(c); } else CALL(c->proto->export_fed, c); } void channel_request_full_refeed(struct channel *c) { rt_export_refeed(&c->out_req, NULL); } static void channel_dump_import_req(struct rt_import_request *req) { SKIP_BACK_DECLARE(struct channel, c, in_req, req); debug(" Channel %s.%s import request %p\n", c->proto->name, c->name, req); } static void channel_dump_export_req(struct rt_export_request *req) { SKIP_BACK_DECLARE(struct channel, c, out_req, req); debug(" Channel %s.%s export request %p\n", c->proto->name, c->name, req); } static void proto_log_state_change(struct proto *p) { if (p->debug & D_STATES) { char *name = proto_state_name(p); if (name != p->last_state_name_announced) { p->last_state_name_announced = name; PD(p, "State changed to %s", proto_state_name(p)); } } else p->last_state_name_announced = NULL; } struct channel_config * proto_cf_find_channel(struct proto_config *pc, uint net_type) { struct channel_config *cc; WALK_LIST(cc, pc->channels) if (cc->net_type == net_type) return cc; return NULL; } /** * proto_find_channel_by_table - find channel connected to a routing table * @p: protocol instance * @t: routing table * * Returns pointer to channel or NULL */ struct channel * proto_find_channel_by_table(struct proto *p, rtable *t) { struct channel *c; WALK_LIST(c, p->channels) if (c->table == t) return c; return NULL; } /** * proto_find_channel_by_name - find channel by its name * @p: protocol instance * @n: channel name * * Returns pointer to channel or NULL */ struct channel * proto_find_channel_by_name(struct proto *p, const char *n) { struct channel *c; WALK_LIST(c, p->channels) if (!strcmp(c->name, n)) return c; return NULL; } /** * proto_add_channel - connect protocol to a routing table * @p: protocol instance * @cf: channel configuration * * This function creates a channel between the protocol instance @p and the * routing table specified in the configuration @cf, making the protocol hear * all changes in the table and allowing the protocol to update routes in the * table. * * The channel is linked in the protocol channel list and when active also in * the table channel list. Channels are allocated from the global resource pool * (@proto_pool) and they are automatically freed when the protocol is removed. */ struct channel * proto_add_channel(struct proto *p, struct channel_config *cf) { struct channel *c = mb_allocz(proto_pool, cf->class->channel_size); c->name = cf->name; c->class = cf->class; c->proto = p; c->table = cf->table->table; rt_lock_table(c->table); c->in_filter = cf->in_filter; c->out_filter = cf->out_filter; c->out_subprefix = cf->out_subprefix; channel_init_limit(c, &c->rx_limit, PLD_RX, &cf->rx_limit); channel_init_limit(c, &c->in_limit, PLD_IN, &cf->in_limit); channel_init_limit(c, &c->out_limit, PLD_OUT, &cf->out_limit); c->net_type = cf->net_type; c->ra_mode = cf->ra_mode; c->preference = cf->preference; c->debug = cf->debug; c->merge_limit = cf->merge_limit; c->in_keep = cf->in_keep; c->rpki_reload = cf->rpki_reload; c->channel_state = CS_DOWN; c->last_state_change = current_time(); c->reloadable = 1; init_list(&c->roa_subscriptions); /* Create the channel information ea_list */ struct ea_list *ca = NULL; ea_set_attr(&ca, EA_LITERAL_STORE_STRING(&ea_name, 0, c->name)); ea_set_attr(&ca, EA_LITERAL_EMBEDDED(&ea_proto_id, 0, c->proto->id)); ea_set_attr(&ca, EA_LITERAL_EMBEDDED(&ea_channel_id, 0, c->id)); ea_set_attr(&ca, EA_LITERAL_EMBEDDED(&ea_in_keep, 0, c->in_keep)); ea_set_attr(&ca, EA_LITERAL_STORE_PTR(&ea_rtable, 0, c->table)); /* Announcing existence of the channel * TODO: make this an actual notification */ PST_LOCKED(ts) { /* Allocating channel ID */ c->id = hmap_first_zero(&ts->channel_id_map); hmap_set(&ts->channel_id_map, c->id); /* The current channel state table may be too small */ if (c->id >= ts->channels_len) { ea_list **l = mb_allocz(ts->pool, sizeof(ea_list*) * ts->channels_len * 2); memcpy(l, ts->channel_states, sizeof(ea_list*) * ts->channels_len); mb_free(ts->channel_states); ts->channel_states = l; ts->channels_len = ts->channels_len * 2; } ASSERT_DIE(c->id < ts->channels_len); ASSERT_DIE(ts->channel_states[c->id] == NULL); /* Set the channel info */ ts->channel_states[c->id] = ea_lookup_slow(ca, 0, EALS_IN_TABLE); } /* Update channel list in protocol state */ struct ea_list *pes = p->ea_state; ea_set_attr(&pes, EA_LITERAL_DIRECT_ADATA(&ea_proto_channel_list, 0, int_set_add( tmp_linpool, ea_get_adata(pes, &ea_proto_channel_list), c->id))); proto_announce_state_later(c->proto, pes); CALL(c->class->init, c, cf); add_tail(&p->channels, &c->n); CD(c, "Connected to table %s", c->table->name); return c; } void proto_remove_channel(struct proto *p UNUSED, struct channel *c) { ASSERT(c->channel_state == CS_DOWN); CD(c, "Removed", c->name); struct ea_list *pes = p->ea_state; ea_set_attr(&pes, EA_LITERAL_DIRECT_ADATA(&ea_proto_channel_list, 0, int_set_del( tmp_linpool, ea_get_adata(p->ea_state, &ea_proto_channel_list), c->id))); proto_announce_state_later(c->proto, pes); /* * This may cause trouble if some protocol flaps really really fast, * like trying to read channel info of a nonexistent channel, * but until something like this actually happens in production, * we are going to fix this anyway. * * (Famous last words by Maria.) */ PST_LOCKED(ts) { ASSERT_DIE(c->id < ts->channels_len); ea_free_later(ts->channel_states[c->id]); ts->channel_states[c->id] = NULL; hmap_clear(&ts->channel_id_map, c->id); } rt_unlock_table(c->table); rem_node(&c->n); mb_free(c); } static void proto_start_channels(struct proto *p) { struct channel *c; WALK_LIST(c, p->channels) if (!c->disabled) channel_set_state(c, CS_UP); } static void proto_pause_channels(struct proto *p) { struct channel *c; WALK_LIST(c, p->channels) if (!c->disabled && channel_is_active(c)) channel_set_state(c, CS_PAUSE); } static void proto_stop_channels(struct proto *p) { struct channel *c; WALK_LIST(c, p->channels) if (!c->disabled && channel_is_active(c)) channel_set_state(c, CS_STOP); } static void proto_remove_channels(struct proto *p) { struct channel *c; WALK_LIST_FIRST(c, p->channels) proto_remove_channel(p, c); } /** * # Automatic ROA reloads * * Route origin authorizations may (and do) change over time by updates via * our RPKI protocols. This then manifests in ROA tables. As the roa_check() * is always executed on a specific contents of ROA table in a specific moment * of time, its value may switch after updates in the ROA table and therefore * must be re-evaluated any time the result may have changed. * * To enable this mechanism, there are auxiliary tools integrated in BIRD * to automatically re-evaluate all filters that may get a different outcome * after ROA change. * * ROA Subscription Data Structure (struct roa_subscription) is the connector * between the channel and the ROA table, keeping track about unprocessed * changes and initiating the reloads. The modus operandi is as follows: * * Init 1. Check whether the filter uses ROA at all. * Init 2. Request exports from the ROA table * Init 3. Allocate a trie * * Export from ROA: This may affect all routes for prefixes matching the ROA * prefix, disregarding its maxlen. Thus we mark these routes in the request's * auxiliary trie. Then we ping the settle timer to wait a reasonable amount of * time before actually requesting channel reload. * * Settle timer fires when nothing has pinged it for the 'min' time, or 'max' * time has elapsed since the first ping. It then: * * - requests partial channel import / export reload based on the trie * - allocates a new trie * * As the import/export reload uses the auxiliary trie to prefilter prefixes, * the trie must be freed after the reload is done, which is ensured in the * .done() hook of the reimport/reexport request. * * # Channel export refeed * * The request, either by ROA or from CLI, is enqueued to the channel and an * auxiliary export hook is requested from the table. This way, the ordinary * updates can flow uninterrupted while refeed gets prefiltered by the given * trie (if given). When the auxiliary export hook finishes, the .done() hook * is then called for the requestor to do their cleanup. * * While refeeding, special care must be taken about route changes inside the * table. For this, an auxiliary trie is allocated to keep track about already * refed net, to avoid unnecessary multiple re-evaluation of filters. * * # Channel import reload from import table * * When the import table is on, the channel keeps the original version of the route * in the table together with the actual version after filters, in a form of * an additional layer of route attributes underneath the actual version. This makes * it exceptionally simple to get the original version of the route directly * from the table by an ordinary export which strips all the newer layers. * * Then, by processing all these auxiliary exports, the channel basically re-imports * all the routes into the table back again, re-evaluating the filters and ROA checks. * * # Channel import reload from protocols * * When the import table is off, the protocol gets the reimport request directly * via the .reload_routes() hook and must do its internal route reload instead. * The protocol may not support it and in such case, this function returns 0 * indicating that no partial reload is going to happen. It's then on the * developer's or user's discretion to run a full reload instead. * * # Caveats, FIXME's, TODO's and other kinds of hell * * The partial reexport uses a trie to track state for single prefixes. This * may do crazy things if a partial reload was to be performed on any other * table than plain IPv6 or IPv4. Network types like VPNv6 or Flowspec may * cause some crashes. This is currently not checked anywhere. * * Anyway, we decided to split the table FIB structure to carry only a mapping * between a prefix and a locally-unique ID, and after this update is done * (probably also in v2), the tracking tries may be easily replaced by * bitfields, therefore fixing this bug. * * We also probably didn't do a proper analysis of the implemented algorithm * for reexports, so if there is somebody willing to formally prove that we * both won't miss any update and won't reexport more than needed, you're welcome * to submit such a proof. * * We wish you a pleasant reading, analyzing and bugfixing experience. * * Kata, Maria and the BIRD Team */ struct roa_subscription { node roa_node; struct channel *c; rtable *tab; void (*refeed_hook)(struct channel *, struct rt_feeding_request *); struct lfjour_recipient digest_recipient; event update_event; }; struct roa_reload_request { struct rt_feeding_request req; struct roa_subscription *s; struct lfjour_item *item; }; static void channel_roa_reload_done(struct rt_feeding_request *req) { SKIP_BACK_DECLARE(struct roa_reload_request, rrr, req, req); ASSERT_DIE(rrr->s->c->channel_state == CS_UP); lfjour_release(&rrr->s->digest_recipient, rrr->item); ev_send(proto_work_list(rrr->s->c->proto), &rrr->s->update_event); mb_free(rrr); /* FIXME: this should reset import/export filters if ACTION BLOCK */ } static void channel_roa_changed(void *_s) { struct roa_subscription *s = _s; u64 first_seq = 0, last_seq = 0; uint count = 0; for (struct lfjour_item *it; it = lfjour_get(&s->digest_recipient); ) { SKIP_BACK_DECLARE(struct rt_digest, rd, li, s->digest_recipient.cur); struct roa_reload_request *rrr = mb_alloc(s->c->proto->pool, sizeof *rrr); *rrr = (struct roa_reload_request) { .req = { .prefilter = { .mode = TE_ADDR_TRIE, .trie = rd->trie, }, .done = channel_roa_reload_done, }, .s = s, .item = it, }; if (!first_seq) first_seq = it->seq; last_seq = it->seq; count++; s->refeed_hook(s->c, &rrr->req); } if (s->c->debug & D_EVENTS) if (count) log(L_INFO "%s.%s: Requested %u automatic roa reloads, seq %lu to %lu", s->c->proto->name, s->c->name, count, first_seq, last_seq); else log(L_INFO "%s.%s: No roa reload requested", s->c->proto->name, s->c->name); } static inline void (*channel_roa_reload_hook(int dir))(struct channel *, struct rt_feeding_request *) { return dir ? channel_reimport : channel_refeed; } static int channel_roa_is_subscribed(struct channel *c, rtable *tab, int dir) { struct roa_subscription *s; node *n; WALK_LIST2(s, n, c->roa_subscriptions, roa_node) if ((tab == s->tab) && (s->refeed_hook == channel_roa_reload_hook(dir))) return 1; return 0; } static void channel_roa_subscribe(struct channel *c, rtable *tab, int dir) { if (channel_roa_is_subscribed(c, tab, dir)) return; rtable *aux = tab->config->roa_aux_table->table; struct roa_subscription *s = mb_allocz(c->proto->pool, sizeof(struct roa_subscription)); *s = (struct roa_subscription) { .c = c, .tab = aux, .refeed_hook = channel_roa_reload_hook(dir), .digest_recipient = { .target = proto_work_list(c->proto), .event = &s->update_event, }, .update_event = { .hook = channel_roa_changed, .data = s, }, }; add_tail(&c->roa_subscriptions, &s->roa_node); RT_LOCK(aux, t); rt_lock_table(t); rt_setup_digestor(t); lfjour_register(&t->export_digest->digest, &s->digest_recipient); } static void channel_roa_unsubscribe(struct roa_subscription *s) { struct channel *c = s->c; RT_LOCKED(s->tab, t) { lfjour_unregister(&s->digest_recipient); rt_unlock_table(t); } ev_postpone(&s->update_event); rem_node(&s->roa_node); mb_free(s); channel_check_stopped(c); } static void channel_roa_subscribe_filter(struct channel *c, int dir) { const struct filter *f = dir ? c->in_filter : c->out_filter; rtable *tab; int valid = 1, found = 0; if ((f == FILTER_ACCEPT) || (f == FILTER_REJECT)) return; /* No automatic reload for non-reloadable channels */ if (dir && !channel_reloadable(c)) valid = 0; struct filter_iterator fit; FILTER_ITERATE_INIT(&fit, f->root, c->proto->pool); FILTER_ITERATE(&fit, fi) { switch (fi->fi_code) { case FI_ROA_CHECK: tab = fi->i_FI_ROA_CHECK.rtc->table; if (valid) channel_roa_subscribe(c, tab, dir); found = 1; break; case FI_ASPA_CHECK_EXPLICIT: tab = fi->i_FI_ASPA_CHECK_EXPLICIT.rtc->table; if (valid) channel_roa_subscribe(c, tab, dir); found = 1; break; default: break; } } FILTER_ITERATE_END; FILTER_ITERATE_CLEANUP(&fit); if (!valid && found) log(L_WARN "%s.%s: Automatic RPKI reload not active for %s", c->proto->name, c->name ?: "?", dir ? "import" : "export"); } static void channel_roa_unsubscribe_all(struct channel *c) { struct roa_subscription *s; node *n, *x; WALK_LIST2_DELSAFE(s, n, x, c->roa_subscriptions, roa_node) channel_roa_unsubscribe(s); } static void channel_start_import(struct channel *c) { if (c->in_req.hook) { log(L_WARN "%s.%s: Attempted to start channel's already started import", c->proto->name, c->name); return; } c->in_req = (struct rt_import_request) { .name = mb_sprintf(c->proto->pool, "%s.%s", c->proto->name, c->name), .trace_routes = c->debug | c->proto->debug, .loop = c->proto->loop, .dump_req = channel_dump_import_req, .log_state_change = channel_import_log_state_change, .preimport = channel_preimport, }; ASSERT(c->channel_state == CS_UP); channel_reset_limit(c, &c->rx_limit, PLD_RX); channel_reset_limit(c, &c->in_limit, PLD_IN); bmap_init(&c->imported_map, c->proto->pool, 16); memset(&c->import_stats, 0, sizeof(struct channel_import_stats)); DBG("%s.%s: Channel start import req=%p\n", c->proto->name, c->name, &c->in_req); rt_request_import(c->table, &c->in_req); } void channel_notify_basic(void *); void channel_notify_accepted(void *); void channel_notify_merged(void *); void channel_start_export(struct channel *c) { ASSERT_DIE(birdloop_inside(c->proto->loop)); if (rt_export_get_state(&c->out_req) != TES_DOWN) bug("%s.%s: Attempted to start channel's already started export", c->proto->name, c->name); ASSERT(c->channel_state == CS_UP); pool *p = rp_newf(c->proto->pool, c->proto->pool->domain, "Channel %s.%s export", c->proto->name, c->name); c->out_req = (struct rt_export_request) { .name = mb_sprintf(p, "%s.%s", c->proto->name, c->name), .r = { .target = proto_work_list(c->proto), .event = &c->out_event, }, .pool = p, .feeder.prefilter = { .mode = c->out_subprefix ? TE_ADDR_IN : TE_ADDR_NONE, .addr = c->out_subprefix, }, .trace_routes = c->debug | c->proto->debug, .dump = channel_dump_export_req, .fed = channel_export_fed, }; c->out_event = (event) { .data = c, }; bmap_init(&c->export_accepted_map, p, 16); bmap_init(&c->export_rejected_map, p, 16); channel_reset_limit(c, &c->out_limit, PLD_OUT); memset(&c->export_stats, 0, sizeof(struct channel_export_stats)); DBG("%s.%s: Channel start export req=%p\n", c->proto->name, c->name, &c->out_req); switch (c->ra_mode) { case RA_OPTIMAL: c->out_event.hook = channel_notify_basic; rt_export_subscribe(c->table, best, &c->out_req); break; case RA_ANY: c->out_event.hook = channel_notify_basic; rt_export_subscribe(c->table, all, &c->out_req); break; case RA_ACCEPTED: c->out_event.hook = channel_notify_accepted; rt_export_subscribe(c->table, all, &c->out_req); break; case RA_MERGED: c->out_event.hook = channel_notify_merged; rt_export_subscribe(c->table, all, &c->out_req); break; default: bug("Unknown route announcement mode"); } } static void channel_check_stopped(struct channel *c) { switch (c->channel_state) { case CS_STOP: if (c->obstacles || !EMPTY_LIST(c->roa_subscriptions) || c->in_req.hook) return; ASSERT_DIE(rt_export_get_state(&c->out_req) == TES_DOWN); ASSERT_DIE(!rt_export_feed_active(&c->reimporter)); channel_set_state(c, CS_DOWN); proto_send_event(c->proto, c->proto->event); break; case CS_PAUSE: if (c->obstacles || !EMPTY_LIST(c->roa_subscriptions)) return; ASSERT_DIE(rt_export_get_state(&c->out_req) == TES_DOWN); ASSERT_DIE(!rt_export_feed_active(&c->reimporter)); channel_set_state(c, CS_START); break; } DBG("%s.%s: Channel requests/hooks stopped (in state %s)\n", c->proto->name, c->name, c_states[c->channel_state]); } void channel_add_obstacle(struct channel *c) { c->obstacles++; } void channel_del_obstacle(struct channel *c) { if (!--c->obstacles) channel_check_stopped(c); } void channel_import_stopped(struct rt_import_request *req) { SKIP_BACK_DECLARE(struct channel, c, in_req, req); mb_free(c->in_req.name); c->in_req.name = NULL; bmap_free(&c->imported_map); channel_check_stopped(c); } static u32 channel_reimport_next_feed_index(struct rt_export_feeder *f, u32 try_this) { SKIP_BACK_DECLARE(struct channel, c, reimporter, f); while (!bmap_test(&c->imported_map, try_this)) if (!(try_this & (try_this - 1))) /* return every power of two to check for maximum */ return try_this; else try_this++; return try_this; } static void channel_do_reload(void *_c) { struct channel *c = _c; RT_FEED_WALK(&c->reimporter, f) { bool seen = 0; for (uint i = 0; i < f->count_routes; i++) { rte *r = &f->block[i]; if (r->flags & REF_OBSOLETE) break; if (r->sender == c->in_req.hook) { /* Strip the table-specific information */ rte new = rte_init_from(r); /* Strip the later attribute layers */ new.attrs = ea_strip_to(new.attrs, BIT32_ALL(EALS_PREIMPORT)); /* And reload the route */ rte_update(c, r->net, &new, new.src); seen = 1; } } if (!seen) bmap_clear(&c->imported_map, f->ni->index); /* Local data needed no more */ tmp_flush(); MAYBE_DEFER_TASK(proto_work_list(c->proto), &c->reimport_event, "%s.%s reimport", c->proto->name, c->name); } } /* Called by protocol to activate in_table */ static void channel_setup_in_table(struct channel *c) { c->reimporter = (struct rt_export_feeder) { .name = mb_sprintf(c->proto->pool, "%s.%s.reimport", c->proto->name, c->name), .trace_routes = c->debug, .next_feed_index = channel_reimport_next_feed_index, }; c->reimport_event = (event) { .hook = channel_do_reload, .data = c, }; rt_feeder_subscribe(&c->table->export_all, &c->reimporter); } static void channel_do_start(struct channel *c) { c->proto->active_channels++; if ((c->in_keep & RIK_PREFILTER) == RIK_PREFILTER) channel_setup_in_table(c); CALL(c->class->start, c); channel_start_import(c); } static void channel_do_up(struct channel *c) { /* Register RPKI/ROA subscriptions */ if (c->rpki_reload) { channel_roa_subscribe_filter(c, 1); channel_roa_subscribe_filter(c, 0); } } static void channel_do_pause(struct channel *c) { /* Drop ROA subscriptions */ channel_roa_unsubscribe_all(c); /* Stop export */ channel_stop_export(c); } static void channel_do_stop(struct channel *c) { /* Stop import */ if (c->in_req.hook) rt_stop_import(&c->in_req, channel_import_stopped); /* Need to abort reimports as well */ rt_feeder_unsubscribe(&c->reimporter); ev_postpone(&c->reimport_event); c->gr_wait = 0; if (OBSREF_GET(c->gr_lock)) channel_graceful_restart_unlock(c); CALL(c->class->shutdown, c); } static void channel_do_down(struct channel *c) { ASSERT_DIE(!rt_export_feed_active(&c->reimporter)); c->proto->active_channels--; memset(&c->import_stats, 0, sizeof(struct channel_import_stats)); memset(&c->export_stats, 0, sizeof(struct channel_export_stats)); c->out_table = NULL; /* The in_table and out_table are going to be freed by freeing their resource pools. */ CALL(c->class->cleanup, c); /* Schedule protocol shutddown */ if (proto_is_done(c->proto)) proto_send_event(c->proto, c->proto->event); } void channel_set_state(struct channel *c, uint state) { uint cs = c->channel_state; DBG("%s reporting channel %s state transition %s -> %s\n", c->proto->name, c->name, c_states[cs], c_states[state]); if (state == cs) return; c->channel_state = state; c->last_state_change = current_time(); switch (state) { case CS_START: ASSERT(cs == CS_DOWN || cs == CS_PAUSE); if (cs == CS_DOWN) channel_do_start(c); break; case CS_UP: ASSERT(cs == CS_DOWN || cs == CS_START); if (cs == CS_DOWN) channel_do_start(c); if (!c->gr_wait && c->proto->rt_notify) channel_start_export(c); channel_do_up(c); break; case CS_PAUSE: ASSERT(cs == CS_UP); if (cs == CS_UP) channel_do_pause(c); break; case CS_STOP: ASSERT(cs == CS_UP || cs == CS_START || cs == CS_PAUSE); if (cs == CS_UP) channel_do_pause(c); channel_do_stop(c); break; case CS_DOWN: ASSERT(cs == CS_STOP); channel_do_down(c); break; default: ASSERT(0); } channel_log_state_change(c); } static void channel_stop_export(struct channel *c) { switch (rt_export_get_state(&c->out_req)) { case TES_FEEDING: case TES_PARTIAL: case TES_READY: if (c->ra_mode == RA_OPTIMAL) rt_export_unsubscribe(best, &c->out_req); else rt_export_unsubscribe(all, &c->out_req); ev_postpone(&c->out_event); bmap_free(&c->export_accepted_map); bmap_free(&c->export_rejected_map); c->out_req.name = NULL; rfree(c->out_req.pool); channel_check_stopped(c); break; case TES_DOWN: break; case TES_STOP: case TES_MAX: bug("Impossible export state"); } } void channel_request_reload(struct channel *c, struct rt_feeding_request *cir) { ASSERT(c->in_req.hook); ASSERT(channel_reloadable(c)); if (cir) CD(c, "Partial import reload requested"); else CD(c, "Full import reload requested"); if ((c->in_keep & RIK_PREFILTER) == RIK_PREFILTER) channel_reimport(c, cir); else if (! c->proto->reload_routes(c, cir)) cli_msg(-15, "%s.%s: partial reload refused, please run full reload instead", c->proto->name, c->name); } const struct channel_class channel_basic = { .channel_size = sizeof(struct channel), .config_size = sizeof(struct channel_config) }; void * channel_config_new(const struct channel_class *cc, const char *name, uint net_type, struct proto_config *proto) { struct channel_config *cf = NULL; struct rtable_config *tab = NULL; if (net_type) { if (!net_val_match(net_type, proto->protocol->channel_mask)) cf_error("Unsupported channel type"); if (proto->net_type && (net_type != proto->net_type) && (net_type != NET_MPLS)) cf_error("Different channel type"); tab = rt_get_default_table(new_config, net_type); } if (!cc) cc = &channel_basic; cf = cfg_allocz(cc->config_size); cf->name = name; cf->class = cc; cf->parent = proto; cf->table = tab; cf->out_filter = FILTER_REJECT; cf->net_type = net_type; cf->ra_mode = RA_OPTIMAL; cf->preference = proto->protocol->preference; cf->debug = new_config->channel_default_debug; cf->rpki_reload = 1; add_tail(&proto->channels, &cf->n); return cf; } void * channel_config_get(const struct channel_class *cc, const char *name, uint net_type, struct proto_config *proto) { struct channel_config *cf; /* We are using name as token, so no strcmp() */ WALK_LIST(cf, proto->channels) if (cf->name == name) { /* Allow to redefine channel only if inherited from template */ if (cf->parent == proto) cf_error("Multiple %s channels", name); cf->parent = proto; cf->copy = 1; return cf; } return channel_config_new(cc, name, net_type, proto); } struct channel_config * channel_copy_config(struct channel_config *src, struct proto_config *proto) { struct channel_config *dst = cfg_alloc(src->class->config_size); memcpy(dst, src, src->class->config_size); memset(&dst->n, 0, sizeof(node)); add_tail(&proto->channels, &dst->n); CALL(src->class->copy_config, dst, src); return dst; } static int reconfigure_type; /* Hack to propagate type info to channel_reconfigure() */ int channel_reconfigure(struct channel *c, struct channel_config *cf) { /* Touched by reconfiguration */ c->stale = 0; /* FIXME: better handle these changes, also handle in_keep_filtered */ if ((c->table != cf->table->table) || (cf->ra_mode && (c->ra_mode != cf->ra_mode)) || (cf->in_keep != c->in_keep) || cf->out_subprefix && c->out_subprefix && !net_equal(cf->out_subprefix, c->out_subprefix) || (!cf->out_subprefix != !c->out_subprefix)) return 0; /* Note that filter_same() requires arguments in (new, old) order */ int import_changed = !filter_same(cf->in_filter, c->in_filter); int export_changed = !filter_same(cf->out_filter, c->out_filter); int rpki_reload_changed = (cf->rpki_reload != c->rpki_reload); if (c->preference != cf->preference) import_changed = 1; if (c->merge_limit != cf->merge_limit) export_changed = 1; /* Reconfigure channel fields */ c->in_filter = cf->in_filter; c->out_filter = cf->out_filter; channel_update_limit(c, &c->rx_limit, PLD_RX, &cf->rx_limit); channel_update_limit(c, &c->in_limit, PLD_IN, &cf->in_limit); channel_update_limit(c, &c->out_limit, PLD_OUT, &cf->out_limit); // c->ra_mode = cf->ra_mode; c->merge_limit = cf->merge_limit; c->preference = cf->preference; c->out_req.feeder.prefilter.addr = c->out_subprefix = cf->out_subprefix; c->debug = cf->debug; c->in_req.trace_routes = c->out_req.trace_routes = c->debug | c->proto->debug; c->rpki_reload = cf->rpki_reload; /* Execute channel-specific reconfigure hook */ if (c->class->reconfigure && !c->class->reconfigure(c, cf, &import_changed, &export_changed)) return 0; /* If the channel is not open, it has no routes and we cannot reload it anyways */ if (c->channel_state != CS_UP) goto done; /* Update RPKI/ROA subscriptions */ if (import_changed || export_changed || rpki_reload_changed) { channel_roa_unsubscribe_all(c); if (c->rpki_reload) { channel_roa_subscribe_filter(c, 1); channel_roa_subscribe_filter(c, 0); } } if (reconfigure_type == RECONFIG_SOFT) { if (import_changed) log(L_INFO "Channel %s.%s changed import", c->proto->name, c->name); if (export_changed) log(L_INFO "Channel %s.%s changed export", c->proto->name, c->name); goto done; } /* Route reload may be not supported */ if (import_changed && !channel_reloadable(c)) return 0; if (import_changed || export_changed) log(L_INFO "Reloading channel %s.%s", c->proto->name, c->name); if (import_changed) channel_request_reload(c, NULL); if (export_changed) channel_request_full_refeed(c); done: CD(c, "Reconfigured"); return 1; } int proto_configure_channel(struct proto *p, struct channel **pc, struct channel_config *cf) { struct channel *c = *pc; if (!c && cf) { /* We could add the channel, but currently it would just stay in down state until protocol is restarted, so it is better to force restart anyways. */ if (p->proto_state != PS_DOWN_XX) { log(L_INFO "Cannot add channel %s.%s", p->name, cf->name); return 0; } *pc = proto_add_channel(p, cf); } else if (c && !cf) { if (c->channel_state != CS_DOWN) { log(L_INFO "Cannot remove channel %s.%s", c->proto->name, c->name); return 0; } proto_remove_channel(p, c); *pc = NULL; } else if (c && cf) { if (!channel_reconfigure(c, cf)) { log(L_INFO "Cannot reconfigure channel %s.%s", c->proto->name, c->name); return 0; } } return 1; } static void proto_loop_stopped(void *ptr) { struct proto *p = ptr; ASSERT_DIE(birdloop_inside(&main_birdloop)); ASSERT_DIE(p->loop != &main_birdloop); p->pool = NULL; /* is freed by birdloop_free() */ birdloop_free(p->loop); p->loop = &main_birdloop; proto_notify_state(p, PS_DOWN_XX); proto_rethink_goal(p); } static void proto_event(void *ptr) { struct proto *p = ptr; if (p->do_stop) { p->do_stop = 0; } if (proto_is_done(p) && p->pool_inloop) /* perusing pool_inloop to do this once only */ { /* Interface notification unsubscribe can't be done * before the protocol is really done, as it also destroys * the neighbors which may be needed (e.g. by BGP->MRT) * during the STOP phase as well. */ iface_unsubscribe(&p->iface_sub); rp_free(p->pool_inloop); p->pool_inloop = NULL; if (p->loop != &main_birdloop) birdloop_stop_self(p->loop, proto_loop_stopped, p); else { proto_notify_state(p, PS_DOWN_XX); proto_rethink_goal(p); } } } /** * proto_new - create a new protocol instance * @c: protocol configuration * * When a new configuration has been read in, the core code starts * initializing all the protocol instances configured by calling their * init() hooks with the corresponding instance configuration. The initialization * code of the protocol is expected to create a new instance according to the * configuration by calling this function and then modifying the default settings * to values wanted by the protocol. */ void * proto_new(struct proto_config *cf) { struct proto *p = mb_allocz(proto_pool, cf->protocol->proto_size); OBSREF_SET(p->global_config, cf->global); p->cf = cf; p->debug = cf->debug; p->mrtdump = cf->mrtdump; p->name = cf->name; p->proto = cf->protocol; p->proto_state = PS_DOWN_XX; p->last_state_change = current_time(); p->last_reconfiguration = current_time(); p->net_type = cf->net_type; p->disabled = cf->disabled; p->hash_key = random_u32(); cf->proto = p; p->last_restart = current_time(); p->restart_limit = cf->restart_limit; PST_LOCKED(tp) { p->id = hmap_first_zero(&tp->proto_id_map); hmap_set(&tp->proto_id_map, p->id); if (p->id >= tp->proto_len) { /* Grow the states array */ ea_list **new_states = mb_allocz(tp->pool, sizeof *new_states * tp->proto_len * 2); memcpy(new_states, tp->proto_states, tp->proto_len * sizeof *new_states); mb_free(tp->proto_states); tp->proto_states = new_states; tp->proto_len *= 2; } } init_list(&p->channels); /* Initial version of protocol state. */ struct ea_list *state = NULL; ea_set_attr(&state, EA_LITERAL_STORE_STRING(&ea_name, 0, p->name)); ea_set_attr(&state, EA_LITERAL_STORE_PTR(&ea_protocol_type, 0, &p->proto)); ea_set_attr(&state, EA_LITERAL_EMBEDDED(&ea_state, 0, p->proto_state)); ea_set_attr(&state, EA_LITERAL_STORE_ADATA(&ea_last_modified, 0, &p->last_state_change, sizeof(btime))); ea_set_attr(&state, EA_LITERAL_EMBEDDED(&ea_proto_id, 0, p->id)); ea_set_attr(&state, EA_LITERAL_STORE_ADATA(&ea_proto_channel_list, 0, NULL, 0)); proto_announce_state_later(p, state); return p; } static struct proto * proto_init(struct proto_config *c, struct proto *after) { struct protocol *pr = c->protocol; struct proto *p = pr->init(c); p->loop = &main_birdloop; p->vrf = c->vrf; proto_add_after(&global_proto_list, p, after); proto_announce_state(p, p->ea_state); p->event = ev_new_init(proto_pool, proto_event, p); PD(p, "Initializing%s", p->disabled ? " [disabled]" : ""); return p; } static void proto_start(struct proto *p) { DBG("Kicking %s up\n", p->name); PD(p, "Starting"); if (OBSREF_GET(graceful_recovery_context)) p->gr_recovery = 1; if (p->cf->loop_order != DOMAIN_ORDER(the_bird)) { p->loop = birdloop_new(proto_pool, p->cf->loop_order, p->cf->loop_max_latency, "Protocol %s", p->cf->name); p->pool = birdloop_pool(p->loop); } else p->pool = rp_newf(proto_pool, the_bird_domain.the_bird, "Protocol %s", p->cf->name); p->iface_sub.target = proto_event_list(p); p->iface_sub.name = p->name; p->iface_sub.debug = !!(p->debug & D_IFACES); PROTO_LOCKED_FROM_MAIN(p) { p->pool_inloop = rp_newf(p->pool, birdloop_domain(p->loop), "Protocol %s early cleanup objects", p->cf->name); p->pool_up = rp_newf(p->pool, birdloop_domain(p->loop), "Protocol %s stop-free objects", p->cf->name); proto_notify_state(p, (p->proto->start ? p->proto->start(p) : PS_UP)); } } /** * proto_config_new - create a new protocol configuration * @pr: protocol the configuration will belong to * @class: SYM_PROTO or SYM_TEMPLATE * * Whenever the configuration file says that a new instance * of a routing protocol should be created, the parser calls * proto_config_new() to create a configuration entry for this * instance (a structure staring with the &proto_config header * containing all the generic items followed by protocol-specific * ones). Also, the configuration entry gets added to the list * of protocol instances kept in the configuration. * * The function is also used to create protocol templates (when class * SYM_TEMPLATE is specified), the only difference is that templates * are not added to the list of protocol instances and therefore not * initialized during protos_commit()). */ void * proto_config_new(struct protocol *pr, int class) { struct proto_config *cf = cfg_allocz(pr->config_size); if (class == SYM_PROTO) add_tail(&new_config->protos, &cf->n); cf->global = new_config; cf->protocol = pr; cf->name = pr->name; cf->class = class; cf->debug = new_config->proto_default_debug; cf->mrtdump = new_config->proto_default_mrtdump; cf->loop_order = DOMAIN_ORDER(the_bird); cf->restart_limit = 5 S; init_list(&cf->channels); return cf; } /** * proto_copy_config - copy a protocol configuration * @dest: destination protocol configuration * @src: source protocol configuration * * Whenever a new instance of a routing protocol is created from the * template, proto_copy_config() is called to copy a content of * the source protocol configuration to the new protocol configuration. * Name, class and a node in protos list of @dest are kept intact. * copy_config() protocol hook is used to copy protocol-specific data. */ void proto_copy_config(struct proto_config *dest, struct proto_config *src) { struct channel_config *cc; node old_node; int old_class; const char *old_name; if (dest->protocol != src->protocol) cf_error("Can't copy configuration from a different protocol type"); if (dest->protocol->copy_config == NULL) cf_error("Inheriting configuration for %s is not supported", src->protocol->name); DBG("Copying configuration from %s to %s\n", src->name, dest->name); /* * Copy struct proto_config here. Keep original node, class and name. * protocol-specific config copy is handled by protocol copy_config() hook */ old_node = dest->n; old_class = dest->class; old_name = dest->name; memcpy(dest, src, src->protocol->config_size); dest->n = old_node; dest->class = old_class; dest->name = old_name; init_list(&dest->channels); WALK_LIST(cc, src->channels) channel_copy_config(cc, dest); /* FIXME: allow for undefined copy_config */ dest->protocol->copy_config(dest, src); } void proto_clone_config(struct symbol *sym, struct proto_config *parent) { struct proto_config *cf = proto_config_new(parent->protocol, SYM_PROTO); proto_copy_config(cf, parent); cf->name = sym->name; cf->proto = NULL; cf->parent = parent; sym->class = cf->class; sym->proto = cf; } static void proto_undef_clone(struct symbol *sym, struct proto_config *cf) { rem_node(&cf->n); sym->class = SYM_VOID; sym->proto = NULL; } /** * protos_preconfig - pre-configuration processing * @c: new configuration * * This function calls the preconfig() hooks of all routing * protocols available to prepare them for reading of the new * configuration. */ void protos_preconfig(struct config *c) { struct protocol *p; init_list(&c->protos); DBG("Protocol preconfig:"); WALK_LIST(p, protocol_list) { DBG(" %s", p->name); p->name_counter = 0; if (p->preconfig) p->preconfig(p, c); } DBG("\n"); } static int proto_reconfigure(struct proto *p, struct proto_config *oc, struct proto_config *nc, int type) { /* If the protocol is DOWN, we just restart it */ if ((p->proto_state == PS_DOWN_XX) || (p->proto_state == PS_FLUSH)) return 0; /* If there is a too big change in core attributes, ... */ if ((nc->protocol != oc->protocol) || (nc->net_type != oc->net_type) || (nc->disabled != p->disabled) || (nc->vrf != oc->vrf)) return 0; p->sources.name = p->name = nc->name; p->sources.debug = p->debug = nc->debug; p->mrtdump = nc->mrtdump; p->restart_limit = nc->restart_limit; reconfigure_type = type; /* Execute protocol specific reconfigure hook */ if (!p->proto->reconfigure || !p->proto->reconfigure(p, nc)) return 0; DBG("\t%s: same\n", oc->name); PD(p, "Reconfigured"); p->cf = nc; return 1; } static struct protos_commit_request { struct config *new; struct config *old; enum protocol_startup phase; int type; } protos_commit_request; static int proto_rethink_goal_pending = 0; static void protos_do_commit(struct config *new, struct config *old, int type); /** * protos_commit - commit new protocol configuration * @new: new configuration * @old: old configuration or %NULL if it's boot time config * @type: type of reconfiguration (RECONFIG_SOFT or RECONFIG_HARD) * * Scan differences between @old and @new configuration and adjust all * protocol instances to conform to the new configuration. * * When a protocol exists in the new configuration, but it doesn't in the * original one, it's immediately started. When a collision with the other * running protocol would arise, the new protocol will be temporarily stopped * by the locking mechanism. * * When a protocol exists in the old configuration, but it doesn't in the * new one, it's shut down and deleted after the shutdown completes. * * When a protocol exists in both configurations, the core decides * whether it's possible to reconfigure it dynamically - it checks all * the core properties of the protocol (changes in filters are ignored * if type is RECONFIG_SOFT) and if they match, it asks the * reconfigure() hook of the protocol to see if the protocol is able * to switch to the new configuration. If it isn't possible, the * protocol is shut down and a new instance is started with the new * configuration after the shutdown is completed. */ void protos_commit(struct config *new, struct config *old, int type) { protos_commit_request = (struct protos_commit_request) { .new = new, .old = old, .phase = (new->shutdown && !new->gr_down) ? PROTOCOL_STARTUP_REGULAR : PROTOCOL_STARTUP_NECESSARY, .type = type, }; protos_do_commit(new, old, type); } static void protos_do_commit(struct config *new, struct config *old, int type) { enum protocol_startup phase = protos_commit_request.phase; struct proto_config *oc, *nc; struct symbol *sym; struct proto *p; if ((phase < PROTOCOL_STARTUP_REGULAR) || (phase > PROTOCOL_STARTUP_NECESSARY)) { protos_commit_request = (struct protos_commit_request) {}; return; } DBG("protos_commit:\n"); if (old) { WALK_LIST(oc, old->protos) { if (oc->protocol->startup != phase) continue; p = oc->proto; sym = cf_find_symbol(new, oc->name); struct birdloop *proto_loop = PROTO_ENTER_FROM_MAIN(p); /* Handle dynamic protocols */ if (!sym && oc->parent && !new->shutdown) { struct symbol *parsym = cf_find_symbol(new, oc->parent->name); if (parsym && parsym->class == SYM_PROTO) { /* This is hack, we would like to share config, but we need to copy it now */ new_config = new; cfg_mem = new->mem; new->current_scope = new->root_scope; sym = cf_get_symbol(new, oc->name); proto_clone_config(sym, parsym->proto); new_config = NULL; cfg_mem = NULL; } } if (sym && sym->class == SYM_PROTO && !new->shutdown) { /* Found match, let's check if we can smoothly switch to new configuration */ /* No need to check description */ nc = sym->proto; nc->proto = p; /* We will try to reconfigure protocol p */ if (proto_reconfigure(p, oc, nc, type)) { OBSREF_CLEAR(p->global_config); OBSREF_SET(p->global_config, new); proto_announce_state(p, p->ea_state); PROTO_LEAVE_FROM_MAIN(proto_loop); continue; } if (nc->parent) { proto_undef_clone(sym, nc); goto remove; } /* Unsuccessful, we will restart it */ if (!p->disabled && !nc->disabled) log(L_INFO "Restarting protocol %s", p->name); else if (p->disabled && !nc->disabled) log(L_INFO "Enabling protocol %s", p->name); else if (!p->disabled && nc->disabled) log(L_INFO "Disabling protocol %s", p->name); p->down_code = nc->disabled ? PDC_CF_DISABLE : PDC_CF_RESTART; p->cf_new = nc; } else if (!new->shutdown) { remove: log(L_INFO "Removing protocol %s", p->name); p->down_code = PDC_CF_REMOVE; p->cf_new = NULL; } else if (new->gr_down) { p->down_code = PDC_CMD_GR_DOWN; p->cf_new = NULL; } else /* global shutdown */ { p->down_code = PDC_CMD_SHUTDOWN; p->cf_new = NULL; } p->reconfiguring = 1; proto_announce_state(p, p->ea_state); PROTO_LEAVE_FROM_MAIN(proto_loop); proto_rethink_goal(p); } } struct proto *after = NULL; WALK_LIST(nc, new->protos) if ((nc->protocol->startup == phase) && !nc->proto) { /* Not a first-time configuration */ if (old) log(L_INFO "Adding protocol %s", nc->name); p = proto_init(nc, after); after = p; proto_rethink_goal(p); } else after = nc->proto; DBG("Protocol start\n"); /* Determine router ID for the first time - it has to be here and not in global_commit() because it is postponed after start of device protocol */ if ((phase == PROTOCOL_STARTUP_NECESSARY) && !old) { struct global_runtime *gr = atomic_load_explicit(&global_runtime, memory_order_relaxed); if (!gr->router_id) { gr->router_id = if_choose_router_id(new->router_id_from, 0); if (!gr->router_id) die("Cannot determine router ID, please configure it manually"); } } /* Commit next round of protocols */ if (new->shutdown && !new->gr_down) protos_commit_request.phase++; else protos_commit_request.phase--; /* If something is pending, the next round will be called asynchronously from proto_rethink_goal(). */ if (!proto_rethink_goal_pending) protos_do_commit(new, old, type); } static void proto_shutdown(struct proto *p) { if (p->proto_state == PS_START || p->proto_state == PS_UP) { /* Going down */ DBG("Kicking %s down\n", p->name); PD(p, "Shutting down"); proto_notify_state(p, (p->proto->shutdown ? p->proto->shutdown(p) : PS_FLUSH)); if (p->reconfiguring) { proto_rethink_goal_pending++; p->reconfiguring = 2; } } } static void proto_rethink_goal(struct proto *p) { int goal_pending = (p->reconfiguring == 2); if (p->reconfiguring && (p->proto_state == PS_DOWN_XX)) { struct proto_config *nc = p->cf_new; struct proto *after = p->n.prev; DBG("%s has shut down for reconfiguration\n", p->name); p->cf->proto = NULL; OBSREF_CLEAR(p->global_config); proto_remove_channels(p); proto_announce_state(p, NULL); proto_rem_node(&global_proto_list, p); rfree(p->event); mb_free(p->message); mb_free(p); if (!nc) goto done; p = proto_init(nc, after); } /* Determine what state we want to reach */ if (p->disabled || p->reconfiguring) { PROTO_LOCKED_FROM_MAIN(p) proto_shutdown(p); } else if (p->proto_state == PS_DOWN_XX) proto_start(p); done: if (goal_pending && !--proto_rethink_goal_pending) protos_do_commit( protos_commit_request.new, protos_commit_request.old, protos_commit_request.type ); } struct proto * proto_spawn(struct proto_config *cf, uint disabled) { struct proto *p = proto_init(cf, global_proto_list.last); p->disabled = disabled; proto_rethink_goal(p); return p; } bool proto_disable(struct proto *p) { ASSERT_DIE(birdloop_inside(&main_birdloop)); bool changed = !p->disabled; p->disabled = 1; proto_rethink_goal(p); return changed; } bool proto_enable(struct proto *p) { ASSERT_DIE(birdloop_inside(&main_birdloop)); bool changed = p->disabled; p->disabled = 0; proto_rethink_goal(p); return changed; } /** * DOC: Graceful restart recovery * * Graceful restart of a router is a process when the routing plane (e.g. BIRD) * restarts but both the forwarding plane (e.g kernel routing table) and routing * neighbors keep proper routes, and therefore uninterrupted packet forwarding * is maintained. * * BIRD implements graceful restart recovery by deferring export of routes to * protocols until routing tables are refilled with the expected content. After * start, protocols generate routes as usual, but routes are not propagated to * them, until protocols report that they generated all routes. After that, * graceful restart recovery is finished and the export (and the initial feed) * to protocols is enabled. * * When graceful restart recovery need is detected during initialization, then * enabled protocols are marked with @gr_recovery flag before start. Such * protocols then decide how to proceed with graceful restart, participation is * voluntary. Protocols could lock the recovery for each channel by function * channel_graceful_restart_lock() (state stored in @gr_lock flag), which means * that they want to postpone the end of the recovery until they converge and * then unlock it. They also could set @gr_wait before advancing to %PS_UP, * which means that the core should defer route export to that channel until * the end of the recovery. This should be done by protocols that expect their * neigbors to keep the proper routes (kernel table, BGP sessions with BGP * graceful restart capability). * * The graceful restart recovery is finished when either all graceful restart * locks are unlocked or when graceful restart wait timer fires. * */ /** * graceful_restart_done - finalize graceful restart * @t: unused * * When there are no locks on graceful restart, the functions finalizes the * graceful restart recovery. Protocols postponing route export until the end of * the recovery are awakened and the export to them is enabled. */ static void graceful_recovery_done(struct callback *_ UNUSED) { ASSERT_DIE(birdloop_inside(&main_birdloop)); ASSERT_DIE(_graceful_recovery_context.grc_state == GRS_ACTIVE); tm_stop(&_graceful_recovery_context.wait_timer); log(L_INFO "Graceful recovery done"); WALK_TLIST(proto, p, &global_proto_list) PROTO_LOCKED_FROM_MAIN(p) { p->gr_recovery = 0; struct channel *c; WALK_LIST(c, p->channels) { ASSERT_DIE(!OBSREF_GET(c->gr_lock)); /* Resume postponed export of routes */ if ((c->channel_state == CS_UP) && c->gr_wait && p->rt_notify) channel_start_export(c); /* Cleanup */ c->gr_wait = 0; } } _graceful_recovery_context.grc_state = GRS_DONE; } /** * graceful_restart_recovery - request initial graceful restart recovery * * Called by the platform initialization code if the need for recovery * after graceful restart is detected during boot. Have to be called * before protos_commit(). */ void graceful_restart_recovery(void) { obstacle_target_init( &_graceful_recovery_context.obstacles, &_graceful_recovery_context.obstacles_cleared, &root_pool, "Graceful recovery"); OBSREF_SET(graceful_recovery_context, &_graceful_recovery_context); _graceful_recovery_context.grc_state = GRS_INIT; } static void graceful_recovery_timeout(timer *t UNUSED) { log(L_INFO "Graceful recovery timeout"); WALK_TLIST(proto, p, &global_proto_list) PROTO_LOCKED_FROM_MAIN(p) { struct channel *c; WALK_LIST(c, p->channels) if (OBSREF_GET(c->gr_lock)) { log(L_INFO "Graceful recovery: Not waiting for %s.%s", p->name, c->name); OBSREF_CLEAR(c->gr_lock); } } } /** * graceful_restart_init - initialize graceful restart * * When graceful restart recovery was requested, the function starts an active * phase of the recovery and initializes graceful restart wait timer. The * function have to be called after protos_commit(). */ void graceful_restart_init(void) { if (!OBSREF_GET(graceful_recovery_context)) return; log(L_INFO "Graceful recovery started"); _graceful_recovery_context.grc_state = GRS_ACTIVE; _graceful_recovery_context.wait_timer = (timer) { .hook = graceful_recovery_timeout }; u32 gr_wait = atomic_load_explicit(&global_runtime, memory_order_relaxed)->gr_wait; tm_start(&_graceful_recovery_context.wait_timer, gr_wait S); callback_init(&_graceful_recovery_context.obstacles_cleared, graceful_recovery_done, &main_birdloop); /* The last clearing of obstacle reference will cause * the graceful recovery finish immediately. */ OBSREF_CLEAR(graceful_recovery_context); } void graceful_restart_show_status(void) { if (_graceful_recovery_context.grc_state != GRS_ACTIVE) return; cli_msg(-24, "Graceful restart recovery in progress"); cli_msg(-24, " Waiting for %u channels to recover", obstacle_target_count(&_graceful_recovery_context.obstacles)); cli_msg(-24, " Wait timer is %t/%u", tm_remains(&_graceful_recovery_context.wait_timer), atomic_load_explicit(&global_runtime, memory_order_relaxed)->gr_wait); } /** * channel_graceful_restart_lock - lock graceful restart by channel * @p: channel instance * * This function allows a protocol to postpone the end of graceful restart * recovery until it converges. The lock is removed when the protocol calls * channel_graceful_restart_unlock() or when the channel is closed. * * The function have to be called during the initial phase of graceful restart * recovery and only for protocols that are part of graceful restart (i.e. their * @gr_recovery is set), which means it should be called from protocol start * hooks. */ void channel_graceful_restart_lock(struct channel *c) { ASSERT_DIE(birdloop_inside(&main_birdloop)); if (OBSREF_GET(c->gr_lock)) return; switch (_graceful_recovery_context.grc_state) { case GRS_INIT: case GRS_ACTIVE: OBSREF_SET(c->gr_lock, &_graceful_recovery_context); break; case GRS_NONE: case GRS_DONE: break; } } /** * channel_graceful_restart_unlock - unlock graceful restart by channel * @p: channel instance * * This function unlocks a lock from channel_graceful_restart_lock(). It is also * automatically called when the lock holding protocol went down. */ void channel_graceful_restart_unlock(struct channel *c) { OBSREF_CLEAR(c->gr_lock); } /** * protos_dump_all - dump status of all protocols * * This function dumps status of all existing protocol instances to the * debug output. It involves printing of general status information * such as protocol states, its position on the protocol lists * and also calling of a dump() hook of the protocol to print * the internals. */ void protos_dump_all(struct dump_request *dreq) { RDUMP("Protocols:\n"); WALK_TLIST(proto, p, &global_proto_list) PROTO_LOCKED_FROM_MAIN(p) { #define DPF(x) (p->x ? " " #x : "") RDUMP(" protocol %s (%p) state %s with %d active channels flags: %s%s%s\n", p->name, p, p_states[p->proto_state], p->active_channels, DPF(disabled), DPF(do_stop), DPF(reconfiguring)); #undef DPF struct channel *c; WALK_LIST(c, p->channels) { RDUMP("\tTABLE %s\n", c->table->name); if (c->in_filter) RDUMP("\tInput filter: %s\n", filter_name(c->in_filter)); if (c->out_filter) RDUMP("\tOutput filter: %s\n", filter_name(c->out_filter)); RDUMP("\tChannel state: %s/%s/%s\n", c_states[c->channel_state], c->in_req.hook ? rt_import_state_name(rt_import_get_state(c->in_req.hook)) : "-", rt_export_state_name(rt_export_get_state(&c->out_req))); } RDUMP("\tSOURCES\n"); if (p->proto_state != PS_DOWN_XX) rt_dump_sources(dreq, &p->sources); if (p->proto->dump && (p->proto_state != PS_DOWN_XX) && (p->proto_state != PS_FLUSH)) p->proto->dump(p, dreq); } } /** * proto_build - make a single protocol available * @p: the protocol * * After the platform specific initialization code uses protos_build() * to add all the standard protocols, it should call proto_build() for * all platform specific protocols to inform the core that they exist. */ void proto_build(struct protocol *p) { add_tail(&protocol_list, &p->n); } /* FIXME: convert this call to some protocol hook */ extern void bfd_init_all(void); void protos_build_gen(void); /** * protos_build - build a protocol list * * This function is called during BIRD startup to insert * all standard protocols to the global protocol list. Insertion * of platform specific protocols (such as the kernel syncer) * is in the domain of competence of the platform dependent * startup code. */ void protos_build(void) { proto_pool = rp_new(&root_pool, the_bird_domain.the_bird, "Protocols"); /* Protocol attributes */ ea_register_init(&ea_name); ea_register_init(&ea_protocol_name); ea_register_init(&ea_protocol_type); ea_register_init(&ea_state); ea_register_init(&ea_last_modified); ea_register_init(&ea_info); ea_register_init(&ea_proto_id); ea_register_init(&ea_channel_id); ea_register_init(&ea_in_keep); ea_register_init(&ea_proto_channel_list); ea_register_init(&ea_rtable); proto_state_table_pub.lock = DOMAIN_NEW(rtable); /* Init proto_state_table */ pool *p = rp_new(&root_pool, the_bird_domain.the_bird, "Proto state table"); PST_LOCKED(ts) { ts->channels_len = 64; ts->proto_len = 32; hmap_init(&ts->proto_id_map, p, ts->proto_len); /* for proto ids. Value of proto id is the same as index of that proto in ptoto_state_table->attrs */ hmap_init(&ts->channel_id_map, p, ts->channels_len); /* Zeros should be reserved for easier undef manipulation */ hmap_set(&ts->proto_id_map, 0); hmap_set(&ts->channel_id_map, 0); ts->pool = p; ts->proto_states = mb_allocz(p, sizeof(ea_list *) * ts->proto_len); ts->channel_states = mb_allocz(p, sizeof(ea_list *) * ts->channels_len * 2); } /* Init proto state journal */ struct settle_config cf = {.min = 0, .max = 0}; proto_state_table_pub.journal.item_done = proto_journal_item_cleanup; proto_state_table_pub.journal.item_size = sizeof(struct proto_pending_update); proto_state_table_pub.journal.loop = birdloop_new(&root_pool, DOMAIN_ORDER(service), 1, "proto journal loop"); proto_state_table_pub.journal.domain = proto_state_table_pub.lock.rtable; lfjour_init(&proto_state_table_pub.journal, &cf); protos_build_gen(); } /* Temporary hack to propagate restart to BGP */ int proto_restart; static void proto_restart_event_hook(void *_p) { struct proto *p = _p; if (!p->down_sched) return; proto_restart = (p->down_sched == PDS_RESTART); p->disabled = 1; proto_rethink_goal(p); if (proto_restart) { /* Store the last restart time */ p->last_restart = current_time(); /* Allow the protocol back again */ p->disabled = 0; /* No need to call proto_rethink_goal() here again as the proto_cleanup() routine will * call it after the protocol stops ... and both these routines are fixed to main_birdloop. */ } } static inline void proto_schedule_down(struct proto *p, byte restart, byte code) { /* Does not work for other states (even PS_START) */ ASSERT(p->proto_state == PS_UP); /* Scheduled restart may change to shutdown, but not otherwise */ if (p->down_sched == PDS_DISABLE) return; p->down_sched = restart ? PDS_RESTART : PDS_DISABLE; p->down_code = code; /* Request protocol restart to be initiated from the mainloop */ ev_send(&global_event_list, ev_new_init(p->pool, proto_restart_event_hook, p)); } /** * proto_set_message - set administrative message to protocol * @p: protocol * @msg: message * @len: message length (-1 for NULL-terminated string) * * The function sets administrative message (string) related to protocol state * change. It is called by the nest code for manual enable/disable/restart * commands all routes to the protocol, and by protocol-specific code when the * protocol state change is initiated by the protocol. Using NULL message clears * the last message. The message string may be either NULL-terminated or with an * explicit length. */ void proto_set_message(struct proto *p, char *msg, int len) { mb_free(p->message); p->message = NULL; if (!msg || !len) return; if (len < 0) len = strlen(msg); if (!len) return; p->message = mb_alloc(proto_pool, len + 1); memcpy(p->message, msg, len); p->message[len] = 0; } static const char * channel_limit_name[] = { [PLA_WARN] = "warn", [PLA_BLOCK] = "block", [PLA_RESTART] = "restart", [PLA_DISABLE] = "disable", }; static void channel_log_limit(struct channel *c, struct limit *l, int dir) { const char *dir_name[PLD_MAX] = { "receive", "import" , "export" }; log(L_WARN "Channel %s.%s hits route %s limit (%d), action: %s", c->proto->name, c->name, dir_name[dir], l->max, channel_limit_name[c->limit_actions[dir]]); } static void channel_activate_limit(struct channel *c, struct limit *l, int dir) { if (c->limit_active & (1 << dir)) return; c->limit_active |= (1 << dir); channel_log_limit(c, l, dir); } static int channel_limit_warn(struct limit *l, void *data) { struct channel_limit_data *cld = data; struct channel *c = cld->c; int dir = cld->dir; channel_log_limit(c, l, dir); return 0; } static int channel_limit_block(struct limit *l, void *data) { struct channel_limit_data *cld = data; struct channel *c = cld->c; int dir = cld->dir; channel_activate_limit(c, l, dir); return 1; } static const byte chl_dir_down[PLD_MAX] = { PDC_RX_LIMIT_HIT, PDC_IN_LIMIT_HIT, PDC_OUT_LIMIT_HIT }; static int channel_limit_down(struct limit *l, void *data) { struct channel_limit_data *cld = data; struct channel *c = cld->c; struct proto *p = c->proto; int dir = cld->dir; channel_activate_limit(c, l, dir); bool restart = (c->limit_actions[dir] == PLA_RESTART); if (restart && (current_time_now() - p->last_restart < p->restart_limit)) { log(L_ERR "%s: too frequent restarts, disabling", p->name); restart = false; } if (p->proto_state == PS_UP) proto_schedule_down(p, restart, chl_dir_down[dir]); return 1; } static int (*channel_limit_action[])(struct limit *, void *) = { [PLA_NONE] = NULL, [PLA_WARN] = channel_limit_warn, [PLA_BLOCK] = channel_limit_block, [PLA_RESTART] = channel_limit_down, [PLA_DISABLE] = channel_limit_down, }; static void channel_update_limit(struct channel *c, struct limit *l, int dir, struct channel_limit *cf) { l->action = channel_limit_action[cf->action]; c->limit_actions[dir] = cf->action; struct channel_limit_data cld = { .c = c, .dir = dir }; limit_update(l, &cld, cf->action ? cf->limit : ~((u32) 0)); } static void channel_init_limit(struct channel *c, struct limit *l, int dir, struct channel_limit *cf) { channel_reset_limit(c, l, dir); channel_update_limit(c, l, dir, cf); } static void channel_reset_limit(struct channel *c, struct limit *l, int dir) { limit_reset(l); c->limit_active &= ~(1 << dir); } static inline void proto_do_start(struct proto *p) { p->sources.debug = p->debug; rt_init_sources(&p->sources, p->name, proto_event_list(p)); if (!p->cf->late_if_feed) iface_subscribe(&p->iface_sub); } static void proto_do_up(struct proto *p) { if (!p->main_source) p->main_source = rt_get_source(p, 0); // Locked automaticaly proto_start_channels(p); if (p->cf->late_if_feed) iface_subscribe(&p->iface_sub); } static inline void proto_do_pause(struct proto *p) { proto_pause_channels(p); } static void proto_do_stop(struct proto *p) { p->down_sched = 0; p->gr_recovery = 0; if (p->main_source) { rt_unlock_source(p->main_source); p->main_source = NULL; } rp_free(p->pool_up); p->pool_up = NULL; proto_stop_channels(p); rt_destroy_sources(&p->sources, p->event); p->do_stop = 1; proto_send_event(p, p->event); } static void proto_do_down(struct proto *p) { p->down_code = 0; /* Shutdown is finished in the protocol event */ if (proto_is_done(p)) proto_send_event(p, p->event); } /** * proto_notify_state - notify core about protocol state change * @p: protocol the state of which has changed * @ps: the new status * * Whenever a state of a protocol changes due to some event internal * to the protocol (i.e., not inside a start() or shutdown() hook), * it should immediately notify the core about the change by calling * proto_notify_state() which will write the new state to the &proto * structure and take all the actions necessary to adapt to the new * state. State change to PS_DOWN immediately frees resources of protocol * and might execute start callback of protocol; therefore, * it should be used at tail positions of protocol callbacks. */ void proto_notify_state(struct proto *p, uint state) { ASSERT_DIE(birdloop_inside(p->loop)); uint ps = p->proto_state; DBG("%s reporting state transition %s -> %s\n", p->name, p_states[ps], p_states[state]); /* If nothing happened, announce possible pending extended states immediately */ if (state == ps) return proto_announce_state(p, p->ea_state); p->proto_state = state; p->last_state_change = current_time(); switch (state) { case PS_START: ASSERT(ps == PS_DOWN_XX || ps == PS_UP); if (ps == PS_DOWN_XX) proto_do_start(p); else proto_do_pause(p); break; case PS_UP: ASSERT(ps == PS_DOWN_XX || ps == PS_START); if (ps == PS_DOWN_XX) proto_do_start(p); proto_do_up(p); break; case PS_STOP: ASSERT(ps == PS_START || ps == PS_UP); proto_do_stop(p); break; case PS_FLUSH: if (ps != PS_STOP) proto_do_stop(p); proto_do_down(p); break; case PS_DOWN_XX: ASSERT(ps == PS_FLUSH); CALL(p->proto->cleanup, p); if (p->pool) { rp_free(p->pool); p->pool = NULL; } break; default: bug("%s: Invalid state %d", p->name, ps); } ea_list *pes = p->ea_state; ea_set_attr(&pes, EA_LITERAL_EMBEDDED(&ea_state, 0, p->proto_state)); ea_set_attr(&pes, EA_LITERAL_STORE_ADATA(&ea_last_modified, 0, &p->last_state_change, sizeof(btime))); proto_announce_state(p, pes); proto_log_state_change(p); } /* * CLI Commands */ static char * proto_state_name(struct proto *p) { switch (p->proto_state) { case PS_DOWN_XX: return "down"; case PS_START: return "start"; case PS_UP: return "up"; case PS_STOP: return "stop"; case PS_FLUSH: return "flush"; default: return "???"; } } static void channel_show_stats(struct channel *c) { struct channel_import_stats *ch_is = &c->import_stats; struct channel_export_stats *ch_es = &c->export_stats; struct rt_import_stats *rt_is = c->in_req.hook ? &c->in_req.hook->stats : NULL; struct rt_export_stats *rt_es = &c->out_req.stats; #define SON(ie, item) ((ie) ? (ie)->item : 0) #define SCI(item) SON(ch_is, item) #define SCE(item) SON(ch_es, item) #define SRI(item) SON(rt_is, item) #define SRE(item) SON(rt_es, item) u32 rx_routes = c->rx_limit.count; u32 in_routes = c->in_limit.count; u32 out_routes = c->out_limit.count; if (c->in_keep) cli_msg(-1006, " Routes: %u imported, %u filtered, %u exported, %u preferred", in_routes, (rx_routes - in_routes), out_routes, SRI(pref)); else cli_msg(-1006, " Routes: %u imported, %u exported, %u preferred", in_routes, out_routes, SRI(pref)); cli_msg(-1006, " Route change stats: received rejected filtered ignored RX limit IN limit accepted"); cli_msg(-1006, " Import updates: %10u %10u %10u %10u %10u %10u %10u", SCI(updates_received), SCI(updates_invalid), SCI(updates_filtered), SRI(updates_ignored), SCI(updates_limited_rx), SCI(updates_limited_in), SRI(updates_accepted)); cli_msg(-1006, " Import withdraws: %10u %10u --- %10u --- %10u", SCI(withdraws_received), SCI(withdraws_invalid), SRI(withdraws_ignored), SRI(withdraws_accepted)); cli_msg(-1006, " Export updates: %10u %10u %10u --- %10u %10u", SRE(updates_received), SCE(updates_rejected), SCE(updates_filtered), SCE(updates_limited), SCE(updates_accepted)); cli_msg(-1006, " Export withdraws: %10u --- --- --- ---%10u", SRE(withdraws_received), SCE(withdraws_accepted)); #undef SRI #undef SRE #undef SCI #undef SCE #undef SON } void channel_show_limit(struct limit *l, const char *dsc, int active, int action) { if (!l->action) return; cli_msg(-1006, " %-16s%d%s", dsc, l->max, active ? " [HIT]" : ""); cli_msg(-1006, " Action: %s", channel_limit_name[action]); } void channel_show_info(struct channel *c) { cli_msg(-1006, " Channel %s", c->name); cli_msg(-1006, " State: %s", c_states[c->channel_state]); cli_msg(-1006, " Import state: %s", rt_import_state_name(rt_import_get_state(c->in_req.hook))); cli_msg(-1006, " Export state: %s", rt_export_state_name(rt_export_get_state(&c->out_req))); cli_msg(-1006, " Table: %s", c->table->name); cli_msg(-1006, " Preference: %d", c->preference); cli_msg(-1006, " Input filter: %s", filter_name(c->in_filter)); cli_msg(-1006, " Output filter: %s", filter_name(c->out_filter)); if (_graceful_recovery_context.grc_state == GRS_ACTIVE) cli_msg(-1006, " GR recovery: %s%s", OBSREF_GET(c->gr_lock) ? " pending" : "", c->gr_wait ? " waiting" : ""); channel_show_limit(&c->rx_limit, "Receive limit:", c->limit_active & (1 << PLD_RX), c->limit_actions[PLD_RX]); channel_show_limit(&c->in_limit, "Import limit:", c->limit_active & (1 << PLD_IN), c->limit_actions[PLD_IN]); channel_show_limit(&c->out_limit, "Export limit:", c->limit_active & (1 << PLD_OUT), c->limit_actions[PLD_OUT]); if (c->channel_state != CS_DOWN) channel_show_stats(c); } void channel_cmd_debug(struct channel *c, uint mask) { if (cli_access_restricted()) return; c->debug = mask; cli_msg(0, ""); } void proto_cmd_show(struct proto *p, uintptr_t verbose, int cnt) { byte buf[256], tbuf[TM_DATETIME_BUFFER_SIZE]; /* First protocol - show header */ if (!cnt) cli_msg(-2002, "%-10s %-10s %-10s %-6s %-12s %s", "Name", "Proto", "Table", "State", "Since", "Info"); buf[0] = 0; if (p->proto->get_status) p->proto->get_status(p, buf); rcu_read_lock(); struct timeformat *tf = this_cli->tf ?: &atomic_load_explicit(&global_runtime, memory_order_acquire)->tf_proto; rcu_read_unlock(); tm_format_time(tbuf, tf, p->last_state_change); cli_msg(-1002, "%-10s %-10s %-10s %-6s %-12s %s", p->name, p->proto->name, p->main_channel ? p->main_channel->table->name : "---", proto_state_name(p), tbuf, buf); if (verbose) { if (p->cf->dsc) cli_msg(-1006, " Description: %s", p->cf->dsc); if (p->message) cli_msg(-1006, " Message: %s", p->message); tm_format_time(tbuf, tf, p->last_reconfiguration); cli_msg(-1006, " Created: %s", tbuf); if (p->last_restart > p->last_reconfiguration) { tm_format_time(tbuf, tf, p->last_restart); cli_msg(-1006, " Last autorestart: %s", tbuf); } if (p->cf->router_id) cli_msg(-1006, " Router ID: %R", p->cf->router_id); if (p->vrf) cli_msg(-1006, " VRF: %s", p->vrf->name); if (p->proto->show_proto_info) p->proto->show_proto_info(p); else { struct channel *c; WALK_LIST(c, p->channels) channel_show_info(c); } cli_msg(-1006, ""); } } void proto_cmd_disable(struct proto *p, uintptr_t arg, int cnt UNUSED) { if (p->disabled) { cli_msg(-8, "%s: already disabled", p->name); return; } log(L_INFO "Disabling protocol %s", p->name); p->disabled = 1; p->down_code = PDC_CMD_DISABLE; proto_set_message(p, (char *) arg, -1); proto_shutdown(p); cli_msg(-9, "%s: disabled", p->name); } void proto_cmd_enable(struct proto *p, uintptr_t arg, int cnt UNUSED) { if (!p->disabled) { cli_msg(-10, "%s: already enabled", p->name); return; } log(L_INFO "Enabling protocol %s", p->name); p->disabled = 0; proto_set_message(p, (char *) arg, -1); proto_rethink_goal(p); cli_msg(-11, "%s: enabled", p->name); } void proto_cmd_restart(struct proto *p, uintptr_t arg, int cnt UNUSED) { if (p->disabled) { cli_msg(-8, "%s: already disabled", p->name); return; } log(L_INFO "Restarting protocol %s", p->name); p->disabled = 1; p->down_code = PDC_CMD_RESTART; proto_set_message(p, (char *) arg, -1); proto_shutdown(p); p->disabled = 0; /* After the protocol shuts down, proto_rethink_goal() is run from proto_event. */ cli_msg(-12, "%s: restarted", p->name); } struct channel_cmd_reload_request { struct rt_feeding_request cfr; struct proto_reload_request *prr; }; static void channel_reload_done(struct rt_feeding_request *cfr) { SKIP_BACK_DECLARE(struct channel_cmd_reload_request, ccrfr, cfr, cfr); if (atomic_fetch_sub_explicit(&ccrfr->prr->counter, 1, memory_order_acq_rel) == 1) ev_send_loop(&main_birdloop, &ccrfr->prr->ev); } static struct rt_feeding_request * channel_create_reload_request(struct proto_reload_request *prr) { if (!prr->trie) return NULL; /* Increase the refeed counter */ atomic_fetch_add_explicit(&prr->counter, 1, memory_order_relaxed); ASSERT_DIE(this_cli->parser_pool != prr->trie->lp); struct channel_cmd_reload_request *req = lp_alloc(prr->trie->lp, sizeof *req); *req = (struct channel_cmd_reload_request) { .cfr = { .done = channel_reload_done, .prefilter = { .mode = TE_ADDR_TRIE, .trie = prr->trie, }, }, .prr = prr, }; return &req->cfr; } void proto_cmd_reload(struct proto *p, uintptr_t _prr, int cnt UNUSED) { struct proto_reload_request *prr = (void *) _prr; struct channel *c; if (p->disabled) { cli_msg(-8, "%s: already disabled", p->name); return; } /* If the protocol in not UP, it has no routes */ if (p->proto_state != PS_UP) return; /* All channels must support reload */ if (prr->dir & CMD_RELOAD_IN) WALK_LIST(c, p->channels) if ((c->channel_state == CS_UP) && !channel_reloadable(c)) { cli_msg(-8006, "%s: reload failed", p->name); return; } log(L_INFO "Reloading protocol %s", p->name); /* re-importing routes */ WALK_LIST(c, p->channels) if (c->channel_state == CS_UP) { if (prr->dir & CMD_RELOAD_IN) channel_request_reload(c, channel_create_reload_request(prr)); if (prr->dir & CMD_RELOAD_OUT) if (c->out_req.name) rt_export_refeed(&c->out_req, channel_create_reload_request(prr)); } cli_msg(-15, "%s: reloading", p->name); } extern void pipe_update_debug(struct proto *P); void proto_cmd_debug(struct proto *p, uintptr_t mask, int cnt UNUSED) { p->debug = mask; #ifdef CONFIG_PIPE if (p->proto == &proto_pipe) pipe_update_debug(p); #endif } void proto_cmd_mrtdump(struct proto *p, uintptr_t mask, int cnt UNUSED) { p->mrtdump = mask; } static void proto_apply_cmd_symbol(const struct symbol *s, void (* cmd)(struct proto *, uintptr_t, int), uintptr_t arg) { if (s->class != SYM_PROTO) { cli_msg(9002, "%s is not a protocol", s->name); return; } if (s->proto->proto) { struct proto *p = s->proto->proto; PROTO_LOCKED_FROM_MAIN(p) cmd(p, arg, 0); cli_msg(0, ""); } else cli_msg(9002, "%s does not exist", s->name); } static void proto_apply_cmd_patt(const char *patt, void (* cmd)(struct proto *, uintptr_t, int), uintptr_t arg) { int cnt = 0; WALK_TLIST(proto, p, &global_proto_list) if (!patt || patmatch(patt, p->name)) PROTO_LOCKED_FROM_MAIN(p) cmd(p, arg, cnt++); if (!cnt) cli_msg(8003, "No protocols match"); else cli_msg(0, ""); } void proto_apply_cmd(struct proto_spec ps, void (* cmd)(struct proto *, uintptr_t, int), int restricted, uintptr_t arg) { if (restricted && cli_access_restricted()) return; if (ps.patt) proto_apply_cmd_patt(ps.ptr, cmd, arg); else proto_apply_cmd_symbol(ps.ptr, cmd, arg); } struct proto * proto_get_named(struct symbol *sym, struct protocol *pr) { struct proto *p; if (sym) { if (sym->class != SYM_PROTO) cf_error("%s: Not a protocol", sym->name); p = sym->proto->proto; if (!p || p->proto != pr) cf_error("%s: Not a %s protocol", sym->name, pr->name); } else { p = NULL; WALK_TLIST(proto, q, &global_proto_list) if ((q->proto == pr) && (q->proto_state != PS_DOWN_XX)) { if (p) cf_error("There are multiple %s protocols running", pr->name); p = q; } if (!p) cf_error("There is no %s protocol running", pr->name); } return p; } struct proto * proto_iterate_named(struct symbol *sym, struct protocol *proto, struct proto *old) { if (sym) { /* Just the first pass */ if (old) { cli_msg(0, ""); return NULL; } if (sym->class != SYM_PROTO) cf_error("%s: Not a protocol", sym->name); struct proto *p = sym->proto->proto; if (!p || (p->proto != proto)) cf_error("%s: Not a %s protocol", sym->name, proto->name); return p; } else { for (struct proto *p = old ? old->n.next : global_proto_list.first; p; p = p->n.next) { if ((p->proto == proto) && (p->proto_state != PS_DOWN_XX)) { cli_separator(this_cli); return p; } } /* Not found anything during first pass */ if (!old) cf_error("There is no %s protocol running", proto->name); /* No more items */ cli_msg(0, ""); return NULL; } } static void proto_journal_item_cleanup_(bool withdrawal, ea_list *old_attr) { /* The old attrs can be finally unref'd */ ea_free_later(old_attr); if (!withdrawal) return; /* The protocol itself is finished, cleanup its ID * The protocol's structure may be already gone, * the only thing left is the ID and the old attributes. */ PST_LOCKED(tp) { u32 id = ea_get_int(old_attr, &ea_proto_id, 0); ASSERT_DIE(id); ASSERT_DIE(tp->proto_states[id] == NULL); hmap_clear(&tp->proto_id_map, id); } } void proto_journal_item_cleanup(struct lfjour * journal UNUSED, struct lfjour_item *i) { /* Called after a journal update was has been read. */ struct proto_pending_update *pupdate = SKIP_BACK(struct proto_pending_update, li, i); proto_journal_item_cleanup_(!pupdate->new, pupdate->old); } /* * Protocol state announcement. * * The authoritative protocol state is always stored in ts->proto_states[p->id] * and it holds a reference. But sometimes it's too clumsy to announce all * protocol changes happening in a fast succession, so there is a * state-to-be-announced stored in the protocol itself, in p->ea_state. * That one is also a reference. * * The usage pattern is simple. First, you need to have a local ea_list copy. * ea_list *pes = p->ea_state; * * Then you update the state. * ea_set_attr(&pes, EA_LITERAL_...); * (possibly more of these) * * And finally, you queue the change for announcement. * proto_announce_state_later(p, pes); * * Alternatively, if you need immediate announcement and no table is locked. * proto_announce_state(p, pes); * * In all cases, the p->ea_state is updated immediately, and the public state * is either updated directly (involving a table lock!) or the update is * deferred after the current task ends. * * Never update p->ea_state from outside these functions. * Also never access p->ea_state from outside the protocol context. * It's a local copy for the protocol itself. From outside, always read the * public state table. * * Also no explicit referencing or EAs is needed outside these functions, * the reference lifecycle is complete here. */ struct proto_announce_state_deferred { struct deferred_call dc; struct proto *p; }; static void proto_announce_state_locked(struct proto_state_table_private* ts, struct proto *p, ea_list *new_state) { ASSERT_DIE(birdloop_inside(p->loop)); /* Cancel the previous deferred announcement */ if (p->deferred_state_announcement) { p->deferred_state_announcement->p = NULL; p->deferred_state_announcement = NULL; } /* First we update the state-to-be-stored */ if (new_state != p->ea_state) { if (p->ea_state) ea_free_later(p->ea_state); p->ea_state = new_state ? ea_lookup(new_state, 0, EALS_IN_TABLE) : NULL; } /* Then we check the public state */ ASSERT_DIE(p->id < ts->proto_len); ea_list *old_state = ts->proto_states[p->id]; /* Nothing has changed? */ if (p->ea_state == old_state) return; /* Set the new state */ ts->proto_states[p->id] = p->ea_state ? ea_ref(p->ea_state) : NULL; /* Announce the new state */ struct lfjour_item *li = lfjour_push_prepare(&proto_state_table_pub.journal); if (!li) { /* No reader, cleanup immediately * There is another version of this code in * proto_journal_item_cleanup_() but it must * be kept separated because of locking. */ ea_free_later(old_state); /* Protocol ID cleanup */ if (!p->ea_state) hmap_clear(&ts->proto_id_map, p->id); return; } SKIP_BACK_DECLARE(struct proto_pending_update, ppu, li, li); *ppu = (struct proto_pending_update) { .li = ppu->li, /* Keep the item's internal state */ .new = p->ea_state, .old = old_state, }; lfjour_push_commit(&proto_state_table_pub.journal); } void proto_announce_state(struct proto *p, ea_list *attr) { PST_LOCKED(ts) proto_announce_state_locked(ts, p, attr); } static void proto_announce_state_deferred(struct deferred_call *dc) { SKIP_BACK_DECLARE(struct proto_announce_state_deferred, pasd, dc, dc); if (pasd->p) proto_announce_state(pasd->p, pasd->p->ea_state); } void proto_announce_state_later_internal(struct proto *p, ea_list *new_state) { /* Cancel the previous deferred announcement */ if (p->deferred_state_announcement) p->deferred_state_announcement->p = NULL; /* The old stored state isn't needed any more */ ea_free_later(p->ea_state); /* Store the new one */ p->ea_state = ea_lookup(new_state, 0, EALS_IN_TABLE); /* Defer the rest */ struct proto_announce_state_deferred pasd = { .dc.hook = proto_announce_state_deferred, .p = p, }; p->deferred_state_announcement = SKIP_BACK(struct proto_announce_state_deferred, dc, defer_call(&pasd.dc, sizeof pasd)); } ea_list * channel_get_state(int id) { PST_LOCKED(ts) { ASSERT_DIE((u32) id < ts->channels_len); if (ts->channel_states[id]) return ea_ref_tmp(ts->channel_states[id]); } return NULL; } ea_list * proto_get_state(int id) { ea_list *eal; PST_LOCKED(ts) { ASSERT_DIE((u32)id < ts->proto_len); eal = ts->proto_states[id]; } if (eal) return ea_ref_tmp(eal); return NULL; } void proto_states_subscribe(struct lfjour_recipient *r) { PST_LOCKED(ts) lfjour_register(&proto_state_table_pub.journal, r); } void proto_states_unsubscribe(struct lfjour_recipient *r) { PST_LOCKED(ts) lfjour_unregister(r); } /* State attribute declarations */ struct ea_class ea_name = { .name = "proto_name", .type = T_STRING, }; struct ea_class ea_protocol_name = { .name = "proto_protocol_name", .type = T_STRING, }; struct ea_class ea_protocol_type = { .name = "proto_protocol_type", .type = T_PTR, }; struct ea_class ea_main_table_id = { .name = "proto_main_table_id", .type = T_INT, }; struct ea_class ea_state = { .name = "proto_state", .type = T_ENUM_STATE, }; struct ea_class ea_last_modified = { .name = "proto_last_modified", .type = T_BTIME, }; struct ea_class ea_info = { .name = "proto_info", .type = T_STRING, }; struct ea_class ea_proto_id = { .name = "proto_proto_id", .type = T_INT, }; struct ea_class ea_proto_channel_list = { .name = "ea_proto_channel_list", .type = T_CLIST, }; struct ea_class ea_channel_id = { .name = "proto_channel_id", .type = T_INT, }; struct ea_class ea_in_keep = { .name = "channel_in_keep", .type = T_INT, }; struct ea_class ea_rtable = { .name = "rtable", .type = T_PTR, };