diff --git a/doc/bird.sgml b/doc/bird.sgml index 5e48e342..3e7ae9dd 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -839,6 +839,13 @@ agreement"). command line interface without needing to touch the configuration. Disabled protocols are not activated. Default: protocol is enabled. + <tag><label id="proto-restart-limit">restart time limit <m/time/</tag> + Set time limit for subsequent automatic restarts of the protocol. + If the protocol hits the limit (with a restart action) before this time + elapses from starting the protocol, the protocol is disabled with + an error message in the config file. This doesn't apply to manual + restarts or reconfiguration. Default: 5 s. + <tag><label id="proto-debug">debug all|off|{ states|routes|filters|interfaces|events|packets [, <m/.../] }</tag> Set protocol debugging options. If asked, each protocol is capable of writing trace messages about its work to the log (with category diff --git a/nest/config.Y b/nest/config.Y index a0a10daf..37a7ad97 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -340,6 +340,7 @@ proto_name: proto_item: /* EMPTY */ | DISABLED bool { this_proto->disabled = $2; } + | RESTART TIME expr_us { this_proto->restart_limit = $3; } | DEBUG debug_mask { this_proto->debug = $2; } | MRTDUMP mrtdump_mask { this_proto->mrtdump = $2; } | ROUTER ID idval { this_proto->router_id = $3; } diff --git a/nest/proto.c b/nest/proto.c index c45289ae..42d479d0 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -1335,6 +1335,9 @@ proto_new(struct proto_config *cf) p->hash_key = random_u32(); cf->proto = p; + p->last_restart = current_time(); + p->restart_limit = cf->restart_limit; + PST_LOCKED(tp) { p->id = hmap_first_zero(&tp->proto_id_map); @@ -1454,6 +1457,8 @@ proto_config_new(struct protocol *pr, int class) cf->mrtdump = new_config->proto_default_mrtdump; cf->loop_order = DOMAIN_ORDER(the_bird); + cf->restart_limit = 5 S; + init_list(&cf->channels); return cf; @@ -1561,7 +1566,7 @@ static int proto_reconfigure(struct proto *p, struct proto_config *oc, struct proto_config *nc, int type) { /* If the protocol is DOWN, we just restart it */ - if (p->proto_state == PS_DOWN_XX) + if ((p->proto_state == PS_DOWN_XX) || (p->proto_state == PS_FLUSH)) return 0; /* If there is a too big change in core attributes, ... */ @@ -1574,6 +1579,7 @@ proto_reconfigure(struct proto *p, struct proto_config *oc, struct proto_config p->sources.name = p->name = nc->name; p->sources.debug = p->debug = nc->debug; p->mrtdump = nc->mrtdump; + p->restart_limit = nc->restart_limit; reconfigure_type = type; /* Execute protocol specific reconfigure hook */ @@ -2167,29 +2173,15 @@ proto_restart_event_hook(void *_p) p->disabled = 1; proto_rethink_goal(p); - p->restart_event = NULL; - p->restart_timer = NULL; - if (proto_restart) + if (current_time_now() - p->last_restart < p->restart_limit) + log(L_ERR "%s: too frequent restarts, disabling", p->name); + else + p->disabled = 0; + /* No need to call proto_rethink_goal() here again as the proto_cleanup() routine will * call it after the protocol stops ... and both these routines are fixed to main_birdloop. */ - p->disabled = 0; -} - -static void -proto_send_restart_event(struct proto *p) -{ - if (!p->restart_event) - p->restart_event = ev_new_init(p->pool, proto_restart_event_hook, p); - - ev_send(&global_event_list, p->restart_event); -} - -static void -proto_send_restart_event_from_timer(struct timer *t) -{ - proto_send_restart_event((struct proto *) t->data); } static inline void @@ -2205,20 +2197,8 @@ proto_schedule_down(struct proto *p, byte restart, byte code) p->down_sched = restart ? PDS_RESTART : PDS_DISABLE; p->down_code = code; - if (!restart) - { - if (p->restart_timer && tm_active(p->restart_timer)) - tm_stop(p->restart_timer); - - proto_send_restart_event(p); - } - else - { - if (!p->restart_timer) - p->restart_timer = tm_new_init(p->pool, proto_send_restart_event_from_timer, p, 0, 0); - - tm_start_max_in(p->restart_timer, 250 MS, p->loop); - } + /* Request protocol restart to be initiated from the mainloop */ + ev_send(&global_event_list, ev_new_init(p->pool, proto_restart_event_hook, p)); } /** diff --git a/nest/protocol.h b/nest/protocol.h index cc8b098b..43fabdea 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -113,6 +113,7 @@ struct proto_config { u32 router_id; /* Protocol specific router ID */ uint loop_order; /* Launch a birdloop on this locking level; use DOMAIN_ORDER(the_bird) for mainloop */ btime loop_max_latency; /* Request this specific maximum latency of loop; zero to default */ + btime restart_limit; /* Minimum allowed time between limit restarts */ list channels; /* List of channel configs (struct channel_config) */ struct iface *vrf; /* Related VRF instance, NULL if global */ @@ -142,8 +143,6 @@ struct proto { pool *pool_inloop; /* Pool containing local objects which need to be freed before the protocol's birdloop actually stops, like olocks */ event *event; /* Protocol event */ - timer *restart_timer; /* Timer to restart the protocol from limits */ - event *restart_event; /* Event to restart/shutdown the protocol from limits */ struct birdloop *loop; /* BIRDloop running this protocol */ list channels; /* List of channels to rtables (struct channel) */ @@ -170,6 +169,8 @@ struct proto { byte down_code; /* Reason for shutdown (PDC_* codes) */ u32 hash_key; /* Random key used for hashing of neighbors */ btime last_state_change; /* Time of last state transition */ + btime last_restart; /* Time of last restart */ + btime restart_limit; /* Minimum allowed time between limit restarts */ char *last_state_name_announced; /* Last state name we've announced to the user */ char *message; /* State-change message, allocated from proto_pool */ u32 id; /* Sequential ID used as index in proto_state_table */