mirror of
https://gitlab.nic.cz/labs/bird.git
synced 2024-12-31 22:21:54 +00:00
Protocol restart timer reworked.
The restart timer was racy and didn't allow for immediate restarts from limits. Now the protocols stores the last restart time and in case of too frequent autorestarts caused by exceeded limits, the protocol gets disabled with an error message. Also now there is a configuration knob for this.
This commit is contained in:
parent
53431ff679
commit
3f4332f0bd
@ -839,6 +839,13 @@ agreement").
|
|||||||
command line interface without needing to touch the configuration.
|
command line interface without needing to touch the configuration.
|
||||||
Disabled protocols are not activated. Default: protocol is enabled.
|
Disabled protocols are not activated. Default: protocol is enabled.
|
||||||
|
|
||||||
|
<tag><label id="proto-restart-limit">restart time limit <m/time/</tag>
|
||||||
|
Set time limit for subsequent automatic restarts of the protocol.
|
||||||
|
If the protocol hits the limit (with a restart action) before this time
|
||||||
|
elapses from starting the protocol, the protocol is disabled with
|
||||||
|
an error message in the config file. This doesn't apply to manual
|
||||||
|
restarts or reconfiguration. Default: 5 s.
|
||||||
|
|
||||||
<tag><label id="proto-debug">debug all|off|{ states|routes|filters|interfaces|events|packets [, <m/.../] }</tag>
|
<tag><label id="proto-debug">debug all|off|{ states|routes|filters|interfaces|events|packets [, <m/.../] }</tag>
|
||||||
Set protocol debugging options. If asked, each protocol is capable of
|
Set protocol debugging options. If asked, each protocol is capable of
|
||||||
writing trace messages about its work to the log (with category
|
writing trace messages about its work to the log (with category
|
||||||
|
@ -72,6 +72,11 @@ how to implement it properly.
|
|||||||
|
|
||||||
The `scope` route attribute has been removed. Use custom route attributes instead.
|
The `scope` route attribute has been removed. Use custom route attributes instead.
|
||||||
|
|
||||||
|
## Protocols common
|
||||||
|
|
||||||
|
There is now a guard against too frequent restarts due to limits, called
|
||||||
|
`restart time`, set by default to 5 seconds. To disable, set this to 1 us.
|
||||||
|
|
||||||
## Pipe
|
## Pipe
|
||||||
|
|
||||||
It's now impossible to check immediately whether the route has entered a pipe
|
It's now impossible to check immediately whether the route has entered a pipe
|
||||||
|
@ -340,6 +340,7 @@ proto_name:
|
|||||||
proto_item:
|
proto_item:
|
||||||
/* EMPTY */
|
/* EMPTY */
|
||||||
| DISABLED bool { this_proto->disabled = $2; }
|
| DISABLED bool { this_proto->disabled = $2; }
|
||||||
|
| RESTART TIME expr_us { this_proto->restart_limit = $3; }
|
||||||
| DEBUG debug_mask { this_proto->debug = $2; }
|
| DEBUG debug_mask { this_proto->debug = $2; }
|
||||||
| MRTDUMP mrtdump_mask { this_proto->mrtdump = $2; }
|
| MRTDUMP mrtdump_mask { this_proto->mrtdump = $2; }
|
||||||
| ROUTER ID idval { this_proto->router_id = $3; }
|
| ROUTER ID idval { this_proto->router_id = $3; }
|
||||||
|
48
nest/proto.c
48
nest/proto.c
@ -1335,6 +1335,9 @@ proto_new(struct proto_config *cf)
|
|||||||
p->hash_key = random_u32();
|
p->hash_key = random_u32();
|
||||||
cf->proto = p;
|
cf->proto = p;
|
||||||
|
|
||||||
|
p->last_restart = current_time();
|
||||||
|
p->restart_limit = cf->restart_limit;
|
||||||
|
|
||||||
PST_LOCKED(tp)
|
PST_LOCKED(tp)
|
||||||
{
|
{
|
||||||
p->id = hmap_first_zero(&tp->proto_id_map);
|
p->id = hmap_first_zero(&tp->proto_id_map);
|
||||||
@ -1454,6 +1457,8 @@ proto_config_new(struct protocol *pr, int class)
|
|||||||
cf->mrtdump = new_config->proto_default_mrtdump;
|
cf->mrtdump = new_config->proto_default_mrtdump;
|
||||||
cf->loop_order = DOMAIN_ORDER(the_bird);
|
cf->loop_order = DOMAIN_ORDER(the_bird);
|
||||||
|
|
||||||
|
cf->restart_limit = 5 S;
|
||||||
|
|
||||||
init_list(&cf->channels);
|
init_list(&cf->channels);
|
||||||
|
|
||||||
return cf;
|
return cf;
|
||||||
@ -1561,7 +1566,7 @@ static int
|
|||||||
proto_reconfigure(struct proto *p, struct proto_config *oc, struct proto_config *nc, int type)
|
proto_reconfigure(struct proto *p, struct proto_config *oc, struct proto_config *nc, int type)
|
||||||
{
|
{
|
||||||
/* If the protocol is DOWN, we just restart it */
|
/* If the protocol is DOWN, we just restart it */
|
||||||
if (p->proto_state == PS_DOWN_XX)
|
if ((p->proto_state == PS_DOWN_XX) || (p->proto_state == PS_FLUSH))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
/* If there is a too big change in core attributes, ... */
|
/* If there is a too big change in core attributes, ... */
|
||||||
@ -1574,6 +1579,7 @@ proto_reconfigure(struct proto *p, struct proto_config *oc, struct proto_config
|
|||||||
p->sources.name = p->name = nc->name;
|
p->sources.name = p->name = nc->name;
|
||||||
p->sources.debug = p->debug = nc->debug;
|
p->sources.debug = p->debug = nc->debug;
|
||||||
p->mrtdump = nc->mrtdump;
|
p->mrtdump = nc->mrtdump;
|
||||||
|
p->restart_limit = nc->restart_limit;
|
||||||
reconfigure_type = type;
|
reconfigure_type = type;
|
||||||
|
|
||||||
/* Execute protocol specific reconfigure hook */
|
/* Execute protocol specific reconfigure hook */
|
||||||
@ -2167,29 +2173,15 @@ proto_restart_event_hook(void *_p)
|
|||||||
p->disabled = 1;
|
p->disabled = 1;
|
||||||
proto_rethink_goal(p);
|
proto_rethink_goal(p);
|
||||||
|
|
||||||
p->restart_event = NULL;
|
|
||||||
p->restart_timer = NULL;
|
|
||||||
|
|
||||||
if (proto_restart)
|
if (proto_restart)
|
||||||
|
if (current_time_now() - p->last_restart < p->restart_limit)
|
||||||
|
log(L_ERR "%s: too frequent restarts, disabling", p->name);
|
||||||
|
else
|
||||||
|
p->disabled = 0;
|
||||||
|
|
||||||
/* No need to call proto_rethink_goal() here again as the proto_cleanup() routine will
|
/* No need to call proto_rethink_goal() here again as the proto_cleanup() routine will
|
||||||
* call it after the protocol stops ... and both these routines are fixed to main_birdloop.
|
* call it after the protocol stops ... and both these routines are fixed to main_birdloop.
|
||||||
*/
|
*/
|
||||||
p->disabled = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
proto_send_restart_event(struct proto *p)
|
|
||||||
{
|
|
||||||
if (!p->restart_event)
|
|
||||||
p->restart_event = ev_new_init(p->pool, proto_restart_event_hook, p);
|
|
||||||
|
|
||||||
ev_send(&global_event_list, p->restart_event);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
proto_send_restart_event_from_timer(struct timer *t)
|
|
||||||
{
|
|
||||||
proto_send_restart_event((struct proto *) t->data);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
@ -2205,20 +2197,8 @@ proto_schedule_down(struct proto *p, byte restart, byte code)
|
|||||||
p->down_sched = restart ? PDS_RESTART : PDS_DISABLE;
|
p->down_sched = restart ? PDS_RESTART : PDS_DISABLE;
|
||||||
p->down_code = code;
|
p->down_code = code;
|
||||||
|
|
||||||
if (!restart)
|
/* Request protocol restart to be initiated from the mainloop */
|
||||||
{
|
ev_send(&global_event_list, ev_new_init(p->pool, proto_restart_event_hook, p));
|
||||||
if (p->restart_timer && tm_active(p->restart_timer))
|
|
||||||
tm_stop(p->restart_timer);
|
|
||||||
|
|
||||||
proto_send_restart_event(p);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (!p->restart_timer)
|
|
||||||
p->restart_timer = tm_new_init(p->pool, proto_send_restart_event_from_timer, p, 0, 0);
|
|
||||||
|
|
||||||
tm_start_max_in(p->restart_timer, 250 MS, p->loop);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -113,6 +113,7 @@ struct proto_config {
|
|||||||
u32 router_id; /* Protocol specific router ID */
|
u32 router_id; /* Protocol specific router ID */
|
||||||
uint loop_order; /* Launch a birdloop on this locking level; use DOMAIN_ORDER(the_bird) for mainloop */
|
uint loop_order; /* Launch a birdloop on this locking level; use DOMAIN_ORDER(the_bird) for mainloop */
|
||||||
btime loop_max_latency; /* Request this specific maximum latency of loop; zero to default */
|
btime loop_max_latency; /* Request this specific maximum latency of loop; zero to default */
|
||||||
|
btime restart_limit; /* Minimum allowed time between limit restarts */
|
||||||
|
|
||||||
list channels; /* List of channel configs (struct channel_config) */
|
list channels; /* List of channel configs (struct channel_config) */
|
||||||
struct iface *vrf; /* Related VRF instance, NULL if global */
|
struct iface *vrf; /* Related VRF instance, NULL if global */
|
||||||
@ -142,8 +143,6 @@ struct proto {
|
|||||||
pool *pool_inloop; /* Pool containing local objects which need to be freed
|
pool *pool_inloop; /* Pool containing local objects which need to be freed
|
||||||
before the protocol's birdloop actually stops, like olocks */
|
before the protocol's birdloop actually stops, like olocks */
|
||||||
event *event; /* Protocol event */
|
event *event; /* Protocol event */
|
||||||
timer *restart_timer; /* Timer to restart the protocol from limits */
|
|
||||||
event *restart_event; /* Event to restart/shutdown the protocol from limits */
|
|
||||||
struct birdloop *loop; /* BIRDloop running this protocol */
|
struct birdloop *loop; /* BIRDloop running this protocol */
|
||||||
|
|
||||||
list channels; /* List of channels to rtables (struct channel) */
|
list channels; /* List of channels to rtables (struct channel) */
|
||||||
@ -170,6 +169,8 @@ struct proto {
|
|||||||
byte down_code; /* Reason for shutdown (PDC_* codes) */
|
byte down_code; /* Reason for shutdown (PDC_* codes) */
|
||||||
u32 hash_key; /* Random key used for hashing of neighbors */
|
u32 hash_key; /* Random key used for hashing of neighbors */
|
||||||
btime last_state_change; /* Time of last state transition */
|
btime last_state_change; /* Time of last state transition */
|
||||||
|
btime last_restart; /* Time of last restart */
|
||||||
|
btime restart_limit; /* Minimum allowed time between limit restarts */
|
||||||
char *last_state_name_announced; /* Last state name we've announced to the user */
|
char *last_state_name_announced; /* Last state name we've announced to the user */
|
||||||
char *message; /* State-change message, allocated from proto_pool */
|
char *message; /* State-change message, allocated from proto_pool */
|
||||||
u32 id; /* Sequential ID used as index in proto_state_table */
|
u32 id; /* Sequential ID used as index in proto_state_table */
|
||||||
|
Loading…
Reference in New Issue
Block a user