mirror of
https://gitlab.nic.cz/labs/bird.git
synced 2025-01-07 01:21:54 +00:00
Graceful recovery: converted to obstacles
Yet another refcounting mechanism had a locking collision.
This commit is contained in:
parent
0fa80d7c79
commit
f7639a9faf
178
nest/proto.c
178
nest/proto.c
@ -31,15 +31,8 @@ static list STATIC_LIST_INIT(protocol_list);
|
|||||||
#define CD(c, msg, args...) ({ if (c->debug & D_STATES) log(L_TRACE "%s.%s: " msg, c->proto->name, c->name ?: "?", ## args); })
|
#define CD(c, msg, args...) ({ if (c->debug & D_STATES) log(L_TRACE "%s.%s: " msg, c->proto->name, c->name ?: "?", ## args); })
|
||||||
#define PD(p, msg, args...) ({ if (p->debug & D_STATES) log(L_TRACE "%s: " msg, p->name, ## args); })
|
#define PD(p, msg, args...) ({ if (p->debug & D_STATES) log(L_TRACE "%s: " msg, p->name, ## args); })
|
||||||
|
|
||||||
static timer *gr_wait_timer;
|
static struct graceful_recovery_context _graceful_recovery_context;
|
||||||
|
OBSREF(struct graceful_recovery_context) graceful_recovery_context;
|
||||||
#define GRS_NONE 0
|
|
||||||
#define GRS_INIT 1
|
|
||||||
#define GRS_ACTIVE 2
|
|
||||||
#define GRS_DONE 3
|
|
||||||
|
|
||||||
static int graceful_restart_state;
|
|
||||||
static u32 graceful_restart_locks;
|
|
||||||
|
|
||||||
static char *p_states[] = { "DOWN", "START", "UP", "STOP" };
|
static char *p_states[] = { "DOWN", "START", "UP", "STOP" };
|
||||||
static char *c_states[] = { "DOWN", "START", "UP", "STOP", "RESTART" };
|
static char *c_states[] = { "DOWN", "START", "UP", "STOP", "RESTART" };
|
||||||
@ -912,7 +905,7 @@ channel_do_stop(struct channel *c)
|
|||||||
ev_postpone(&c->reimport_event);
|
ev_postpone(&c->reimport_event);
|
||||||
|
|
||||||
c->gr_wait = 0;
|
c->gr_wait = 0;
|
||||||
if (c->gr_lock)
|
if (OBSREF_GET(c->gr_lock))
|
||||||
channel_graceful_restart_unlock(c);
|
channel_graceful_restart_unlock(c);
|
||||||
|
|
||||||
CALL(c->class->shutdown, c);
|
CALL(c->class->shutdown, c);
|
||||||
@ -1407,7 +1400,7 @@ proto_start(struct proto *p)
|
|||||||
DBG("Kicking %s up\n", p->name);
|
DBG("Kicking %s up\n", p->name);
|
||||||
PD(p, "Starting");
|
PD(p, "Starting");
|
||||||
|
|
||||||
if (graceful_restart_state == GRS_INIT)
|
if (OBSREF_GET(graceful_recovery_context))
|
||||||
p->gr_recovery = 1;
|
p->gr_recovery = 1;
|
||||||
|
|
||||||
if (p->cf->loop_order != DOMAIN_ORDER(the_bird))
|
if (p->cf->loop_order != DOMAIN_ORDER(the_bird))
|
||||||
@ -1921,7 +1914,45 @@ proto_enable(struct proto *p)
|
|||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static void graceful_restart_done(timer *t);
|
/**
|
||||||
|
* graceful_restart_done - finalize graceful restart
|
||||||
|
* @t: unused
|
||||||
|
*
|
||||||
|
* When there are no locks on graceful restart, the functions finalizes the
|
||||||
|
* graceful restart recovery. Protocols postponing route export until the end of
|
||||||
|
* the recovery are awakened and the export to them is enabled.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
graceful_recovery_done(struct callback *_ UNUSED)
|
||||||
|
{
|
||||||
|
ASSERT_DIE(birdloop_inside(&main_birdloop));
|
||||||
|
ASSERT_DIE(_graceful_recovery_context.grc_state == GRS_ACTIVE);
|
||||||
|
|
||||||
|
tm_stop(&_graceful_recovery_context.wait_timer);
|
||||||
|
log(L_INFO "Graceful recovery done");
|
||||||
|
|
||||||
|
WALK_TLIST(proto, p, &global_proto_list)
|
||||||
|
PROTO_LOCKED_FROM_MAIN(p)
|
||||||
|
{
|
||||||
|
p->gr_recovery = 0;
|
||||||
|
|
||||||
|
struct channel *c;
|
||||||
|
WALK_LIST(c, p->channels)
|
||||||
|
{
|
||||||
|
ASSERT_DIE(!OBSREF_GET(c->gr_lock));
|
||||||
|
|
||||||
|
/* Resume postponed export of routes */
|
||||||
|
if ((c->channel_state == CS_UP) && c->gr_wait && p->rt_notify)
|
||||||
|
channel_start_export(c);
|
||||||
|
|
||||||
|
/* Cleanup */
|
||||||
|
c->gr_wait = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
_graceful_recovery_context.grc_state = GRS_DONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* graceful_restart_recovery - request initial graceful restart recovery
|
* graceful_restart_recovery - request initial graceful restart recovery
|
||||||
@ -1933,7 +1964,30 @@ static void graceful_restart_done(timer *t);
|
|||||||
void
|
void
|
||||||
graceful_restart_recovery(void)
|
graceful_restart_recovery(void)
|
||||||
{
|
{
|
||||||
graceful_restart_state = GRS_INIT;
|
obstacle_target_init(
|
||||||
|
&_graceful_recovery_context.obstacles,
|
||||||
|
&_graceful_recovery_context.obstacles_cleared,
|
||||||
|
&root_pool, "Graceful recovery");
|
||||||
|
|
||||||
|
OBSREF_SET(graceful_recovery_context, &_graceful_recovery_context);
|
||||||
|
_graceful_recovery_context.grc_state = GRS_INIT;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
graceful_recovery_timeout(timer *t UNUSED)
|
||||||
|
{
|
||||||
|
log(L_INFO "Graceful recovery timeout");
|
||||||
|
WALK_TLIST(proto, p, &global_proto_list)
|
||||||
|
PROTO_LOCKED_FROM_MAIN(p)
|
||||||
|
{
|
||||||
|
struct channel *c;
|
||||||
|
WALK_LIST(c, p->channels)
|
||||||
|
if (OBSREF_GET(c->gr_lock))
|
||||||
|
{
|
||||||
|
log(L_INFO "Graceful recovery: Not waiting for %s.%s", p->name, c->name);
|
||||||
|
OBSREF_CLEAR(c->gr_lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1946,73 +2000,35 @@ graceful_restart_recovery(void)
|
|||||||
void
|
void
|
||||||
graceful_restart_init(void)
|
graceful_restart_init(void)
|
||||||
{
|
{
|
||||||
if (!graceful_restart_state)
|
if (!OBSREF_GET(graceful_recovery_context))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
log(L_INFO "Graceful restart started");
|
log(L_INFO "Graceful recovery started");
|
||||||
|
|
||||||
if (!graceful_restart_locks)
|
_graceful_recovery_context.grc_state = GRS_ACTIVE;
|
||||||
{
|
|
||||||
graceful_restart_done(NULL);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
graceful_restart_state = GRS_ACTIVE;
|
_graceful_recovery_context.wait_timer = (timer) { .hook = graceful_recovery_timeout };
|
||||||
gr_wait_timer = tm_new_init(proto_pool, graceful_restart_done, NULL, 0, 0);
|
|
||||||
u32 gr_wait = atomic_load_explicit(&global_runtime, memory_order_relaxed)->gr_wait;
|
u32 gr_wait = atomic_load_explicit(&global_runtime, memory_order_relaxed)->gr_wait;
|
||||||
tm_start(gr_wait_timer, gr_wait S);
|
tm_start(&_graceful_recovery_context.wait_timer, gr_wait S);
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
callback_init(&_graceful_recovery_context.obstacles_cleared, graceful_recovery_done, &main_birdloop);
|
||||||
* graceful_restart_done - finalize graceful restart
|
|
||||||
* @t: unused
|
|
||||||
*
|
|
||||||
* When there are no locks on graceful restart, the functions finalizes the
|
|
||||||
* graceful restart recovery. Protocols postponing route export until the end of
|
|
||||||
* the recovery are awakened and the export to them is enabled. All other
|
|
||||||
* related state is cleared. The function is also called when the graceful
|
|
||||||
* restart wait timer fires (but there are still some locks).
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
graceful_restart_done(timer *t)
|
|
||||||
{
|
|
||||||
log(L_INFO "Graceful restart done");
|
|
||||||
graceful_restart_state = GRS_DONE;
|
|
||||||
|
|
||||||
WALK_TLIST(proto, p, &global_proto_list)
|
/* The last clearing of obstacle reference will cause
|
||||||
{
|
* the graceful recovery finish immediately. */
|
||||||
if (!p->gr_recovery)
|
OBSREF_CLEAR(graceful_recovery_context);
|
||||||
continue;
|
|
||||||
|
|
||||||
struct channel *c;
|
|
||||||
WALK_LIST(c, p->channels)
|
|
||||||
{
|
|
||||||
/* Resume postponed export of routes */
|
|
||||||
if ((c->channel_state == CS_UP) && c->gr_wait && p->rt_notify)
|
|
||||||
channel_start_export(c);
|
|
||||||
|
|
||||||
/* Cleanup */
|
|
||||||
c->gr_wait = 0;
|
|
||||||
c->gr_lock = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
p->gr_recovery = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
graceful_restart_locks = 0;
|
|
||||||
|
|
||||||
rfree(t);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
graceful_restart_show_status(void)
|
graceful_restart_show_status(void)
|
||||||
{
|
{
|
||||||
if (graceful_restart_state != GRS_ACTIVE)
|
if (_graceful_recovery_context.grc_state != GRS_ACTIVE)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
cli_msg(-24, "Graceful restart recovery in progress");
|
cli_msg(-24, "Graceful restart recovery in progress");
|
||||||
cli_msg(-24, " Waiting for %d channels to recover", graceful_restart_locks);
|
cli_msg(-24, " Waiting for %u channels to recover",
|
||||||
cli_msg(-24, " Wait timer is %t/%u", tm_remains(gr_wait_timer),
|
obstacle_target_count(&_graceful_recovery_context.obstacles));
|
||||||
|
cli_msg(-24, " Wait timer is %t/%u",
|
||||||
|
tm_remains(&_graceful_recovery_context.wait_timer),
|
||||||
atomic_load_explicit(&global_runtime, memory_order_relaxed)->gr_wait);
|
atomic_load_explicit(&global_runtime, memory_order_relaxed)->gr_wait);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2032,14 +2048,22 @@ graceful_restart_show_status(void)
|
|||||||
void
|
void
|
||||||
channel_graceful_restart_lock(struct channel *c)
|
channel_graceful_restart_lock(struct channel *c)
|
||||||
{
|
{
|
||||||
ASSERT(graceful_restart_state == GRS_INIT);
|
ASSERT_DIE(birdloop_inside(&main_birdloop));
|
||||||
ASSERT(c->proto->gr_recovery);
|
|
||||||
|
|
||||||
if (c->gr_lock)
|
if (OBSREF_GET(c->gr_lock))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
c->gr_lock = 1;
|
switch (_graceful_recovery_context.grc_state)
|
||||||
graceful_restart_locks++;
|
{
|
||||||
|
case GRS_INIT:
|
||||||
|
case GRS_ACTIVE:
|
||||||
|
OBSREF_SET(c->gr_lock, &_graceful_recovery_context);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GRS_NONE:
|
||||||
|
case GRS_DONE:
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -2052,18 +2076,10 @@ channel_graceful_restart_lock(struct channel *c)
|
|||||||
void
|
void
|
||||||
channel_graceful_restart_unlock(struct channel *c)
|
channel_graceful_restart_unlock(struct channel *c)
|
||||||
{
|
{
|
||||||
if (!c->gr_lock)
|
OBSREF_CLEAR(c->gr_lock);
|
||||||
return;
|
|
||||||
|
|
||||||
c->gr_lock = 0;
|
|
||||||
graceful_restart_locks--;
|
|
||||||
|
|
||||||
if ((graceful_restart_state == GRS_ACTIVE) && !graceful_restart_locks)
|
|
||||||
tm_start(gr_wait_timer, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* protos_dump_all - dump status of all protocols
|
* protos_dump_all - dump status of all protocols
|
||||||
*
|
*
|
||||||
@ -2615,9 +2631,9 @@ channel_show_info(struct channel *c)
|
|||||||
cli_msg(-1006, " Input filter: %s", filter_name(c->in_filter));
|
cli_msg(-1006, " Input filter: %s", filter_name(c->in_filter));
|
||||||
cli_msg(-1006, " Output filter: %s", filter_name(c->out_filter));
|
cli_msg(-1006, " Output filter: %s", filter_name(c->out_filter));
|
||||||
|
|
||||||
if (graceful_restart_state == GRS_ACTIVE)
|
if (_graceful_recovery_context.grc_state == GRS_ACTIVE)
|
||||||
cli_msg(-1006, " GR recovery: %s%s",
|
cli_msg(-1006, " GR recovery: %s%s",
|
||||||
c->gr_lock ? " pending" : "",
|
OBSREF_GET(c->gr_lock) ? " pending" : "",
|
||||||
c->gr_wait ? " waiting" : "");
|
c->gr_wait ? " waiting" : "");
|
||||||
|
|
||||||
channel_show_limit(&c->rx_limit, "Receive limit:", c->limit_active & (1 << PLD_RX), c->limit_actions[PLD_RX]);
|
channel_show_limit(&c->rx_limit, "Receive limit:", c->limit_active & (1 << PLD_RX), c->limit_actions[PLD_RX]);
|
||||||
|
@ -659,7 +659,7 @@ struct channel {
|
|||||||
|
|
||||||
u8 channel_state;
|
u8 channel_state;
|
||||||
u8 reloadable; /* Hook reload_routes() is allowed on the channel */
|
u8 reloadable; /* Hook reload_routes() is allowed on the channel */
|
||||||
u8 gr_lock; /* Graceful restart mechanism should wait for this channel */
|
OBSREF(struct graceful_recovery_context) gr_lock; /* Graceful restart mechanism should wait for this channel */
|
||||||
u8 gr_wait; /* Route export to channel is postponed until graceful restart */
|
u8 gr_wait; /* Route export to channel is postponed until graceful restart */
|
||||||
|
|
||||||
u32 obstacles; /* External obstacles remaining before cleanup */
|
u32 obstacles; /* External obstacles remaining before cleanup */
|
||||||
@ -763,4 +763,16 @@ void *channel_config_new(const struct channel_class *cc, const char *name, uint
|
|||||||
void *channel_config_get(const struct channel_class *cc, const char *name, uint net_type, struct proto_config *proto);
|
void *channel_config_get(const struct channel_class *cc, const char *name, uint net_type, struct proto_config *proto);
|
||||||
int channel_reconfigure(struct channel *c, struct channel_config *cf);
|
int channel_reconfigure(struct channel *c, struct channel_config *cf);
|
||||||
|
|
||||||
|
struct graceful_recovery_context {
|
||||||
|
struct obstacle_target obstacles;
|
||||||
|
struct callback obstacles_cleared;
|
||||||
|
enum {
|
||||||
|
GRS_NONE,
|
||||||
|
GRS_INIT,
|
||||||
|
GRS_ACTIVE,
|
||||||
|
GRS_DONE,
|
||||||
|
} grc_state;
|
||||||
|
timer wait_timer;
|
||||||
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
Reference in New Issue
Block a user