0
0
mirror of https://gitlab.nic.cz/labs/bird.git synced 2024-12-22 09:41:54 +00:00

BGP: fix locking order error on dynamic protocol spawn

We missed that the protocol spawner violates the prescribed
locking order. When the rtable level is locked, no new protocol can be
started, thus we need to:

* create the protocol from a clean mainloop context
* in protocol start hook, take the socket

Testsuite: cf-bgp-autopeer
Fixes: #136

Thanks to Job Snijders <job@fastly.com> for reporting:
https://trubka.network.cz/pipermail/bird-users/2024-December/017980.html
This commit is contained in:
Maria Matejka 2024-12-20 11:28:00 +01:00
parent ab74652f96
commit 6779e5da69
3 changed files with 56 additions and 11 deletions

View File

@ -1867,6 +1867,25 @@ proto_spawn(struct proto_config *cf, uint disabled)
return p; return p;
} }
bool
proto_disable(struct proto *p)
{
ASSERT_DIE(birdloop_inside(&main_birdloop));
bool changed = !p->disabled;
p->disabled = 1;
proto_rethink_goal(p);
return changed;
}
bool
proto_enable(struct proto *p)
{
ASSERT_DIE(birdloop_inside(&main_birdloop));
bool changed = p->disabled;
p->disabled = 0;
proto_rethink_goal(p);
return changed;
}
/** /**
* DOC: Graceful restart recovery * DOC: Graceful restart recovery

View File

@ -78,6 +78,8 @@ void proto_build(struct protocol *); /* Called from protocol to register itself
void protos_preconfig(struct config *); void protos_preconfig(struct config *);
void protos_commit(struct config *new, struct config *old, int type); void protos_commit(struct config *new, struct config *old, int type);
struct proto * proto_spawn(struct proto_config *cf, uint disabled); struct proto * proto_spawn(struct proto_config *cf, uint disabled);
bool proto_disable(struct proto *p);
bool proto_enable(struct proto *p);
void protos_dump_all(struct dump_request *); void protos_dump_all(struct dump_request *);
#define GA_UNKNOWN 0 /* Attribute not recognized */ #define GA_UNKNOWN 0 /* Attribute not recognized */

View File

@ -378,8 +378,6 @@ bgp_startup(struct bgp_proto *p)
if (p->postponed_sk) if (p->postponed_sk)
{ {
/* Apply postponed incoming connection */ /* Apply postponed incoming connection */
sk_reloop(p->postponed_sk, p->p.loop);
bgp_setup_conn(p, &p->incoming_conn); bgp_setup_conn(p, &p->incoming_conn);
bgp_setup_sk(&p->incoming_conn, p->postponed_sk); bgp_setup_sk(&p->incoming_conn, p->postponed_sk);
bgp_send_open(&p->incoming_conn); bgp_send_open(&p->incoming_conn);
@ -583,6 +581,9 @@ bgp_graceful_close_conn(struct bgp_conn *conn, int subcode, byte *data, uint len
static void static void
bgp_down(struct bgp_proto *p) bgp_down(struct bgp_proto *p)
{ {
/* Check that the dynamic BGP socket has been picked up */
ASSERT_DIE(p->postponed_sk == NULL);
if (bgp_start_state(p) > BSS_PREPARE) if (bgp_start_state(p) > BSS_PREPARE)
{ {
bgp_setup_auth(p, 0); bgp_setup_auth(p, 0);
@ -617,8 +618,8 @@ bgp_decision(void *vp)
bgp_down(p); bgp_down(p);
} }
static struct bgp_proto * static void
bgp_spawn(struct bgp_proto *pp, ip_addr remote_ip) bgp_spawn(struct bgp_proto *pp, struct birdsock *sk)
{ {
struct symbol *sym; struct symbol *sym;
char fmt[SYM_MAX_LEN]; char fmt[SYM_MAX_LEN];
@ -635,9 +636,16 @@ bgp_spawn(struct bgp_proto *pp, ip_addr remote_ip)
cfg_mem = NULL; cfg_mem = NULL;
/* Just pass remote_ip to bgp_init() */ /* Just pass remote_ip to bgp_init() */
((struct bgp_config *) sym->proto)->remote_ip = remote_ip; ((struct bgp_config *) sym->proto)->remote_ip = sk->daddr;
return (void *) proto_spawn(sym->proto, 0); /* Create the protocol disabled initially */
SKIP_BACK_DECLARE(struct bgp_proto, p, p, proto_spawn(sym->proto, 1));
/* Pass the socket */
p->postponed_sk = sk;
/* And enable the protocol */
proto_enable(&p->p);
} }
void void
@ -1489,10 +1497,15 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED)
/* For dynamic BGP, spawn new instance and postpone the socket */ /* For dynamic BGP, spawn new instance and postpone the socket */
if (bgp_is_dynamic(p)) if (bgp_is_dynamic(p))
{ {
p = bgp_spawn(p, sk->daddr); UNLOCK_DOMAIN(rtable, bgp_listen_domain);
p->postponed_sk = sk;
rmove(sk, p->p.pool); /* The dynamic protocol must be in the START state */
goto leave; ASSERT_DIE(p->p.proto_state == PS_START);
birdloop_leave(p->p.loop);
/* Now we have a clean mainloop */
bgp_spawn(p, sk);
return 0;
} }
rmove(sk, p->p.pool); rmove(sk, p->p.pool);
@ -1806,7 +1819,6 @@ bgp_start(struct proto *P)
p->incoming_conn.state = BS_IDLE; p->incoming_conn.state = BS_IDLE;
p->neigh = NULL; p->neigh = NULL;
p->bfd_req = NULL; p->bfd_req = NULL;
p->postponed_sk = NULL;
p->gr_ready = 0; p->gr_ready = 0;
p->gr_active_num = 0; p->gr_active_num = 0;
@ -1848,6 +1860,16 @@ bgp_start(struct proto *P)
channel_graceful_restart_lock(&c->c); channel_graceful_restart_lock(&c->c);
} }
/* Now it's the last chance to move the postponed socket to this BGP,
* as bgp_start is the only hook running from main loop. */
if (p->postponed_sk)
{
LOCK_DOMAIN(rtable, bgp_listen_domain);
rmove(p->postponed_sk, p->p.pool);
sk_reloop(p->postponed_sk, p->p.loop);
UNLOCK_DOMAIN(rtable, bgp_listen_domain);
}
/* /*
* Before attempting to create the connection, we need to lock the port, * Before attempting to create the connection, we need to lock the port,
* so that we are the only instance attempting to talk with that neighbor. * so that we are the only instance attempting to talk with that neighbor.
@ -1999,6 +2021,8 @@ bgp_init(struct proto_config *CF)
p->remote_ip = cf->remote_ip; p->remote_ip = cf->remote_ip;
p->remote_as = cf->remote_as; p->remote_as = cf->remote_as;
p->postponed_sk = NULL;
/* Hack: We use cf->remote_ip just to pass remote_ip from bgp_spawn() */ /* Hack: We use cf->remote_ip just to pass remote_ip from bgp_spawn() */
if (cf->c.parent) if (cf->c.parent)
cf->remote_ip = IPA_NONE; cf->remote_ip = IPA_NONE;