mirror of
https://gitlab.nic.cz/labs/bird.git
synced 2025-01-18 15:01:53 +00:00
BMP: Fix connection management
Replace broken TCP connection management with a simple state machine. Handle failed attempts properly with a timeout, detect and handle TCP connection close and try to reconnect after that. Remove useless 'station_connected' flag. Keep open messages saved even after the BMP session establishment, so they can be used after BMP session flaps. Use proper log messages for session events.
This commit is contained in:
parent
62aa714175
commit
8f78c232f6
291
proto/bmp/bmp.c
291
proto/bmp/bmp.c
@ -20,6 +20,12 @@
|
||||
* - Support DE_CONFIGURED PEER DOWN REASON code in PEER DOWN NOTIFICATION message
|
||||
* - If connection with BMP collector will lost then we don't establish connection again
|
||||
* - Set Peer Type by its a global and local-scope IP address
|
||||
*
|
||||
* The BMP session is managed by a simple state machine with three states: Idle
|
||||
* (!started, !sk), Connect (!started, sk active), and Established (started). It
|
||||
* has three events: connect successfull (Connect -> Established), socket error
|
||||
* (any -> Idle), and connect timeout (Idle/Connect -> Connect, resetting the
|
||||
* TCP socket).
|
||||
*/
|
||||
|
||||
#include "proto/bmp/bmp.h"
|
||||
@ -166,8 +172,11 @@ enum bmp_term_reason {
|
||||
// Default chunk size request when memory allocation
|
||||
#define DEFAULT_MEM_BLOCK_SIZE 4096
|
||||
|
||||
// Initial delay for connection to the BMP collector
|
||||
#define CONNECT_INIT_TIME (200 MS)
|
||||
|
||||
// Timeout for connection to the BMP collector retry
|
||||
#define CONNECT_RETRY_SEC (10 S)
|
||||
#define CONNECT_RETRY_TIME (10 S)
|
||||
|
||||
#define IP4_MAX_TTL 255
|
||||
|
||||
@ -188,20 +197,15 @@ enum bmp_term_reason {
|
||||
} while (0)
|
||||
|
||||
|
||||
// Handle BIRD socket error event
|
||||
static void
|
||||
bmp_sock_err(sock *sk, int err);
|
||||
static void bmp_connected(struct birdsock *sk);
|
||||
static void bmp_sock_err(sock *sk, int err);
|
||||
static void bmp_close_socket(struct bmp_proto *p);
|
||||
|
||||
static void
|
||||
bmp_send_peer_up_notif_msg(struct bmp_proto *p, const struct bgp_proto *bgp,
|
||||
const byte* tx_data, const size_t tx_data_size,
|
||||
const byte* rx_data, const size_t rx_data_size);
|
||||
|
||||
static void
|
||||
bmp_peer_map_walk_tx_open_msg_and_send_peer_up_notif(
|
||||
const struct bmp_peer_map_key key, const byte *tx_msg,
|
||||
const size_t tx_msg_size, void *bmp_);
|
||||
|
||||
// Stores necessary any data in list
|
||||
struct bmp_data_node {
|
||||
node n;
|
||||
@ -250,7 +254,7 @@ bmp_init_msg_serialize(buffer *stream, const char *sys_descr, const char *sys_na
|
||||
static void
|
||||
bmp_schedule_tx_packet(struct bmp_proto *p, const byte *payload, const size_t size)
|
||||
{
|
||||
ASSERT(p->station_connected);
|
||||
ASSERT(p->started);
|
||||
|
||||
struct bmp_data_node *tx_data = mb_alloc(p->tx_mem_pool, sizeof (struct bmp_data_node));
|
||||
tx_data->data = mb_alloc(p->tx_mem_pool, size);
|
||||
@ -265,23 +269,6 @@ bmp_schedule_tx_packet(struct bmp_proto *p, const byte *payload, const size_t si
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* bmp_startup - connect to the BMP collector.
|
||||
* NOTE: Send Initiation Message to the BMP collector.
|
||||
*/
|
||||
static void
|
||||
bmp_startup(struct bmp_proto *p)
|
||||
{
|
||||
ASSERT(p->station_connected && !p->started);
|
||||
|
||||
buffer payload = bmp_buffer_alloc(p->buffer_mpool, DEFAULT_MEM_BLOCK_SIZE);
|
||||
bmp_init_msg_serialize(&payload, p->sys_descr, p->sys_name);
|
||||
bmp_schedule_tx_packet(p, bmp_buffer_data(&payload), bmp_buffer_pos(&payload));
|
||||
bmp_buffer_free(&payload);
|
||||
|
||||
p->started = true;
|
||||
}
|
||||
|
||||
static void
|
||||
bmp_fire_tx(void *p_)
|
||||
{
|
||||
@ -333,43 +320,13 @@ bmp_tx(struct birdsock *sk)
|
||||
bmp_fire_tx(sk->data);
|
||||
}
|
||||
|
||||
static inline int
|
||||
bmp_open_socket(struct bmp_proto *p)
|
||||
/* We need RX hook just to accept socket close events */
|
||||
static int
|
||||
bmp_rx(struct birdsock *sk UNUSED, uint size UNUSED)
|
||||
{
|
||||
sock *s = p->sk;
|
||||
s->daddr = p->station_ip;
|
||||
s->dport = p->station_port;
|
||||
s->err_hook = bmp_sock_err;
|
||||
|
||||
int rc = sk_open(s);
|
||||
|
||||
if (rc < 0)
|
||||
sk_log_error(s, p->p.name);
|
||||
|
||||
return rc;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
bmp_connection_retry(timer *t)
|
||||
{
|
||||
struct bmp_proto *p = t->data;
|
||||
|
||||
if (bmp_open_socket(p) < 0)
|
||||
{
|
||||
log(L_DEBUG "Failed to connect to BMP station");
|
||||
return;
|
||||
}
|
||||
|
||||
log(L_DEBUG "Connected to BMP station after connection retry");
|
||||
tm_stop(t);
|
||||
}
|
||||
|
||||
void
|
||||
bmp_sock_err(sock *sk, int err)
|
||||
{
|
||||
struct bmp_proto *p = sk->data;
|
||||
log(L_WARN "[BMP:%s] Socket error: %M", p->p.name, err);
|
||||
}
|
||||
|
||||
static inline void
|
||||
bmp_put_ipa(buffer *stream, const ip_addr addr)
|
||||
@ -489,36 +446,13 @@ bmp_peer_down_notif_msg_serialize(buffer *stream, const bool is_peer_global,
|
||||
bmp_put_data(stream, data, data_size);
|
||||
}
|
||||
|
||||
/**
|
||||
* bmp_open - initialize internal resources of BMP implementation.
|
||||
* NOTE: It does not connect to BMP collector yet.
|
||||
*/
|
||||
void
|
||||
bmp_open(const struct proto *P)
|
||||
{
|
||||
struct bmp_proto *p = (void *) P;
|
||||
|
||||
if (bmp_open_socket(p) < 0)
|
||||
{
|
||||
log(L_DEBUG "Failed to connect to BMP station");
|
||||
p->connect_retry_timer = tm_new_init(P->pool, bmp_connection_retry, p,
|
||||
CONNECT_RETRY_SEC, 0 /* not randomized */);
|
||||
tm_start(p->connect_retry_timer, CONNECT_RETRY_SEC);
|
||||
p->station_connected = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
log(L_DEBUG "Connected to BMP station");
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
static void
|
||||
bmp_peer_map_walk_tx_open_msg_and_send_peer_up_notif(
|
||||
const struct bmp_peer_map_key key, const byte *tx_msg,
|
||||
const size_t tx_msg_size, void *bmp_)
|
||||
{
|
||||
struct bmp_proto *p = bmp_;
|
||||
ASSERT(p->station_connected);
|
||||
ASSERT(p->started);
|
||||
|
||||
const struct bmp_peer_map_entry *map_rx_msg = bmp_peer_map_get(&p->peer_open_msg.rx_msg, key);
|
||||
IF_PTR_IS_NULL_PRINT_ERR_MSG_AND_RETURN_OPT_VAL(
|
||||
@ -630,7 +564,7 @@ bmp_send_peer_up_notif_msg(struct bmp_proto *p, const struct bgp_proto *bgp,
|
||||
const byte* tx_data, const size_t tx_data_size,
|
||||
const byte* rx_data, const size_t rx_data_size)
|
||||
{
|
||||
ASSERT(p->station_connected);
|
||||
ASSERT(p->started);
|
||||
|
||||
const struct birdsock *sk = bmp_get_birdsock_ext(bgp);
|
||||
IF_PTR_IS_NULL_PRINT_ERR_MSG_AND_RETURN_OPT_VAL(
|
||||
@ -661,24 +595,19 @@ bmp_put_sent_bgp_open_msg(const struct bgp_proto *bgp, const byte* pkt,
|
||||
return;
|
||||
}
|
||||
|
||||
struct bmp_peer_map_key key = bmp_peer_map_key_create(bgp->remote_ip,
|
||||
bgp->remote_as);
|
||||
const struct bmp_peer_map_entry *map_entry
|
||||
struct bmp_peer_map_key key
|
||||
= bmp_peer_map_key_create(bgp->remote_ip, bgp->remote_as);
|
||||
const struct bmp_peer_map_entry *rx_msg
|
||||
= bmp_peer_map_get(&p->peer_open_msg.rx_msg, key);
|
||||
if (!map_entry || !p->started)
|
||||
{
|
||||
bmp_peer_map_insert(&p->peer_open_msg.tx_msg, key, pkt, pkt_size);
|
||||
|
||||
if (!map_entry)
|
||||
{
|
||||
bmp_peer_map_insert(&p->bgp_peers, key, (const byte *) &bgp, sizeof (bgp));
|
||||
}
|
||||
bmp_peer_map_insert(&p->peer_open_msg.tx_msg, key, pkt, pkt_size);
|
||||
|
||||
return;
|
||||
}
|
||||
if (!rx_msg)
|
||||
bmp_peer_map_insert(&p->bgp_peers, key, (const byte *) &bgp, sizeof (bgp));
|
||||
|
||||
bmp_send_peer_up_notif_msg(p, bgp, pkt, pkt_size, map_entry->data.buf,
|
||||
map_entry->data.buf_size);
|
||||
if (rx_msg && p->started)
|
||||
bmp_send_peer_up_notif_msg(p, bgp, pkt, pkt_size, rx_msg->data.buf,
|
||||
rx_msg->data.buf_size);
|
||||
}
|
||||
|
||||
void
|
||||
@ -694,22 +623,17 @@ bmp_put_recv_bgp_open_msg(const struct bgp_proto *bgp, const byte* pkt,
|
||||
|
||||
struct bmp_peer_map_key key
|
||||
= bmp_peer_map_key_create(bgp->remote_ip, bgp->remote_as);
|
||||
const struct bmp_peer_map_entry *map_data
|
||||
const struct bmp_peer_map_entry *tx_msg
|
||||
= bmp_peer_map_get(&p->peer_open_msg.tx_msg, key);
|
||||
if (!map_data || !p->started)
|
||||
{
|
||||
bmp_peer_map_insert(&p->peer_open_msg.rx_msg, key, pkt, pkt_size);
|
||||
|
||||
if (!map_data)
|
||||
{
|
||||
bmp_peer_map_insert(&p->bgp_peers, key, (const byte *) &bgp, sizeof (bgp));
|
||||
}
|
||||
bmp_peer_map_insert(&p->peer_open_msg.rx_msg, key, pkt, pkt_size);
|
||||
|
||||
return;
|
||||
}
|
||||
if (!tx_msg)
|
||||
bmp_peer_map_insert(&p->bgp_peers, key, (const byte *) &bgp, sizeof (bgp));
|
||||
|
||||
bmp_send_peer_up_notif_msg(p, bgp, map_data->data.buf, map_data->data.buf_size,
|
||||
pkt, pkt_size);
|
||||
if (tx_msg && p->started)
|
||||
bmp_send_peer_up_notif_msg(p, bgp, tx_msg->data.buf, tx_msg->data.buf_size,
|
||||
pkt, pkt_size);
|
||||
}
|
||||
|
||||
void
|
||||
@ -933,7 +857,7 @@ static void
|
||||
bmp_send_peer_down_notif_msg(struct bmp_proto *p, const struct bgp_proto *bgp,
|
||||
const byte* data, const size_t data_size)
|
||||
{
|
||||
ASSERT(p->station_connected);
|
||||
ASSERT(p->started);
|
||||
|
||||
const struct bgp_caps *remote_caps = bmp_get_bgp_remote_caps_ext(bgp);
|
||||
bool is_global_instance_peer = bmp_is_peer_global_instance(bgp);
|
||||
@ -1035,36 +959,136 @@ bmp_send_termination_msg(struct bmp_proto *p,
|
||||
bmp_buffer_free(&stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* bmp_startup - enter established state
|
||||
* @p: BMP instance
|
||||
*
|
||||
* The bgp_startup() function is called when the BMP session is established.
|
||||
* It sends initiation and peer up messagages.
|
||||
*/
|
||||
static void
|
||||
bmp_station_connected(struct birdsock *sk)
|
||||
bmp_startup(struct bmp_proto *p)
|
||||
{
|
||||
struct bmp_proto *p = (void *) sk->data;
|
||||
ASSERT(!p->started);
|
||||
p->started = true;
|
||||
|
||||
sk->tx_hook = bmp_tx;
|
||||
p->station_connected = true;
|
||||
TRACE(D_EVENTS, "BMP session established");
|
||||
|
||||
bmp_startup(p);
|
||||
/* Send initiation message */
|
||||
buffer payload = bmp_buffer_alloc(p->buffer_mpool, DEFAULT_MEM_BLOCK_SIZE);
|
||||
bmp_init_msg_serialize(&payload, p->sys_descr, p->sys_name);
|
||||
bmp_schedule_tx_packet(p, bmp_buffer_data(&payload), bmp_buffer_pos(&payload));
|
||||
bmp_buffer_free(&payload);
|
||||
|
||||
/* Send Peer Up messages */
|
||||
bmp_peer_map_walk(&p->peer_open_msg.tx_msg,
|
||||
bmp_peer_map_walk_tx_open_msg_and_send_peer_up_notif, p);
|
||||
bmp_peer_map_flush(&p->peer_open_msg.tx_msg);
|
||||
bmp_peer_map_flush(&p->peer_open_msg.rx_msg);
|
||||
|
||||
proto_notify_state(&p->p, PS_UP);
|
||||
}
|
||||
|
||||
static inline void
|
||||
bmp_setup_socket(struct bmp_proto *p)
|
||||
/**
|
||||
* bmp_down - leave established state
|
||||
* @p: BMP instance
|
||||
*
|
||||
* The bgp_down() function is called when the BMP session fails.
|
||||
*/
|
||||
static void
|
||||
bmp_down(struct bmp_proto *p)
|
||||
{
|
||||
sock *sk = sk_new(p->tx_mem_pool);
|
||||
ASSERT(p->started);
|
||||
p->started = false;
|
||||
|
||||
TRACE(D_EVENTS, "BMP session closed");
|
||||
|
||||
proto_notify_state(&p->p, PS_START);
|
||||
}
|
||||
|
||||
/**
|
||||
* bmp_connect - initiate an outgoing connection
|
||||
* @p: BMP instance
|
||||
*
|
||||
* The bmp_connect() function creates the socket and initiates an outgoing TCP
|
||||
* connection to the monitoring station. It is called to enter Connect state.
|
||||
*/
|
||||
static void
|
||||
bmp_connect(struct bmp_proto *p)
|
||||
{
|
||||
ASSERT(!p->started);
|
||||
|
||||
sock *sk = sk_new(p->p.pool);
|
||||
sk->type = SK_TCP_ACTIVE;
|
||||
sk->daddr = p->station_ip;
|
||||
sk->dport = p->station_port;
|
||||
sk->ttl = IP4_MAX_TTL;
|
||||
sk->tos = IP_PREC_INTERNET_CONTROL;
|
||||
sk->tbsize = BGP_TX_BUFFER_EXT_SIZE;
|
||||
sk->tx_hook = bmp_station_connected;
|
||||
sk->tx_hook = bmp_connected;
|
||||
sk->err_hook = bmp_sock_err;
|
||||
|
||||
p->sk = sk;
|
||||
sk->data = p;
|
||||
|
||||
int rc = sk_open(sk);
|
||||
|
||||
if (rc < 0)
|
||||
sk_log_error(sk, p->p.name);
|
||||
|
||||
tm_start(p->connect_retry_timer, CONNECT_RETRY_TIME);
|
||||
}
|
||||
|
||||
/* BMP connect successfull event - switch from Connect to Established state */
|
||||
static void
|
||||
bmp_connected(struct birdsock *sk)
|
||||
{
|
||||
struct bmp_proto *p = (void *) sk->data;
|
||||
|
||||
sk->rx_hook = bmp_rx;
|
||||
sk->tx_hook = bmp_tx;
|
||||
tm_stop(p->connect_retry_timer);
|
||||
|
||||
bmp_startup(p);
|
||||
}
|
||||
|
||||
/* BMP socket error event - switch from any state to Idle state */
|
||||
static void
|
||||
bmp_sock_err(sock *sk, int err)
|
||||
{
|
||||
struct bmp_proto *p = sk->data;
|
||||
|
||||
if (err)
|
||||
TRACE(D_EVENTS, "Connection lost (%M)", err);
|
||||
else
|
||||
TRACE(D_EVENTS, "Connection closed");
|
||||
|
||||
if (p->started)
|
||||
bmp_down(p);
|
||||
|
||||
bmp_close_socket(p);
|
||||
tm_start(p->connect_retry_timer, CONNECT_RETRY_TIME);
|
||||
}
|
||||
|
||||
/* BMP connect timeout event - switch from Idle/Connect state to Connect state */
|
||||
static void
|
||||
bmp_connection_retry(timer *t)
|
||||
{
|
||||
struct bmp_proto *p = t->data;
|
||||
|
||||
if (p->started)
|
||||
return;
|
||||
|
||||
bmp_close_socket(p);
|
||||
bmp_connect(p);
|
||||
}
|
||||
|
||||
static void
|
||||
bmp_close_socket(struct bmp_proto *p)
|
||||
{
|
||||
rfree(p->sk);
|
||||
p->sk = NULL;
|
||||
}
|
||||
|
||||
|
||||
/** Configuration handle section **/
|
||||
static struct proto *
|
||||
bmp_init(struct proto_config *CF)
|
||||
@ -1097,6 +1121,8 @@ bmp_start(struct proto *P)
|
||||
p->tx_mem_pool = rp_new(P->pool, "BMP Tx");
|
||||
p->update_msg_mem_pool = rp_new(P->pool, "BMP Update");
|
||||
p->tx_ev = ev_new_init(p->tx_mem_pool, bmp_fire_tx, p);
|
||||
p->connect_retry_timer = tm_new_init(p->p.pool, bmp_connection_retry, p, 0, 0);
|
||||
p->sk = NULL;
|
||||
|
||||
bmp_peer_map_init(&p->peer_open_msg.tx_msg, p->map_mem_pool);
|
||||
bmp_peer_map_init(&p->peer_open_msg.rx_msg, p->map_mem_pool);
|
||||
@ -1104,26 +1130,25 @@ bmp_start(struct proto *P)
|
||||
|
||||
init_list(&p->tx_queue);
|
||||
init_list(&p->rt_table_in_pre_policy.update_msg_queue);
|
||||
p->station_connected = false;
|
||||
p->started = false;
|
||||
p->connect_retry_timer = NULL;
|
||||
|
||||
bmp_setup_socket(p);
|
||||
bmp_open(P);
|
||||
tm_start(p->connect_retry_timer, CONNECT_INIT_TIME);
|
||||
|
||||
g_bmp = p;
|
||||
|
||||
return PS_UP;
|
||||
return PS_START;
|
||||
}
|
||||
|
||||
static int
|
||||
bmp_shutdown(struct proto *P)
|
||||
{
|
||||
struct bmp_proto *p = (void *) P;
|
||||
bmp_send_termination_msg(p, BMP_TERM_REASON_ADM);
|
||||
|
||||
p->station_connected = false;
|
||||
p->started = false;
|
||||
if (p->started)
|
||||
{
|
||||
bmp_send_termination_msg(p, BMP_TERM_REASON_ADM);
|
||||
p->started = false;
|
||||
}
|
||||
|
||||
g_bmp = NULL;
|
||||
|
||||
|
@ -81,7 +81,6 @@ struct bmp_proto {
|
||||
list tx_queue; // Stores queued packets going to be sent
|
||||
timer *connect_retry_timer; // Timer for retrying connection to the BMP collector
|
||||
struct rt_table_info rt_table_in_pre_policy; // Pre-policy route import table
|
||||
bool station_connected; // Flag that stores connection status with BMP station
|
||||
bool started; // Flag that stores running status of BMP instance
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user