0
0
mirror of https://gitlab.nic.cz/labs/bird.git synced 2025-01-15 21:41:56 +00:00
bird/sysdep/unix/io.c

479 lines
11 KiB
C

/*
* BIRD Internet Routing Daemon -- Unix I/O
*
* (c) 1998--2004 Martin Mares <mj@ucw.cz>
* (c) 2004 Ondrej Filip <feela@network.cz>
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
/* Unfortunately, some glibc versions hide parts of RFC 3542 API
if _GNU_SOURCE is not defined. */
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <sys/uio.h>
#include <sys/un.h>
#include <poll.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <net/if.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <netinet/udp.h>
#include <netinet/icmp6.h>
#include <netdb.h>
#include "nest/bird.h"
#include "lib/lists.h"
#include "lib/resource.h"
#include "lib/socket.h"
#include "lib/event.h"
#include "lib/locking.h"
#include "lib/timer.h"
#include "lib/string.h"
#include "nest/iface.h"
#include "conf/conf.h"
#include "sysdep/unix/unix.h"
#include "sysdep/unix/io-loop.h"
/* Maximum number of calls of tx handler for one socket in one
* poll iteration. Should be small enough to not monopolize CPU by
* one protocol instance.
*/
#define MAX_STEPS 4
/* Maximum number of calls of rx handler for all sockets in one poll
iteration. RX callbacks are often much more costly so we limit
this to gen small latencies */
#define MAX_RX_STEPS 4
#if 0
/**********
* Internal event log for the mainloop only makes no sense.
* To be replaced by a lockless event log keeping much more information
* about all the logs throughout all the threads.
*/
/*
* Internal event log and watchdog
*/
#define EVENT_LOG_LENGTH 32
struct event_log_entry
{
void *hook;
void *data;
btime timestamp;
btime duration;
};
static struct event_log_entry event_log[EVENT_LOG_LENGTH];
static struct event_log_entry *event_open;
static int event_log_pos, event_log_num, watchdog_active;
static btime last_io_time;
static btime loop_time;
static void
io_update_time(void)
{
last_io_time = current_time();
if (event_open)
{
event_open->duration = last_io_time - event_open->timestamp;
struct global_runtime *gr = atomic_load_explicit(&global_runtime, memory_order_relaxed);
if (event_open->duration > gr->latency_limit)
log(L_WARN "Event 0x%p 0x%p took %u.%03u ms",
event_open->hook, event_open->data, (uint) (event_open->duration TO_MS), (uint) (event_open->duration % 1000));
event_open = NULL;
}
}
/**
* io_log_event - mark approaching event into event log
* @hook: event hook address
* @data: event data address
*
* Store info (hook, data, timestamp) about the following internal event into
* a circular event log (@event_log). When latency tracking is enabled, the log
* entry is kept open (in @event_open) so the duration can be filled later.
*/
void
io_log_event(void *hook, void *data, uint flag)
{
struct global_runtime *gr = atomic_load_explicit(&global_runtime, memory_order_relaxed);
if (gr->latency_debug & flag)
io_update_time();
struct event_log_entry *en = event_log + event_log_pos;
en->hook = hook;
en->data = data;
en->timestamp = last_io_time;
en->duration = 0;
event_log_num++;
event_log_pos++;
event_log_pos %= EVENT_LOG_LENGTH;
event_open = (gr->latency_debug & flag) ? en : NULL;
}
static inline void
io_close_event(void)
{
if (event_open)
io_update_time();
}
void
io_log_dump(void)
{
int i;
log(L_DEBUG "Event log:");
for (i = 0; i < EVENT_LOG_LENGTH; i++)
{
struct event_log_entry *en = event_log + (event_log_pos + i) % EVENT_LOG_LENGTH;
if (en->hook)
log(L_DEBUG " Event 0x%p 0x%p at %8d for %d ms", en->hook, en->data,
(int) ((last_io_time - en->timestamp) TO_MS), (int) (en->duration TO_MS));
}
}
#endif
static btime last_io_time, loop_time;
static int watchdog_active;
void
watchdog_sigalrm(int sig UNUSED)
{
/* Update last_io_time and duration, but skip latency check */
struct global_runtime *gr = atomic_load_explicit(&global_runtime, memory_order_relaxed);
gr->latency_limit = 0xffffffff;
last_io_time = current_time_now();
debug_safe("Watchdog timer timed out\n");
/* We want core dump */
abort();
}
static inline void
watchdog_start1(void)
{
loop_time = last_io_time = current_time_now();
}
static inline void
watchdog_start(void)
{
loop_time = last_io_time = current_time_now();
// event_log_num = 0;
union bird_global_runtime *gr = BIRD_GLOBAL_RUNTIME;
if (gr->watchdog_timeout)
{
alarm(gr->watchdog_timeout);
watchdog_active = 1;
}
}
static inline void
watchdog_stop(void)
{
last_io_time = current_time_now();
if (watchdog_active)
{
alarm(0);
watchdog_active = 0;
}
btime duration = last_io_time - loop_time;
union bird_global_runtime *gr = BIRD_GLOBAL_RUNTIME;
/*
if (duration > gr->watchdog_warning)
log(L_WARN "I/O loop cycle took %u.%03u ms for %d events",
(uint) (duration TO_MS), (uint) (duration % 1000), event_log_num);
*/
if (duration > gr->watchdog_warning)
log(L_WARN "I/O loop cycle took %u.%03u ms",
(uint) (duration TO_MS), (uint) (duration % 1000));
}
/*
* Main I/O Loop
*/
void
io_init(void)
{
init_list(&main_birdloop.sock_list);
ev_init_list(&global_event_list, &main_birdloop, "Global event list");
ev_init_list(&global_work_list, &main_birdloop, "Global work list");
ev_init_list(&main_birdloop.event_list, &main_birdloop, "Global fast event list");
krt_io_init();
// XXX init_times();
// XXX update_times();
boot_time = current_time();
u64 now = (u64) current_real_time();
srandom((uint) (now ^ (now >> 32)));
}
static int short_loops = 0;
#define SHORT_LOOP_MAX 10
#define WORK_EVENTS_MAX 10
extern sock *stored_sock; /* mainloop hack */
int sk_read(sock *s, int revents);
int sk_write(sock *s);
void sk_err(sock *s, int revents);
void
io_loop(void)
{
int poll_tout, timeout;
int events, pout;
timer *t;
struct pfd pfd;
BUFFER_INIT(pfd.pfd, &root_pool, 16);
BUFFER_INIT(pfd.loop, &root_pool, 16);
watchdog_start1();
for(;;)
{
times_update();
ev_run_list(&global_event_list);
ev_run_list_limited(&global_work_list, WORK_EVENTS_MAX);
ev_run_list(&main_birdloop.event_list);
timers_fire(&main_birdloop.time);
// io_close_event();
events =
!ev_list_empty(&global_event_list) ||
!ev_list_empty(&global_work_list) ||
!ev_list_empty(&main_birdloop.event_list);
poll_tout = (events ? 0 : 3000); /* Time in milliseconds */
if (t = timers_first(&main_birdloop.time))
{
times_update();
timeout = (tm_remains(t) TO_MS) + 1;
poll_tout = MIN(poll_tout, timeout);
}
BUFFER_FLUSH(pfd.pfd);
BUFFER_FLUSH(pfd.loop);
pipe_pollin(&main_birdloop.thread->wakeup, &pfd);
sockets_prepare(&main_birdloop, &pfd);
/*
* Yes, this is racy. But even if the signal comes before this test
* and entering poll(), it gets caught on the next timer tick.
*/
if (async_config_flag)
{
// io_log_event(async_config, NULL, DL_EVENTS);
async_config();
async_config_flag = 0;
continue;
}
if (async_dump_flag)
{
// io_log_event(async_dump, NULL, DL_EVENTS);
async_dump();
async_dump_flag = 0;
continue;
}
if (async_shutdown_flag)
{
// io_log_event(async_shutdown, NULL, DL_EVENTS);
async_shutdown();
async_shutdown_flag = 0;
continue;
}
/* And finally enter poll() to find active sockets */
watchdog_stop();
birdloop_leave(&main_birdloop);
pout = poll(pfd.pfd.data, pfd.pfd.used, poll_tout);
birdloop_enter(&main_birdloop);
watchdog_start();
if (pout < 0)
{
if (errno == EINTR || errno == EAGAIN)
continue;
bug("poll: %m");
}
if (pout)
{
if (pfd.pfd.data[0].revents & POLLIN)
{
/* IO loop reload requested */
pipe_drain(&main_birdloop.thread->wakeup);
atomic_fetch_and_explicit(&main_birdloop.thread_transition, ~LTT_PING, memory_order_acq_rel);
continue;
}
times_update();
/* guaranteed to be non-empty */
main_birdloop.sock_active = SKIP_BACK(sock, n, HEAD(main_birdloop.sock_list));
while (main_birdloop.sock_active)
{
sock *s = main_birdloop.sock_active;
if (s->index != -1)
{
int e;
int steps;
steps = MAX_STEPS;
if (s->fast_rx && (pfd.pfd.data[s->index].revents & POLLIN) && s->rx_hook)
do
{
steps--;
// io_log_event(s->rx_hook, s->data, DL_SOCKETS);
e = sk_read(s, pfd.pfd.data[s->index].revents);
}
while (e && (main_birdloop.sock_active == s) && s->rx_hook && steps);
if (s != main_birdloop.sock_active)
continue;
steps = MAX_STEPS;
if (pfd.pfd.data[s->index].revents & POLLOUT)
do
{
steps--;
// io_log_event(s->tx_hook, s->data, DL_SOCKETS);
e = sk_write(s);
}
while (e && (main_birdloop.sock_active == s) && steps);
if (s != main_birdloop.sock_active)
continue;
}
main_birdloop.sock_active = sk_next(s);
}
short_loops++;
if (events && (short_loops < SHORT_LOOP_MAX))
continue;
short_loops = 0;
int count = 0;
main_birdloop.sock_active = stored_sock;
if (main_birdloop.sock_active == NULL)
main_birdloop.sock_active = SKIP_BACK(sock, n, HEAD(main_birdloop.sock_list));
while (main_birdloop.sock_active && count < MAX_RX_STEPS)
{
sock *s = main_birdloop.sock_active;
if (s->index == -1)
goto next2;
if (!s->fast_rx && (pfd.pfd.data[s->index].revents & POLLIN) && s->rx_hook)
{
count++;
// io_log_event(s->rx_hook, s->data, DL_SOCKETS);
sk_read(s, pfd.pfd.data[s->index].revents);
if (s != main_birdloop.sock_active)
continue;
}
if (pfd.pfd.data[s->index].revents & (POLLHUP | POLLERR))
{
sk_err(s, pfd.pfd.data[s->index].revents);
if (s != main_birdloop.sock_active)
continue;
}
next2: ;
main_birdloop.sock_active = sk_next(s);
}
stored_sock = main_birdloop.sock_active;
}
}
}
void
test_old_bird(char *path)
{
int fd;
struct sockaddr_un sa;
fd = socket(AF_UNIX, SOCK_STREAM, 0);
if (fd < 0)
die("Cannot create socket: %m");
if (strlen(path) >= sizeof(sa.sun_path))
die("Socket path too long");
bzero(&sa, sizeof(sa));
sa.sun_family = AF_UNIX;
strcpy(sa.sun_path, path);
if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0)
die("I found another BIRD running.");
close(fd);
}
/*
* DNS resolver
*/
ip_addr
resolve_hostname(const char *host, int type, const char **err_msg)
{
struct addrinfo *res;
struct addrinfo hints = {
.ai_family = AF_UNSPEC,
.ai_socktype = (type == SK_UDP) ? SOCK_DGRAM : SOCK_STREAM,
.ai_flags = AI_ADDRCONFIG,
};
*err_msg = NULL;
int err_code = getaddrinfo(host, NULL, &hints, &res);
if (err_code != 0)
{
*err_msg = gai_strerror(err_code);
return IPA_NONE;
}
ip_addr addr = IPA_NONE;
uint unused;
sockaddr_read((sockaddr *) res->ai_addr, res->ai_family, &addr, NULL, &unused);
freeaddrinfo(res);
return addr;
}