mirror of
https://gitlab.nic.cz/labs/bird.git
synced 2025-01-24 18:01:54 +00:00
1735 lines
35 KiB
C
1735 lines
35 KiB
C
/*
|
|
* BIRD Internet Routing Daemon -- Unix I/O
|
|
*
|
|
* (c) 1998--2004 Martin Mares <mj@ucw.cz>
|
|
* (c) 2004 Ondrej Filip <feela@network.cz>
|
|
*
|
|
* Can be freely distributed and used under the terms of the GNU GPL.
|
|
*/
|
|
|
|
/* Unfortunately, some glibc versions hide parts of RFC 3542 API
|
|
if _GNU_SOURCE is not defined. */
|
|
#define _GNU_SOURCE 1
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <time.h>
|
|
#include <sys/time.h>
|
|
#include <sys/types.h>
|
|
#include <sys/socket.h>
|
|
#include <sys/fcntl.h>
|
|
#include <sys/uio.h>
|
|
#include <sys/un.h>
|
|
#include <unistd.h>
|
|
#include <errno.h>
|
|
#include <netinet/in.h>
|
|
#include <netinet/icmp6.h>
|
|
|
|
#include "nest/bird.h"
|
|
#include "lib/lists.h"
|
|
#include "lib/resource.h"
|
|
#include "lib/timer.h"
|
|
#include "lib/socket.h"
|
|
#include "lib/event.h"
|
|
#include "lib/string.h"
|
|
#include "nest/iface.h"
|
|
|
|
#include "lib/unix.h"
|
|
#include "lib/sysio.h"
|
|
|
|
/* Maximum number of calls of tx handler for one socket in one
|
|
* select iteration. Should be small enough to not monopolize CPU by
|
|
* one protocol instance.
|
|
*/
|
|
#define MAX_STEPS 4
|
|
|
|
/* Maximum number of calls of rx handler for all sockets in one select
|
|
iteration. RX callbacks are often much more costly so we limit
|
|
this to gen small latencies */
|
|
#define MAX_RX_STEPS 4
|
|
|
|
/*
|
|
* Tracked Files
|
|
*/
|
|
|
|
struct rfile {
|
|
resource r;
|
|
FILE *f;
|
|
};
|
|
|
|
static void
|
|
rf_free(resource *r)
|
|
{
|
|
struct rfile *a = (struct rfile *) r;
|
|
|
|
fclose(a->f);
|
|
}
|
|
|
|
static void
|
|
rf_dump(resource *r)
|
|
{
|
|
struct rfile *a = (struct rfile *) r;
|
|
|
|
debug("(FILE *%p)\n", a->f);
|
|
}
|
|
|
|
static struct resclass rf_class = {
|
|
"FILE",
|
|
sizeof(struct rfile),
|
|
rf_free,
|
|
rf_dump,
|
|
NULL,
|
|
NULL
|
|
};
|
|
|
|
void *
|
|
tracked_fopen(pool *p, char *name, char *mode)
|
|
{
|
|
FILE *f = fopen(name, mode);
|
|
|
|
if (f)
|
|
{
|
|
struct rfile *r = ralloc(p, &rf_class);
|
|
r->f = f;
|
|
}
|
|
return f;
|
|
}
|
|
|
|
/**
|
|
* DOC: Timers
|
|
*
|
|
* Timers are resources which represent a wish of a module to call
|
|
* a function at the specified time. The platform dependent code
|
|
* doesn't guarantee exact timing, only that a timer function
|
|
* won't be called before the requested time.
|
|
*
|
|
* In BIRD, time is represented by values of the &bird_clock_t type
|
|
* which are integral numbers interpreted as a relative number of seconds since
|
|
* some fixed time point in past. The current time can be read
|
|
* from variable @now with reasonable accuracy and is monotonic. There is also
|
|
* a current 'absolute' time in variable @now_real reported by OS.
|
|
*
|
|
* Each timer is described by a &timer structure containing a pointer
|
|
* to the handler function (@hook), data private to this function (@data),
|
|
* time the function should be called at (@expires, 0 for inactive timers),
|
|
* for the other fields see |timer.h|.
|
|
*/
|
|
|
|
#define NEAR_TIMER_LIMIT 4
|
|
|
|
static list near_timers, far_timers;
|
|
static bird_clock_t first_far_timer = TIME_INFINITY;
|
|
|
|
/* now must be different from 0, because 0 is a special value in timer->expires */
|
|
bird_clock_t now = 1, now_real;
|
|
|
|
static void
|
|
update_times_plain(void)
|
|
{
|
|
bird_clock_t new_time = time(NULL);
|
|
int delta = new_time - now_real;
|
|
|
|
if ((delta >= 0) && (delta < 60))
|
|
now += delta;
|
|
else if (now_real != 0)
|
|
log(L_WARN "Time jump, delta %d s", delta);
|
|
|
|
now_real = new_time;
|
|
}
|
|
|
|
static void
|
|
update_times_gettime(void)
|
|
{
|
|
struct timespec ts;
|
|
int rv;
|
|
|
|
rv = clock_gettime(CLOCK_MONOTONIC, &ts);
|
|
if (rv != 0)
|
|
die("clock_gettime: %m");
|
|
|
|
if (ts.tv_sec != now) {
|
|
if (ts.tv_sec < now)
|
|
log(L_ERR "Monotonic timer is broken");
|
|
|
|
now = ts.tv_sec;
|
|
now_real = time(NULL);
|
|
}
|
|
}
|
|
|
|
static int clock_monotonic_available;
|
|
|
|
static inline void
|
|
update_times(void)
|
|
{
|
|
if (clock_monotonic_available)
|
|
update_times_gettime();
|
|
else
|
|
update_times_plain();
|
|
}
|
|
|
|
static inline void
|
|
init_times(void)
|
|
{
|
|
struct timespec ts;
|
|
clock_monotonic_available = (clock_gettime(CLOCK_MONOTONIC, &ts) == 0);
|
|
if (!clock_monotonic_available)
|
|
log(L_WARN "Monotonic timer is missing");
|
|
}
|
|
|
|
|
|
static void
|
|
tm_free(resource *r)
|
|
{
|
|
timer *t = (timer *) r;
|
|
|
|
tm_stop(t);
|
|
}
|
|
|
|
static void
|
|
tm_dump(resource *r)
|
|
{
|
|
timer *t = (timer *) r;
|
|
|
|
debug("(code %p, data %p, ", t->hook, t->data);
|
|
if (t->randomize)
|
|
debug("rand %d, ", t->randomize);
|
|
if (t->recurrent)
|
|
debug("recur %d, ", t->recurrent);
|
|
if (t->expires)
|
|
debug("expires in %d sec)\n", t->expires - now);
|
|
else
|
|
debug("inactive)\n");
|
|
}
|
|
|
|
static struct resclass tm_class = {
|
|
"Timer",
|
|
sizeof(timer),
|
|
tm_free,
|
|
tm_dump,
|
|
NULL,
|
|
NULL
|
|
};
|
|
|
|
/**
|
|
* tm_new - create a timer
|
|
* @p: pool
|
|
*
|
|
* This function creates a new timer resource and returns
|
|
* a pointer to it. To use the timer, you need to fill in
|
|
* the structure fields and call tm_start() to start timing.
|
|
*/
|
|
timer *
|
|
tm_new(pool *p)
|
|
{
|
|
timer *t = ralloc(p, &tm_class);
|
|
return t;
|
|
}
|
|
|
|
static inline void
|
|
tm_insert_near(timer *t)
|
|
{
|
|
node *n = HEAD(near_timers);
|
|
|
|
while (n->next && (SKIP_BACK(timer, n, n)->expires < t->expires))
|
|
n = n->next;
|
|
insert_node(&t->n, n->prev);
|
|
}
|
|
|
|
/**
|
|
* tm_start - start a timer
|
|
* @t: timer
|
|
* @after: number of seconds the timer should be run after
|
|
*
|
|
* This function schedules the hook function of the timer to
|
|
* be called after @after seconds. If the timer has been already
|
|
* started, it's @expire time is replaced by the new value.
|
|
*
|
|
* You can have set the @randomize field of @t, the timeout
|
|
* will be increased by a random number of seconds chosen
|
|
* uniformly from range 0 .. @randomize.
|
|
*
|
|
* You can call tm_start() from the handler function of the timer
|
|
* to request another run of the timer. Also, you can set the @recurrent
|
|
* field to have the timer re-added automatically with the same timeout.
|
|
*/
|
|
void
|
|
tm_start(timer *t, unsigned after)
|
|
{
|
|
bird_clock_t when;
|
|
|
|
if (t->randomize)
|
|
after += random() % (t->randomize + 1);
|
|
when = now + after;
|
|
if (t->expires == when)
|
|
return;
|
|
if (t->expires)
|
|
rem_node(&t->n);
|
|
t->expires = when;
|
|
if (after <= NEAR_TIMER_LIMIT)
|
|
tm_insert_near(t);
|
|
else
|
|
{
|
|
if (!first_far_timer || first_far_timer > when)
|
|
first_far_timer = when;
|
|
add_tail(&far_timers, &t->n);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* tm_stop - stop a timer
|
|
* @t: timer
|
|
*
|
|
* This function stops a timer. If the timer is already stopped,
|
|
* nothing happens.
|
|
*/
|
|
void
|
|
tm_stop(timer *t)
|
|
{
|
|
if (t->expires)
|
|
{
|
|
rem_node(&t->n);
|
|
t->expires = 0;
|
|
}
|
|
}
|
|
|
|
static void
|
|
tm_dump_them(char *name, list *l)
|
|
{
|
|
node *n;
|
|
timer *t;
|
|
|
|
debug("%s timers:\n", name);
|
|
WALK_LIST(n, *l)
|
|
{
|
|
t = SKIP_BACK(timer, n, n);
|
|
debug("%p ", t);
|
|
tm_dump(&t->r);
|
|
}
|
|
debug("\n");
|
|
}
|
|
|
|
void
|
|
tm_dump_all(void)
|
|
{
|
|
tm_dump_them("Near", &near_timers);
|
|
tm_dump_them("Far", &far_timers);
|
|
}
|
|
|
|
static inline time_t
|
|
tm_first_shot(void)
|
|
{
|
|
time_t x = first_far_timer;
|
|
|
|
if (!EMPTY_LIST(near_timers))
|
|
{
|
|
timer *t = SKIP_BACK(timer, n, HEAD(near_timers));
|
|
if (t->expires < x)
|
|
x = t->expires;
|
|
}
|
|
return x;
|
|
}
|
|
|
|
static void
|
|
tm_shot(void)
|
|
{
|
|
timer *t;
|
|
node *n, *m;
|
|
|
|
if (first_far_timer <= now)
|
|
{
|
|
bird_clock_t limit = now + NEAR_TIMER_LIMIT;
|
|
first_far_timer = TIME_INFINITY;
|
|
n = HEAD(far_timers);
|
|
while (m = n->next)
|
|
{
|
|
t = SKIP_BACK(timer, n, n);
|
|
if (t->expires <= limit)
|
|
{
|
|
rem_node(n);
|
|
tm_insert_near(t);
|
|
}
|
|
else if (t->expires < first_far_timer)
|
|
first_far_timer = t->expires;
|
|
n = m;
|
|
}
|
|
}
|
|
while ((n = HEAD(near_timers)) -> next)
|
|
{
|
|
int delay;
|
|
t = SKIP_BACK(timer, n, n);
|
|
if (t->expires > now)
|
|
break;
|
|
rem_node(n);
|
|
delay = t->expires - now;
|
|
t->expires = 0;
|
|
if (t->recurrent)
|
|
{
|
|
int i = t->recurrent - delay;
|
|
if (i < 0)
|
|
i = 0;
|
|
tm_start(t, i);
|
|
}
|
|
t->hook(t);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* tm_parse_datetime - parse a date and time
|
|
* @x: datetime string
|
|
*
|
|
* tm_parse_datetime() takes a textual representation of
|
|
* a date and time (dd-mm-yyyy hh:mm:ss)
|
|
* and converts it to the corresponding value of type &bird_clock_t.
|
|
*/
|
|
bird_clock_t
|
|
tm_parse_datetime(char *x)
|
|
{
|
|
struct tm tm;
|
|
int n;
|
|
time_t t;
|
|
|
|
if (sscanf(x, "%d-%d-%d %d:%d:%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &n) != 6 || x[n])
|
|
return tm_parse_date(x);
|
|
tm.tm_mon--;
|
|
tm.tm_year -= 1900;
|
|
t = mktime(&tm);
|
|
if (t == (time_t) -1)
|
|
return 0;
|
|
return t;
|
|
}
|
|
/**
|
|
* tm_parse_date - parse a date
|
|
* @x: date string
|
|
*
|
|
* tm_parse_date() takes a textual representation of a date (dd-mm-yyyy)
|
|
* and converts it to the corresponding value of type &bird_clock_t.
|
|
*/
|
|
bird_clock_t
|
|
tm_parse_date(char *x)
|
|
{
|
|
struct tm tm;
|
|
int n;
|
|
time_t t;
|
|
|
|
if (sscanf(x, "%d-%d-%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &n) != 3 || x[n])
|
|
return 0;
|
|
tm.tm_mon--;
|
|
tm.tm_year -= 1900;
|
|
tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
|
|
t = mktime(&tm);
|
|
if (t == (time_t) -1)
|
|
return 0;
|
|
return t;
|
|
}
|
|
|
|
static void
|
|
tm_format_reltime(char *x, struct tm *tm, bird_clock_t delta)
|
|
{
|
|
static char *month_names[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
|
|
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
|
|
|
|
if (delta < 20*3600)
|
|
bsprintf(x, "%02d:%02d", tm->tm_hour, tm->tm_min);
|
|
else if (delta < 360*86400)
|
|
bsprintf(x, "%s%02d", month_names[tm->tm_mon], tm->tm_mday);
|
|
else
|
|
bsprintf(x, "%d", tm->tm_year+1900);
|
|
}
|
|
|
|
#include "conf/conf.h"
|
|
|
|
/**
|
|
* tm_format_datetime - convert date and time to textual representation
|
|
* @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE
|
|
* @t: time
|
|
*
|
|
* This function formats the given relative time value @t to a textual
|
|
* date/time representation (dd-mm-yyyy hh:mm:ss) in real time.
|
|
*/
|
|
void
|
|
tm_format_datetime(char *x, struct timeformat *fmt_spec, bird_clock_t t)
|
|
{
|
|
const char *fmt_used;
|
|
struct tm *tm;
|
|
bird_clock_t delta = now - t;
|
|
t = now_real - delta;
|
|
tm = localtime(&t);
|
|
|
|
if (fmt_spec->fmt1 == NULL)
|
|
return tm_format_reltime(x, tm, delta);
|
|
|
|
if ((fmt_spec->limit == 0) || (delta < fmt_spec->limit))
|
|
fmt_used = fmt_spec->fmt1;
|
|
else
|
|
fmt_used = fmt_spec->fmt2;
|
|
|
|
int rv = strftime(x, TM_DATETIME_BUFFER_SIZE, fmt_used, tm);
|
|
if (((rv == 0) && fmt_used[0]) || (rv == TM_DATETIME_BUFFER_SIZE))
|
|
strcpy(x, "<too-long>");
|
|
}
|
|
|
|
/**
|
|
* DOC: Sockets
|
|
*
|
|
* Socket resources represent network connections. Their data structure (&socket)
|
|
* contains a lot of fields defining the exact type of the socket, the local and
|
|
* remote addresses and ports, pointers to socket buffers and finally pointers to
|
|
* hook functions to be called when new data have arrived to the receive buffer
|
|
* (@rx_hook), when the contents of the transmit buffer have been transmitted
|
|
* (@tx_hook) and when an error or connection close occurs (@err_hook).
|
|
*
|
|
* Freeing of sockets from inside socket hooks is perfectly safe.
|
|
*/
|
|
|
|
#ifndef SOL_IP
|
|
#define SOL_IP IPPROTO_IP
|
|
#endif
|
|
|
|
#ifndef SOL_IPV6
|
|
#define SOL_IPV6 IPPROTO_IPV6
|
|
#endif
|
|
|
|
static list sock_list;
|
|
static struct birdsock *current_sock;
|
|
static struct birdsock *stored_sock;
|
|
static int sock_recalc_fdsets_p;
|
|
|
|
static inline sock *
|
|
sk_next(sock *s)
|
|
{
|
|
if (!s->n.next->next)
|
|
return NULL;
|
|
else
|
|
return SKIP_BACK(sock, n, s->n.next);
|
|
}
|
|
|
|
static void
|
|
sk_alloc_bufs(sock *s)
|
|
{
|
|
if (!s->rbuf && s->rbsize)
|
|
s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize);
|
|
s->rpos = s->rbuf;
|
|
if (!s->tbuf && s->tbsize)
|
|
s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize);
|
|
s->tpos = s->ttx = s->tbuf;
|
|
}
|
|
|
|
static void
|
|
sk_free_bufs(sock *s)
|
|
{
|
|
if (s->rbuf_alloc)
|
|
{
|
|
xfree(s->rbuf_alloc);
|
|
s->rbuf = s->rbuf_alloc = NULL;
|
|
}
|
|
if (s->tbuf_alloc)
|
|
{
|
|
xfree(s->tbuf_alloc);
|
|
s->tbuf = s->tbuf_alloc = NULL;
|
|
}
|
|
}
|
|
|
|
static void
|
|
sk_free(resource *r)
|
|
{
|
|
sock *s = (sock *) r;
|
|
|
|
sk_free_bufs(s);
|
|
if (s->fd >= 0)
|
|
{
|
|
close(s->fd);
|
|
if (s == current_sock)
|
|
current_sock = sk_next(s);
|
|
if (s == stored_sock)
|
|
stored_sock = sk_next(s);
|
|
rem_node(&s->n);
|
|
sock_recalc_fdsets_p = 1;
|
|
}
|
|
}
|
|
|
|
void
|
|
sk_reallocate(sock *s)
|
|
{
|
|
sk_free_bufs(s);
|
|
sk_alloc_bufs(s);
|
|
}
|
|
|
|
static void
|
|
sk_dump(resource *r)
|
|
{
|
|
sock *s = (sock *) r;
|
|
static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", "UDP/MC", "IP", "IP/MC", "MAGIC", "UNIX<", "UNIX", "DEL!" };
|
|
|
|
debug("(%s, ud=%p, sa=%08x, sp=%d, da=%08x, dp=%d, tos=%d, ttl=%d, if=%s)\n",
|
|
sk_type_names[s->type],
|
|
s->data,
|
|
s->saddr,
|
|
s->sport,
|
|
s->daddr,
|
|
s->dport,
|
|
s->tos,
|
|
s->ttl,
|
|
s->iface ? s->iface->name : "none");
|
|
}
|
|
|
|
static struct resclass sk_class = {
|
|
"Socket",
|
|
sizeof(sock),
|
|
sk_free,
|
|
sk_dump,
|
|
NULL,
|
|
NULL
|
|
};
|
|
|
|
#define SOCKADDR_DEFINE(sa, len, af) struct sockaddr *sa; int len; sockaddr_init(sa, len, af)
|
|
#define sockaddr_init(sa, len, af) do { len=sockaddr_size(af); sa=alloca(len); sa->sa_family=af; } while(0)
|
|
|
|
static inline int sockaddr_size(int af)
|
|
{ return (af == AF_INET) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); }
|
|
|
|
static inline void
|
|
sockaddr_fill4(struct sockaddr_in *sa, ip_addr a, struct iface *ifa, unsigned port)
|
|
{
|
|
sa->sin_port = htons(port);
|
|
#ifdef HAVE_SIN_LEN
|
|
sa->sin_len = sizeof(struct sockaddr_in);
|
|
#endif
|
|
ipa_put_in4(&sa->sin_addr, a);
|
|
}
|
|
|
|
static inline void
|
|
sockaddr_fill6(struct sockaddr_in6 *sa, ip_addr a, struct iface *ifa, unsigned port)
|
|
{
|
|
sa->sin6_port = htons(port);
|
|
sa->sin6_flowinfo = 0;
|
|
#ifdef SIN6_LEN
|
|
sa->sin6_len = sizeof(struct sockaddr_in6);
|
|
#endif
|
|
ipa_put_in6(&sa->sin6_addr, a);
|
|
sa->sin6_scope_id = (ifa && ipa_is_link_local(a)) ? ifa->index : 0;
|
|
}
|
|
|
|
void
|
|
sockaddr_fill(struct sockaddr *sa, ip_addr a, struct iface *ifa, unsigned port)
|
|
{
|
|
if (sa->sa_family == AF_INET)
|
|
sockaddr_fill4((struct sockaddr_in *) sa, a, ifa, port);
|
|
else if (sa->sa_family == AF_INET6)
|
|
sockaddr_fill6((struct sockaddr_in6 *) sa, a, ifa, port);
|
|
else
|
|
bug("%s called for wrong AF (%d)", "sockaddr_fill", sa->sa_family);
|
|
}
|
|
|
|
static inline void
|
|
sockaddr_read4(struct sockaddr_in *sa, ip_addr *a, struct iface **ifa, unsigned *port)
|
|
{
|
|
if (port)
|
|
*port = ntohs(sa->sin_port);
|
|
*a = ipa_get_in4(&sa->sin_addr);
|
|
}
|
|
|
|
static inline void
|
|
sockaddr_read6(struct sockaddr_in6 *sa, ip_addr *a, struct iface **ifa, unsigned *port)
|
|
{
|
|
if (port)
|
|
*port = ntohs(sa->sin6_port);
|
|
*a = ipa_get_in6(&sa->sin6_addr);
|
|
|
|
if (ifa && ipa_is_link_local(*a))
|
|
*ifa = if_find_by_index(sa->sin6_scope_id);
|
|
}
|
|
|
|
void
|
|
sockaddr_read(struct sockaddr *sa, ip_addr *a, struct iface **ifa, unsigned *port, int check)
|
|
{
|
|
if (sa->sa_family == AF_INET)
|
|
sockaddr_read4((struct sockaddr_in *) sa, a, ifa, port);
|
|
else if (sa->sa_family == AF_INET6)
|
|
sockaddr_read6((struct sockaddr_in6 *) sa, a, ifa, port);
|
|
else if (check)
|
|
bug("%s called for wrong AF (%d)", "sockaddr_read", sa->sa_family);
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
* sk_new - create a socket
|
|
* @p: pool
|
|
*
|
|
* This function creates a new socket resource. If you want to use it,
|
|
* you need to fill in all the required fields of the structure and
|
|
* call sk_open() to do the actual opening of the socket.
|
|
*
|
|
* The real function name is sock_new(), sk_new() is a macro wrapper
|
|
* to avoid collision with OpenSSL.
|
|
*/
|
|
sock *
|
|
sock_new(pool *p)
|
|
{
|
|
sock *s = ralloc(p, &sk_class);
|
|
s->pool = p;
|
|
// s->saddr = s->daddr = IPA_NONE;
|
|
s->tos = s->ttl = -1;
|
|
s->fd = -1;
|
|
return s;
|
|
}
|
|
|
|
static void
|
|
sk_insert(sock *s)
|
|
{
|
|
add_tail(&sock_list, &s->n);
|
|
sock_recalc_fdsets_p = 1;
|
|
}
|
|
|
|
|
|
|
|
/* PKTINFO handling is also standardized in IPv6 */
|
|
|
|
/*
|
|
* RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
|
|
* type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
|
|
* don't have IPV6_RECVPKTINFO we suppose the OS implements the older
|
|
* RFC and we use IPV6_PKTINFO.
|
|
*/
|
|
#ifndef IPV6_RECVPKTINFO
|
|
#define IPV6_RECVPKTINFO IPV6_PKTINFO
|
|
#endif
|
|
|
|
static inline char *
|
|
sk_request_pktinfo6(sock *s)
|
|
{
|
|
int ok = 1;
|
|
if (s->flags & SKF_LADDR_RX)
|
|
if (setsockopt(s->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &ok, sizeof(ok)) < 0)
|
|
return "IPV6_RECVPKTINFO";
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static void
|
|
sk_process_rx_cmsgs(sock *s, struct msghdr *msg)
|
|
{
|
|
struct cmsghdr *cm;
|
|
|
|
if (!(s->flags & SKF_LADDR_RX))
|
|
return;
|
|
|
|
s->laddr = IPA_NONE;
|
|
s->lifindex = 0;
|
|
|
|
for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm))
|
|
{
|
|
if (cm->cmsg_level == IPPROTO_IPV6 && cm->cmsg_type == IPV6_PKTINFO)
|
|
{
|
|
struct in6_pktinfo *pi = (struct in6_pktinfo *) CMSG_DATA(cm);
|
|
s->laddr = ipa_get_in6(&pi->ipi6_addr);
|
|
s->lifindex = pi->ipi6_ifindex;
|
|
}
|
|
|
|
sk_process_rx_cmsg4(s, cm);
|
|
}
|
|
}
|
|
|
|
/*
|
|
static void
|
|
sysio_prepare_tx_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
|
|
{
|
|
struct cmsghdr *cm;
|
|
struct in6_pktinfo *pi;
|
|
|
|
if (!(s->flags & SKF_LADDR_TX))
|
|
return;
|
|
|
|
msg->msg_control = cbuf;
|
|
msg->msg_controllen = cbuflen;
|
|
|
|
cm = CMSG_FIRSTHDR(msg);
|
|
cm->cmsg_level = IPPROTO_IPV6;
|
|
cm->cmsg_type = IPV6_PKTINFO;
|
|
cm->cmsg_len = CMSG_LEN(sizeof(*pi));
|
|
|
|
pi = (struct in6_pktinfo *) CMSG_DATA(cm);
|
|
set_inaddr(&pi->ipi6_addr, s->saddr);
|
|
pi->ipi6_ifindex = s->iface ? s->iface->index : 0;
|
|
|
|
msg->msg_controllen = cm->cmsg_len;
|
|
return;
|
|
}
|
|
*/
|
|
|
|
#define ERR(x) do { err = x; goto bad; } while(0)
|
|
#define WARN(x) log(L_WARN "sk_setup: %s: %m", x)
|
|
|
|
static char *
|
|
sk_setup(sock *s)
|
|
{
|
|
int one = 1;
|
|
int fd = s->fd;
|
|
char *err = NULL;
|
|
|
|
if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
|
|
ERR("fcntl(O_NONBLOCK)");
|
|
|
|
if (!s->af)
|
|
return NULL;
|
|
|
|
if (sk_is_ipv4(s) && (s->tos >= 0))
|
|
if (setsockopt(fd, SOL_IP, IP_TOS, &s->tos, sizeof(s->tos)) < 0)
|
|
WARN("IP_TOS");
|
|
|
|
if (sk_is_ipv6(s) && (s->flags & SKF_V6ONLY))
|
|
if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0)
|
|
WARN("IPV6_V6ONLY");
|
|
|
|
// XXXX better error handling
|
|
if (s->ttl >= 0)
|
|
sk_set_ttl(s, s->ttl);
|
|
|
|
if (sk_is_ipv4(s))
|
|
err = sk_request_pktinfo4(s);
|
|
else
|
|
err = sk_request_pktinfo6(s);
|
|
bad:
|
|
return err;
|
|
}
|
|
|
|
/**
|
|
* sk_set_ttl - set transmit TTL for given socket.
|
|
* @s: socket
|
|
* @ttl: TTL value
|
|
*
|
|
* Set TTL for already opened connections when TTL was not set before.
|
|
* Useful for accepted connections when different ones should have
|
|
* different TTL.
|
|
*
|
|
* Result: 0 for success, -1 for an error.
|
|
*/
|
|
|
|
int
|
|
sk_set_ttl(sock *s, int ttl)
|
|
{
|
|
char *err;
|
|
|
|
if (sk_is_ipv4(s))
|
|
{
|
|
if (setsockopt(s->fd, SOL_IP, IP_TTL, &ttl, sizeof(ttl)) < 0)
|
|
ERR("IP_TTL");
|
|
|
|
#ifdef CONFIG_UNIX_DONTROUTE
|
|
int one = 1;
|
|
if (ttl == 1)
|
|
if (setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &one, sizeof(one)) < 0)
|
|
ERR("SO_DONTROUTE");
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) < 0)
|
|
ERR("IPV6_UNICAST_HOPS");
|
|
}
|
|
|
|
s->ttl = ttl;
|
|
return 0;
|
|
|
|
bad:
|
|
log(L_ERR "sk_set_ttl: %s: %m", err);
|
|
return -1;
|
|
}
|
|
|
|
/**
|
|
* sk_set_min_ttl - set minimal accepted TTL for given socket.
|
|
* @s: socket
|
|
* @ttl: TTL value
|
|
*
|
|
* Can be used in TTL security implementation
|
|
*
|
|
* Result: 0 for success, -1 for an error.
|
|
*/
|
|
|
|
int
|
|
sk_set_min_ttl(sock *s, int ttl)
|
|
{
|
|
char *err;
|
|
|
|
if (sk_is_ipv4(s))
|
|
{
|
|
if (setsockopt(s->fd, IPPROTO_IP, IP_MINTTL, &ttl, sizeof(ttl)) < 0)
|
|
ERR("IP_MINTTL");
|
|
}
|
|
else
|
|
{
|
|
if (setsockopt(s->fd, IPPROTO_IPV6, IPV6_MINHOPCOUNT, &ttl, sizeof(ttl)) < 0)
|
|
ERR("IPV6_MINHOPCOUNT");
|
|
}
|
|
|
|
return 0;
|
|
|
|
bad:
|
|
if (errno == ENOPROTOOPT)
|
|
log(L_ERR "Kernel does not support %s TTL security", sk_is_ipv4(s) ? "IPv4" : "IPv6");
|
|
else
|
|
log(L_ERR "sk_set_min_ttl: %s: %m", err);
|
|
return -1;
|
|
}
|
|
|
|
|
|
/**
|
|
* sk_set_md5_auth - add / remove MD5 security association for given socket.
|
|
* @s: socket
|
|
* @a: IP address of the other side
|
|
* @ifa: Interface for link-local IP address
|
|
* @passwd: password used for MD5 authentication
|
|
*
|
|
* In TCP MD5 handling code in kernel, there is a set of pairs
|
|
* (address, password) used to choose password according to
|
|
* address of the other side. This function is useful for
|
|
* listening socket, for active sockets it is enough to set
|
|
* s->password field.
|
|
*
|
|
* When called with passwd != NULL, the new pair is added,
|
|
* When called with passwd == NULL, the existing pair is removed.
|
|
*
|
|
* Result: 0 for success, -1 for an error.
|
|
*/
|
|
|
|
int
|
|
sk_set_md5_auth(sock *s, ip_addr a, struct iface *ifa, char *passwd)
|
|
{
|
|
SOCKADDR_DEFINE(sa, sa_len, s->af);
|
|
sockaddr_fill(sa, a, ifa, 0);
|
|
return sk_set_md5_auth_int(s, sa, sa_len, passwd);
|
|
}
|
|
|
|
int
|
|
sk_set_broadcast(sock *s, int enable)
|
|
{
|
|
if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &enable, sizeof(enable)) < 0)
|
|
{
|
|
log(L_ERR "sk_set_broadcast: SO_BROADCAST: %m");
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
sk_set_ipv6_checksum(sock *s, int offset)
|
|
{
|
|
if (setsockopt(s->fd, IPPROTO_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0)
|
|
{
|
|
log(L_ERR "sk_set_ipv6_checksum: IPV6_CHECKSUM: %m");
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
sk_set_icmp6_filter(sock *s, int p1, int p2)
|
|
{
|
|
/* a bit of lame interface, but it is here only for Radv */
|
|
struct icmp6_filter f;
|
|
|
|
ICMP6_FILTER_SETBLOCKALL(&f);
|
|
ICMP6_FILTER_SETPASS(p1, &f);
|
|
ICMP6_FILTER_SETPASS(p2, &f);
|
|
|
|
if (setsockopt(s->fd, IPPROTO_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0)
|
|
{
|
|
log(L_ERR "sk_set_icmp6_filter: ICMP6_FILTER: %m");
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
static inline void
|
|
fill_mreq6(struct ipv6_mreq *m, struct iface *ifa, ip_addr maddr)
|
|
{
|
|
bzero(m, sizeof(*m));
|
|
|
|
#ifdef CONFIG_IPV6_GLIBC_20
|
|
m->ipv6mr_ifindex = ifa->index;
|
|
#else
|
|
m->ipv6mr_interface = ifa->index;
|
|
#endif
|
|
|
|
ipa_put_in6(&m->ipv6mr_multiaddr, maddr);
|
|
}
|
|
|
|
static inline char *
|
|
sk_setup_multicast6(sock *s)
|
|
{
|
|
int zero = 0;
|
|
int index = s->iface->index;
|
|
|
|
if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &s->ttl, sizeof(s->ttl)) < 0)
|
|
return "IPV6_MULTICAST_HOPS";
|
|
if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &zero, sizeof(zero)) < 0)
|
|
return "IPV6_MULTICAST_LOOP";
|
|
if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0)
|
|
return "IPV6_MULTICAST_IF";
|
|
|
|
/* Is this necessary? */
|
|
return sk_bind_to_iface(s);
|
|
}
|
|
|
|
int
|
|
sk_setup_multicast(sock *s)
|
|
{
|
|
char *err;
|
|
|
|
ASSERT(s->iface && s->iface->addr);
|
|
|
|
if (sk_is_ipv4(s))
|
|
err = sk_setup_multicast4(s);
|
|
else
|
|
err = sk_setup_multicast6(s);
|
|
|
|
if (err)
|
|
{
|
|
log(L_ERR "sk_setup_multicast: %s: %m", err);
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline char *
|
|
sk_join_group6(sock *s, ip_addr maddr)
|
|
{
|
|
struct ipv6_mreq m;
|
|
|
|
fill_mreq6(&m, s->iface, maddr);
|
|
if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &m, sizeof(m)) < 0)
|
|
return "IPV6_JOIN_GROUP";
|
|
|
|
return NULL;
|
|
}
|
|
|
|
int
|
|
sk_join_group(sock *s, ip_addr maddr)
|
|
{
|
|
char *err;
|
|
|
|
if (sk_is_ipv4(s))
|
|
err = sk_join_group4(s, maddr);
|
|
else
|
|
err = sk_join_group6(s, maddr);
|
|
|
|
if (err)
|
|
{
|
|
log(L_ERR "sk_join_group: %s: %m", err);
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline char *
|
|
sk_leave_group6(sock *s, ip_addr maddr)
|
|
{
|
|
struct ipv6_mreq m;
|
|
|
|
fill_mreq6(&m, s->iface, maddr);
|
|
if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &m, sizeof(m)) < 0)
|
|
return "IPV6_LEAVE_GROUP";
|
|
|
|
return NULL;
|
|
}
|
|
|
|
int
|
|
sk_leave_group(sock *s, ip_addr maddr)
|
|
{
|
|
char *err;
|
|
|
|
if (sk_is_ipv4(s))
|
|
err = sk_leave_group4(s, maddr);
|
|
else
|
|
err = sk_leave_group6(s, maddr);
|
|
|
|
if (err)
|
|
{
|
|
log(L_ERR "sk_leave_group: %s: %m", err);
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
sk_tcp_connected(sock *s)
|
|
{
|
|
SOCKADDR_DEFINE(sa, sa_len, s->af);
|
|
|
|
if (getsockname(s->fd, sa, &sa_len) == 0)
|
|
sockaddr_read(sa, &s->saddr, &s->iface, &s->sport, 1);
|
|
|
|
s->type = SK_TCP;
|
|
sk_alloc_bufs(s);
|
|
s->tx_hook(s);
|
|
}
|
|
|
|
static int
|
|
sk_passive_connected(sock *s, struct sockaddr *sa, int sa_len, int type)
|
|
{
|
|
int fd = accept(s->fd, sa, &sa_len);
|
|
if (fd >= 0)
|
|
{
|
|
sock *t = sk_new(s->pool);
|
|
char *err;
|
|
t->type = type;
|
|
t->fd = fd;
|
|
t->af = s->af;
|
|
t->ttl = s->ttl;
|
|
t->tos = s->tos;
|
|
t->rbsize = s->rbsize;
|
|
t->tbsize = s->tbsize;
|
|
if (type == SK_TCP)
|
|
{
|
|
SOCKADDR_DEFINE(lsa, lsa_len, t->af);
|
|
if (getsockname(fd, lsa, &lsa_len) == 0) // XXXX
|
|
sockaddr_read(lsa, &t->saddr, &t->iface, &t->sport, 1);
|
|
|
|
sockaddr_read(sa, &t->daddr, &t->iface, &t->dport, 1);
|
|
}
|
|
if (err = sk_setup(t))
|
|
{
|
|
log(L_ERR "sk_passive_connected: %s: %m", err);
|
|
rfree(t);
|
|
return 1;
|
|
}
|
|
sk_insert(t);
|
|
sk_alloc_bufs(t);
|
|
s->rx_hook(t, 0);
|
|
return 1;
|
|
}
|
|
else if (errno != EINTR && errno != EAGAIN)
|
|
{
|
|
s->err_hook(s, errno);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* sk_open - open a socket
|
|
* @s: socket
|
|
*
|
|
* This function takes a socket resource created by sk_new() and
|
|
* initialized by the user and binds a corresponding network connection
|
|
* to it.
|
|
*
|
|
* Result: 0 for success, -1 for an error.
|
|
*/
|
|
int
|
|
sk_open(sock *s)
|
|
{
|
|
int type = s->type;
|
|
int has_src = ipa_nonzero(s->saddr) || s->sport;
|
|
char *err;
|
|
|
|
switch (type)
|
|
{
|
|
case SK_TCP_ACTIVE:
|
|
s->ttx = ""; /* Force s->ttx != s->tpos */
|
|
/* Fall thru */
|
|
case SK_TCP_PASSIVE:
|
|
s->af = (s->flags & SKF_V4ONLY) ? AF_INET : AF_INET6;
|
|
s->fd = socket(s->af, SOCK_STREAM, IPPROTO_TCP);
|
|
break;
|
|
case SK_UDP:
|
|
s->af = (s->flags & SKF_V4ONLY) ? AF_INET : AF_INET6;
|
|
s->fd = socket(s->af, SOCK_DGRAM, IPPROTO_UDP);
|
|
break;
|
|
case SK_IP:
|
|
s->af = (s->flags & SKF_V4ONLY) ? AF_INET : AF_INET6;
|
|
s->fd = socket(s->af, SOCK_RAW, s->dport);
|
|
break;
|
|
case SK_MAGIC:
|
|
if (err = sk_setup(s))
|
|
goto bad;
|
|
sk_insert(s);
|
|
return 0;
|
|
default:
|
|
bug("sk_open() called for invalid sock type %d", type);
|
|
}
|
|
|
|
int fd = s->fd;
|
|
if (fd < 0)
|
|
ERR("socket");
|
|
|
|
if (err = sk_setup(s))
|
|
goto bad;
|
|
|
|
SOCKADDR_DEFINE(sa, sa_len, s->af);
|
|
|
|
if (has_src)
|
|
{
|
|
int port;
|
|
|
|
if (type == SK_IP)
|
|
port = 0;
|
|
else
|
|
{
|
|
port = s->sport;
|
|
|
|
int one = 1;
|
|
if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) < 0)
|
|
ERR("SO_REUSEADDR");
|
|
}
|
|
|
|
sockaddr_fill(sa, s->saddr, s->iface, port);
|
|
if (bind(fd, sa, sa_len) < 0)
|
|
ERR("bind");
|
|
}
|
|
sockaddr_fill(sa, s->daddr, s->iface, s->dport);
|
|
|
|
if (s->password)
|
|
if (sk_set_md5_auth_int(s, sa, sa_len, s->password) < 0)
|
|
goto bad_no_log;
|
|
|
|
switch (type)
|
|
{
|
|
case SK_TCP_ACTIVE:
|
|
if (connect(fd, sa, sa_len) >= 0)
|
|
sk_tcp_connected(s);
|
|
else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
|
|
errno != ECONNREFUSED && errno != EHOSTUNREACH && errno != ENETUNREACH)
|
|
ERR("connect");
|
|
break;
|
|
case SK_TCP_PASSIVE:
|
|
if (listen(fd, 8))
|
|
ERR("listen");
|
|
break;
|
|
default:
|
|
sk_alloc_bufs(s);
|
|
|
|
#ifdef IP_PMTUDISC
|
|
if (sk_is_ipv4(s))
|
|
{
|
|
int dont = IP_PMTUDISC_DONT;
|
|
if (setsockopt(fd, SOL_IP, IP_PMTUDISC, &dont, sizeof(dont)) < 0)
|
|
ERR("IP_PMTUDISC");
|
|
}
|
|
#endif
|
|
|
|
#ifdef IPV6_MTU_DISCOVER
|
|
if (sk_is_ipv6(s))
|
|
{
|
|
int dont = IPV6_PMTUDISC_DONT;
|
|
if (setsockopt(fd, SOL_IPV6, IPV6_MTU_DISCOVER, &dont, sizeof(dont)) < 0)
|
|
ERR("IPV6_MTU_DISCOVER");
|
|
}
|
|
#endif
|
|
}
|
|
|
|
sk_insert(s);
|
|
return 0;
|
|
|
|
bad:
|
|
log(L_ERR "sk_open: %s: %m", err);
|
|
bad_no_log:
|
|
close(s->fd);
|
|
s->af = 0;
|
|
s->fd = -1;
|
|
return -1;
|
|
}
|
|
|
|
void
|
|
sk_open_unix(sock *s, char *name)
|
|
{
|
|
int fd;
|
|
struct sockaddr_un sa;
|
|
char *err;
|
|
|
|
fd = socket(AF_UNIX, SOCK_STREAM, 0);
|
|
if (fd < 0)
|
|
ERR("socket");
|
|
s->fd = fd;
|
|
if (err = sk_setup(s))
|
|
goto bad;
|
|
unlink(name);
|
|
|
|
/* Path length checked in test_old_bird() */
|
|
sa.sun_family = AF_UNIX;
|
|
strcpy(sa.sun_path, name);
|
|
if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0)
|
|
ERR("bind");
|
|
if (listen(fd, 8))
|
|
ERR("listen");
|
|
sk_insert(s);
|
|
return;
|
|
|
|
bad:
|
|
log(L_ERR "sk_open_unix: %s: %m", err);
|
|
die("Unable to create control socket %s", name);
|
|
}
|
|
|
|
static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; }
|
|
|
|
static int
|
|
sk_maybe_write(sock *s)
|
|
{
|
|
int e;
|
|
|
|
switch (s->type)
|
|
{
|
|
case SK_TCP:
|
|
case SK_MAGIC:
|
|
case SK_UNIX:
|
|
while (s->ttx != s->tpos)
|
|
{
|
|
e = write(s->fd, s->ttx, s->tpos - s->ttx);
|
|
if (e < 0)
|
|
{
|
|
if (errno != EINTR && errno != EAGAIN)
|
|
{
|
|
reset_tx_buffer(s);
|
|
/* EPIPE is just a connection close notification during TX */
|
|
s->err_hook(s, (errno != EPIPE) ? errno : 0);
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
s->ttx += e;
|
|
}
|
|
reset_tx_buffer(s);
|
|
return 1;
|
|
case SK_UDP:
|
|
case SK_IP:
|
|
{
|
|
if (s->tbuf == s->tpos)
|
|
return 1;
|
|
|
|
SOCKADDR_DEFINE(sa, sa_len, s->af);
|
|
sockaddr_fill(sa, s->daddr, s->iface, s->dport);
|
|
|
|
struct iovec iov = {s->tbuf, s->tpos - s->tbuf};
|
|
// byte cmsg_buf[CMSG_TX_SPACE];
|
|
|
|
struct msghdr msg = {
|
|
.msg_name = sa,
|
|
.msg_namelen = sa_len,
|
|
.msg_iov = &iov,
|
|
.msg_iovlen = 1};
|
|
|
|
// sysio_prepare_tx_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
|
|
e = sendmsg(s->fd, &msg, 0);
|
|
|
|
if (e < 0)
|
|
{
|
|
if (errno != EINTR && errno != EAGAIN)
|
|
{
|
|
reset_tx_buffer(s);
|
|
s->err_hook(s, errno);
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
reset_tx_buffer(s);
|
|
return 1;
|
|
}
|
|
default:
|
|
bug("sk_maybe_write: unknown socket type %d", s->type);
|
|
}
|
|
}
|
|
|
|
int
|
|
sk_rx_ready(sock *s)
|
|
{
|
|
fd_set rd, wr;
|
|
struct timeval timo;
|
|
int rv;
|
|
|
|
FD_ZERO(&rd);
|
|
FD_ZERO(&wr);
|
|
FD_SET(s->fd, &rd);
|
|
|
|
timo.tv_sec = 0;
|
|
timo.tv_usec = 0;
|
|
|
|
redo:
|
|
rv = select(s->fd+1, &rd, &wr, NULL, &timo);
|
|
|
|
if ((rv < 0) && (errno == EINTR || errno == EAGAIN))
|
|
goto redo;
|
|
|
|
return rv;
|
|
}
|
|
|
|
/**
|
|
* sk_send - send data to a socket
|
|
* @s: socket
|
|
* @len: number of bytes to send
|
|
*
|
|
* This function sends @len bytes of data prepared in the
|
|
* transmit buffer of the socket @s to the network connection.
|
|
* If the packet can be sent immediately, it does so and returns
|
|
* 1, else it queues the packet for later processing, returns 0
|
|
* and calls the @tx_hook of the socket when the tranmission
|
|
* takes place.
|
|
*/
|
|
int
|
|
sk_send(sock *s, unsigned len)
|
|
{
|
|
s->ttx = s->tbuf;
|
|
s->tpos = s->tbuf + len;
|
|
return sk_maybe_write(s);
|
|
}
|
|
|
|
/**
|
|
* sk_send_to - send data to a specific destination
|
|
* @s: socket
|
|
* @len: number of bytes to send
|
|
* @addr: IP address to send the packet to
|
|
* @port: port to send the packet to
|
|
*
|
|
* This is a sk_send() replacement for connection-less packet sockets
|
|
* which allows destination of the packet to be chosen dynamically.
|
|
*/
|
|
int
|
|
sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
|
|
{
|
|
s->daddr = addr;
|
|
s->dport = port;
|
|
s->ttx = s->tbuf;
|
|
s->tpos = s->tbuf + len;
|
|
return sk_maybe_write(s);
|
|
}
|
|
|
|
/*
|
|
int
|
|
sk_send_full(sock *s, unsigned len, struct iface *ifa,
|
|
ip_addr saddr, ip_addr daddr, unsigned dport)
|
|
{
|
|
s->iface = ifa;
|
|
s->saddr = saddr;
|
|
s->daddr = daddr;
|
|
s->dport = dport;
|
|
s->ttx = s->tbuf;
|
|
s->tpos = s->tbuf + len;
|
|
return sk_maybe_write(s);
|
|
}
|
|
*/
|
|
|
|
static int
|
|
sk_read(sock *s)
|
|
{
|
|
switch (s->type)
|
|
{
|
|
case SK_TCP_PASSIVE:
|
|
{
|
|
SOCKADDR_DEFINE(sa, sa_len, s->af);
|
|
return sk_passive_connected(s, sa, sa_len, SK_TCP);
|
|
}
|
|
case SK_UNIX_PASSIVE:
|
|
{
|
|
return sk_passive_connected(s, NULL, 0, SK_UNIX);
|
|
}
|
|
case SK_TCP:
|
|
case SK_UNIX:
|
|
{
|
|
int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
|
|
|
|
if (c < 0)
|
|
{
|
|
if (errno != EINTR && errno != EAGAIN)
|
|
s->err_hook(s, errno);
|
|
}
|
|
else if (!c)
|
|
s->err_hook(s, 0);
|
|
else
|
|
{
|
|
s->rpos += c;
|
|
if (s->rx_hook(s, s->rpos - s->rbuf))
|
|
{
|
|
/* We need to be careful since the socket could have been deleted by the hook */
|
|
if (current_sock == s)
|
|
s->rpos = s->rbuf;
|
|
}
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
case SK_MAGIC:
|
|
return s->rx_hook(s, 0);
|
|
default:
|
|
{
|
|
SOCKADDR_DEFINE(sa, sa_len, s->af);
|
|
int e;
|
|
|
|
struct iovec iov = {s->rbuf, s->rbsize};
|
|
byte cmsg_buf[CMSG_RX_SPACE];
|
|
|
|
struct msghdr msg = {
|
|
.msg_name = sa,
|
|
.msg_namelen = sa_len,
|
|
.msg_iov = &iov,
|
|
.msg_iovlen = 1,
|
|
.msg_control = cmsg_buf,
|
|
.msg_controllen = sizeof(cmsg_buf),
|
|
.msg_flags = 0};
|
|
|
|
e = recvmsg(s->fd, &msg, 0);
|
|
|
|
if (e < 0)
|
|
{
|
|
if (errno != EINTR && errno != EAGAIN)
|
|
s->err_hook(s, errno);
|
|
return 0;
|
|
}
|
|
s->rpos = s->rbuf + e;
|
|
sockaddr_read(sa, &s->faddr, NULL, &s->fport, 1);
|
|
sk_process_rx_cmsgs(s, &msg);
|
|
|
|
s->rx_hook(s, e);
|
|
return 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
static int
|
|
sk_write(sock *s)
|
|
{
|
|
switch (s->type)
|
|
{
|
|
case SK_TCP_ACTIVE:
|
|
{
|
|
SOCKADDR_DEFINE(sa, sa_len, s->af);
|
|
sockaddr_fill(sa, s->daddr, s->iface, s->dport);
|
|
|
|
if (connect(s->fd, sa, sa_len) >= 0 || errno == EISCONN)
|
|
sk_tcp_connected(s);
|
|
else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS)
|
|
s->err_hook(s, errno);
|
|
return 0;
|
|
}
|
|
default:
|
|
if (s->ttx != s->tpos && sk_maybe_write(s) > 0)
|
|
{
|
|
s->tx_hook(s);
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
void
|
|
sk_dump_all(void)
|
|
{
|
|
node *n;
|
|
sock *s;
|
|
|
|
debug("Open sockets:\n");
|
|
WALK_LIST(n, sock_list)
|
|
{
|
|
s = SKIP_BACK(sock, n, n);
|
|
debug("%p ", s);
|
|
sk_dump(&s->r);
|
|
}
|
|
debug("\n");
|
|
}
|
|
|
|
#undef ERR
|
|
#undef WARN
|
|
|
|
/*
|
|
* Main I/O Loop
|
|
*/
|
|
|
|
volatile int async_config_flag; /* Asynchronous reconfiguration/dump scheduled */
|
|
volatile int async_dump_flag;
|
|
|
|
void
|
|
io_init(void)
|
|
{
|
|
init_list(&near_timers);
|
|
init_list(&far_timers);
|
|
init_list(&sock_list);
|
|
init_list(&global_event_list);
|
|
krt_io_init();
|
|
init_times();
|
|
update_times();
|
|
srandom((int) now_real);
|
|
}
|
|
|
|
static int short_loops = 0;
|
|
#define SHORT_LOOP_MAX 10
|
|
|
|
void
|
|
io_loop(void)
|
|
{
|
|
fd_set rd, wr;
|
|
struct timeval timo;
|
|
time_t tout;
|
|
int hi, events;
|
|
sock *s;
|
|
node *n;
|
|
|
|
sock_recalc_fdsets_p = 1;
|
|
for(;;)
|
|
{
|
|
events = ev_run_list(&global_event_list);
|
|
update_times();
|
|
tout = tm_first_shot();
|
|
if (tout <= now)
|
|
{
|
|
tm_shot();
|
|
continue;
|
|
}
|
|
timo.tv_sec = events ? 0 : tout - now;
|
|
timo.tv_usec = 0;
|
|
|
|
if (sock_recalc_fdsets_p)
|
|
{
|
|
sock_recalc_fdsets_p = 0;
|
|
FD_ZERO(&rd);
|
|
FD_ZERO(&wr);
|
|
}
|
|
|
|
hi = 0;
|
|
WALK_LIST(n, sock_list)
|
|
{
|
|
s = SKIP_BACK(sock, n, n);
|
|
if (s->rx_hook)
|
|
{
|
|
FD_SET(s->fd, &rd);
|
|
if (s->fd > hi)
|
|
hi = s->fd;
|
|
}
|
|
else
|
|
FD_CLR(s->fd, &rd);
|
|
if (s->tx_hook && s->ttx != s->tpos)
|
|
{
|
|
FD_SET(s->fd, &wr);
|
|
if (s->fd > hi)
|
|
hi = s->fd;
|
|
}
|
|
else
|
|
FD_CLR(s->fd, &wr);
|
|
}
|
|
|
|
/*
|
|
* Yes, this is racy. But even if the signal comes before this test
|
|
* and entering select(), it gets caught on the next timer tick.
|
|
*/
|
|
|
|
if (async_config_flag)
|
|
{
|
|
async_config();
|
|
async_config_flag = 0;
|
|
continue;
|
|
}
|
|
if (async_dump_flag)
|
|
{
|
|
async_dump();
|
|
async_dump_flag = 0;
|
|
continue;
|
|
}
|
|
if (async_shutdown_flag)
|
|
{
|
|
async_shutdown();
|
|
async_shutdown_flag = 0;
|
|
continue;
|
|
}
|
|
|
|
/* And finally enter select() to find active sockets */
|
|
hi = select(hi+1, &rd, &wr, NULL, &timo);
|
|
|
|
if (hi < 0)
|
|
{
|
|
if (errno == EINTR || errno == EAGAIN)
|
|
continue;
|
|
die("select: %m");
|
|
}
|
|
if (hi)
|
|
{
|
|
/* guaranteed to be non-empty */
|
|
current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
|
|
|
|
while (current_sock)
|
|
{
|
|
sock *s = current_sock;
|
|
int e;
|
|
int steps;
|
|
|
|
steps = MAX_STEPS;
|
|
if ((s->type >= SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
|
|
do
|
|
{
|
|
steps--;
|
|
e = sk_read(s);
|
|
if (s != current_sock)
|
|
goto next;
|
|
}
|
|
while (e && s->rx_hook && steps);
|
|
|
|
steps = MAX_STEPS;
|
|
if (FD_ISSET(s->fd, &wr))
|
|
do
|
|
{
|
|
steps--;
|
|
e = sk_write(s);
|
|
if (s != current_sock)
|
|
goto next;
|
|
}
|
|
while (e && steps);
|
|
current_sock = sk_next(s);
|
|
next: ;
|
|
}
|
|
|
|
short_loops++;
|
|
if (events && (short_loops < SHORT_LOOP_MAX))
|
|
continue;
|
|
short_loops = 0;
|
|
|
|
int count = 0;
|
|
current_sock = stored_sock;
|
|
if (current_sock == NULL)
|
|
current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
|
|
|
|
while (current_sock && count < MAX_RX_STEPS)
|
|
{
|
|
sock *s = current_sock;
|
|
int e;
|
|
|
|
if ((s->type < SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
|
|
{
|
|
count++;
|
|
e = sk_read(s);
|
|
if (s != current_sock)
|
|
goto next2;
|
|
}
|
|
current_sock = sk_next(s);
|
|
next2: ;
|
|
}
|
|
|
|
stored_sock = current_sock;
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
test_old_bird(char *path)
|
|
{
|
|
int fd;
|
|
struct sockaddr_un sa;
|
|
|
|
fd = socket(AF_UNIX, SOCK_STREAM, 0);
|
|
if (fd < 0)
|
|
die("Cannot create socket: %m");
|
|
if (strlen(path) >= sizeof(sa.sun_path))
|
|
die("Socket path too long");
|
|
bzero(&sa, sizeof(sa));
|
|
sa.sun_family = AF_UNIX;
|
|
strcpy(sa.sun_path, path);
|
|
if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0)
|
|
die("I found another BIRD running.");
|
|
close(fd);
|
|
}
|
|
|
|
|