mirror of
https://gitlab.nic.cz/labs/bird.git
synced 2024-11-08 12:18:42 +00:00
2619 lines
55 KiB
C
2619 lines
55 KiB
C
/*
|
|
* BIRD Internet Routing Daemon -- Unix I/O
|
|
*
|
|
* (c) 1998--2004 Martin Mares <mj@ucw.cz>
|
|
* (c) 2004 Ondrej Filip <feela@network.cz>
|
|
*
|
|
* Can be freely distributed and used under the terms of the GNU GPL.
|
|
*/
|
|
|
|
/* Unfortunately, some glibc versions hide parts of RFC 3542 API
|
|
if _GNU_SOURCE is not defined. */
|
|
#ifndef _GNU_SOURCE
|
|
#define _GNU_SOURCE
|
|
#endif
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <time.h>
|
|
#include <sys/mman.h>
|
|
#include <sys/time.h>
|
|
#include <sys/types.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/socket.h>
|
|
#include <sys/uio.h>
|
|
#include <sys/un.h>
|
|
#include <poll.h>
|
|
#include <unistd.h>
|
|
#include <fcntl.h>
|
|
#include <errno.h>
|
|
#include <net/if.h>
|
|
#include <netinet/in.h>
|
|
#include <netinet/tcp.h>
|
|
#include <netinet/udp.h>
|
|
#include <netinet/icmp6.h>
|
|
#include <netdb.h>
|
|
|
|
#include "nest/bird.h"
|
|
#include "lib/lists.h"
|
|
#include "lib/resource.h"
|
|
#include "lib/socket.h"
|
|
#include "lib/event.h"
|
|
#include "lib/locking.h"
|
|
#include "lib/timer.h"
|
|
#include "lib/string.h"
|
|
#include "nest/iface.h"
|
|
#include "conf/conf.h"
|
|
|
|
#include "sysdep/unix/unix.h"
|
|
#include "sysdep/unix/io-loop.h"
|
|
#include CONFIG_INCLUDE_SYSIO_H
|
|
|
|
/* Maximum number of calls of tx handler for one socket in one
|
|
* poll iteration. Should be small enough to not monopolize CPU by
|
|
* one protocol instance.
|
|
*/
|
|
#define MAX_STEPS 4
|
|
|
|
/* Maximum number of calls of rx handler for all sockets in one poll
|
|
iteration. RX callbacks are often much more costly so we limit
|
|
this to gen small latencies */
|
|
#define MAX_RX_STEPS 4
|
|
|
|
|
|
/*
|
|
* Tracked Files
|
|
*/
|
|
|
|
struct rfile {
|
|
resource r;
|
|
struct stat stat;
|
|
int fd;
|
|
off_t limit;
|
|
_Atomic off_t pos;
|
|
void *mapping;
|
|
};
|
|
|
|
struct rfile rf_stderr = {
|
|
.fd = 2,
|
|
};
|
|
|
|
static void
|
|
rf_free(resource *r)
|
|
{
|
|
struct rfile *a = (struct rfile *) r;
|
|
|
|
if (a->mapping)
|
|
munmap(a->mapping, a->limit);
|
|
|
|
close(a->fd);
|
|
}
|
|
|
|
static void
|
|
rf_dump(resource *r, unsigned indent UNUSED)
|
|
{
|
|
struct rfile *a = (struct rfile *) r;
|
|
|
|
debug("(fd %d)\n", a->fd);
|
|
}
|
|
|
|
static struct resclass rf_class = {
|
|
"FILE",
|
|
sizeof(struct rfile),
|
|
rf_free,
|
|
rf_dump,
|
|
NULL,
|
|
NULL
|
|
};
|
|
|
|
static int
|
|
rf_open_get_fd(const char *name, enum rf_mode mode)
|
|
{
|
|
int omode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
|
|
int flags;
|
|
|
|
switch (mode)
|
|
{
|
|
case RF_APPEND:
|
|
flags = O_WRONLY | O_CREAT | O_APPEND;
|
|
break;
|
|
|
|
case RF_FIXED:
|
|
flags = O_RDWR | O_CREAT;
|
|
break;
|
|
|
|
default:
|
|
bug("rf_open() must have the mode set");
|
|
}
|
|
|
|
return open(name, flags, omode);
|
|
}
|
|
|
|
static void
|
|
rf_stat(struct rfile *r)
|
|
{
|
|
if (fstat(r->fd, &r->stat) < 0)
|
|
die("fstat() failed: %m");
|
|
}
|
|
|
|
struct rfile *
|
|
rf_open(pool *p, const char *name, enum rf_mode mode, off_t limit)
|
|
{
|
|
int fd = rf_open_get_fd(name, mode);
|
|
if (fd < 0)
|
|
return NULL; /* The caller takes care of printing %m. */
|
|
|
|
struct rfile *r = ralloc(p, &rf_class);
|
|
r->fd = fd;
|
|
r->limit = limit;
|
|
|
|
switch (mode)
|
|
{
|
|
case RF_APPEND:
|
|
rf_stat(r);
|
|
atomic_store_explicit(&r->pos, S_ISREG(r->stat.st_mode) ? r->stat.st_size : 0, memory_order_relaxed);
|
|
break;
|
|
|
|
case RF_FIXED:
|
|
if ((ftruncate(fd, limit) < 0)
|
|
|| ((r->mapping = mmap(NULL, limit, PROT_WRITE, MAP_SHARED, fd, 0)) == MAP_FAILED))
|
|
{
|
|
int erf = errno;
|
|
r->mapping = NULL;
|
|
rfree(r);
|
|
errno = erf;
|
|
return NULL;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
bug("rf_open() must have the mode set");
|
|
}
|
|
|
|
|
|
return r;
|
|
}
|
|
|
|
off_t
|
|
rf_size(struct rfile *r)
|
|
{
|
|
return atomic_load_explicit(&r->pos, memory_order_relaxed);
|
|
}
|
|
|
|
int
|
|
rf_same(struct rfile *a, struct rfile *b)
|
|
{
|
|
rf_stat(a);
|
|
rf_stat(b);
|
|
|
|
return
|
|
(a->limit == b->limit) &&
|
|
(a->stat.st_mode == b->stat.st_mode) &&
|
|
(a->stat.st_dev == b->stat.st_dev) &&
|
|
(a->stat.st_ino == b->stat.st_ino);
|
|
}
|
|
|
|
void
|
|
rf_write_crude(struct rfile *r, const char *buf, int sz)
|
|
{
|
|
if (r->mapping)
|
|
memcpy(r->mapping, buf, sz);
|
|
else
|
|
write(r->fd, buf, sz);
|
|
}
|
|
|
|
|
|
int
|
|
rf_writev(struct rfile *r, struct iovec *iov, int iov_count)
|
|
{
|
|
off_t size = 0;
|
|
for (int i = 0; i < iov_count; i++)
|
|
size += iov[i].iov_len;
|
|
|
|
if (r->mapping)
|
|
{
|
|
/* Update the pointer */
|
|
off_t target = atomic_fetch_add_explicit(&r->pos, size, memory_order_relaxed) % r->limit;
|
|
|
|
/* Write the line */
|
|
for (int i = 0; i < iov_count; i++)
|
|
{
|
|
/* Take care of wrapping; this should really happen only once */
|
|
off_t rsz;
|
|
while ((rsz = r->limit - target) < (off_t) iov[i].iov_len)
|
|
{
|
|
memcpy(r->mapping + target, iov[i].iov_base, rsz);
|
|
iov[i].iov_base += rsz;
|
|
iov[i].iov_len -= rsz;
|
|
target = 0;
|
|
}
|
|
|
|
memcpy(r->mapping + target, iov[i].iov_base, iov[i].iov_len);
|
|
target += iov[i].iov_len;
|
|
}
|
|
return 1;
|
|
}
|
|
else if (r->limit && (atomic_fetch_add_explicit(&r->pos, size, memory_order_relaxed) + size > r->limit))
|
|
{
|
|
atomic_fetch_sub_explicit(&r->pos, size, memory_order_relaxed);
|
|
return 0;
|
|
}
|
|
else
|
|
{
|
|
while (size > 0)
|
|
{
|
|
/* Try to write */
|
|
ssize_t e = writev(r->fd, iov, iov_count);
|
|
if (e < 0)
|
|
if (errno == EINTR)
|
|
continue;
|
|
else
|
|
return 1; /* FIXME: What should we do when we suddenly can't write? */
|
|
|
|
/* It is expected that we always write the whole bunch at once */
|
|
if (e == size)
|
|
return 1;
|
|
|
|
/* Block split should not happen (we write small enough messages)
|
|
* but if it happens, let's try to write the rest of the log */
|
|
size -= e;
|
|
while (e > 0)
|
|
{
|
|
if ((ssize_t) iov[0].iov_len > e)
|
|
{
|
|
/* Some bytes are remaining in the first chunk */
|
|
iov[0].iov_len -= e;
|
|
iov[0].iov_base += e;
|
|
break;
|
|
}
|
|
|
|
/* First chunk written completely, get rid of it */
|
|
e -= iov[0].iov_len;
|
|
iov++;
|
|
iov_count--;
|
|
ASSERT_DIE(iov_count > 0);
|
|
}
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* Time clock
|
|
*/
|
|
|
|
btime boot_time;
|
|
|
|
|
|
void
|
|
times_update(void)
|
|
{
|
|
struct timespec ts;
|
|
int rv;
|
|
|
|
btime old_time = current_time();
|
|
btime old_real_time = current_real_time();
|
|
|
|
rv = clock_gettime(CLOCK_MONOTONIC, &ts);
|
|
if (rv < 0)
|
|
die("Monotonic clock is missing");
|
|
|
|
if ((ts.tv_sec < 0) || (((u64) ts.tv_sec) > ((u64) 1 << 40)))
|
|
log(L_WARN "Monotonic clock is crazy");
|
|
|
|
btime new_time = ts.tv_sec S + ts.tv_nsec NS;
|
|
|
|
if (new_time < old_time)
|
|
log(L_ERR "Monotonic clock is broken");
|
|
|
|
rv = clock_gettime(CLOCK_REALTIME, &ts);
|
|
if (rv < 0)
|
|
die("clock_gettime: %m");
|
|
|
|
btime new_real_time = ts.tv_sec S + ts.tv_nsec NS;
|
|
|
|
if (!atomic_compare_exchange_strong_explicit(
|
|
&last_time,
|
|
&old_time,
|
|
new_time,
|
|
memory_order_acq_rel,
|
|
memory_order_relaxed))
|
|
DBG("Time update collision: last_time");
|
|
|
|
if (!atomic_compare_exchange_strong_explicit(
|
|
&real_time,
|
|
&old_real_time,
|
|
new_real_time,
|
|
memory_order_acq_rel,
|
|
memory_order_relaxed))
|
|
DBG("Time update collision: real_time");
|
|
}
|
|
|
|
btime
|
|
current_time_now(void)
|
|
{
|
|
struct timespec ts;
|
|
int rv;
|
|
|
|
rv = clock_gettime(CLOCK_MONOTONIC, &ts);
|
|
if (rv < 0)
|
|
die("clock_gettime: %m");
|
|
|
|
return ts.tv_sec S + ts.tv_nsec NS;
|
|
}
|
|
|
|
/**
|
|
* DOC: Sockets
|
|
*
|
|
* Socket resources represent network connections. Their data structure (&socket)
|
|
* contains a lot of fields defining the exact type of the socket, the local and
|
|
* remote addresses and ports, pointers to socket buffers and finally pointers to
|
|
* hook functions to be called when new data have arrived to the receive buffer
|
|
* (@rx_hook), when the contents of the transmit buffer have been transmitted
|
|
* (@tx_hook) and when an error or connection close occurs (@err_hook).
|
|
*
|
|
* Freeing of sockets from inside socket hooks is perfectly safe.
|
|
*/
|
|
|
|
#ifndef SOL_IP
|
|
#define SOL_IP IPPROTO_IP
|
|
#endif
|
|
|
|
#ifndef SOL_IPV6
|
|
#define SOL_IPV6 IPPROTO_IPV6
|
|
#endif
|
|
|
|
#ifndef SOL_ICMPV6
|
|
#define SOL_ICMPV6 IPPROTO_ICMPV6
|
|
#endif
|
|
|
|
|
|
/*
|
|
* Sockaddr helper functions
|
|
*/
|
|
|
|
static inline int UNUSED sockaddr_length(int af)
|
|
{ return (af == AF_INET) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); }
|
|
|
|
static inline void
|
|
sockaddr_fill4(struct sockaddr_in *sa, ip_addr a, uint port)
|
|
{
|
|
memset(sa, 0, sizeof(struct sockaddr_in));
|
|
#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
|
|
sa->sin_len = sizeof(struct sockaddr_in);
|
|
#endif
|
|
sa->sin_family = AF_INET;
|
|
sa->sin_port = htons(port);
|
|
sa->sin_addr = ipa_to_in4(a);
|
|
}
|
|
|
|
static inline void
|
|
sockaddr_fill6(struct sockaddr_in6 *sa, ip_addr a, struct iface *ifa, uint port)
|
|
{
|
|
memset(sa, 0, sizeof(struct sockaddr_in6));
|
|
#ifdef SIN6_LEN
|
|
sa->sin6_len = sizeof(struct sockaddr_in6);
|
|
#endif
|
|
sa->sin6_family = AF_INET6;
|
|
sa->sin6_port = htons(port);
|
|
sa->sin6_flowinfo = 0;
|
|
sa->sin6_addr = ipa_to_in6(a);
|
|
|
|
if (ifa && ipa_is_link_local(a))
|
|
sa->sin6_scope_id = ifa->index;
|
|
}
|
|
|
|
void
|
|
sockaddr_fill(sockaddr *sa, int af, ip_addr a, struct iface *ifa, uint port)
|
|
{
|
|
if (af == AF_INET)
|
|
sockaddr_fill4((struct sockaddr_in *) sa, a, port);
|
|
else if (af == AF_INET6)
|
|
sockaddr_fill6((struct sockaddr_in6 *) sa, a, ifa, port);
|
|
else
|
|
bug("Unknown AF");
|
|
}
|
|
|
|
static inline void
|
|
sockaddr_read4(struct sockaddr_in *sa, ip_addr *a, uint *port)
|
|
{
|
|
*port = ntohs(sa->sin_port);
|
|
*a = ipa_from_in4(sa->sin_addr);
|
|
}
|
|
|
|
static inline void
|
|
sockaddr_read6(struct sockaddr_in6 *sa, ip_addr *a, struct iface **ifa, uint *port)
|
|
{
|
|
*port = ntohs(sa->sin6_port);
|
|
*a = ipa_from_in6(sa->sin6_addr);
|
|
|
|
if (ifa && ipa_is_link_local(*a))
|
|
*ifa = if_find_by_index(sa->sin6_scope_id);
|
|
}
|
|
|
|
int
|
|
sockaddr_read(sockaddr *sa, int af, ip_addr *a, struct iface **ifa, uint *port)
|
|
{
|
|
if (sa->sa.sa_family != af)
|
|
goto fail;
|
|
|
|
if (af == AF_INET)
|
|
sockaddr_read4((struct sockaddr_in *) sa, a, port);
|
|
else if (af == AF_INET6)
|
|
sockaddr_read6((struct sockaddr_in6 *) sa, a, ifa, port);
|
|
else
|
|
goto fail;
|
|
|
|
return 0;
|
|
|
|
fail:
|
|
*a = IPA_NONE;
|
|
*port = 0;
|
|
return -1;
|
|
}
|
|
|
|
|
|
/*
|
|
* IPv6 multicast syscalls
|
|
*/
|
|
|
|
/* Fortunately standardized in RFC 3493 */
|
|
|
|
#define INIT_MREQ6(maddr,ifa) \
|
|
{ .ipv6mr_multiaddr = ipa_to_in6(maddr), .ipv6mr_interface = ifa->index }
|
|
|
|
static inline int
|
|
sk_setup_multicast6(sock *s)
|
|
{
|
|
int index = s->iface->index;
|
|
int ttl = s->ttl;
|
|
int n = 0;
|
|
|
|
if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0)
|
|
ERR("IPV6_MULTICAST_IF");
|
|
|
|
if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &ttl, sizeof(ttl)) < 0)
|
|
ERR("IPV6_MULTICAST_HOPS");
|
|
|
|
if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &n, sizeof(n)) < 0)
|
|
ERR("IPV6_MULTICAST_LOOP");
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline int
|
|
sk_join_group6(sock *s, ip_addr maddr)
|
|
{
|
|
struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
|
|
|
|
if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mr, sizeof(mr)) < 0)
|
|
ERR("IPV6_JOIN_GROUP");
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline int
|
|
sk_leave_group6(sock *s, ip_addr maddr)
|
|
{
|
|
struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
|
|
|
|
if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mr, sizeof(mr)) < 0)
|
|
ERR("IPV6_LEAVE_GROUP");
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
* IPv6 packet control messages
|
|
*/
|
|
|
|
/* Also standardized, in RFC 3542 */
|
|
|
|
/*
|
|
* RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
|
|
* type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
|
|
* don't have IPV6_RECVPKTINFO we suppose the OS implements the older
|
|
* RFC and we use IPV6_PKTINFO.
|
|
*/
|
|
#ifndef IPV6_RECVPKTINFO
|
|
#define IPV6_RECVPKTINFO IPV6_PKTINFO
|
|
#endif
|
|
/*
|
|
* Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT.
|
|
*/
|
|
#ifndef IPV6_RECVHOPLIMIT
|
|
#define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT
|
|
#endif
|
|
|
|
|
|
#define CMSG6_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in6_pktinfo))
|
|
#define CMSG6_SPACE_TTL CMSG_SPACE(sizeof(int))
|
|
|
|
static inline int
|
|
sk_request_cmsg6_pktinfo(sock *s)
|
|
{
|
|
int y = 1;
|
|
|
|
if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVPKTINFO, &y, sizeof(y)) < 0)
|
|
ERR("IPV6_RECVPKTINFO");
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline int
|
|
sk_request_cmsg6_ttl(sock *s)
|
|
{
|
|
int y = 1;
|
|
|
|
if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVHOPLIMIT, &y, sizeof(y)) < 0)
|
|
ERR("IPV6_RECVHOPLIMIT");
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline void
|
|
sk_process_cmsg6_pktinfo(sock *s, struct cmsghdr *cm)
|
|
{
|
|
if (cm->cmsg_type == IPV6_PKTINFO)
|
|
{
|
|
struct in6_pktinfo *pi = (struct in6_pktinfo *) CMSG_DATA(cm);
|
|
s->laddr = ipa_from_in6(pi->ipi6_addr);
|
|
s->lifindex = pi->ipi6_ifindex;
|
|
}
|
|
}
|
|
|
|
static inline void
|
|
sk_process_cmsg6_ttl(sock *s, struct cmsghdr *cm)
|
|
{
|
|
if (cm->cmsg_type == IPV6_HOPLIMIT)
|
|
s->rcv_ttl = * (int *) CMSG_DATA(cm);
|
|
}
|
|
|
|
static inline void
|
|
sk_prepare_cmsgs6(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
|
|
{
|
|
struct cmsghdr *cm;
|
|
struct in6_pktinfo *pi;
|
|
int controllen = 0;
|
|
|
|
msg->msg_control = cbuf;
|
|
msg->msg_controllen = cbuflen;
|
|
|
|
cm = CMSG_FIRSTHDR(msg);
|
|
cm->cmsg_level = SOL_IPV6;
|
|
cm->cmsg_type = IPV6_PKTINFO;
|
|
cm->cmsg_len = CMSG_LEN(sizeof(*pi));
|
|
controllen += CMSG_SPACE(sizeof(*pi));
|
|
|
|
pi = (struct in6_pktinfo *) CMSG_DATA(cm);
|
|
pi->ipi6_ifindex = s->iface ? s->iface->index : 0;
|
|
pi->ipi6_addr = ipa_to_in6(s->saddr);
|
|
|
|
msg->msg_controllen = controllen;
|
|
}
|
|
|
|
|
|
/*
|
|
* Miscellaneous socket syscalls
|
|
*/
|
|
|
|
static inline int
|
|
sk_set_ttl4(sock *s, int ttl)
|
|
{
|
|
if (setsockopt(s->fd, SOL_IP, IP_TTL, &ttl, sizeof(ttl)) < 0)
|
|
ERR("IP_TTL");
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline int
|
|
sk_set_ttl6(sock *s, int ttl)
|
|
{
|
|
if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) < 0)
|
|
ERR("IPV6_UNICAST_HOPS");
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline int
|
|
sk_set_tos4(sock *s, int tos)
|
|
{
|
|
if (setsockopt(s->fd, SOL_IP, IP_TOS, &tos, sizeof(tos)) < 0)
|
|
ERR("IP_TOS");
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline int
|
|
sk_set_tos6(sock *s, int tos)
|
|
{
|
|
if (setsockopt(s->fd, SOL_IPV6, IPV6_TCLASS, &tos, sizeof(tos)) < 0)
|
|
ERR("IPV6_TCLASS");
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline int
|
|
sk_set_high_port(sock *s UNUSED)
|
|
{
|
|
/* Port range setting is optional, ignore it if not supported */
|
|
|
|
#ifdef IP_PORTRANGE
|
|
if (sk_is_ipv4(s))
|
|
{
|
|
int range = IP_PORTRANGE_HIGH;
|
|
if (setsockopt(s->fd, SOL_IP, IP_PORTRANGE, &range, sizeof(range)) < 0)
|
|
ERR("IP_PORTRANGE");
|
|
}
|
|
#endif
|
|
|
|
#ifdef IPV6_PORTRANGE
|
|
if (sk_is_ipv6(s))
|
|
{
|
|
int range = IPV6_PORTRANGE_HIGH;
|
|
if (setsockopt(s->fd, SOL_IPV6, IPV6_PORTRANGE, &range, sizeof(range)) < 0)
|
|
ERR("IPV6_PORTRANGE");
|
|
}
|
|
#endif
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline byte *
|
|
sk_skip_ip_header(byte *pkt, int *len)
|
|
{
|
|
if ((*len < 20) || ((*pkt & 0xf0) != 0x40))
|
|
return NULL;
|
|
|
|
int hlen = (*pkt & 0x0f) * 4;
|
|
if ((hlen < 20) || (hlen > *len))
|
|
return NULL;
|
|
|
|
*len -= hlen;
|
|
return pkt + hlen;
|
|
}
|
|
|
|
byte *
|
|
sk_rx_buffer(sock *s, int *len)
|
|
{
|
|
if (sk_is_ipv4(s) && (s->type == SK_IP))
|
|
return sk_skip_ip_header(s->rbuf, len);
|
|
else
|
|
return s->rbuf;
|
|
}
|
|
|
|
|
|
/*
|
|
* Public socket functions
|
|
*/
|
|
|
|
/**
|
|
* sk_setup_multicast - enable multicast for given socket
|
|
* @s: socket
|
|
*
|
|
* Prepare transmission of multicast packets for given datagram socket.
|
|
* The socket must have defined @iface.
|
|
*
|
|
* Result: 0 for success, -1 for an error.
|
|
*/
|
|
|
|
int
|
|
sk_setup_multicast(sock *s)
|
|
{
|
|
ASSERT(s->iface);
|
|
|
|
if (sk_is_ipv4(s))
|
|
return sk_setup_multicast4(s);
|
|
else
|
|
return sk_setup_multicast6(s);
|
|
}
|
|
|
|
/**
|
|
* sk_join_group - join multicast group for given socket
|
|
* @s: socket
|
|
* @maddr: multicast address
|
|
*
|
|
* Join multicast group for given datagram socket and associated interface.
|
|
* The socket must have defined @iface.
|
|
*
|
|
* Result: 0 for success, -1 for an error.
|
|
*/
|
|
|
|
int
|
|
sk_join_group(sock *s, ip_addr maddr)
|
|
{
|
|
if (sk_is_ipv4(s))
|
|
return sk_join_group4(s, maddr);
|
|
else
|
|
return sk_join_group6(s, maddr);
|
|
}
|
|
|
|
/**
|
|
* sk_leave_group - leave multicast group for given socket
|
|
* @s: socket
|
|
* @maddr: multicast address
|
|
*
|
|
* Leave multicast group for given datagram socket and associated interface.
|
|
* The socket must have defined @iface.
|
|
*
|
|
* Result: 0 for success, -1 for an error.
|
|
*/
|
|
|
|
int
|
|
sk_leave_group(sock *s, ip_addr maddr)
|
|
{
|
|
if (sk_is_ipv4(s))
|
|
return sk_leave_group4(s, maddr);
|
|
else
|
|
return sk_leave_group6(s, maddr);
|
|
}
|
|
|
|
/**
|
|
* sk_setup_broadcast - enable broadcast for given socket
|
|
* @s: socket
|
|
*
|
|
* Allow reception and transmission of broadcast packets for given datagram
|
|
* socket. The socket must have defined @iface. For transmission, packets should
|
|
* be send to @brd address of @iface.
|
|
*
|
|
* Result: 0 for success, -1 for an error.
|
|
*/
|
|
|
|
int
|
|
sk_setup_broadcast(sock *s)
|
|
{
|
|
int y = 1;
|
|
|
|
if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &y, sizeof(y)) < 0)
|
|
ERR("SO_BROADCAST");
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* sk_set_ttl - set transmit TTL for given socket
|
|
* @s: socket
|
|
* @ttl: TTL value
|
|
*
|
|
* Set TTL for already opened connections when TTL was not set before. Useful
|
|
* for accepted connections when different ones should have different TTL.
|
|
*
|
|
* Result: 0 for success, -1 for an error.
|
|
*/
|
|
|
|
int
|
|
sk_set_ttl(sock *s, int ttl)
|
|
{
|
|
s->ttl = ttl;
|
|
|
|
if (sk_is_ipv4(s))
|
|
return sk_set_ttl4(s, ttl);
|
|
else
|
|
return sk_set_ttl6(s, ttl);
|
|
}
|
|
|
|
/**
|
|
* sk_set_min_ttl - set minimal accepted TTL for given socket
|
|
* @s: socket
|
|
* @ttl: TTL value
|
|
*
|
|
* Set minimal accepted TTL for given socket. Can be used for TTL security.
|
|
* implementations.
|
|
*
|
|
* Result: 0 for success, -1 for an error.
|
|
*/
|
|
|
|
int
|
|
sk_set_min_ttl(sock *s, int ttl)
|
|
{
|
|
if (sk_is_ipv4(s))
|
|
return sk_set_min_ttl4(s, ttl);
|
|
else
|
|
return sk_set_min_ttl6(s, ttl);
|
|
}
|
|
|
|
#if 0
|
|
/**
|
|
* sk_set_md5_auth - add / remove MD5 security association for given socket
|
|
* @s: socket
|
|
* @local: IP address of local side
|
|
* @remote: IP address of remote side
|
|
* @ifa: Interface for link-local IP address
|
|
* @passwd: Password used for MD5 authentication
|
|
* @setkey: Update also system SA/SP database
|
|
*
|
|
* In TCP MD5 handling code in kernel, there is a set of security associations
|
|
* used for choosing password and other authentication parameters according to
|
|
* the local and remote address. This function is useful for listening socket,
|
|
* for active sockets it may be enough to set s->password field.
|
|
*
|
|
* When called with passwd != NULL, the new pair is added,
|
|
* When called with passwd == NULL, the existing pair is removed.
|
|
*
|
|
* Note that while in Linux, the MD5 SAs are specific to socket, in BSD they are
|
|
* stored in global SA/SP database (but the behavior also must be enabled on
|
|
* per-socket basis). In case of multiple sockets to the same neighbor, the
|
|
* socket-specific state must be configured for each socket while global state
|
|
* just once per src-dst pair. The @setkey argument controls whether the global
|
|
* state (SA/SP database) is also updated.
|
|
*
|
|
* Result: 0 for success, -1 for an error.
|
|
*/
|
|
|
|
int
|
|
sk_set_md5_auth(sock *s, ip_addr local, ip_addr remote, struct iface *ifa, char *passwd, int setkey)
|
|
{ DUMMY; }
|
|
#endif
|
|
|
|
/**
|
|
* sk_set_ipv6_checksum - specify IPv6 checksum offset for given socket
|
|
* @s: socket
|
|
* @offset: offset
|
|
*
|
|
* Specify IPv6 checksum field offset for given raw IPv6 socket. After that, the
|
|
* kernel will automatically fill it for outgoing packets and check it for
|
|
* incoming packets. Should not be used on ICMPv6 sockets, where the position is
|
|
* known to the kernel.
|
|
*
|
|
* Result: 0 for success, -1 for an error.
|
|
*/
|
|
|
|
int
|
|
sk_set_ipv6_checksum(sock *s, int offset)
|
|
{
|
|
if (setsockopt(s->fd, SOL_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0)
|
|
ERR("IPV6_CHECKSUM");
|
|
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
sk_set_icmp6_filter(sock *s, int p1, int p2)
|
|
{
|
|
/* a bit of lame interface, but it is here only for Radv */
|
|
struct icmp6_filter f;
|
|
|
|
ICMP6_FILTER_SETBLOCKALL(&f);
|
|
ICMP6_FILTER_SETPASS(p1, &f);
|
|
ICMP6_FILTER_SETPASS(p2, &f);
|
|
|
|
if (setsockopt(s->fd, SOL_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0)
|
|
ERR("ICMP6_FILTER");
|
|
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
sk_log_error(sock *s, const char *p)
|
|
{
|
|
log(L_ERR "%s: Socket error: %s%#m", p, s->err);
|
|
}
|
|
|
|
|
|
/*
|
|
* Actual struct birdsock code
|
|
*/
|
|
|
|
sock *
|
|
sk_next(sock *s)
|
|
{
|
|
if (!s->n.next->next)
|
|
return NULL;
|
|
else
|
|
return SKIP_BACK(sock, n, s->n.next);
|
|
}
|
|
|
|
static void
|
|
sk_alloc_bufs(sock *s)
|
|
{
|
|
if (!s->rbuf && s->rbsize)
|
|
s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize);
|
|
s->rpos = s->rbuf;
|
|
if (!s->tbuf && s->tbsize)
|
|
s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize);
|
|
s->tpos = s->ttx = s->tbuf;
|
|
}
|
|
|
|
static void
|
|
sk_free_bufs(sock *s)
|
|
{
|
|
if (s->rbuf_alloc)
|
|
{
|
|
xfree(s->rbuf_alloc);
|
|
s->rbuf = s->rbuf_alloc = NULL;
|
|
}
|
|
if (s->tbuf_alloc)
|
|
{
|
|
xfree(s->tbuf_alloc);
|
|
s->tbuf = s->tbuf_alloc = NULL;
|
|
}
|
|
}
|
|
|
|
#ifdef HAVE_LIBSSH
|
|
static void
|
|
sk_ssh_free(sock *s)
|
|
{
|
|
struct ssh_sock *ssh = s->ssh;
|
|
|
|
if (s->ssh == NULL)
|
|
return;
|
|
|
|
s->ssh = NULL;
|
|
|
|
if (ssh->channel)
|
|
{
|
|
ssh_channel_close(ssh->channel);
|
|
ssh_channel_free(ssh->channel);
|
|
ssh->channel = NULL;
|
|
}
|
|
|
|
if (ssh->session)
|
|
{
|
|
ssh_disconnect(ssh->session);
|
|
ssh_free(ssh->session);
|
|
ssh->session = NULL;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
|
|
static void
|
|
sk_free(resource *r)
|
|
{
|
|
sock *s = (sock *) r;
|
|
|
|
sk_free_bufs(s);
|
|
|
|
#ifdef HAVE_LIBSSH
|
|
if (s->type == SK_SSH || s->type == SK_SSH_ACTIVE)
|
|
sk_ssh_free(s);
|
|
#endif
|
|
|
|
if (s->loop)
|
|
birdloop_remove_socket(s->loop, s);
|
|
|
|
if (s->fd >= 0 && s->type != SK_SSH && s->type != SK_SSH_ACTIVE)
|
|
close(s->fd);
|
|
|
|
s->fd = -1;
|
|
}
|
|
|
|
void
|
|
sk_set_rbsize(sock *s, uint val)
|
|
{
|
|
ASSERT(s->rbuf_alloc == s->rbuf);
|
|
|
|
if (s->rbsize == val)
|
|
return;
|
|
|
|
s->rbsize = val;
|
|
xfree(s->rbuf_alloc);
|
|
s->rbuf_alloc = xmalloc(val);
|
|
s->rpos = s->rbuf = s->rbuf_alloc;
|
|
}
|
|
|
|
void
|
|
sk_set_tbsize(sock *s, uint val)
|
|
{
|
|
ASSERT(s->tbuf_alloc == s->tbuf);
|
|
|
|
if (s->tbsize == val)
|
|
return;
|
|
|
|
byte *old_tbuf = s->tbuf;
|
|
|
|
s->tbsize = val;
|
|
s->tbuf = s->tbuf_alloc = xrealloc(s->tbuf_alloc, val);
|
|
s->tpos = s->tbuf + (s->tpos - old_tbuf);
|
|
s->ttx = s->tbuf + (s->ttx - old_tbuf);
|
|
}
|
|
|
|
void
|
|
sk_set_tbuf(sock *s, void *tbuf)
|
|
{
|
|
s->tbuf = tbuf ?: s->tbuf_alloc;
|
|
s->ttx = s->tpos = s->tbuf;
|
|
}
|
|
|
|
void
|
|
sk_reallocate(sock *s)
|
|
{
|
|
sk_free_bufs(s);
|
|
sk_alloc_bufs(s);
|
|
}
|
|
|
|
static void
|
|
sk_dump(resource *r, unsigned indent UNUSED)
|
|
{
|
|
sock *s = (sock *) r;
|
|
static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", NULL, "IP", NULL, "MAGIC", "UNIX<", "UNIX", "SSH>", "SSH", "DEL!" };
|
|
|
|
debug("(%s, ud=%p, sa=%I, sp=%d, da=%I, dp=%d, tos=%d, ttl=%d, if=%s)\n",
|
|
sk_type_names[s->type],
|
|
s->data,
|
|
s->saddr,
|
|
s->sport,
|
|
s->daddr,
|
|
s->dport,
|
|
s->tos,
|
|
s->ttl,
|
|
s->iface ? s->iface->name : "none");
|
|
}
|
|
|
|
static struct resclass sk_class = {
|
|
"Socket",
|
|
sizeof(sock),
|
|
sk_free,
|
|
sk_dump,
|
|
NULL,
|
|
NULL
|
|
};
|
|
|
|
/**
|
|
* sk_new - create a socket
|
|
* @p: pool
|
|
*
|
|
* This function creates a new socket resource. If you want to use it,
|
|
* you need to fill in all the required fields of the structure and
|
|
* call sk_open() to do the actual opening of the socket.
|
|
*
|
|
* The real function name is sock_new(), sk_new() is a macro wrapper
|
|
* to avoid collision with OpenSSL.
|
|
*/
|
|
sock *
|
|
sock_new(pool *p)
|
|
{
|
|
sock *s = ralloc(p, &sk_class);
|
|
s->pool = p;
|
|
// s->saddr = s->daddr = IPA_NONE;
|
|
s->tos = s->priority = s->ttl = -1;
|
|
s->fd = -1;
|
|
return s;
|
|
}
|
|
|
|
static int
|
|
sk_setup(sock *s)
|
|
{
|
|
int y = 1;
|
|
int fd = s->fd;
|
|
|
|
if (s->type == SK_SSH_ACTIVE)
|
|
return 0;
|
|
|
|
if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
|
|
ERR("O_NONBLOCK");
|
|
|
|
if (!s->af)
|
|
return 0;
|
|
|
|
if (ipa_nonzero(s->saddr) && !(s->flags & SKF_BIND))
|
|
s->flags |= SKF_PKTINFO;
|
|
|
|
#ifdef CONFIG_USE_HDRINCL
|
|
if (sk_is_ipv4(s) && (s->type == SK_IP) && (s->flags & SKF_PKTINFO))
|
|
{
|
|
s->flags &= ~SKF_PKTINFO;
|
|
s->flags |= SKF_HDRINCL;
|
|
if (setsockopt(fd, SOL_IP, IP_HDRINCL, &y, sizeof(y)) < 0)
|
|
ERR("IP_HDRINCL");
|
|
}
|
|
#endif
|
|
|
|
if (s->vrf && (s->vrf != &default_vrf) && !s->iface)
|
|
{
|
|
/* Bind socket to associated VRF interface.
|
|
This is Linux-specific, but so is SO_BINDTODEVICE. */
|
|
#ifdef SO_BINDTODEVICE
|
|
struct ifreq ifr = {};
|
|
strcpy(ifr.ifr_name, s->vrf->name);
|
|
if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0)
|
|
ERR("SO_BINDTODEVICE");
|
|
#endif
|
|
}
|
|
|
|
if (s->iface)
|
|
{
|
|
#ifdef SO_BINDTODEVICE
|
|
struct ifreq ifr = {};
|
|
strcpy(ifr.ifr_name, s->iface->name);
|
|
if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0)
|
|
ERR("SO_BINDTODEVICE");
|
|
#endif
|
|
|
|
#ifdef CONFIG_UNIX_DONTROUTE
|
|
if (setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &y, sizeof(y)) < 0)
|
|
ERR("SO_DONTROUTE");
|
|
#endif
|
|
}
|
|
|
|
if (sk_is_ipv4(s))
|
|
{
|
|
if (s->flags & SKF_LADDR_RX)
|
|
if (sk_request_cmsg4_pktinfo(s) < 0)
|
|
return -1;
|
|
|
|
if (s->flags & SKF_TTL_RX)
|
|
if (sk_request_cmsg4_ttl(s) < 0)
|
|
return -1;
|
|
|
|
if ((s->type == SK_UDP) || (s->type == SK_IP))
|
|
if (sk_disable_mtu_disc4(s) < 0)
|
|
return -1;
|
|
|
|
if (s->ttl >= 0)
|
|
if (sk_set_ttl4(s, s->ttl) < 0)
|
|
return -1;
|
|
|
|
if (s->tos >= 0)
|
|
if (sk_set_tos4(s, s->tos) < 0)
|
|
return -1;
|
|
}
|
|
|
|
if (sk_is_ipv6(s))
|
|
{
|
|
if ((s->type == SK_TCP_PASSIVE) || (s->type == SK_TCP_ACTIVE) || (s->type == SK_UDP))
|
|
if (setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &y, sizeof(y)) < 0)
|
|
ERR("IPV6_V6ONLY");
|
|
|
|
if (s->flags & SKF_LADDR_RX)
|
|
if (sk_request_cmsg6_pktinfo(s) < 0)
|
|
return -1;
|
|
|
|
if (s->flags & SKF_TTL_RX)
|
|
if (sk_request_cmsg6_ttl(s) < 0)
|
|
return -1;
|
|
|
|
if ((s->type == SK_UDP) || (s->type == SK_IP))
|
|
if (sk_disable_mtu_disc6(s) < 0)
|
|
return -1;
|
|
|
|
if (s->ttl >= 0)
|
|
if (sk_set_ttl6(s, s->ttl) < 0)
|
|
return -1;
|
|
|
|
if (s->tos >= 0)
|
|
if (sk_set_tos6(s, s->tos) < 0)
|
|
return -1;
|
|
}
|
|
|
|
/* Must be after sk_set_tos4() as setting ToS on Linux also mangles priority */
|
|
if (s->priority >= 0)
|
|
if (sk_set_priority(s, s->priority) < 0)
|
|
return -1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
sk_connect(sock *s)
|
|
{
|
|
sockaddr sa;
|
|
sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport);
|
|
return connect(s->fd, &sa.sa, SA_LEN(sa));
|
|
}
|
|
|
|
static void
|
|
sk_tcp_connected(sock *s)
|
|
{
|
|
sockaddr sa;
|
|
int sa_len = sizeof(sa);
|
|
|
|
if ((getsockname(s->fd, &sa.sa, &sa_len) < 0) ||
|
|
(sockaddr_read(&sa, s->af, &s->saddr, &s->iface, &s->sport) < 0))
|
|
log(L_WARN "SOCK: Cannot get local IP address for TCP>");
|
|
|
|
s->type = SK_TCP;
|
|
sk_alloc_bufs(s);
|
|
s->tx_hook(s);
|
|
}
|
|
|
|
#ifdef HAVE_LIBSSH
|
|
static void
|
|
sk_ssh_connected(sock *s)
|
|
{
|
|
sk_alloc_bufs(s);
|
|
s->type = SK_SSH;
|
|
s->tx_hook(s);
|
|
}
|
|
#endif
|
|
|
|
static int
|
|
sk_passive_connected(sock *s, int type)
|
|
{
|
|
sockaddr loc_sa, rem_sa;
|
|
int loc_sa_len = sizeof(loc_sa);
|
|
int rem_sa_len = sizeof(rem_sa);
|
|
|
|
int fd = accept(s->fd, ((type == SK_TCP) ? &rem_sa.sa : NULL), &rem_sa_len);
|
|
if (fd < 0)
|
|
{
|
|
if ((errno != EINTR) && (errno != EAGAIN))
|
|
s->err_hook(s, errno);
|
|
return 0;
|
|
}
|
|
|
|
struct domain_generic *sock_lock = DG_IS_LOCKED(s->pool->domain) ? NULL : s->pool->domain;
|
|
if (sock_lock)
|
|
DG_LOCK(sock_lock);
|
|
|
|
sock *t = sk_new(s->pool);
|
|
t->type = type;
|
|
t->data = s->data;
|
|
t->af = s->af;
|
|
t->fd = fd;
|
|
t->ttl = s->ttl;
|
|
t->tos = s->tos;
|
|
t->vrf = s->vrf;
|
|
t->rbsize = s->rbsize;
|
|
t->tbsize = s->tbsize;
|
|
|
|
if (type == SK_TCP)
|
|
{
|
|
if ((getsockname(fd, &loc_sa.sa, &loc_sa_len) < 0) ||
|
|
(sockaddr_read(&loc_sa, s->af, &t->saddr, &t->iface, &t->sport) < 0))
|
|
log(L_WARN "SOCK: Cannot get local IP address for TCP<");
|
|
|
|
if (sockaddr_read(&rem_sa, s->af, &t->daddr, &t->iface, &t->dport) < 0)
|
|
log(L_WARN "SOCK: Cannot get remote IP address for TCP<");
|
|
}
|
|
|
|
if (sk_setup(t) < 0)
|
|
{
|
|
/* FIXME: Call err_hook instead ? */
|
|
log(L_ERR "SOCK: Incoming connection: %s%#m", t->err);
|
|
|
|
/* FIXME: handle it better in rfree() */
|
|
close(t->fd);
|
|
t->fd = -1;
|
|
sk_close(t);
|
|
t = NULL;
|
|
}
|
|
else
|
|
{
|
|
birdloop_add_socket(s->loop, t);
|
|
sk_alloc_bufs(t);
|
|
}
|
|
|
|
if (sock_lock)
|
|
DG_UNLOCK(sock_lock);
|
|
|
|
if (t)
|
|
s->rx_hook(t, 0);
|
|
|
|
return 1;
|
|
}
|
|
|
|
#ifdef HAVE_LIBSSH
|
|
/*
|
|
* Return SSH_OK or SSH_AGAIN or SSH_ERROR
|
|
*/
|
|
static int
|
|
sk_ssh_connect(sock *s)
|
|
{
|
|
s->fd = ssh_get_fd(s->ssh->session);
|
|
|
|
/* Big fall thru automata */
|
|
switch (s->ssh->state)
|
|
{
|
|
case SK_SSH_CONNECT:
|
|
{
|
|
switch (ssh_connect(s->ssh->session))
|
|
{
|
|
case SSH_AGAIN:
|
|
/* A quick look into libSSH shows that ssh_get_fd() should return non-(-1)
|
|
* after SSH_AGAIN is returned by ssh_connect(). This is however nowhere
|
|
* documented but our code relies on that.
|
|
*/
|
|
return SSH_AGAIN;
|
|
|
|
case SSH_OK:
|
|
break;
|
|
|
|
default:
|
|
return SSH_ERROR;
|
|
}
|
|
} /* fallthrough */
|
|
|
|
case SK_SSH_SERVER_KNOWN:
|
|
{
|
|
s->ssh->state = SK_SSH_SERVER_KNOWN;
|
|
|
|
if (s->ssh->server_hostkey_path)
|
|
{
|
|
int server_identity_is_ok = 1;
|
|
|
|
#ifdef HAVE_SSH_OLD_SERVER_VALIDATION_API
|
|
#define ssh_session_is_known_server ssh_is_server_known
|
|
#define SSH_KNOWN_HOSTS_OK SSH_SERVER_KNOWN_OK
|
|
#define SSH_KNOWN_HOSTS_UNKNOWN SSH_SERVER_NOT_KNOWN
|
|
#define SSH_KNOWN_HOSTS_CHANGED SSH_SERVER_KNOWN_CHANGED
|
|
#define SSH_KNOWN_HOSTS_NOT_FOUND SSH_SERVER_FILE_NOT_FOUND
|
|
#define SSH_KNOWN_HOSTS_ERROR SSH_SERVER_ERROR
|
|
#define SSH_KNOWN_HOSTS_OTHER SSH_SERVER_FOUND_OTHER
|
|
#endif
|
|
|
|
/* Check server identity */
|
|
switch (ssh_session_is_known_server(s->ssh->session))
|
|
{
|
|
#define LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s,msg,args...) log(L_WARN "SSH Identity %s@%s:%u: " msg, (s)->ssh->username, (s)->host, (s)->dport, ## args);
|
|
case SSH_KNOWN_HOSTS_OK:
|
|
/* The server is known and has not changed. */
|
|
break;
|
|
|
|
case SSH_KNOWN_HOSTS_UNKNOWN:
|
|
LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server is unknown, its public key was not found in the known host file %s", s->ssh->server_hostkey_path);
|
|
server_identity_is_ok = 0;
|
|
break;
|
|
|
|
case SSH_KNOWN_HOSTS_CHANGED:
|
|
LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server key has changed. Either you are under attack or the administrator changed the key.");
|
|
server_identity_is_ok = 0;
|
|
break;
|
|
|
|
case SSH_KNOWN_HOSTS_NOT_FOUND:
|
|
LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The known host file %s does not exist", s->ssh->server_hostkey_path);
|
|
server_identity_is_ok = 0;
|
|
break;
|
|
|
|
case SSH_KNOWN_HOSTS_ERROR:
|
|
LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "Some error happened");
|
|
server_identity_is_ok = 0;
|
|
break;
|
|
|
|
case SSH_KNOWN_HOSTS_OTHER:
|
|
LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server gave use a key of a type while we had an other type recorded. " \
|
|
"It is a possible attack.");
|
|
server_identity_is_ok = 0;
|
|
break;
|
|
}
|
|
|
|
if (!server_identity_is_ok)
|
|
return SSH_ERROR;
|
|
}
|
|
} /* fallthrough */
|
|
|
|
case SK_SSH_USERAUTH:
|
|
{
|
|
s->ssh->state = SK_SSH_USERAUTH;
|
|
switch (ssh_userauth_publickey_auto(s->ssh->session, NULL, NULL))
|
|
{
|
|
case SSH_AUTH_AGAIN:
|
|
return SSH_AGAIN;
|
|
|
|
case SSH_AUTH_SUCCESS:
|
|
break;
|
|
|
|
default:
|
|
return SSH_ERROR;
|
|
}
|
|
} /* fallthrough */
|
|
|
|
case SK_SSH_CHANNEL:
|
|
{
|
|
s->ssh->state = SK_SSH_CHANNEL;
|
|
s->ssh->channel = ssh_channel_new(s->ssh->session);
|
|
if (s->ssh->channel == NULL)
|
|
return SSH_ERROR;
|
|
} /* fallthrough */
|
|
|
|
case SK_SSH_SESSION:
|
|
{
|
|
s->ssh->state = SK_SSH_SESSION;
|
|
switch (ssh_channel_open_session(s->ssh->channel))
|
|
{
|
|
case SSH_AGAIN:
|
|
return SSH_AGAIN;
|
|
|
|
case SSH_OK:
|
|
break;
|
|
|
|
default:
|
|
return SSH_ERROR;
|
|
}
|
|
} /* fallthrough */
|
|
|
|
case SK_SSH_SUBSYSTEM:
|
|
{
|
|
s->ssh->state = SK_SSH_SUBSYSTEM;
|
|
if (s->ssh->subsystem)
|
|
{
|
|
switch (ssh_channel_request_subsystem(s->ssh->channel, s->ssh->subsystem))
|
|
{
|
|
case SSH_AGAIN:
|
|
return SSH_AGAIN;
|
|
|
|
case SSH_OK:
|
|
break;
|
|
|
|
default:
|
|
return SSH_ERROR;
|
|
}
|
|
}
|
|
} /* fallthrough */
|
|
|
|
case SK_SSH_ESTABLISHED:
|
|
s->ssh->state = SK_SSH_ESTABLISHED;
|
|
}
|
|
|
|
return SSH_OK;
|
|
}
|
|
|
|
/*
|
|
* Return file descriptor number if success
|
|
* Return -1 if failed
|
|
*/
|
|
static int
|
|
sk_open_ssh(sock *s)
|
|
{
|
|
if (!s->ssh)
|
|
bug("sk_open() sock->ssh is not allocated");
|
|
|
|
ssh_session sess = ssh_new();
|
|
if (sess == NULL)
|
|
ERR2("Cannot create a ssh session");
|
|
s->ssh->session = sess;
|
|
|
|
const int verbosity = SSH_LOG_NOLOG;
|
|
ssh_options_set(sess, SSH_OPTIONS_LOG_VERBOSITY, &verbosity);
|
|
ssh_options_set(sess, SSH_OPTIONS_HOST, s->host);
|
|
ssh_options_set(sess, SSH_OPTIONS_PORT, &(s->dport));
|
|
/* TODO: Add SSH_OPTIONS_BINDADDR */
|
|
ssh_options_set(sess, SSH_OPTIONS_USER, s->ssh->username);
|
|
|
|
if (s->ssh->server_hostkey_path)
|
|
ssh_options_set(sess, SSH_OPTIONS_KNOWNHOSTS, s->ssh->server_hostkey_path);
|
|
|
|
if (s->ssh->client_privkey_path)
|
|
ssh_options_set(sess, SSH_OPTIONS_IDENTITY, s->ssh->client_privkey_path);
|
|
|
|
ssh_set_blocking(sess, 0);
|
|
|
|
switch (sk_ssh_connect(s))
|
|
{
|
|
case SSH_AGAIN:
|
|
break;
|
|
|
|
case SSH_OK:
|
|
sk_ssh_connected(s);
|
|
break;
|
|
|
|
case SSH_ERROR:
|
|
ERR2(ssh_get_error(sess));
|
|
break;
|
|
}
|
|
|
|
return ssh_get_fd(sess);
|
|
|
|
err:
|
|
return -1;
|
|
}
|
|
#endif
|
|
|
|
/**
|
|
* sk_open - open a socket
|
|
* @loop: loop
|
|
* @s: socket
|
|
*
|
|
* This function takes a socket resource created by sk_new() and
|
|
* initialized by the user and binds a corresponding network connection
|
|
* to it.
|
|
*
|
|
* Result: 0 for success, -1 for an error.
|
|
*/
|
|
int
|
|
sk_open(sock *s, struct birdloop *loop)
|
|
{
|
|
int af = AF_UNSPEC;
|
|
int fd = -1;
|
|
int do_bind = 0;
|
|
int bind_port = 0;
|
|
ip_addr bind_addr = IPA_NONE;
|
|
sockaddr sa;
|
|
|
|
if (s->type <= SK_IP)
|
|
{
|
|
/*
|
|
* For TCP/IP sockets, Address family (IPv4 or IPv6) can be specified either
|
|
* explicitly (SK_IPV4 or SK_IPV6) or implicitly (based on saddr, daddr).
|
|
* But the specifications have to be consistent.
|
|
*/
|
|
|
|
switch (s->subtype)
|
|
{
|
|
case 0:
|
|
ASSERT(ipa_zero(s->saddr) || ipa_zero(s->daddr) ||
|
|
(ipa_is_ip4(s->saddr) == ipa_is_ip4(s->daddr)));
|
|
af = (ipa_is_ip4(s->saddr) || ipa_is_ip4(s->daddr)) ? AF_INET : AF_INET6;
|
|
break;
|
|
|
|
case SK_IPV4:
|
|
ASSERT(ipa_zero(s->saddr) || ipa_is_ip4(s->saddr));
|
|
ASSERT(ipa_zero(s->daddr) || ipa_is_ip4(s->daddr));
|
|
af = AF_INET;
|
|
break;
|
|
|
|
case SK_IPV6:
|
|
ASSERT(ipa_zero(s->saddr) || !ipa_is_ip4(s->saddr));
|
|
ASSERT(ipa_zero(s->daddr) || !ipa_is_ip4(s->daddr));
|
|
af = AF_INET6;
|
|
break;
|
|
|
|
default:
|
|
bug("Invalid subtype %d", s->subtype);
|
|
}
|
|
}
|
|
|
|
switch (s->type)
|
|
{
|
|
case SK_TCP_ACTIVE:
|
|
s->ttx = ""; /* Force s->ttx != s->tpos */
|
|
/* Fall thru */
|
|
case SK_TCP_PASSIVE:
|
|
fd = socket(af, SOCK_STREAM, IPPROTO_TCP);
|
|
bind_port = s->sport;
|
|
bind_addr = s->saddr;
|
|
do_bind = bind_port || ipa_nonzero(bind_addr);
|
|
break;
|
|
|
|
#ifdef HAVE_LIBSSH
|
|
case SK_SSH_ACTIVE:
|
|
s->ttx = ""; /* Force s->ttx != s->tpos */
|
|
fd = sk_open_ssh(s);
|
|
break;
|
|
#endif
|
|
|
|
case SK_UDP:
|
|
fd = socket(af, SOCK_DGRAM, IPPROTO_UDP);
|
|
bind_port = s->sport;
|
|
bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
|
|
do_bind = 1;
|
|
break;
|
|
|
|
case SK_IP:
|
|
fd = socket(af, SOCK_RAW, s->dport);
|
|
bind_port = 0;
|
|
bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
|
|
do_bind = ipa_nonzero(bind_addr);
|
|
break;
|
|
|
|
case SK_MAGIC:
|
|
af = 0;
|
|
fd = s->fd;
|
|
break;
|
|
|
|
default:
|
|
bug("sk_open() called for invalid sock type %d", s->type);
|
|
}
|
|
|
|
if (fd < 0)
|
|
ERR("socket");
|
|
|
|
s->af = af;
|
|
s->fd = fd;
|
|
|
|
if (sk_setup(s) < 0)
|
|
goto err;
|
|
|
|
if (do_bind)
|
|
{
|
|
if (bind_port)
|
|
{
|
|
int y = 1;
|
|
|
|
if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)) < 0)
|
|
ERR2("SO_REUSEADDR");
|
|
|
|
#ifdef CONFIG_NO_IFACE_BIND
|
|
/* Workaround missing ability to bind to an iface */
|
|
if ((s->type == SK_UDP) && s->iface && ipa_zero(bind_addr))
|
|
{
|
|
if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &y, sizeof(y)) < 0)
|
|
ERR2("SO_REUSEPORT");
|
|
}
|
|
#endif
|
|
}
|
|
else
|
|
if (s->flags & SKF_HIGH_PORT)
|
|
if (sk_set_high_port(s) < 0)
|
|
log(L_WARN "Socket error: %s%#m", s->err);
|
|
|
|
if (s->flags & SKF_FREEBIND)
|
|
if (sk_set_freebind(s) < 0)
|
|
log(L_WARN "Socket error: %s%#m", s->err);
|
|
|
|
sockaddr_fill(&sa, s->af, bind_addr, s->iface, bind_port);
|
|
if (bind(fd, &sa.sa, SA_LEN(sa)) < 0)
|
|
ERR2("bind");
|
|
}
|
|
|
|
if (s->password)
|
|
if (sk_set_md5_auth(s, s->saddr, s->daddr, -1, s->iface, s->password, 0) < 0)
|
|
goto err;
|
|
|
|
switch (s->type)
|
|
{
|
|
case SK_TCP_ACTIVE:
|
|
if (sk_connect(s) >= 0)
|
|
sk_tcp_connected(s);
|
|
else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
|
|
errno != ECONNREFUSED && errno != EHOSTUNREACH && errno != ENETUNREACH)
|
|
ERR2("connect");
|
|
break;
|
|
|
|
case SK_TCP_PASSIVE:
|
|
if (listen(fd, 8) < 0)
|
|
ERR2("listen");
|
|
break;
|
|
|
|
case SK_UDP:
|
|
if (s->flags & SKF_CONNECT)
|
|
if (sk_connect(s) < 0)
|
|
ERR2("connect");
|
|
|
|
sk_alloc_bufs(s);
|
|
break;
|
|
|
|
case SK_SSH_ACTIVE:
|
|
case SK_MAGIC:
|
|
break;
|
|
|
|
default:
|
|
sk_alloc_bufs(s);
|
|
}
|
|
|
|
birdloop_add_socket(loop, s);
|
|
return 0;
|
|
|
|
err:
|
|
close(fd);
|
|
s->fd = -1;
|
|
return -1;
|
|
}
|
|
|
|
int
|
|
sk_open_unix(sock *s, struct birdloop *loop, char *name)
|
|
{
|
|
struct sockaddr_un sa;
|
|
int fd;
|
|
|
|
/* We are sloppy during error (leak fd and not set s->err), but we die anyway */
|
|
|
|
fd = socket(AF_UNIX, SOCK_STREAM, 0);
|
|
if (fd < 0)
|
|
return -1;
|
|
|
|
if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
|
|
return -1;
|
|
|
|
/* Path length checked in test_old_bird() but we may need unix sockets for other reasons in future */
|
|
ASSERT_DIE(strlen(name) < sizeof(sa.sun_path));
|
|
|
|
sa.sun_family = AF_UNIX;
|
|
strcpy(sa.sun_path, name);
|
|
|
|
if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0)
|
|
return -1;
|
|
|
|
if (listen(fd, 8) < 0)
|
|
return -1;
|
|
|
|
s->fd = fd;
|
|
birdloop_add_socket(loop, s);
|
|
return 0;
|
|
}
|
|
|
|
|
|
#define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \
|
|
CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL)
|
|
#define CMSG_TX_SPACE MAX(CMSG4_SPACE_PKTINFO,CMSG6_SPACE_PKTINFO)
|
|
|
|
static void
|
|
sk_prepare_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
|
|
{
|
|
if (sk_is_ipv4(s))
|
|
sk_prepare_cmsgs4(s, msg, cbuf, cbuflen);
|
|
else
|
|
sk_prepare_cmsgs6(s, msg, cbuf, cbuflen);
|
|
}
|
|
|
|
static void
|
|
sk_process_cmsgs(sock *s, struct msghdr *msg)
|
|
{
|
|
struct cmsghdr *cm;
|
|
|
|
s->laddr = IPA_NONE;
|
|
s->lifindex = 0;
|
|
s->rcv_ttl = -1;
|
|
|
|
for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm))
|
|
{
|
|
if ((cm->cmsg_level == SOL_IP) && sk_is_ipv4(s))
|
|
{
|
|
sk_process_cmsg4_pktinfo(s, cm);
|
|
sk_process_cmsg4_ttl(s, cm);
|
|
}
|
|
|
|
if ((cm->cmsg_level == SOL_IPV6) && sk_is_ipv6(s))
|
|
{
|
|
sk_process_cmsg6_pktinfo(s, cm);
|
|
sk_process_cmsg6_ttl(s, cm);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
static inline int
|
|
sk_sendmsg(sock *s)
|
|
{
|
|
struct iovec iov = {s->tbuf, s->tpos - s->tbuf};
|
|
byte cmsg_buf[CMSG_TX_SPACE];
|
|
sockaddr dst;
|
|
int flags = 0;
|
|
|
|
sockaddr_fill(&dst, s->af, s->daddr, s->iface, s->dport);
|
|
|
|
struct msghdr msg = {
|
|
.msg_name = &dst.sa,
|
|
.msg_namelen = SA_LEN(dst),
|
|
.msg_iov = &iov,
|
|
.msg_iovlen = 1
|
|
};
|
|
|
|
#ifdef CONFIG_DONTROUTE_UNICAST
|
|
/* FreeBSD silently changes TTL to 1 when MSG_DONTROUTE is used, therefore we
|
|
cannot use it for other cases (e.g. when TTL security is used). */
|
|
if (ipa_is_ip4(s->daddr) && ip4_is_unicast(ipa_to_ip4(s->daddr)) && (s->ttl == 1))
|
|
flags = MSG_DONTROUTE;
|
|
#endif
|
|
|
|
#ifdef CONFIG_USE_HDRINCL
|
|
byte hdr[20];
|
|
struct iovec iov2[2] = { {hdr, 20}, iov };
|
|
|
|
if (s->flags & SKF_HDRINCL)
|
|
{
|
|
sk_prepare_ip_header(s, hdr, iov.iov_len);
|
|
msg.msg_iov = iov2;
|
|
msg.msg_iovlen = 2;
|
|
}
|
|
#endif
|
|
|
|
if (s->flags & SKF_PKTINFO)
|
|
sk_prepare_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
|
|
|
|
return sendmsg(s->fd, &msg, flags);
|
|
}
|
|
|
|
static inline int
|
|
sk_recvmsg(sock *s)
|
|
{
|
|
struct iovec iov = {s->rbuf, s->rbsize};
|
|
byte cmsg_buf[CMSG_RX_SPACE];
|
|
sockaddr src;
|
|
|
|
struct msghdr msg = {
|
|
.msg_name = &src.sa,
|
|
.msg_namelen = sizeof(src), // XXXX ??
|
|
.msg_iov = &iov,
|
|
.msg_iovlen = 1,
|
|
.msg_control = cmsg_buf,
|
|
.msg_controllen = sizeof(cmsg_buf),
|
|
.msg_flags = 0
|
|
};
|
|
|
|
int rv = recvmsg(s->fd, &msg, 0);
|
|
if (rv < 0)
|
|
return rv;
|
|
|
|
//ifdef IPV4
|
|
// if (cf_type == SK_IP)
|
|
// rv = ipv4_skip_header(pbuf, rv);
|
|
//endif
|
|
|
|
sockaddr_read(&src, s->af, &s->faddr, NULL, &s->fport);
|
|
sk_process_cmsgs(s, &msg);
|
|
|
|
if (msg.msg_flags & MSG_TRUNC)
|
|
s->flags |= SKF_TRUNCATED;
|
|
else
|
|
s->flags &= ~SKF_TRUNCATED;
|
|
|
|
return rv;
|
|
}
|
|
|
|
|
|
static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; }
|
|
|
|
_Bool
|
|
sk_tx_pending(sock *s)
|
|
{
|
|
return s->ttx != s->tpos;
|
|
}
|
|
|
|
|
|
static int
|
|
sk_maybe_write(sock *s)
|
|
{
|
|
int e;
|
|
|
|
switch (s->type)
|
|
{
|
|
case SK_TCP:
|
|
case SK_MAGIC:
|
|
case SK_UNIX:
|
|
while (sk_tx_pending(s))
|
|
{
|
|
e = write(s->fd, s->ttx, s->tpos - s->ttx);
|
|
|
|
if (e < 0)
|
|
{
|
|
if (errno != EINTR && errno != EAGAIN)
|
|
{
|
|
reset_tx_buffer(s);
|
|
/* EPIPE is just a connection close notification during TX */
|
|
s->err_hook(s, (errno != EPIPE) ? errno : 0);
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
s->ttx += e;
|
|
}
|
|
reset_tx_buffer(s);
|
|
return 1;
|
|
|
|
#ifdef HAVE_LIBSSH
|
|
case SK_SSH:
|
|
while (sk_tx_pending(s))
|
|
{
|
|
e = ssh_channel_write(s->ssh->channel, s->ttx, s->tpos - s->ttx);
|
|
|
|
if (e < 0)
|
|
{
|
|
s->err = ssh_get_error(s->ssh->session);
|
|
s->err_hook(s, ssh_get_error_code(s->ssh->session));
|
|
|
|
reset_tx_buffer(s);
|
|
/* EPIPE is just a connection close notification during TX */
|
|
s->err_hook(s, (errno != EPIPE) ? errno : 0);
|
|
return -1;
|
|
}
|
|
s->ttx += e;
|
|
}
|
|
reset_tx_buffer(s);
|
|
return 1;
|
|
#endif
|
|
|
|
case SK_UDP:
|
|
case SK_IP:
|
|
{
|
|
if (s->tbuf == s->tpos)
|
|
return 1;
|
|
|
|
e = sk_sendmsg(s);
|
|
|
|
if (e < 0)
|
|
{
|
|
if (errno != EINTR && errno != EAGAIN)
|
|
{
|
|
reset_tx_buffer(s);
|
|
s->err_hook(s, errno);
|
|
return -1;
|
|
}
|
|
|
|
if (!s->tx_hook)
|
|
reset_tx_buffer(s);
|
|
return 0;
|
|
}
|
|
reset_tx_buffer(s);
|
|
return 1;
|
|
}
|
|
|
|
default:
|
|
bug("sk_maybe_write: unknown socket type %d", s->type);
|
|
}
|
|
}
|
|
|
|
int
|
|
sk_rx_ready(sock *s)
|
|
{
|
|
int rv;
|
|
struct pollfd pfd = { .fd = s->fd };
|
|
pfd.events |= POLLIN;
|
|
|
|
redo:
|
|
rv = poll(&pfd, 1, 0);
|
|
|
|
if ((rv < 0) && (errno == EINTR || errno == EAGAIN))
|
|
goto redo;
|
|
|
|
return rv;
|
|
}
|
|
|
|
/**
|
|
* sk_send - send data to a socket
|
|
* @s: socket
|
|
* @len: number of bytes to send
|
|
*
|
|
* This function sends @len bytes of data prepared in the
|
|
* transmit buffer of the socket @s to the network connection.
|
|
* If the packet can be sent immediately, it does so and returns
|
|
* 1, else it queues the packet for later processing, returns 0
|
|
* and calls the @tx_hook of the socket when the tranmission
|
|
* takes place.
|
|
*/
|
|
int
|
|
sk_send(sock *s, unsigned len)
|
|
{
|
|
s->ttx = s->tbuf;
|
|
s->tpos = s->tbuf + len;
|
|
|
|
int e = sk_maybe_write(s);
|
|
if (e == 0) /* Trigger thread poll reload to poll this socket's write. */
|
|
socket_changed(s);
|
|
|
|
return e;
|
|
}
|
|
|
|
/**
|
|
* sk_send_to - send data to a specific destination
|
|
* @s: socket
|
|
* @len: number of bytes to send
|
|
* @addr: IP address to send the packet to
|
|
* @port: port to send the packet to
|
|
*
|
|
* This is a sk_send() replacement for connection-less packet sockets
|
|
* which allows destination of the packet to be chosen dynamically.
|
|
* Raw IP sockets should use 0 for @port.
|
|
*/
|
|
int
|
|
sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
|
|
{
|
|
s->daddr = addr;
|
|
if (port)
|
|
s->dport = port;
|
|
|
|
s->ttx = s->tbuf;
|
|
s->tpos = s->tbuf + len;
|
|
return sk_maybe_write(s);
|
|
}
|
|
|
|
/*
|
|
int
|
|
sk_send_full(sock *s, unsigned len, struct iface *ifa,
|
|
ip_addr saddr, ip_addr daddr, unsigned dport)
|
|
{
|
|
s->iface = ifa;
|
|
s->saddr = saddr;
|
|
s->daddr = daddr;
|
|
s->dport = dport;
|
|
s->ttx = s->tbuf;
|
|
s->tpos = s->tbuf + len;
|
|
return sk_maybe_write(s);
|
|
}
|
|
*/
|
|
|
|
static void
|
|
call_rx_hook(sock *s, int size)
|
|
{
|
|
if (s->rx_hook(s, size))
|
|
{
|
|
/* We need to be careful since the socket could have been deleted by the hook */
|
|
if (s->loop->sock_active == s)
|
|
s->rpos = s->rbuf;
|
|
}
|
|
}
|
|
|
|
#ifdef HAVE_LIBSSH
|
|
static int
|
|
sk_read_ssh(sock *s)
|
|
{
|
|
ssh_channel rchans[2] = { s->ssh->channel, NULL };
|
|
struct timeval timev = { 1, 0 };
|
|
|
|
if (ssh_channel_select(rchans, NULL, NULL, &timev) == SSH_EINTR)
|
|
return 1; /* Try again */
|
|
|
|
if (ssh_channel_is_eof(s->ssh->channel) != 0)
|
|
{
|
|
/* The remote side is closing the connection */
|
|
s->err_hook(s, 0);
|
|
return 0;
|
|
}
|
|
|
|
if (rchans[0] == NULL)
|
|
return 0; /* No data is available on the socket */
|
|
|
|
const uint used_bytes = s->rpos - s->rbuf;
|
|
const int read_bytes = ssh_channel_read_nonblocking(s->ssh->channel, s->rpos, s->rbsize - used_bytes, 0);
|
|
if (read_bytes > 0)
|
|
{
|
|
/* Received data */
|
|
s->rpos += read_bytes;
|
|
call_rx_hook(s, used_bytes + read_bytes);
|
|
return 1;
|
|
}
|
|
else if (read_bytes == 0)
|
|
{
|
|
if (ssh_channel_is_eof(s->ssh->channel) != 0)
|
|
{
|
|
/* The remote side is closing the connection */
|
|
s->err_hook(s, 0);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
s->err = ssh_get_error(s->ssh->session);
|
|
s->err_hook(s, ssh_get_error_code(s->ssh->session));
|
|
}
|
|
|
|
return 0; /* No data is available on the socket */
|
|
}
|
|
#endif
|
|
|
|
/* sk_read() and sk_write() are called from BFD's event loop */
|
|
|
|
static inline int
|
|
sk_read_noflush(sock *s, int revents)
|
|
{
|
|
switch (s->type)
|
|
{
|
|
case SK_TCP_PASSIVE:
|
|
return sk_passive_connected(s, SK_TCP);
|
|
|
|
case SK_UNIX_PASSIVE:
|
|
return sk_passive_connected(s, SK_UNIX);
|
|
|
|
case SK_TCP:
|
|
case SK_UNIX:
|
|
{
|
|
int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
|
|
|
|
if (c < 0)
|
|
{
|
|
if (errno != EINTR && errno != EAGAIN)
|
|
s->err_hook(s, errno);
|
|
else if (errno == EAGAIN && !(revents & POLLIN))
|
|
{
|
|
log(L_ERR "Got EAGAIN from read when revents=%x (without POLLIN)", revents);
|
|
s->err_hook(s, 0);
|
|
}
|
|
}
|
|
else if (!c)
|
|
s->err_hook(s, 0);
|
|
else
|
|
{
|
|
s->rpos += c;
|
|
call_rx_hook(s, s->rpos - s->rbuf);
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
#ifdef HAVE_LIBSSH
|
|
case SK_SSH:
|
|
return sk_read_ssh(s);
|
|
#endif
|
|
|
|
case SK_MAGIC:
|
|
return s->rx_hook(s, 0);
|
|
|
|
default:
|
|
{
|
|
int e = sk_recvmsg(s);
|
|
|
|
if (e < 0)
|
|
{
|
|
if (errno != EINTR && errno != EAGAIN)
|
|
s->err_hook(s, errno);
|
|
return 0;
|
|
}
|
|
|
|
s->rpos = s->rbuf + e;
|
|
s->rx_hook(s, e);
|
|
return 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
int
|
|
sk_read(sock *s, int revents)
|
|
{
|
|
int e = sk_read_noflush(s, revents);
|
|
tmp_flush();
|
|
return e;
|
|
}
|
|
|
|
static inline int
|
|
sk_write_noflush(sock *s)
|
|
{
|
|
switch (s->type)
|
|
{
|
|
case SK_TCP_ACTIVE:
|
|
{
|
|
if (sk_connect(s) >= 0 || errno == EISCONN)
|
|
sk_tcp_connected(s);
|
|
else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS)
|
|
s->err_hook(s, errno);
|
|
return 0;
|
|
}
|
|
|
|
#ifdef HAVE_LIBSSH
|
|
case SK_SSH_ACTIVE:
|
|
{
|
|
switch (sk_ssh_connect(s))
|
|
{
|
|
case SSH_OK:
|
|
sk_ssh_connected(s);
|
|
break;
|
|
|
|
case SSH_AGAIN:
|
|
return 1;
|
|
|
|
case SSH_ERROR:
|
|
s->err = ssh_get_error(s->ssh->session);
|
|
s->err_hook(s, ssh_get_error_code(s->ssh->session));
|
|
break;
|
|
}
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
default:
|
|
if (sk_tx_pending(s) && sk_maybe_write(s) > 0)
|
|
{
|
|
if (s->tx_hook)
|
|
s->tx_hook(s);
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
int
|
|
sk_write(sock *s)
|
|
{
|
|
int e = sk_write_noflush(s);
|
|
tmp_flush();
|
|
return e;
|
|
}
|
|
|
|
int sk_is_ipv4(sock *s)
|
|
{ return s->af == AF_INET; }
|
|
|
|
int sk_is_ipv6(sock *s)
|
|
{ return s->af == AF_INET6; }
|
|
|
|
void
|
|
sk_err(sock *s, int revents)
|
|
{
|
|
int se = 0, sse = sizeof(se);
|
|
if ((s->type != SK_MAGIC) && (revents & POLLERR))
|
|
if (getsockopt(s->fd, SOL_SOCKET, SO_ERROR, &se, &sse) < 0)
|
|
{
|
|
log(L_ERR "IO: Socket error: SO_ERROR: %m");
|
|
se = 0;
|
|
}
|
|
|
|
s->err_hook(s, se);
|
|
tmp_flush();
|
|
}
|
|
|
|
void
|
|
sk_dump_all(void)
|
|
{
|
|
node *n;
|
|
sock *s;
|
|
|
|
debug("Open sockets:\n");
|
|
WALK_LIST(n, main_birdloop.sock_list)
|
|
{
|
|
s = SKIP_BACK(sock, n, n);
|
|
debug("%p ", s);
|
|
sk_dump(&s->r, 3);
|
|
}
|
|
debug("\n");
|
|
}
|
|
|
|
|
|
/*
|
|
* Internal event log and watchdog
|
|
*/
|
|
|
|
#define EVENT_LOG_LENGTH 32
|
|
|
|
struct event_log_entry
|
|
{
|
|
void *hook;
|
|
void *data;
|
|
btime timestamp;
|
|
btime duration;
|
|
};
|
|
|
|
static struct event_log_entry event_log[EVENT_LOG_LENGTH];
|
|
static struct event_log_entry *event_open;
|
|
static int event_log_pos, event_log_num, watchdog_active;
|
|
static btime last_io_time;
|
|
static btime loop_time;
|
|
|
|
static void
|
|
io_update_time(void)
|
|
{
|
|
last_io_time = current_time();
|
|
|
|
if (event_open)
|
|
{
|
|
event_open->duration = last_io_time - event_open->timestamp;
|
|
|
|
struct global_runtime *gr = atomic_load_explicit(&global_runtime, memory_order_relaxed);
|
|
if (event_open->duration > gr->latency_limit)
|
|
log(L_WARN "Event 0x%p 0x%p took %u.%03u ms",
|
|
event_open->hook, event_open->data, (uint) (event_open->duration TO_MS), (uint) (event_open->duration % 1000));
|
|
|
|
event_open = NULL;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* io_log_event - mark approaching event into event log
|
|
* @hook: event hook address
|
|
* @data: event data address
|
|
*
|
|
* Store info (hook, data, timestamp) about the following internal event into
|
|
* a circular event log (@event_log). When latency tracking is enabled, the log
|
|
* entry is kept open (in @event_open) so the duration can be filled later.
|
|
*/
|
|
void
|
|
io_log_event(void *hook, void *data, uint flag)
|
|
{
|
|
struct global_runtime *gr = atomic_load_explicit(&global_runtime, memory_order_relaxed);
|
|
if (gr->latency_debug & flag)
|
|
io_update_time();
|
|
|
|
struct event_log_entry *en = event_log + event_log_pos;
|
|
|
|
en->hook = hook;
|
|
en->data = data;
|
|
en->timestamp = last_io_time;
|
|
en->duration = 0;
|
|
|
|
event_log_num++;
|
|
event_log_pos++;
|
|
event_log_pos %= EVENT_LOG_LENGTH;
|
|
|
|
event_open = (gr->latency_debug & flag) ? en : NULL;
|
|
}
|
|
|
|
static inline void
|
|
io_close_event(void)
|
|
{
|
|
if (event_open)
|
|
io_update_time();
|
|
}
|
|
|
|
void
|
|
io_log_dump(void)
|
|
{
|
|
int i;
|
|
|
|
log(L_DEBUG "Event log:");
|
|
for (i = 0; i < EVENT_LOG_LENGTH; i++)
|
|
{
|
|
struct event_log_entry *en = event_log + (event_log_pos + i) % EVENT_LOG_LENGTH;
|
|
if (en->hook)
|
|
log(L_DEBUG " Event 0x%p 0x%p at %8d for %d ms", en->hook, en->data,
|
|
(int) ((last_io_time - en->timestamp) TO_MS), (int) (en->duration TO_MS));
|
|
}
|
|
}
|
|
|
|
void
|
|
watchdog_sigalrm(int sig UNUSED)
|
|
{
|
|
/* Update last_io_time and duration, but skip latency check */
|
|
struct global_runtime *gr = atomic_load_explicit(&global_runtime, memory_order_relaxed);
|
|
gr->latency_limit = 0xffffffff;
|
|
io_update_time();
|
|
|
|
debug_safe("Watchdog timer timed out\n");
|
|
|
|
/* We want core dump */
|
|
abort();
|
|
}
|
|
|
|
static inline void
|
|
watchdog_start1(void)
|
|
{
|
|
io_update_time();
|
|
|
|
loop_time = last_io_time;
|
|
}
|
|
|
|
static inline void
|
|
watchdog_start(void)
|
|
{
|
|
io_update_time();
|
|
|
|
loop_time = last_io_time;
|
|
event_log_num = 0;
|
|
|
|
struct global_runtime *gr = atomic_load_explicit(&global_runtime, memory_order_relaxed);
|
|
if (gr->watchdog_timeout)
|
|
{
|
|
alarm(gr->watchdog_timeout);
|
|
watchdog_active = 1;
|
|
}
|
|
}
|
|
|
|
static inline void
|
|
watchdog_stop(void)
|
|
{
|
|
io_update_time();
|
|
|
|
if (watchdog_active)
|
|
{
|
|
alarm(0);
|
|
watchdog_active = 0;
|
|
}
|
|
|
|
btime duration = last_io_time - loop_time;
|
|
struct global_runtime *gr = atomic_load_explicit(&global_runtime, memory_order_relaxed);
|
|
if (duration > gr->watchdog_warning)
|
|
log(L_WARN "I/O loop cycle took %u.%03u ms for %d events",
|
|
(uint) (duration TO_MS), (uint) (duration % 1000), event_log_num);
|
|
}
|
|
|
|
|
|
/*
|
|
* Main I/O Loop
|
|
*/
|
|
|
|
void
|
|
io_init(void)
|
|
{
|
|
init_list(&main_birdloop.sock_list);
|
|
ev_init_list(&global_event_list, &main_birdloop, "Global event list");
|
|
ev_init_list(&global_work_list, &main_birdloop, "Global work list");
|
|
ev_init_list(&main_birdloop.event_list, &main_birdloop, "Global fast event list");
|
|
krt_io_init();
|
|
// XXX init_times();
|
|
// XXX update_times();
|
|
boot_time = current_time();
|
|
|
|
u64 now = (u64) current_real_time();
|
|
srandom((uint) (now ^ (now >> 32)));
|
|
}
|
|
|
|
static int short_loops = 0;
|
|
#define SHORT_LOOP_MAX 10
|
|
#define WORK_EVENTS_MAX 10
|
|
|
|
sock *stored_sock;
|
|
|
|
void
|
|
io_loop(void)
|
|
{
|
|
int poll_tout, timeout;
|
|
int events, pout;
|
|
timer *t;
|
|
struct pfd pfd;
|
|
BUFFER_INIT(pfd.pfd, &root_pool, 16);
|
|
BUFFER_INIT(pfd.loop, &root_pool, 16);
|
|
|
|
watchdog_start1();
|
|
for(;;)
|
|
{
|
|
times_update();
|
|
ev_run_list(&global_event_list);
|
|
ev_run_list_limited(&global_work_list, WORK_EVENTS_MAX);
|
|
ev_run_list(&main_birdloop.event_list);
|
|
timers_fire(&main_birdloop.time, 1);
|
|
io_close_event();
|
|
|
|
events =
|
|
!ev_list_empty(&global_event_list) ||
|
|
!ev_list_empty(&global_work_list) ||
|
|
!ev_list_empty(&main_birdloop.event_list);
|
|
|
|
poll_tout = (events ? 0 : 3000); /* Time in milliseconds */
|
|
if (t = timers_first(&main_birdloop.time))
|
|
{
|
|
times_update();
|
|
timeout = (tm_remains(t) TO_MS) + 1;
|
|
poll_tout = MIN(poll_tout, timeout);
|
|
}
|
|
|
|
BUFFER_FLUSH(pfd.pfd);
|
|
BUFFER_FLUSH(pfd.loop);
|
|
|
|
pipe_pollin(&main_birdloop.thread->wakeup, &pfd);
|
|
sockets_prepare(&main_birdloop, &pfd);
|
|
|
|
/*
|
|
* Yes, this is racy. But even if the signal comes before this test
|
|
* and entering poll(), it gets caught on the next timer tick.
|
|
*/
|
|
|
|
if (async_config_flag)
|
|
{
|
|
io_log_event(async_config, NULL, DL_EVENTS);
|
|
async_config();
|
|
async_config_flag = 0;
|
|
continue;
|
|
}
|
|
if (async_dump_flag)
|
|
{
|
|
io_log_event(async_dump, NULL, DL_EVENTS);
|
|
async_dump();
|
|
async_dump_flag = 0;
|
|
continue;
|
|
}
|
|
if (async_shutdown_flag)
|
|
{
|
|
io_log_event(async_shutdown, NULL, DL_EVENTS);
|
|
async_shutdown();
|
|
async_shutdown_flag = 0;
|
|
continue;
|
|
}
|
|
|
|
/* And finally enter poll() to find active sockets */
|
|
watchdog_stop();
|
|
birdloop_leave(&main_birdloop);
|
|
pout = poll(pfd.pfd.data, pfd.pfd.used, poll_tout);
|
|
birdloop_enter(&main_birdloop);
|
|
watchdog_start();
|
|
|
|
if (pout < 0)
|
|
{
|
|
if (errno == EINTR || errno == EAGAIN)
|
|
continue;
|
|
bug("poll: %m");
|
|
}
|
|
if (pout)
|
|
{
|
|
if (pfd.pfd.data[0].revents & POLLIN)
|
|
{
|
|
/* IO loop reload requested */
|
|
pipe_drain(&main_birdloop.thread->wakeup);
|
|
atomic_fetch_and_explicit(&main_birdloop.thread_transition, ~LTT_PING, memory_order_acq_rel);
|
|
continue;
|
|
}
|
|
|
|
times_update();
|
|
|
|
/* guaranteed to be non-empty */
|
|
main_birdloop.sock_active = SKIP_BACK(sock, n, HEAD(main_birdloop.sock_list));
|
|
|
|
while (main_birdloop.sock_active)
|
|
{
|
|
sock *s = main_birdloop.sock_active;
|
|
if (s->index != -1)
|
|
{
|
|
int e;
|
|
int steps;
|
|
|
|
steps = MAX_STEPS;
|
|
if (s->fast_rx && (pfd.pfd.data[s->index].revents & POLLIN) && s->rx_hook)
|
|
do
|
|
{
|
|
steps--;
|
|
io_log_event(s->rx_hook, s->data, DL_SOCKETS);
|
|
e = sk_read(s, pfd.pfd.data[s->index].revents);
|
|
}
|
|
while (e && (main_birdloop.sock_active == s) && s->rx_hook && steps);
|
|
|
|
if (s != main_birdloop.sock_active)
|
|
continue;
|
|
|
|
steps = MAX_STEPS;
|
|
if (pfd.pfd.data[s->index].revents & POLLOUT)
|
|
do
|
|
{
|
|
steps--;
|
|
io_log_event(s->tx_hook, s->data, DL_SOCKETS);
|
|
e = sk_write(s);
|
|
}
|
|
while (e && (main_birdloop.sock_active == s) && steps);
|
|
|
|
if (s != main_birdloop.sock_active)
|
|
continue;
|
|
}
|
|
|
|
main_birdloop.sock_active = sk_next(s);
|
|
}
|
|
|
|
short_loops++;
|
|
if (events && (short_loops < SHORT_LOOP_MAX))
|
|
continue;
|
|
short_loops = 0;
|
|
|
|
int count = 0;
|
|
main_birdloop.sock_active = stored_sock;
|
|
if (main_birdloop.sock_active == NULL)
|
|
main_birdloop.sock_active = SKIP_BACK(sock, n, HEAD(main_birdloop.sock_list));
|
|
|
|
while (main_birdloop.sock_active && count < MAX_RX_STEPS)
|
|
{
|
|
sock *s = main_birdloop.sock_active;
|
|
if (s->index == -1)
|
|
goto next2;
|
|
|
|
if (!s->fast_rx && (pfd.pfd.data[s->index].revents & POLLIN) && s->rx_hook)
|
|
{
|
|
count++;
|
|
io_log_event(s->rx_hook, s->data, DL_SOCKETS);
|
|
sk_read(s, pfd.pfd.data[s->index].revents);
|
|
if (s != main_birdloop.sock_active)
|
|
continue;
|
|
}
|
|
|
|
if (pfd.pfd.data[s->index].revents & (POLLHUP | POLLERR))
|
|
{
|
|
sk_err(s, pfd.pfd.data[s->index].revents);
|
|
if (s != main_birdloop.sock_active)
|
|
continue;
|
|
}
|
|
|
|
next2: ;
|
|
main_birdloop.sock_active = sk_next(s);
|
|
}
|
|
|
|
|
|
stored_sock = main_birdloop.sock_active;
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
test_old_bird(char *path)
|
|
{
|
|
int fd;
|
|
struct sockaddr_un sa;
|
|
|
|
fd = socket(AF_UNIX, SOCK_STREAM, 0);
|
|
if (fd < 0)
|
|
die("Cannot create socket: %m");
|
|
if (strlen(path) >= sizeof(sa.sun_path))
|
|
die("Socket path too long");
|
|
bzero(&sa, sizeof(sa));
|
|
sa.sun_family = AF_UNIX;
|
|
strcpy(sa.sun_path, path);
|
|
if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0)
|
|
die("I found another BIRD running.");
|
|
close(fd);
|
|
}
|
|
|
|
|
|
/*
|
|
* DNS resolver
|
|
*/
|
|
|
|
ip_addr
|
|
resolve_hostname(const char *host, int type, const char **err_msg)
|
|
{
|
|
struct addrinfo *res;
|
|
struct addrinfo hints = {
|
|
.ai_family = AF_UNSPEC,
|
|
.ai_socktype = (type == SK_UDP) ? SOCK_DGRAM : SOCK_STREAM,
|
|
.ai_flags = AI_ADDRCONFIG,
|
|
};
|
|
|
|
*err_msg = NULL;
|
|
|
|
int err_code = getaddrinfo(host, NULL, &hints, &res);
|
|
if (err_code != 0)
|
|
{
|
|
*err_msg = gai_strerror(err_code);
|
|
return IPA_NONE;
|
|
}
|
|
|
|
ip_addr addr = IPA_NONE;
|
|
uint unused;
|
|
|
|
sockaddr_read((sockaddr *) res->ai_addr, res->ai_family, &addr, NULL, &unused);
|
|
freeaddrinfo(res);
|
|
|
|
return addr;
|
|
}
|