From bb183c9d0c237a4d6d1caeccb478a3571677ecca Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Thu, 22 Aug 2024 18:55:53 +0200 Subject: [PATCH] Sysdep: moved socket code into a separate file --- sysdep/unix/Makefile | 2 +- sysdep/unix/io.c | 1878 +---------------------------------------- sysdep/unix/socket.c | 1924 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1929 insertions(+), 1875 deletions(-) create mode 100644 sysdep/unix/socket.c diff --git a/sysdep/unix/Makefile b/sysdep/unix/Makefile index efbf3c70..2681d288 100644 --- a/sysdep/unix/Makefile +++ b/sysdep/unix/Makefile @@ -1,4 +1,4 @@ -src := alloc.c io.c io-loop.c krt.c log.c main.c random.c domain.c +src := alloc.c io.c io-loop.c krt.c log.c main.c random.c domain.c socket.c obj := $(src-o-files) $(all-daemon) $(cf-local) diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index 892044c5..3dce3d8d 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -47,7 +47,6 @@ #include "sysdep/unix/unix.h" #include "sysdep/unix/io-loop.h" -#include CONFIG_INCLUDE_SYSIO_H /* Maximum number of calls of tx handler for one socket in one * poll iteration. Should be small enough to not monopolize CPU by @@ -344,1879 +343,6 @@ current_time_now(void) return ts.tv_sec S + ts.tv_nsec NS; } -/** - * DOC: Sockets - * - * Socket resources represent network connections. Their data structure (&socket) - * contains a lot of fields defining the exact type of the socket, the local and - * remote addresses and ports, pointers to socket buffers and finally pointers to - * hook functions to be called when new data have arrived to the receive buffer - * (@rx_hook), when the contents of the transmit buffer have been transmitted - * (@tx_hook) and when an error or connection close occurs (@err_hook). - * - * Freeing of sockets from inside socket hooks is perfectly safe. - */ - -#ifndef SOL_IP -#define SOL_IP IPPROTO_IP -#endif - -#ifndef SOL_IPV6 -#define SOL_IPV6 IPPROTO_IPV6 -#endif - -#ifndef SOL_ICMPV6 -#define SOL_ICMPV6 IPPROTO_ICMPV6 -#endif - - -/* - * Sockaddr helper functions - */ - -static inline int UNUSED sockaddr_length(int af) -{ return (af == AF_INET) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); } - -static inline void -sockaddr_fill4(struct sockaddr_in *sa, ip_addr a, uint port) -{ - memset(sa, 0, sizeof(struct sockaddr_in)); -#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN - sa->sin_len = sizeof(struct sockaddr_in); -#endif - sa->sin_family = AF_INET; - sa->sin_port = htons(port); - sa->sin_addr = ipa_to_in4(a); -} - -static inline void -sockaddr_fill6(struct sockaddr_in6 *sa, ip_addr a, struct iface *ifa, uint port) -{ - memset(sa, 0, sizeof(struct sockaddr_in6)); -#ifdef SIN6_LEN - sa->sin6_len = sizeof(struct sockaddr_in6); -#endif - sa->sin6_family = AF_INET6; - sa->sin6_port = htons(port); - sa->sin6_flowinfo = 0; - sa->sin6_addr = ipa_to_in6(a); - - if (ifa && ipa_is_link_local(a)) - sa->sin6_scope_id = ifa->index; -} - -void -sockaddr_fill(sockaddr *sa, int af, ip_addr a, struct iface *ifa, uint port) -{ - if (af == AF_INET) - sockaddr_fill4((struct sockaddr_in *) sa, a, port); - else if (af == AF_INET6) - sockaddr_fill6((struct sockaddr_in6 *) sa, a, ifa, port); - else - bug("Unknown AF"); -} - -static inline void -sockaddr_read4(struct sockaddr_in *sa, ip_addr *a, uint *port) -{ - *port = ntohs(sa->sin_port); - *a = ipa_from_in4(sa->sin_addr); -} - -static inline void -sockaddr_read6(struct sockaddr_in6 *sa, ip_addr *a, struct iface **ifa, uint *port) -{ - *port = ntohs(sa->sin6_port); - *a = ipa_from_in6(sa->sin6_addr); - - if (ifa && ipa_is_link_local(*a)) - *ifa = if_find_by_index(sa->sin6_scope_id); -} - -int -sockaddr_read(sockaddr *sa, int af, ip_addr *a, struct iface **ifa, uint *port) -{ - if (sa->sa.sa_family != af) - goto fail; - - if (af == AF_INET) - sockaddr_read4((struct sockaddr_in *) sa, a, port); - else if (af == AF_INET6) - sockaddr_read6((struct sockaddr_in6 *) sa, a, ifa, port); - else - goto fail; - - return 0; - - fail: - *a = IPA_NONE; - *port = 0; - return -1; -} - - -/* - * IPv6 multicast syscalls - */ - -/* Fortunately standardized in RFC 3493 */ - -#define INIT_MREQ6(maddr,ifa) \ - { .ipv6mr_multiaddr = ipa_to_in6(maddr), .ipv6mr_interface = ifa->index } - -static inline int -sk_setup_multicast6(sock *s) -{ - int index = s->iface->index; - int ttl = s->ttl; - int n = 0; - - if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0) - ERR("IPV6_MULTICAST_IF"); - - if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &ttl, sizeof(ttl)) < 0) - ERR("IPV6_MULTICAST_HOPS"); - - if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &n, sizeof(n)) < 0) - ERR("IPV6_MULTICAST_LOOP"); - - return 0; -} - -static inline int -sk_join_group6(sock *s, ip_addr maddr) -{ - struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface); - - if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mr, sizeof(mr)) < 0) - ERR("IPV6_JOIN_GROUP"); - - return 0; -} - -static inline int -sk_leave_group6(sock *s, ip_addr maddr) -{ - struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface); - - if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mr, sizeof(mr)) < 0) - ERR("IPV6_LEAVE_GROUP"); - - return 0; -} - - -/* - * IPv6 packet control messages - */ - -/* Also standardized, in RFC 3542 */ - -/* - * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg - * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we - * don't have IPV6_RECVPKTINFO we suppose the OS implements the older - * RFC and we use IPV6_PKTINFO. - */ -#ifndef IPV6_RECVPKTINFO -#define IPV6_RECVPKTINFO IPV6_PKTINFO -#endif -/* - * Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT. - */ -#ifndef IPV6_RECVHOPLIMIT -#define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT -#endif - - -#define CMSG6_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in6_pktinfo)) -#define CMSG6_SPACE_TTL CMSG_SPACE(sizeof(int)) - -static inline int -sk_request_cmsg6_pktinfo(sock *s) -{ - int y = 1; - - if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVPKTINFO, &y, sizeof(y)) < 0) - ERR("IPV6_RECVPKTINFO"); - - return 0; -} - -static inline int -sk_request_cmsg6_ttl(sock *s) -{ - int y = 1; - - if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVHOPLIMIT, &y, sizeof(y)) < 0) - ERR("IPV6_RECVHOPLIMIT"); - - return 0; -} - -static inline void -sk_process_cmsg6_pktinfo(sock *s, struct cmsghdr *cm) -{ - if (cm->cmsg_type == IPV6_PKTINFO) - { - struct in6_pktinfo *pi = (struct in6_pktinfo *) CMSG_DATA(cm); - s->laddr = ipa_from_in6(pi->ipi6_addr); - s->lifindex = pi->ipi6_ifindex; - } -} - -static inline void -sk_process_cmsg6_ttl(sock *s, struct cmsghdr *cm) -{ - if (cm->cmsg_type == IPV6_HOPLIMIT) - s->rcv_ttl = * (int *) CMSG_DATA(cm); -} - -static inline void -sk_prepare_cmsgs6(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen) -{ - struct cmsghdr *cm; - struct in6_pktinfo *pi; - int controllen = 0; - - msg->msg_control = cbuf; - msg->msg_controllen = cbuflen; - - cm = CMSG_FIRSTHDR(msg); - cm->cmsg_level = SOL_IPV6; - cm->cmsg_type = IPV6_PKTINFO; - cm->cmsg_len = CMSG_LEN(sizeof(*pi)); - controllen += CMSG_SPACE(sizeof(*pi)); - - pi = (struct in6_pktinfo *) CMSG_DATA(cm); - pi->ipi6_ifindex = s->iface ? s->iface->index : 0; - pi->ipi6_addr = ipa_to_in6(s->saddr); - - msg->msg_controllen = controllen; -} - - -/* - * Miscellaneous socket syscalls - */ - -static inline int -sk_set_ttl4(sock *s, int ttl) -{ - if (setsockopt(s->fd, SOL_IP, IP_TTL, &ttl, sizeof(ttl)) < 0) - ERR("IP_TTL"); - - return 0; -} - -static inline int -sk_set_ttl6(sock *s, int ttl) -{ - if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) < 0) - ERR("IPV6_UNICAST_HOPS"); - - return 0; -} - -static inline int -sk_set_tos4(sock *s, int tos) -{ - if (setsockopt(s->fd, SOL_IP, IP_TOS, &tos, sizeof(tos)) < 0) - ERR("IP_TOS"); - - return 0; -} - -static inline int -sk_set_tos6(sock *s, int tos) -{ - if (setsockopt(s->fd, SOL_IPV6, IPV6_TCLASS, &tos, sizeof(tos)) < 0) - ERR("IPV6_TCLASS"); - - return 0; -} - -static inline int -sk_set_high_port(sock *s UNUSED) -{ - /* Port range setting is optional, ignore it if not supported */ - -#ifdef IP_PORTRANGE - if (sk_is_ipv4(s)) - { - int range = IP_PORTRANGE_HIGH; - if (setsockopt(s->fd, SOL_IP, IP_PORTRANGE, &range, sizeof(range)) < 0) - ERR("IP_PORTRANGE"); - } -#endif - -#ifdef IPV6_PORTRANGE - if (sk_is_ipv6(s)) - { - int range = IPV6_PORTRANGE_HIGH; - if (setsockopt(s->fd, SOL_IPV6, IPV6_PORTRANGE, &range, sizeof(range)) < 0) - ERR("IPV6_PORTRANGE"); - } -#endif - - return 0; -} - -static inline byte * -sk_skip_ip_header(byte *pkt, int *len) -{ - if ((*len < 20) || ((*pkt & 0xf0) != 0x40)) - return NULL; - - int hlen = (*pkt & 0x0f) * 4; - if ((hlen < 20) || (hlen > *len)) - return NULL; - - *len -= hlen; - return pkt + hlen; -} - -byte * -sk_rx_buffer(sock *s, int *len) -{ - if (sk_is_ipv4(s) && (s->type == SK_IP)) - return sk_skip_ip_header(s->rbuf, len); - else - return s->rbuf; -} - - -/* - * Public socket functions - */ - -/** - * sk_setup_multicast - enable multicast for given socket - * @s: socket - * - * Prepare transmission of multicast packets for given datagram socket. - * The socket must have defined @iface. - * - * Result: 0 for success, -1 for an error. - */ - -int -sk_setup_multicast(sock *s) -{ - ASSERT(s->iface); - - if (sk_is_ipv4(s)) - return sk_setup_multicast4(s); - else - return sk_setup_multicast6(s); -} - -/** - * sk_join_group - join multicast group for given socket - * @s: socket - * @maddr: multicast address - * - * Join multicast group for given datagram socket and associated interface. - * The socket must have defined @iface. - * - * Result: 0 for success, -1 for an error. - */ - -int -sk_join_group(sock *s, ip_addr maddr) -{ - if (sk_is_ipv4(s)) - return sk_join_group4(s, maddr); - else - return sk_join_group6(s, maddr); -} - -/** - * sk_leave_group - leave multicast group for given socket - * @s: socket - * @maddr: multicast address - * - * Leave multicast group for given datagram socket and associated interface. - * The socket must have defined @iface. - * - * Result: 0 for success, -1 for an error. - */ - -int -sk_leave_group(sock *s, ip_addr maddr) -{ - if (sk_is_ipv4(s)) - return sk_leave_group4(s, maddr); - else - return sk_leave_group6(s, maddr); -} - -/** - * sk_setup_broadcast - enable broadcast for given socket - * @s: socket - * - * Allow reception and transmission of broadcast packets for given datagram - * socket. The socket must have defined @iface. For transmission, packets should - * be send to @brd address of @iface. - * - * Result: 0 for success, -1 for an error. - */ - -int -sk_setup_broadcast(sock *s) -{ - int y = 1; - - if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &y, sizeof(y)) < 0) - ERR("SO_BROADCAST"); - - return 0; -} - -/** - * sk_set_ttl - set transmit TTL for given socket - * @s: socket - * @ttl: TTL value - * - * Set TTL for already opened connections when TTL was not set before. Useful - * for accepted connections when different ones should have different TTL. - * - * Result: 0 for success, -1 for an error. - */ - -int -sk_set_ttl(sock *s, int ttl) -{ - s->ttl = ttl; - - if (sk_is_ipv4(s)) - return sk_set_ttl4(s, ttl); - else - return sk_set_ttl6(s, ttl); -} - -/** - * sk_set_min_ttl - set minimal accepted TTL for given socket - * @s: socket - * @ttl: TTL value - * - * Set minimal accepted TTL for given socket. Can be used for TTL security. - * implementations. - * - * Result: 0 for success, -1 for an error. - */ - -int -sk_set_min_ttl(sock *s, int ttl) -{ - if (sk_is_ipv4(s)) - return sk_set_min_ttl4(s, ttl); - else - return sk_set_min_ttl6(s, ttl); -} - -#if 0 -/** - * sk_set_md5_auth - add / remove MD5 security association for given socket - * @s: socket - * @local: IP address of local side - * @remote: IP address of remote side - * @ifa: Interface for link-local IP address - * @passwd: Password used for MD5 authentication - * @setkey: Update also system SA/SP database - * - * In TCP MD5 handling code in kernel, there is a set of security associations - * used for choosing password and other authentication parameters according to - * the local and remote address. This function is useful for listening socket, - * for active sockets it may be enough to set s->password field. - * - * When called with passwd != NULL, the new pair is added, - * When called with passwd == NULL, the existing pair is removed. - * - * Note that while in Linux, the MD5 SAs are specific to socket, in BSD they are - * stored in global SA/SP database (but the behavior also must be enabled on - * per-socket basis). In case of multiple sockets to the same neighbor, the - * socket-specific state must be configured for each socket while global state - * just once per src-dst pair. The @setkey argument controls whether the global - * state (SA/SP database) is also updated. - * - * Result: 0 for success, -1 for an error. - */ - -int -sk_set_md5_auth(sock *s, ip_addr local, ip_addr remote, struct iface *ifa, char *passwd, int setkey) -{ DUMMY; } -#endif - -/** - * sk_set_ipv6_checksum - specify IPv6 checksum offset for given socket - * @s: socket - * @offset: offset - * - * Specify IPv6 checksum field offset for given raw IPv6 socket. After that, the - * kernel will automatically fill it for outgoing packets and check it for - * incoming packets. Should not be used on ICMPv6 sockets, where the position is - * known to the kernel. - * - * Result: 0 for success, -1 for an error. - */ - -int -sk_set_ipv6_checksum(sock *s, int offset) -{ - if (setsockopt(s->fd, SOL_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0) - ERR("IPV6_CHECKSUM"); - - return 0; -} - -int -sk_set_icmp6_filter(sock *s, int p1, int p2) -{ - /* a bit of lame interface, but it is here only for Radv */ - struct icmp6_filter f; - - ICMP6_FILTER_SETBLOCKALL(&f); - ICMP6_FILTER_SETPASS(p1, &f); - ICMP6_FILTER_SETPASS(p2, &f); - - if (setsockopt(s->fd, SOL_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0) - ERR("ICMP6_FILTER"); - - return 0; -} - -void -sk_log_error(sock *s, const char *p) -{ - log(L_ERR "%s: Socket error: %s%#m", p, s->err); -} - - -/* - * Actual struct birdsock code - */ - -sock * -sk_next(sock *s) -{ - if (!s->n.next->next) - return NULL; - else - return SKIP_BACK(sock, n, s->n.next); -} - -static void -sk_alloc_bufs(sock *s) -{ - if (!s->rbuf && s->rbsize) - s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize); - s->rpos = s->rbuf; - if (!s->tbuf && s->tbsize) - s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize); - s->tpos = s->ttx = s->tbuf; -} - -static void -sk_free_bufs(sock *s) -{ - if (s->rbuf_alloc) - { - xfree(s->rbuf_alloc); - s->rbuf = s->rbuf_alloc = NULL; - } - if (s->tbuf_alloc) - { - xfree(s->tbuf_alloc); - s->tbuf = s->tbuf_alloc = NULL; - } -} - -#ifdef HAVE_LIBSSH -static void -sk_ssh_free(sock *s) -{ - struct ssh_sock *ssh = s->ssh; - - if (s->ssh == NULL) - return; - - s->ssh = NULL; - - if (ssh->channel) - { - ssh_channel_close(ssh->channel); - ssh_channel_free(ssh->channel); - ssh->channel = NULL; - } - - if (ssh->session) - { - ssh_disconnect(ssh->session); - ssh_free(ssh->session); - ssh->session = NULL; - } -} -#endif - - -static void -sk_free(resource *r) -{ - sock *s = (sock *) r; - - sk_free_bufs(s); - -#ifdef HAVE_LIBSSH - if (s->type == SK_SSH || s->type == SK_SSH_ACTIVE) - sk_ssh_free(s); -#endif - - if (s->loop) - birdloop_remove_socket(s->loop, s); - - if (s->fd >= 0 && s->type != SK_SSH && s->type != SK_SSH_ACTIVE) - close(s->fd); - - s->fd = -1; -} - -void -sk_set_rbsize(sock *s, uint val) -{ - ASSERT(s->rbuf_alloc == s->rbuf); - - if (s->rbsize == val) - return; - - s->rbsize = val; - xfree(s->rbuf_alloc); - s->rbuf_alloc = xmalloc(val); - s->rpos = s->rbuf = s->rbuf_alloc; -} - -void -sk_set_tbsize(sock *s, uint val) -{ - ASSERT(s->tbuf_alloc == s->tbuf); - - if (s->tbsize == val) - return; - - byte *old_tbuf = s->tbuf; - - s->tbsize = val; - s->tbuf = s->tbuf_alloc = xrealloc(s->tbuf_alloc, val); - s->tpos = s->tbuf + (s->tpos - old_tbuf); - s->ttx = s->tbuf + (s->ttx - old_tbuf); -} - -void -sk_set_tbuf(sock *s, void *tbuf) -{ - s->tbuf = tbuf ?: s->tbuf_alloc; - s->ttx = s->tpos = s->tbuf; -} - -void -sk_reallocate(sock *s) -{ - sk_free_bufs(s); - sk_alloc_bufs(s); -} - -static void -sk_dump(resource *r, unsigned indent UNUSED) -{ - sock *s = (sock *) r; - static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", NULL, "IP", NULL, "MAGIC", "UNIX<", "UNIX", "SSH>", "SSH", "DEL!" }; - - debug("(%s, ud=%p, sa=%I, sp=%d, da=%I, dp=%d, tos=%d, ttl=%d, if=%s)\n", - sk_type_names[s->type], - s->data, - s->saddr, - s->sport, - s->daddr, - s->dport, - s->tos, - s->ttl, - s->iface ? s->iface->name : "none"); -} - -static struct resclass sk_class = { - "Socket", - sizeof(sock), - sk_free, - sk_dump, - NULL, - NULL -}; - -/** - * sk_new - create a socket - * @p: pool - * - * This function creates a new socket resource. If you want to use it, - * you need to fill in all the required fields of the structure and - * call sk_open() to do the actual opening of the socket. - * - * The real function name is sock_new(), sk_new() is a macro wrapper - * to avoid collision with OpenSSL. - */ -sock * -sock_new(pool *p) -{ - sock *s = ralloc(p, &sk_class); - s->pool = p; - // s->saddr = s->daddr = IPA_NONE; - s->tos = s->priority = s->ttl = -1; - s->fd = -1; - return s; -} - -static int -sk_setup(sock *s) -{ - int y = 1; - int fd = s->fd; - - if (s->type == SK_SSH_ACTIVE) - return 0; - - if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) - ERR("O_NONBLOCK"); - - if (!s->af) - return 0; - - if (ipa_nonzero(s->saddr) && !(s->flags & SKF_BIND)) - s->flags |= SKF_PKTINFO; - -#ifdef CONFIG_USE_HDRINCL - if (sk_is_ipv4(s) && (s->type == SK_IP) && (s->flags & SKF_PKTINFO)) - { - s->flags &= ~SKF_PKTINFO; - s->flags |= SKF_HDRINCL; - if (setsockopt(fd, SOL_IP, IP_HDRINCL, &y, sizeof(y)) < 0) - ERR("IP_HDRINCL"); - } -#endif - - if (s->vrf && (s->vrf != &default_vrf) && !s->iface) - { - /* Bind socket to associated VRF interface. - This is Linux-specific, but so is SO_BINDTODEVICE. */ -#ifdef SO_BINDTODEVICE - struct ifreq ifr = {}; - strcpy(ifr.ifr_name, s->vrf->name); - if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0) - ERR("SO_BINDTODEVICE"); -#endif - } - - if (s->iface) - { -#ifdef SO_BINDTODEVICE - struct ifreq ifr = {}; - strcpy(ifr.ifr_name, s->iface->name); - if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0) - ERR("SO_BINDTODEVICE"); -#endif - -#ifdef CONFIG_UNIX_DONTROUTE - if (setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &y, sizeof(y)) < 0) - ERR("SO_DONTROUTE"); -#endif - } - - if (sk_is_ipv4(s)) - { - if (s->flags & SKF_LADDR_RX) - if (sk_request_cmsg4_pktinfo(s) < 0) - return -1; - - if (s->flags & SKF_TTL_RX) - if (sk_request_cmsg4_ttl(s) < 0) - return -1; - - if ((s->type == SK_UDP) || (s->type == SK_IP)) - if (sk_disable_mtu_disc4(s) < 0) - return -1; - - if (s->ttl >= 0) - if (sk_set_ttl4(s, s->ttl) < 0) - return -1; - - if (s->tos >= 0) - if (sk_set_tos4(s, s->tos) < 0) - return -1; - } - - if (sk_is_ipv6(s)) - { - if ((s->type == SK_TCP_PASSIVE) || (s->type == SK_TCP_ACTIVE) || (s->type == SK_UDP)) - if (setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &y, sizeof(y)) < 0) - ERR("IPV6_V6ONLY"); - - if (s->flags & SKF_LADDR_RX) - if (sk_request_cmsg6_pktinfo(s) < 0) - return -1; - - if (s->flags & SKF_TTL_RX) - if (sk_request_cmsg6_ttl(s) < 0) - return -1; - - if ((s->type == SK_UDP) || (s->type == SK_IP)) - if (sk_disable_mtu_disc6(s) < 0) - return -1; - - if (s->ttl >= 0) - if (sk_set_ttl6(s, s->ttl) < 0) - return -1; - - if (s->tos >= 0) - if (sk_set_tos6(s, s->tos) < 0) - return -1; - } - - /* Must be after sk_set_tos4() as setting ToS on Linux also mangles priority */ - if (s->priority >= 0) - if (sk_set_priority(s, s->priority) < 0) - return -1; - - return 0; -} - -static int -sk_connect(sock *s) -{ - sockaddr sa; - sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport); - return connect(s->fd, &sa.sa, SA_LEN(sa)); -} - -static void -sk_tcp_connected(sock *s) -{ - sockaddr sa; - int sa_len = sizeof(sa); - - if ((getsockname(s->fd, &sa.sa, &sa_len) < 0) || - (sockaddr_read(&sa, s->af, &s->saddr, &s->iface, &s->sport) < 0)) - log(L_WARN "SOCK: Cannot get local IP address for TCP>"); - - s->type = SK_TCP; - sk_alloc_bufs(s); - s->tx_hook(s); -} - -#ifdef HAVE_LIBSSH -static void -sk_ssh_connected(sock *s) -{ - sk_alloc_bufs(s); - s->type = SK_SSH; - s->tx_hook(s); -} -#endif - -static int -sk_passive_connected(sock *s, int type) -{ - sockaddr loc_sa, rem_sa; - int loc_sa_len = sizeof(loc_sa); - int rem_sa_len = sizeof(rem_sa); - - int fd = accept(s->fd, ((type == SK_TCP) ? &rem_sa.sa : NULL), &rem_sa_len); - if (fd < 0) - { - if ((errno != EINTR) && (errno != EAGAIN)) - s->err_hook(s, errno); - return 0; - } - - struct domain_generic *sock_lock = DG_IS_LOCKED(s->pool->domain) ? NULL : s->pool->domain; - if (sock_lock) - DG_LOCK(sock_lock); - - sock *t = sk_new(s->pool); - t->type = type; - t->data = s->data; - t->af = s->af; - t->fd = fd; - t->ttl = s->ttl; - t->tos = s->tos; - t->vrf = s->vrf; - t->rbsize = s->rbsize; - t->tbsize = s->tbsize; - - if (type == SK_TCP) - { - if ((getsockname(fd, &loc_sa.sa, &loc_sa_len) < 0) || - (sockaddr_read(&loc_sa, s->af, &t->saddr, &t->iface, &t->sport) < 0)) - log(L_WARN "SOCK: Cannot get local IP address for TCP<"); - - if (sockaddr_read(&rem_sa, s->af, &t->daddr, &t->iface, &t->dport) < 0) - log(L_WARN "SOCK: Cannot get remote IP address for TCP<"); - } - - if (sk_setup(t) < 0) - { - /* FIXME: Call err_hook instead ? */ - log(L_ERR "SOCK: Incoming connection: %s%#m", t->err); - - /* FIXME: handle it better in rfree() */ - close(t->fd); - t->fd = -1; - sk_close(t); - t = NULL; - } - else - { - birdloop_add_socket(s->loop, t); - sk_alloc_bufs(t); - } - - if (sock_lock) - DG_UNLOCK(sock_lock); - - if (t) - s->rx_hook(t, 0); - - return 1; -} - -#ifdef HAVE_LIBSSH -/* - * Return SSH_OK or SSH_AGAIN or SSH_ERROR - */ -static int -sk_ssh_connect(sock *s) -{ - s->fd = ssh_get_fd(s->ssh->session); - - /* Big fall thru automata */ - switch (s->ssh->state) - { - case SK_SSH_CONNECT: - { - switch (ssh_connect(s->ssh->session)) - { - case SSH_AGAIN: - /* A quick look into libSSH shows that ssh_get_fd() should return non-(-1) - * after SSH_AGAIN is returned by ssh_connect(). This is however nowhere - * documented but our code relies on that. - */ - return SSH_AGAIN; - - case SSH_OK: - break; - - default: - return SSH_ERROR; - } - } /* fallthrough */ - - case SK_SSH_SERVER_KNOWN: - { - s->ssh->state = SK_SSH_SERVER_KNOWN; - - if (s->ssh->server_hostkey_path) - { - int server_identity_is_ok = 1; - -#ifdef HAVE_SSH_OLD_SERVER_VALIDATION_API -#define ssh_session_is_known_server ssh_is_server_known -#define SSH_KNOWN_HOSTS_OK SSH_SERVER_KNOWN_OK -#define SSH_KNOWN_HOSTS_UNKNOWN SSH_SERVER_NOT_KNOWN -#define SSH_KNOWN_HOSTS_CHANGED SSH_SERVER_KNOWN_CHANGED -#define SSH_KNOWN_HOSTS_NOT_FOUND SSH_SERVER_FILE_NOT_FOUND -#define SSH_KNOWN_HOSTS_ERROR SSH_SERVER_ERROR -#define SSH_KNOWN_HOSTS_OTHER SSH_SERVER_FOUND_OTHER -#endif - - /* Check server identity */ - switch (ssh_session_is_known_server(s->ssh->session)) - { -#define LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s,msg,args...) log(L_WARN "SSH Identity %s@%s:%u: " msg, (s)->ssh->username, (s)->host, (s)->dport, ## args); - case SSH_KNOWN_HOSTS_OK: - /* The server is known and has not changed. */ - break; - - case SSH_KNOWN_HOSTS_UNKNOWN: - LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server is unknown, its public key was not found in the known host file %s", s->ssh->server_hostkey_path); - server_identity_is_ok = 0; - break; - - case SSH_KNOWN_HOSTS_CHANGED: - LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server key has changed. Either you are under attack or the administrator changed the key."); - server_identity_is_ok = 0; - break; - - case SSH_KNOWN_HOSTS_NOT_FOUND: - LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The known host file %s does not exist", s->ssh->server_hostkey_path); - server_identity_is_ok = 0; - break; - - case SSH_KNOWN_HOSTS_ERROR: - LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "Some error happened"); - server_identity_is_ok = 0; - break; - - case SSH_KNOWN_HOSTS_OTHER: - LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server gave use a key of a type while we had an other type recorded. " \ - "It is a possible attack."); - server_identity_is_ok = 0; - break; - } - - if (!server_identity_is_ok) - return SSH_ERROR; - } - } /* fallthrough */ - - case SK_SSH_USERAUTH: - { - s->ssh->state = SK_SSH_USERAUTH; - switch (ssh_userauth_publickey_auto(s->ssh->session, NULL, NULL)) - { - case SSH_AUTH_AGAIN: - return SSH_AGAIN; - - case SSH_AUTH_SUCCESS: - break; - - default: - return SSH_ERROR; - } - } /* fallthrough */ - - case SK_SSH_CHANNEL: - { - s->ssh->state = SK_SSH_CHANNEL; - s->ssh->channel = ssh_channel_new(s->ssh->session); - if (s->ssh->channel == NULL) - return SSH_ERROR; - } /* fallthrough */ - - case SK_SSH_SESSION: - { - s->ssh->state = SK_SSH_SESSION; - switch (ssh_channel_open_session(s->ssh->channel)) - { - case SSH_AGAIN: - return SSH_AGAIN; - - case SSH_OK: - break; - - default: - return SSH_ERROR; - } - } /* fallthrough */ - - case SK_SSH_SUBSYSTEM: - { - s->ssh->state = SK_SSH_SUBSYSTEM; - if (s->ssh->subsystem) - { - switch (ssh_channel_request_subsystem(s->ssh->channel, s->ssh->subsystem)) - { - case SSH_AGAIN: - return SSH_AGAIN; - - case SSH_OK: - break; - - default: - return SSH_ERROR; - } - } - } /* fallthrough */ - - case SK_SSH_ESTABLISHED: - s->ssh->state = SK_SSH_ESTABLISHED; - } - - return SSH_OK; -} - -/* - * Return file descriptor number if success - * Return -1 if failed - */ -static int -sk_open_ssh(sock *s) -{ - if (!s->ssh) - bug("sk_open() sock->ssh is not allocated"); - - ssh_session sess = ssh_new(); - if (sess == NULL) - ERR2("Cannot create a ssh session"); - s->ssh->session = sess; - - const int verbosity = SSH_LOG_NOLOG; - ssh_options_set(sess, SSH_OPTIONS_LOG_VERBOSITY, &verbosity); - ssh_options_set(sess, SSH_OPTIONS_HOST, s->host); - ssh_options_set(sess, SSH_OPTIONS_PORT, &(s->dport)); - /* TODO: Add SSH_OPTIONS_BINDADDR */ - ssh_options_set(sess, SSH_OPTIONS_USER, s->ssh->username); - - if (s->ssh->server_hostkey_path) - ssh_options_set(sess, SSH_OPTIONS_KNOWNHOSTS, s->ssh->server_hostkey_path); - - if (s->ssh->client_privkey_path) - ssh_options_set(sess, SSH_OPTIONS_IDENTITY, s->ssh->client_privkey_path); - - ssh_set_blocking(sess, 0); - - switch (sk_ssh_connect(s)) - { - case SSH_AGAIN: - break; - - case SSH_OK: - sk_ssh_connected(s); - break; - - case SSH_ERROR: - ERR2(ssh_get_error(sess)); - break; - } - - return ssh_get_fd(sess); - - err: - return -1; -} -#endif - -/** - * sk_open - open a socket - * @loop: loop - * @s: socket - * - * This function takes a socket resource created by sk_new() and - * initialized by the user and binds a corresponding network connection - * to it. - * - * Result: 0 for success, -1 for an error. - */ -int -sk_open(sock *s, struct birdloop *loop) -{ - int af = AF_UNSPEC; - int fd = -1; - int do_bind = 0; - int bind_port = 0; - ip_addr bind_addr = IPA_NONE; - sockaddr sa; - - if (s->type <= SK_IP) - { - /* - * For TCP/IP sockets, Address family (IPv4 or IPv6) can be specified either - * explicitly (SK_IPV4 or SK_IPV6) or implicitly (based on saddr, daddr). - * But the specifications have to be consistent. - */ - - switch (s->subtype) - { - case 0: - ASSERT(ipa_zero(s->saddr) || ipa_zero(s->daddr) || - (ipa_is_ip4(s->saddr) == ipa_is_ip4(s->daddr))); - af = (ipa_is_ip4(s->saddr) || ipa_is_ip4(s->daddr)) ? AF_INET : AF_INET6; - break; - - case SK_IPV4: - ASSERT(ipa_zero(s->saddr) || ipa_is_ip4(s->saddr)); - ASSERT(ipa_zero(s->daddr) || ipa_is_ip4(s->daddr)); - af = AF_INET; - break; - - case SK_IPV6: - ASSERT(ipa_zero(s->saddr) || !ipa_is_ip4(s->saddr)); - ASSERT(ipa_zero(s->daddr) || !ipa_is_ip4(s->daddr)); - af = AF_INET6; - break; - - default: - bug("Invalid subtype %d", s->subtype); - } - } - - switch (s->type) - { - case SK_TCP_ACTIVE: - s->ttx = ""; /* Force s->ttx != s->tpos */ - /* Fall thru */ - case SK_TCP_PASSIVE: - fd = socket(af, SOCK_STREAM, IPPROTO_TCP); - bind_port = s->sport; - bind_addr = s->saddr; - do_bind = bind_port || ipa_nonzero(bind_addr); - break; - -#ifdef HAVE_LIBSSH - case SK_SSH_ACTIVE: - s->ttx = ""; /* Force s->ttx != s->tpos */ - fd = sk_open_ssh(s); - break; -#endif - - case SK_UDP: - fd = socket(af, SOCK_DGRAM, IPPROTO_UDP); - bind_port = s->sport; - bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE; - do_bind = 1; - break; - - case SK_IP: - fd = socket(af, SOCK_RAW, s->dport); - bind_port = 0; - bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE; - do_bind = ipa_nonzero(bind_addr); - break; - - case SK_MAGIC: - af = 0; - fd = s->fd; - break; - - default: - bug("sk_open() called for invalid sock type %d", s->type); - } - - if (fd < 0) - ERR("socket"); - - s->af = af; - s->fd = fd; - - if (sk_setup(s) < 0) - goto err; - - if (do_bind) - { - if (bind_port) - { - int y = 1; - - if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)) < 0) - ERR2("SO_REUSEADDR"); - -#ifdef CONFIG_NO_IFACE_BIND - /* Workaround missing ability to bind to an iface */ - if ((s->type == SK_UDP) && s->iface && ipa_zero(bind_addr)) - { - if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &y, sizeof(y)) < 0) - ERR2("SO_REUSEPORT"); - } -#endif - } - else - if (s->flags & SKF_HIGH_PORT) - if (sk_set_high_port(s) < 0) - log(L_WARN "Socket error: %s%#m", s->err); - - if (s->flags & SKF_FREEBIND) - if (sk_set_freebind(s) < 0) - log(L_WARN "Socket error: %s%#m", s->err); - - sockaddr_fill(&sa, s->af, bind_addr, s->iface, bind_port); - if (bind(fd, &sa.sa, SA_LEN(sa)) < 0) - ERR2("bind"); - } - - if (s->password) - if (sk_set_md5_auth(s, s->saddr, s->daddr, -1, s->iface, s->password, 0) < 0) - goto err; - - switch (s->type) - { - case SK_TCP_ACTIVE: - if (sk_connect(s) >= 0) - sk_tcp_connected(s); - else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS && - errno != ECONNREFUSED && errno != EHOSTUNREACH && errno != ENETUNREACH) - ERR2("connect"); - break; - - case SK_TCP_PASSIVE: - if (listen(fd, 8) < 0) - ERR2("listen"); - break; - - case SK_UDP: - if (s->flags & SKF_CONNECT) - if (sk_connect(s) < 0) - ERR2("connect"); - - sk_alloc_bufs(s); - break; - - case SK_SSH_ACTIVE: - case SK_MAGIC: - break; - - default: - sk_alloc_bufs(s); - } - - birdloop_add_socket(loop, s); - return 0; - -err: - close(fd); - s->fd = -1; - return -1; -} - -int -sk_open_unix(sock *s, struct birdloop *loop, char *name) -{ - struct sockaddr_un sa; - int fd; - - /* We are sloppy during error (leak fd and not set s->err), but we die anyway */ - - fd = socket(AF_UNIX, SOCK_STREAM, 0); - if (fd < 0) - return -1; - - if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) - return -1; - - /* Path length checked in test_old_bird() but we may need unix sockets for other reasons in future */ - ASSERT_DIE(strlen(name) < sizeof(sa.sun_path)); - - sa.sun_family = AF_UNIX; - strcpy(sa.sun_path, name); - - if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0) - return -1; - - if (listen(fd, 8) < 0) - return -1; - - s->fd = fd; - birdloop_add_socket(loop, s); - return 0; -} - - -#define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \ - CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL) -#define CMSG_TX_SPACE MAX(CMSG4_SPACE_PKTINFO,CMSG6_SPACE_PKTINFO) - -static void -sk_prepare_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen) -{ - if (sk_is_ipv4(s)) - sk_prepare_cmsgs4(s, msg, cbuf, cbuflen); - else - sk_prepare_cmsgs6(s, msg, cbuf, cbuflen); -} - -static void -sk_process_cmsgs(sock *s, struct msghdr *msg) -{ - struct cmsghdr *cm; - - s->laddr = IPA_NONE; - s->lifindex = 0; - s->rcv_ttl = -1; - - for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm)) - { - if ((cm->cmsg_level == SOL_IP) && sk_is_ipv4(s)) - { - sk_process_cmsg4_pktinfo(s, cm); - sk_process_cmsg4_ttl(s, cm); - } - - if ((cm->cmsg_level == SOL_IPV6) && sk_is_ipv6(s)) - { - sk_process_cmsg6_pktinfo(s, cm); - sk_process_cmsg6_ttl(s, cm); - } - } -} - - -static inline int -sk_sendmsg(sock *s) -{ - struct iovec iov = {s->tbuf, s->tpos - s->tbuf}; - byte cmsg_buf[CMSG_TX_SPACE]; - sockaddr dst; - int flags = 0; - - sockaddr_fill(&dst, s->af, s->daddr, s->iface, s->dport); - - struct msghdr msg = { - .msg_name = &dst.sa, - .msg_namelen = SA_LEN(dst), - .msg_iov = &iov, - .msg_iovlen = 1 - }; - -#ifdef CONFIG_DONTROUTE_UNICAST - /* FreeBSD silently changes TTL to 1 when MSG_DONTROUTE is used, therefore we - cannot use it for other cases (e.g. when TTL security is used). */ - if (ipa_is_ip4(s->daddr) && ip4_is_unicast(ipa_to_ip4(s->daddr)) && (s->ttl == 1)) - flags = MSG_DONTROUTE; -#endif - -#ifdef CONFIG_USE_HDRINCL - byte hdr[20]; - struct iovec iov2[2] = { {hdr, 20}, iov }; - - if (s->flags & SKF_HDRINCL) - { - sk_prepare_ip_header(s, hdr, iov.iov_len); - msg.msg_iov = iov2; - msg.msg_iovlen = 2; - } -#endif - - if (s->flags & SKF_PKTINFO) - sk_prepare_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf)); - - return sendmsg(s->fd, &msg, flags); -} - -static inline int -sk_recvmsg(sock *s) -{ - struct iovec iov = {s->rbuf, s->rbsize}; - byte cmsg_buf[CMSG_RX_SPACE]; - sockaddr src; - - struct msghdr msg = { - .msg_name = &src.sa, - .msg_namelen = sizeof(src), // XXXX ?? - .msg_iov = &iov, - .msg_iovlen = 1, - .msg_control = cmsg_buf, - .msg_controllen = sizeof(cmsg_buf), - .msg_flags = 0 - }; - - int rv = recvmsg(s->fd, &msg, 0); - if (rv < 0) - return rv; - - //ifdef IPV4 - // if (cf_type == SK_IP) - // rv = ipv4_skip_header(pbuf, rv); - //endif - - sockaddr_read(&src, s->af, &s->faddr, NULL, &s->fport); - sk_process_cmsgs(s, &msg); - - if (msg.msg_flags & MSG_TRUNC) - s->flags |= SKF_TRUNCATED; - else - s->flags &= ~SKF_TRUNCATED; - - return rv; -} - - -static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; } - -bool -sk_tx_pending(sock *s) -{ - return s->ttx != s->tpos; -} - - -static int -sk_maybe_write(sock *s) -{ - int e; - - switch (s->type) - { - case SK_TCP: - case SK_MAGIC: - case SK_UNIX: - while (sk_tx_pending(s)) - { - e = write(s->fd, s->ttx, s->tpos - s->ttx); - - if (e < 0) - { - if (errno != EINTR && errno != EAGAIN) - { - reset_tx_buffer(s); - /* EPIPE is just a connection close notification during TX */ - s->err_hook(s, (errno != EPIPE) ? errno : 0); - return -1; - } - return 0; - } - s->ttx += e; - } - reset_tx_buffer(s); - return 1; - -#ifdef HAVE_LIBSSH - case SK_SSH: - while (sk_tx_pending(s)) - { - e = ssh_channel_write(s->ssh->channel, s->ttx, s->tpos - s->ttx); - - if (e < 0) - { - s->err = ssh_get_error(s->ssh->session); - s->err_hook(s, ssh_get_error_code(s->ssh->session)); - - reset_tx_buffer(s); - /* EPIPE is just a connection close notification during TX */ - s->err_hook(s, (errno != EPIPE) ? errno : 0); - return -1; - } - s->ttx += e; - } - reset_tx_buffer(s); - return 1; -#endif - - case SK_UDP: - case SK_IP: - { - if (s->tbuf == s->tpos) - return 1; - - e = sk_sendmsg(s); - - if (e < 0) - { - if (errno != EINTR && errno != EAGAIN) - { - reset_tx_buffer(s); - s->err_hook(s, errno); - return -1; - } - - if (!s->tx_hook) - reset_tx_buffer(s); - return 0; - } - reset_tx_buffer(s); - return 1; - } - - default: - bug("sk_maybe_write: unknown socket type %d", s->type); - } -} - -int -sk_rx_ready(sock *s) -{ - int rv; - struct pollfd pfd = { .fd = s->fd }; - pfd.events |= POLLIN; - - redo: - rv = poll(&pfd, 1, 0); - - if ((rv < 0) && (errno == EINTR || errno == EAGAIN)) - goto redo; - - return rv; -} - -/** - * sk_send - send data to a socket - * @s: socket - * @len: number of bytes to send - * - * This function sends @len bytes of data prepared in the - * transmit buffer of the socket @s to the network connection. - * If the packet can be sent immediately, it does so and returns - * 1, else it queues the packet for later processing, returns 0 - * and calls the @tx_hook of the socket when the tranmission - * takes place. - */ -int -sk_send(sock *s, unsigned len) -{ - s->ttx = s->tbuf; - s->tpos = s->tbuf + len; - - int e = sk_maybe_write(s); - if (e == 0) /* Trigger thread poll reload to poll this socket's write. */ - socket_changed(s); - - return e; -} - -/** - * sk_send_to - send data to a specific destination - * @s: socket - * @len: number of bytes to send - * @addr: IP address to send the packet to - * @port: port to send the packet to - * - * This is a sk_send() replacement for connection-less packet sockets - * which allows destination of the packet to be chosen dynamically. - * Raw IP sockets should use 0 for @port. - */ -int -sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port) -{ - s->daddr = addr; - if (port) - s->dport = port; - - s->ttx = s->tbuf; - s->tpos = s->tbuf + len; - return sk_maybe_write(s); -} - -/* -int -sk_send_full(sock *s, unsigned len, struct iface *ifa, - ip_addr saddr, ip_addr daddr, unsigned dport) -{ - s->iface = ifa; - s->saddr = saddr; - s->daddr = daddr; - s->dport = dport; - s->ttx = s->tbuf; - s->tpos = s->tbuf + len; - return sk_maybe_write(s); -} -*/ - -static void -call_rx_hook(sock *s, int size) -{ - if (s->rx_hook(s, size)) - { - /* We need to be careful since the socket could have been deleted by the hook */ - if (s->loop->sock_active == s) - s->rpos = s->rbuf; - } -} - -#ifdef HAVE_LIBSSH -static int -sk_read_ssh(sock *s) -{ - ssh_channel rchans[2] = { s->ssh->channel, NULL }; - struct timeval timev = { 1, 0 }; - - if (ssh_channel_select(rchans, NULL, NULL, &timev) == SSH_EINTR) - return 1; /* Try again */ - - if (ssh_channel_is_eof(s->ssh->channel) != 0) - { - /* The remote side is closing the connection */ - s->err_hook(s, 0); - return 0; - } - - if (rchans[0] == NULL) - return 0; /* No data is available on the socket */ - - const uint used_bytes = s->rpos - s->rbuf; - const int read_bytes = ssh_channel_read_nonblocking(s->ssh->channel, s->rpos, s->rbsize - used_bytes, 0); - if (read_bytes > 0) - { - /* Received data */ - s->rpos += read_bytes; - call_rx_hook(s, used_bytes + read_bytes); - return 1; - } - else if (read_bytes == 0) - { - if (ssh_channel_is_eof(s->ssh->channel) != 0) - { - /* The remote side is closing the connection */ - s->err_hook(s, 0); - } - } - else - { - s->err = ssh_get_error(s->ssh->session); - s->err_hook(s, ssh_get_error_code(s->ssh->session)); - } - - return 0; /* No data is available on the socket */ -} -#endif - - /* sk_read() and sk_write() are called from BFD's event loop */ - -static inline int -sk_read_noflush(sock *s, int revents) -{ - switch (s->type) - { - case SK_TCP_PASSIVE: - return sk_passive_connected(s, SK_TCP); - - case SK_UNIX_PASSIVE: - return sk_passive_connected(s, SK_UNIX); - - case SK_TCP: - case SK_UNIX: - { - int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos); - - if (c < 0) - { - if (errno != EINTR && errno != EAGAIN) - s->err_hook(s, errno); - else if (errno == EAGAIN && !(revents & POLLIN)) - { - log(L_ERR "Got EAGAIN from read when revents=%x (without POLLIN)", revents); - s->err_hook(s, 0); - } - } - else if (!c) - s->err_hook(s, 0); - else - { - s->rpos += c; - call_rx_hook(s, s->rpos - s->rbuf); - return 1; - } - return 0; - } - -#ifdef HAVE_LIBSSH - case SK_SSH: - return sk_read_ssh(s); -#endif - - case SK_MAGIC: - return s->rx_hook(s, 0); - - default: - { - int e = sk_recvmsg(s); - - if (e < 0) - { - if (errno != EINTR && errno != EAGAIN) - s->err_hook(s, errno); - return 0; - } - - s->rpos = s->rbuf + e; - s->rx_hook(s, e); - return 1; - } - } -} - -int -sk_read(sock *s, int revents) -{ - int e = sk_read_noflush(s, revents); - tmp_flush(); - return e; -} - -static inline int -sk_write_noflush(sock *s) -{ - switch (s->type) - { - case SK_TCP_ACTIVE: - { - if (sk_connect(s) >= 0 || errno == EISCONN) - sk_tcp_connected(s); - else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS) - s->err_hook(s, errno); - return 0; - } - -#ifdef HAVE_LIBSSH - case SK_SSH_ACTIVE: - { - switch (sk_ssh_connect(s)) - { - case SSH_OK: - sk_ssh_connected(s); - break; - - case SSH_AGAIN: - return 1; - - case SSH_ERROR: - s->err = ssh_get_error(s->ssh->session); - s->err_hook(s, ssh_get_error_code(s->ssh->session)); - break; - } - return 0; - } -#endif - - default: - if (sk_tx_pending(s) && sk_maybe_write(s) > 0) - { - if (s->tx_hook) - s->tx_hook(s); - return 1; - } - return 0; - } -} - -int -sk_write(sock *s) -{ - int e = sk_write_noflush(s); - tmp_flush(); - return e; -} - -int sk_is_ipv4(sock *s) -{ return s->af == AF_INET; } - -int sk_is_ipv6(sock *s) -{ return s->af == AF_INET6; } - -void -sk_err(sock *s, int revents) -{ - int se = 0, sse = sizeof(se); - if ((s->type != SK_MAGIC) && (revents & POLLERR)) - if (getsockopt(s->fd, SOL_SOCKET, SO_ERROR, &se, &sse) < 0) - { - log(L_ERR "IO: Socket error: SO_ERROR: %m"); - se = 0; - } - - s->err_hook(s, se); - tmp_flush(); -} - -void -sk_dump_all(void) -{ - node *n; - sock *s; - - debug("Open sockets:\n"); - WALK_LIST(n, main_birdloop.sock_list) - { - s = SKIP_BACK(sock, n, n); - debug("%p ", s); - sk_dump(&s->r, 3); - } - debug("\n"); -} - /* * Internal event log and watchdog @@ -2391,6 +517,10 @@ static int short_loops = 0; sock *stored_sock; +int sk_read(sock *s, int revents); +int sk_write(sock *s); +void sk_err(sock *s, int revents); + void io_loop(void) { diff --git a/sysdep/unix/socket.c b/sysdep/unix/socket.c new file mode 100644 index 00000000..890014be --- /dev/null +++ b/sysdep/unix/socket.c @@ -0,0 +1,1924 @@ +/* + * BIRD Internet Routing Daemon -- Unix I/O + * + * (c) 1998--2004 Martin Mares + * (c) 2004 Ondrej Filip + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +/* Unfortunately, some glibc versions hide parts of RFC 3542 API + if _GNU_SOURCE is not defined. */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nest/bird.h" +#include "lib/lists.h" +#include "lib/resource.h" +#include "lib/socket.h" +#include "lib/event.h" +#include "lib/locking.h" +#include "lib/timer.h" +#include "lib/string.h" +#include "nest/iface.h" +#include "conf/conf.h" + +#include "sysdep/unix/unix.h" +#include "sysdep/unix/io-loop.h" +#include CONFIG_INCLUDE_SYSIO_H + +/** + * DOC: Sockets + * + * Socket resources represent network connections. Their data structure (&socket) + * contains a lot of fields defining the exact type of the socket, the local and + * remote addresses and ports, pointers to socket buffers and finally pointers to + * hook functions to be called when new data have arrived to the receive buffer + * (@rx_hook), when the contents of the transmit buffer have been transmitted + * (@tx_hook) and when an error or connection close occurs (@err_hook). + * + * Freeing of sockets from inside socket hooks is perfectly safe. + */ + +#ifndef SOL_IP +#define SOL_IP IPPROTO_IP +#endif + +#ifndef SOL_IPV6 +#define SOL_IPV6 IPPROTO_IPV6 +#endif + +#ifndef SOL_ICMPV6 +#define SOL_ICMPV6 IPPROTO_ICMPV6 +#endif + + +/* + * Sockaddr helper functions + */ + +static inline int UNUSED sockaddr_length(int af) +{ return (af == AF_INET) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); } + +static inline void +sockaddr_fill4(struct sockaddr_in *sa, ip_addr a, uint port) +{ + memset(sa, 0, sizeof(struct sockaddr_in)); +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + sa->sin_len = sizeof(struct sockaddr_in); +#endif + sa->sin_family = AF_INET; + sa->sin_port = htons(port); + sa->sin_addr = ipa_to_in4(a); +} + +static inline void +sockaddr_fill6(struct sockaddr_in6 *sa, ip_addr a, struct iface *ifa, uint port) +{ + memset(sa, 0, sizeof(struct sockaddr_in6)); +#ifdef SIN6_LEN + sa->sin6_len = sizeof(struct sockaddr_in6); +#endif + sa->sin6_family = AF_INET6; + sa->sin6_port = htons(port); + sa->sin6_flowinfo = 0; + sa->sin6_addr = ipa_to_in6(a); + + if (ifa && ipa_is_link_local(a)) + sa->sin6_scope_id = ifa->index; +} + +void +sockaddr_fill(sockaddr *sa, int af, ip_addr a, struct iface *ifa, uint port) +{ + if (af == AF_INET) + sockaddr_fill4((struct sockaddr_in *) sa, a, port); + else if (af == AF_INET6) + sockaddr_fill6((struct sockaddr_in6 *) sa, a, ifa, port); + else + bug("Unknown AF"); +} + +static inline void +sockaddr_read4(struct sockaddr_in *sa, ip_addr *a, uint *port) +{ + *port = ntohs(sa->sin_port); + *a = ipa_from_in4(sa->sin_addr); +} + +static inline void +sockaddr_read6(struct sockaddr_in6 *sa, ip_addr *a, struct iface **ifa, uint *port) +{ + *port = ntohs(sa->sin6_port); + *a = ipa_from_in6(sa->sin6_addr); + + if (ifa && ipa_is_link_local(*a)) + *ifa = if_find_by_index(sa->sin6_scope_id); +} + +int +sockaddr_read(sockaddr *sa, int af, ip_addr *a, struct iface **ifa, uint *port) +{ + if (sa->sa.sa_family != af) + goto fail; + + if (af == AF_INET) + sockaddr_read4((struct sockaddr_in *) sa, a, port); + else if (af == AF_INET6) + sockaddr_read6((struct sockaddr_in6 *) sa, a, ifa, port); + else + goto fail; + + return 0; + + fail: + *a = IPA_NONE; + *port = 0; + return -1; +} + + +/* + * IPv6 multicast syscalls + */ + +/* Fortunately standardized in RFC 3493 */ + +#define INIT_MREQ6(maddr,ifa) \ + { .ipv6mr_multiaddr = ipa_to_in6(maddr), .ipv6mr_interface = ifa->index } + +static inline int +sk_setup_multicast6(sock *s) +{ + int index = s->iface->index; + int ttl = s->ttl; + int n = 0; + + if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0) + ERR("IPV6_MULTICAST_IF"); + + if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &ttl, sizeof(ttl)) < 0) + ERR("IPV6_MULTICAST_HOPS"); + + if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &n, sizeof(n)) < 0) + ERR("IPV6_MULTICAST_LOOP"); + + return 0; +} + +static inline int +sk_join_group6(sock *s, ip_addr maddr) +{ + struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface); + + if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mr, sizeof(mr)) < 0) + ERR("IPV6_JOIN_GROUP"); + + return 0; +} + +static inline int +sk_leave_group6(sock *s, ip_addr maddr) +{ + struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface); + + if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mr, sizeof(mr)) < 0) + ERR("IPV6_LEAVE_GROUP"); + + return 0; +} + + +/* + * IPv6 packet control messages + */ + +/* Also standardized, in RFC 3542 */ + +/* + * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg + * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we + * don't have IPV6_RECVPKTINFO we suppose the OS implements the older + * RFC and we use IPV6_PKTINFO. + */ +#ifndef IPV6_RECVPKTINFO +#define IPV6_RECVPKTINFO IPV6_PKTINFO +#endif +/* + * Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT. + */ +#ifndef IPV6_RECVHOPLIMIT +#define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT +#endif + + +#define CMSG6_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in6_pktinfo)) +#define CMSG6_SPACE_TTL CMSG_SPACE(sizeof(int)) + +static inline int +sk_request_cmsg6_pktinfo(sock *s) +{ + int y = 1; + + if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVPKTINFO, &y, sizeof(y)) < 0) + ERR("IPV6_RECVPKTINFO"); + + return 0; +} + +static inline int +sk_request_cmsg6_ttl(sock *s) +{ + int y = 1; + + if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVHOPLIMIT, &y, sizeof(y)) < 0) + ERR("IPV6_RECVHOPLIMIT"); + + return 0; +} + +static inline void +sk_process_cmsg6_pktinfo(sock *s, struct cmsghdr *cm) +{ + if (cm->cmsg_type == IPV6_PKTINFO) + { + struct in6_pktinfo *pi = (struct in6_pktinfo *) CMSG_DATA(cm); + s->laddr = ipa_from_in6(pi->ipi6_addr); + s->lifindex = pi->ipi6_ifindex; + } +} + +static inline void +sk_process_cmsg6_ttl(sock *s, struct cmsghdr *cm) +{ + if (cm->cmsg_type == IPV6_HOPLIMIT) + s->rcv_ttl = * (int *) CMSG_DATA(cm); +} + +static inline void +sk_prepare_cmsgs6(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen) +{ + struct cmsghdr *cm; + struct in6_pktinfo *pi; + int controllen = 0; + + msg->msg_control = cbuf; + msg->msg_controllen = cbuflen; + + cm = CMSG_FIRSTHDR(msg); + cm->cmsg_level = SOL_IPV6; + cm->cmsg_type = IPV6_PKTINFO; + cm->cmsg_len = CMSG_LEN(sizeof(*pi)); + controllen += CMSG_SPACE(sizeof(*pi)); + + pi = (struct in6_pktinfo *) CMSG_DATA(cm); + pi->ipi6_ifindex = s->iface ? s->iface->index : 0; + pi->ipi6_addr = ipa_to_in6(s->saddr); + + msg->msg_controllen = controllen; +} + + +/* + * Miscellaneous socket syscalls + */ + +static inline int +sk_set_ttl4(sock *s, int ttl) +{ + if (setsockopt(s->fd, SOL_IP, IP_TTL, &ttl, sizeof(ttl)) < 0) + ERR("IP_TTL"); + + return 0; +} + +static inline int +sk_set_ttl6(sock *s, int ttl) +{ + if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) < 0) + ERR("IPV6_UNICAST_HOPS"); + + return 0; +} + +static inline int +sk_set_tos4(sock *s, int tos) +{ + if (setsockopt(s->fd, SOL_IP, IP_TOS, &tos, sizeof(tos)) < 0) + ERR("IP_TOS"); + + return 0; +} + +static inline int +sk_set_tos6(sock *s, int tos) +{ + if (setsockopt(s->fd, SOL_IPV6, IPV6_TCLASS, &tos, sizeof(tos)) < 0) + ERR("IPV6_TCLASS"); + + return 0; +} + +static inline int +sk_set_high_port(sock *s UNUSED) +{ + /* Port range setting is optional, ignore it if not supported */ + +#ifdef IP_PORTRANGE + if (sk_is_ipv4(s)) + { + int range = IP_PORTRANGE_HIGH; + if (setsockopt(s->fd, SOL_IP, IP_PORTRANGE, &range, sizeof(range)) < 0) + ERR("IP_PORTRANGE"); + } +#endif + +#ifdef IPV6_PORTRANGE + if (sk_is_ipv6(s)) + { + int range = IPV6_PORTRANGE_HIGH; + if (setsockopt(s->fd, SOL_IPV6, IPV6_PORTRANGE, &range, sizeof(range)) < 0) + ERR("IPV6_PORTRANGE"); + } +#endif + + return 0; +} + +static inline byte * +sk_skip_ip_header(byte *pkt, int *len) +{ + if ((*len < 20) || ((*pkt & 0xf0) != 0x40)) + return NULL; + + int hlen = (*pkt & 0x0f) * 4; + if ((hlen < 20) || (hlen > *len)) + return NULL; + + *len -= hlen; + return pkt + hlen; +} + +byte * +sk_rx_buffer(sock *s, int *len) +{ + if (sk_is_ipv4(s) && (s->type == SK_IP)) + return sk_skip_ip_header(s->rbuf, len); + else + return s->rbuf; +} + + +/* + * Public socket functions + */ + +/** + * sk_setup_multicast - enable multicast for given socket + * @s: socket + * + * Prepare transmission of multicast packets for given datagram socket. + * The socket must have defined @iface. + * + * Result: 0 for success, -1 for an error. + */ + +int +sk_setup_multicast(sock *s) +{ + ASSERT(s->iface); + + if (sk_is_ipv4(s)) + return sk_setup_multicast4(s); + else + return sk_setup_multicast6(s); +} + +/** + * sk_join_group - join multicast group for given socket + * @s: socket + * @maddr: multicast address + * + * Join multicast group for given datagram socket and associated interface. + * The socket must have defined @iface. + * + * Result: 0 for success, -1 for an error. + */ + +int +sk_join_group(sock *s, ip_addr maddr) +{ + if (sk_is_ipv4(s)) + return sk_join_group4(s, maddr); + else + return sk_join_group6(s, maddr); +} + +/** + * sk_leave_group - leave multicast group for given socket + * @s: socket + * @maddr: multicast address + * + * Leave multicast group for given datagram socket and associated interface. + * The socket must have defined @iface. + * + * Result: 0 for success, -1 for an error. + */ + +int +sk_leave_group(sock *s, ip_addr maddr) +{ + if (sk_is_ipv4(s)) + return sk_leave_group4(s, maddr); + else + return sk_leave_group6(s, maddr); +} + +/** + * sk_setup_broadcast - enable broadcast for given socket + * @s: socket + * + * Allow reception and transmission of broadcast packets for given datagram + * socket. The socket must have defined @iface. For transmission, packets should + * be send to @brd address of @iface. + * + * Result: 0 for success, -1 for an error. + */ + +int +sk_setup_broadcast(sock *s) +{ + int y = 1; + + if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &y, sizeof(y)) < 0) + ERR("SO_BROADCAST"); + + return 0; +} + +/** + * sk_set_ttl - set transmit TTL for given socket + * @s: socket + * @ttl: TTL value + * + * Set TTL for already opened connections when TTL was not set before. Useful + * for accepted connections when different ones should have different TTL. + * + * Result: 0 for success, -1 for an error. + */ + +int +sk_set_ttl(sock *s, int ttl) +{ + s->ttl = ttl; + + if (sk_is_ipv4(s)) + return sk_set_ttl4(s, ttl); + else + return sk_set_ttl6(s, ttl); +} + +/** + * sk_set_min_ttl - set minimal accepted TTL for given socket + * @s: socket + * @ttl: TTL value + * + * Set minimal accepted TTL for given socket. Can be used for TTL security. + * implementations. + * + * Result: 0 for success, -1 for an error. + */ + +int +sk_set_min_ttl(sock *s, int ttl) +{ + if (sk_is_ipv4(s)) + return sk_set_min_ttl4(s, ttl); + else + return sk_set_min_ttl6(s, ttl); +} + +#if 0 +/** + * sk_set_md5_auth - add / remove MD5 security association for given socket + * @s: socket + * @local: IP address of local side + * @remote: IP address of remote side + * @ifa: Interface for link-local IP address + * @passwd: Password used for MD5 authentication + * @setkey: Update also system SA/SP database + * + * In TCP MD5 handling code in kernel, there is a set of security associations + * used for choosing password and other authentication parameters according to + * the local and remote address. This function is useful for listening socket, + * for active sockets it may be enough to set s->password field. + * + * When called with passwd != NULL, the new pair is added, + * When called with passwd == NULL, the existing pair is removed. + * + * Note that while in Linux, the MD5 SAs are specific to socket, in BSD they are + * stored in global SA/SP database (but the behavior also must be enabled on + * per-socket basis). In case of multiple sockets to the same neighbor, the + * socket-specific state must be configured for each socket while global state + * just once per src-dst pair. The @setkey argument controls whether the global + * state (SA/SP database) is also updated. + * + * Result: 0 for success, -1 for an error. + */ + +int +sk_set_md5_auth(sock *s, ip_addr local, ip_addr remote, struct iface *ifa, char *passwd, int setkey) +{ DUMMY; } +#endif + +/** + * sk_set_ipv6_checksum - specify IPv6 checksum offset for given socket + * @s: socket + * @offset: offset + * + * Specify IPv6 checksum field offset for given raw IPv6 socket. After that, the + * kernel will automatically fill it for outgoing packets and check it for + * incoming packets. Should not be used on ICMPv6 sockets, where the position is + * known to the kernel. + * + * Result: 0 for success, -1 for an error. + */ + +int +sk_set_ipv6_checksum(sock *s, int offset) +{ + if (setsockopt(s->fd, SOL_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0) + ERR("IPV6_CHECKSUM"); + + return 0; +} + +int +sk_set_icmp6_filter(sock *s, int p1, int p2) +{ + /* a bit of lame interface, but it is here only for Radv */ + struct icmp6_filter f; + + ICMP6_FILTER_SETBLOCKALL(&f); + ICMP6_FILTER_SETPASS(p1, &f); + ICMP6_FILTER_SETPASS(p2, &f); + + if (setsockopt(s->fd, SOL_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0) + ERR("ICMP6_FILTER"); + + return 0; +} + +void +sk_log_error(sock *s, const char *p) +{ + log(L_ERR "%s: Socket error: %s%#m", p, s->err); +} + + +/* + * Actual struct birdsock code + */ + +sock * +sk_next(sock *s) +{ + if (!s->n.next->next) + return NULL; + else + return SKIP_BACK(sock, n, s->n.next); +} + +static void +sk_alloc_bufs(sock *s) +{ + if (!s->rbuf && s->rbsize) + s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize); + s->rpos = s->rbuf; + if (!s->tbuf && s->tbsize) + s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize); + s->tpos = s->ttx = s->tbuf; +} + +static void +sk_free_bufs(sock *s) +{ + if (s->rbuf_alloc) + { + xfree(s->rbuf_alloc); + s->rbuf = s->rbuf_alloc = NULL; + } + if (s->tbuf_alloc) + { + xfree(s->tbuf_alloc); + s->tbuf = s->tbuf_alloc = NULL; + } +} + +#ifdef HAVE_LIBSSH +static void +sk_ssh_free(sock *s) +{ + struct ssh_sock *ssh = s->ssh; + + if (s->ssh == NULL) + return; + + s->ssh = NULL; + + if (ssh->channel) + { + ssh_channel_close(ssh->channel); + ssh_channel_free(ssh->channel); + ssh->channel = NULL; + } + + if (ssh->session) + { + ssh_disconnect(ssh->session); + ssh_free(ssh->session); + ssh->session = NULL; + } +} +#endif + + +static void +sk_free(resource *r) +{ + sock *s = (sock *) r; + + sk_free_bufs(s); + +#ifdef HAVE_LIBSSH + if (s->type == SK_SSH || s->type == SK_SSH_ACTIVE) + sk_ssh_free(s); +#endif + + if (s->loop) + birdloop_remove_socket(s->loop, s); + + if (s->fd >= 0 && s->type != SK_SSH && s->type != SK_SSH_ACTIVE) + close(s->fd); + + s->fd = -1; +} + +void +sk_set_rbsize(sock *s, uint val) +{ + ASSERT(s->rbuf_alloc == s->rbuf); + + if (s->rbsize == val) + return; + + s->rbsize = val; + xfree(s->rbuf_alloc); + s->rbuf_alloc = xmalloc(val); + s->rpos = s->rbuf = s->rbuf_alloc; +} + +void +sk_set_tbsize(sock *s, uint val) +{ + ASSERT(s->tbuf_alloc == s->tbuf); + + if (s->tbsize == val) + return; + + byte *old_tbuf = s->tbuf; + + s->tbsize = val; + s->tbuf = s->tbuf_alloc = xrealloc(s->tbuf_alloc, val); + s->tpos = s->tbuf + (s->tpos - old_tbuf); + s->ttx = s->tbuf + (s->ttx - old_tbuf); +} + +void +sk_set_tbuf(sock *s, void *tbuf) +{ + s->tbuf = tbuf ?: s->tbuf_alloc; + s->ttx = s->tpos = s->tbuf; +} + +void +sk_reallocate(sock *s) +{ + sk_free_bufs(s); + sk_alloc_bufs(s); +} + +static void +sk_dump(resource *r, unsigned indent UNUSED) +{ + sock *s = (sock *) r; + static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", NULL, "IP", NULL, "MAGIC", "UNIX<", "UNIX", "SSH>", "SSH", "DEL!" }; + + debug("(%s, ud=%p, sa=%I, sp=%d, da=%I, dp=%d, tos=%d, ttl=%d, if=%s)\n", + sk_type_names[s->type], + s->data, + s->saddr, + s->sport, + s->daddr, + s->dport, + s->tos, + s->ttl, + s->iface ? s->iface->name : "none"); +} + +static struct resclass sk_class = { + "Socket", + sizeof(sock), + sk_free, + sk_dump, + NULL, + NULL +}; + +/** + * sk_new - create a socket + * @p: pool + * + * This function creates a new socket resource. If you want to use it, + * you need to fill in all the required fields of the structure and + * call sk_open() to do the actual opening of the socket. + * + * The real function name is sock_new(), sk_new() is a macro wrapper + * to avoid collision with OpenSSL. + */ +sock * +sock_new(pool *p) +{ + sock *s = ralloc(p, &sk_class); + s->pool = p; + // s->saddr = s->daddr = IPA_NONE; + s->tos = s->priority = s->ttl = -1; + s->fd = -1; + return s; +} + +static int +sk_setup(sock *s) +{ + int y = 1; + int fd = s->fd; + + if (s->type == SK_SSH_ACTIVE) + return 0; + + if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) + ERR("O_NONBLOCK"); + + if (!s->af) + return 0; + + if (ipa_nonzero(s->saddr) && !(s->flags & SKF_BIND)) + s->flags |= SKF_PKTINFO; + +#ifdef CONFIG_USE_HDRINCL + if (sk_is_ipv4(s) && (s->type == SK_IP) && (s->flags & SKF_PKTINFO)) + { + s->flags &= ~SKF_PKTINFO; + s->flags |= SKF_HDRINCL; + if (setsockopt(fd, SOL_IP, IP_HDRINCL, &y, sizeof(y)) < 0) + ERR("IP_HDRINCL"); + } +#endif + + if (s->vrf && (s->vrf != &default_vrf) && !s->iface) + { + /* Bind socket to associated VRF interface. + This is Linux-specific, but so is SO_BINDTODEVICE. */ +#ifdef SO_BINDTODEVICE + struct ifreq ifr = {}; + strcpy(ifr.ifr_name, s->vrf->name); + if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0) + ERR("SO_BINDTODEVICE"); +#endif + } + + if (s->iface) + { +#ifdef SO_BINDTODEVICE + struct ifreq ifr = {}; + strcpy(ifr.ifr_name, s->iface->name); + if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0) + ERR("SO_BINDTODEVICE"); +#endif + +#ifdef CONFIG_UNIX_DONTROUTE + if (setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &y, sizeof(y)) < 0) + ERR("SO_DONTROUTE"); +#endif + } + + if (sk_is_ipv4(s)) + { + if (s->flags & SKF_LADDR_RX) + if (sk_request_cmsg4_pktinfo(s) < 0) + return -1; + + if (s->flags & SKF_TTL_RX) + if (sk_request_cmsg4_ttl(s) < 0) + return -1; + + if ((s->type == SK_UDP) || (s->type == SK_IP)) + if (sk_disable_mtu_disc4(s) < 0) + return -1; + + if (s->ttl >= 0) + if (sk_set_ttl4(s, s->ttl) < 0) + return -1; + + if (s->tos >= 0) + if (sk_set_tos4(s, s->tos) < 0) + return -1; + } + + if (sk_is_ipv6(s)) + { + if ((s->type == SK_TCP_PASSIVE) || (s->type == SK_TCP_ACTIVE) || (s->type == SK_UDP)) + if (setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &y, sizeof(y)) < 0) + ERR("IPV6_V6ONLY"); + + if (s->flags & SKF_LADDR_RX) + if (sk_request_cmsg6_pktinfo(s) < 0) + return -1; + + if (s->flags & SKF_TTL_RX) + if (sk_request_cmsg6_ttl(s) < 0) + return -1; + + if ((s->type == SK_UDP) || (s->type == SK_IP)) + if (sk_disable_mtu_disc6(s) < 0) + return -1; + + if (s->ttl >= 0) + if (sk_set_ttl6(s, s->ttl) < 0) + return -1; + + if (s->tos >= 0) + if (sk_set_tos6(s, s->tos) < 0) + return -1; + } + + /* Must be after sk_set_tos4() as setting ToS on Linux also mangles priority */ + if (s->priority >= 0) + if (sk_set_priority(s, s->priority) < 0) + return -1; + + return 0; +} + +static int +sk_connect(sock *s) +{ + sockaddr sa; + sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport); + return connect(s->fd, &sa.sa, SA_LEN(sa)); +} + +static void +sk_tcp_connected(sock *s) +{ + sockaddr sa; + int sa_len = sizeof(sa); + + if ((getsockname(s->fd, &sa.sa, &sa_len) < 0) || + (sockaddr_read(&sa, s->af, &s->saddr, &s->iface, &s->sport) < 0)) + log(L_WARN "SOCK: Cannot get local IP address for TCP>"); + + s->type = SK_TCP; + sk_alloc_bufs(s); + s->tx_hook(s); +} + +#ifdef HAVE_LIBSSH +static void +sk_ssh_connected(sock *s) +{ + sk_alloc_bufs(s); + s->type = SK_SSH; + s->tx_hook(s); +} +#endif + +static int +sk_passive_connected(sock *s, int type) +{ + sockaddr loc_sa, rem_sa; + int loc_sa_len = sizeof(loc_sa); + int rem_sa_len = sizeof(rem_sa); + + int fd = accept(s->fd, ((type == SK_TCP) ? &rem_sa.sa : NULL), &rem_sa_len); + if (fd < 0) + { + if ((errno != EINTR) && (errno != EAGAIN)) + s->err_hook(s, errno); + return 0; + } + + struct domain_generic *sock_lock = DG_IS_LOCKED(s->pool->domain) ? NULL : s->pool->domain; + if (sock_lock) + DG_LOCK(sock_lock); + + sock *t = sk_new(s->pool); + t->type = type; + t->data = s->data; + t->af = s->af; + t->fd = fd; + t->ttl = s->ttl; + t->tos = s->tos; + t->vrf = s->vrf; + t->rbsize = s->rbsize; + t->tbsize = s->tbsize; + + if (type == SK_TCP) + { + if ((getsockname(fd, &loc_sa.sa, &loc_sa_len) < 0) || + (sockaddr_read(&loc_sa, s->af, &t->saddr, &t->iface, &t->sport) < 0)) + log(L_WARN "SOCK: Cannot get local IP address for TCP<"); + + if (sockaddr_read(&rem_sa, s->af, &t->daddr, &t->iface, &t->dport) < 0) + log(L_WARN "SOCK: Cannot get remote IP address for TCP<"); + } + + if (sk_setup(t) < 0) + { + /* FIXME: Call err_hook instead ? */ + log(L_ERR "SOCK: Incoming connection: %s%#m", t->err); + + /* FIXME: handle it better in rfree() */ + close(t->fd); + t->fd = -1; + sk_close(t); + t = NULL; + } + else + { + birdloop_add_socket(s->loop, t); + sk_alloc_bufs(t); + } + + if (sock_lock) + DG_UNLOCK(sock_lock); + + if (t) + s->rx_hook(t, 0); + + return 1; +} + +#ifdef HAVE_LIBSSH +/* + * Return SSH_OK or SSH_AGAIN or SSH_ERROR + */ +static int +sk_ssh_connect(sock *s) +{ + s->fd = ssh_get_fd(s->ssh->session); + + /* Big fall thru automata */ + switch (s->ssh->state) + { + case SK_SSH_CONNECT: + { + switch (ssh_connect(s->ssh->session)) + { + case SSH_AGAIN: + /* A quick look into libSSH shows that ssh_get_fd() should return non-(-1) + * after SSH_AGAIN is returned by ssh_connect(). This is however nowhere + * documented but our code relies on that. + */ + return SSH_AGAIN; + + case SSH_OK: + break; + + default: + return SSH_ERROR; + } + } /* fallthrough */ + + case SK_SSH_SERVER_KNOWN: + { + s->ssh->state = SK_SSH_SERVER_KNOWN; + + if (s->ssh->server_hostkey_path) + { + int server_identity_is_ok = 1; + +#ifdef HAVE_SSH_OLD_SERVER_VALIDATION_API +#define ssh_session_is_known_server ssh_is_server_known +#define SSH_KNOWN_HOSTS_OK SSH_SERVER_KNOWN_OK +#define SSH_KNOWN_HOSTS_UNKNOWN SSH_SERVER_NOT_KNOWN +#define SSH_KNOWN_HOSTS_CHANGED SSH_SERVER_KNOWN_CHANGED +#define SSH_KNOWN_HOSTS_NOT_FOUND SSH_SERVER_FILE_NOT_FOUND +#define SSH_KNOWN_HOSTS_ERROR SSH_SERVER_ERROR +#define SSH_KNOWN_HOSTS_OTHER SSH_SERVER_FOUND_OTHER +#endif + + /* Check server identity */ + switch (ssh_session_is_known_server(s->ssh->session)) + { +#define LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s,msg,args...) log(L_WARN "SSH Identity %s@%s:%u: " msg, (s)->ssh->username, (s)->host, (s)->dport, ## args); + case SSH_KNOWN_HOSTS_OK: + /* The server is known and has not changed. */ + break; + + case SSH_KNOWN_HOSTS_UNKNOWN: + LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server is unknown, its public key was not found in the known host file %s", s->ssh->server_hostkey_path); + server_identity_is_ok = 0; + break; + + case SSH_KNOWN_HOSTS_CHANGED: + LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server key has changed. Either you are under attack or the administrator changed the key."); + server_identity_is_ok = 0; + break; + + case SSH_KNOWN_HOSTS_NOT_FOUND: + LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The known host file %s does not exist", s->ssh->server_hostkey_path); + server_identity_is_ok = 0; + break; + + case SSH_KNOWN_HOSTS_ERROR: + LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "Some error happened"); + server_identity_is_ok = 0; + break; + + case SSH_KNOWN_HOSTS_OTHER: + LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server gave use a key of a type while we had an other type recorded. " \ + "It is a possible attack."); + server_identity_is_ok = 0; + break; + } + + if (!server_identity_is_ok) + return SSH_ERROR; + } + } /* fallthrough */ + + case SK_SSH_USERAUTH: + { + s->ssh->state = SK_SSH_USERAUTH; + switch (ssh_userauth_publickey_auto(s->ssh->session, NULL, NULL)) + { + case SSH_AUTH_AGAIN: + return SSH_AGAIN; + + case SSH_AUTH_SUCCESS: + break; + + default: + return SSH_ERROR; + } + } /* fallthrough */ + + case SK_SSH_CHANNEL: + { + s->ssh->state = SK_SSH_CHANNEL; + s->ssh->channel = ssh_channel_new(s->ssh->session); + if (s->ssh->channel == NULL) + return SSH_ERROR; + } /* fallthrough */ + + case SK_SSH_SESSION: + { + s->ssh->state = SK_SSH_SESSION; + switch (ssh_channel_open_session(s->ssh->channel)) + { + case SSH_AGAIN: + return SSH_AGAIN; + + case SSH_OK: + break; + + default: + return SSH_ERROR; + } + } /* fallthrough */ + + case SK_SSH_SUBSYSTEM: + { + s->ssh->state = SK_SSH_SUBSYSTEM; + if (s->ssh->subsystem) + { + switch (ssh_channel_request_subsystem(s->ssh->channel, s->ssh->subsystem)) + { + case SSH_AGAIN: + return SSH_AGAIN; + + case SSH_OK: + break; + + default: + return SSH_ERROR; + } + } + } /* fallthrough */ + + case SK_SSH_ESTABLISHED: + s->ssh->state = SK_SSH_ESTABLISHED; + } + + return SSH_OK; +} + +/* + * Return file descriptor number if success + * Return -1 if failed + */ +static int +sk_open_ssh(sock *s) +{ + if (!s->ssh) + bug("sk_open() sock->ssh is not allocated"); + + ssh_session sess = ssh_new(); + if (sess == NULL) + ERR2("Cannot create a ssh session"); + s->ssh->session = sess; + + const int verbosity = SSH_LOG_NOLOG; + ssh_options_set(sess, SSH_OPTIONS_LOG_VERBOSITY, &verbosity); + ssh_options_set(sess, SSH_OPTIONS_HOST, s->host); + ssh_options_set(sess, SSH_OPTIONS_PORT, &(s->dport)); + /* TODO: Add SSH_OPTIONS_BINDADDR */ + ssh_options_set(sess, SSH_OPTIONS_USER, s->ssh->username); + + if (s->ssh->server_hostkey_path) + ssh_options_set(sess, SSH_OPTIONS_KNOWNHOSTS, s->ssh->server_hostkey_path); + + if (s->ssh->client_privkey_path) + ssh_options_set(sess, SSH_OPTIONS_IDENTITY, s->ssh->client_privkey_path); + + ssh_set_blocking(sess, 0); + + switch (sk_ssh_connect(s)) + { + case SSH_AGAIN: + break; + + case SSH_OK: + sk_ssh_connected(s); + break; + + case SSH_ERROR: + ERR2(ssh_get_error(sess)); + break; + } + + return ssh_get_fd(sess); + + err: + return -1; +} +#endif + +/** + * sk_open - open a socket + * @loop: loop + * @s: socket + * + * This function takes a socket resource created by sk_new() and + * initialized by the user and binds a corresponding network connection + * to it. + * + * Result: 0 for success, -1 for an error. + */ +int +sk_open(sock *s, struct birdloop *loop) +{ + int af = AF_UNSPEC; + int fd = -1; + int do_bind = 0; + int bind_port = 0; + ip_addr bind_addr = IPA_NONE; + sockaddr sa; + + if (s->type <= SK_IP) + { + /* + * For TCP/IP sockets, Address family (IPv4 or IPv6) can be specified either + * explicitly (SK_IPV4 or SK_IPV6) or implicitly (based on saddr, daddr). + * But the specifications have to be consistent. + */ + + switch (s->subtype) + { + case 0: + ASSERT(ipa_zero(s->saddr) || ipa_zero(s->daddr) || + (ipa_is_ip4(s->saddr) == ipa_is_ip4(s->daddr))); + af = (ipa_is_ip4(s->saddr) || ipa_is_ip4(s->daddr)) ? AF_INET : AF_INET6; + break; + + case SK_IPV4: + ASSERT(ipa_zero(s->saddr) || ipa_is_ip4(s->saddr)); + ASSERT(ipa_zero(s->daddr) || ipa_is_ip4(s->daddr)); + af = AF_INET; + break; + + case SK_IPV6: + ASSERT(ipa_zero(s->saddr) || !ipa_is_ip4(s->saddr)); + ASSERT(ipa_zero(s->daddr) || !ipa_is_ip4(s->daddr)); + af = AF_INET6; + break; + + default: + bug("Invalid subtype %d", s->subtype); + } + } + + switch (s->type) + { + case SK_TCP_ACTIVE: + s->ttx = ""; /* Force s->ttx != s->tpos */ + /* Fall thru */ + case SK_TCP_PASSIVE: + fd = socket(af, SOCK_STREAM, IPPROTO_TCP); + bind_port = s->sport; + bind_addr = s->saddr; + do_bind = bind_port || ipa_nonzero(bind_addr); + break; + +#ifdef HAVE_LIBSSH + case SK_SSH_ACTIVE: + s->ttx = ""; /* Force s->ttx != s->tpos */ + fd = sk_open_ssh(s); + break; +#endif + + case SK_UDP: + fd = socket(af, SOCK_DGRAM, IPPROTO_UDP); + bind_port = s->sport; + bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE; + do_bind = 1; + break; + + case SK_IP: + fd = socket(af, SOCK_RAW, s->dport); + bind_port = 0; + bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE; + do_bind = ipa_nonzero(bind_addr); + break; + + case SK_MAGIC: + af = 0; + fd = s->fd; + break; + + default: + bug("sk_open() called for invalid sock type %d", s->type); + } + + if (fd < 0) + ERR("socket"); + + s->af = af; + s->fd = fd; + + if (sk_setup(s) < 0) + goto err; + + if (do_bind) + { + if (bind_port) + { + int y = 1; + + if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)) < 0) + ERR2("SO_REUSEADDR"); + +#ifdef CONFIG_NO_IFACE_BIND + /* Workaround missing ability to bind to an iface */ + if ((s->type == SK_UDP) && s->iface && ipa_zero(bind_addr)) + { + if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &y, sizeof(y)) < 0) + ERR2("SO_REUSEPORT"); + } +#endif + } + else + if (s->flags & SKF_HIGH_PORT) + if (sk_set_high_port(s) < 0) + log(L_WARN "Socket error: %s%#m", s->err); + + if (s->flags & SKF_FREEBIND) + if (sk_set_freebind(s) < 0) + log(L_WARN "Socket error: %s%#m", s->err); + + sockaddr_fill(&sa, s->af, bind_addr, s->iface, bind_port); + if (bind(fd, &sa.sa, SA_LEN(sa)) < 0) + ERR2("bind"); + } + + if (s->password) + if (sk_set_md5_auth(s, s->saddr, s->daddr, -1, s->iface, s->password, 0) < 0) + goto err; + + switch (s->type) + { + case SK_TCP_ACTIVE: + if (sk_connect(s) >= 0) + sk_tcp_connected(s); + else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS && + errno != ECONNREFUSED && errno != EHOSTUNREACH && errno != ENETUNREACH) + ERR2("connect"); + break; + + case SK_TCP_PASSIVE: + if (listen(fd, 8) < 0) + ERR2("listen"); + break; + + case SK_UDP: + if (s->flags & SKF_CONNECT) + if (sk_connect(s) < 0) + ERR2("connect"); + + sk_alloc_bufs(s); + break; + + case SK_SSH_ACTIVE: + case SK_MAGIC: + break; + + default: + sk_alloc_bufs(s); + } + + birdloop_add_socket(loop, s); + return 0; + +err: + close(fd); + s->fd = -1; + return -1; +} + +int +sk_open_unix(sock *s, struct birdloop *loop, char *name) +{ + struct sockaddr_un sa; + int fd; + + /* We are sloppy during error (leak fd and not set s->err), but we die anyway */ + + fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (fd < 0) + return -1; + + if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) + return -1; + + /* Path length checked in test_old_bird() but we may need unix sockets for other reasons in future */ + ASSERT_DIE(strlen(name) < sizeof(sa.sun_path)); + + sa.sun_family = AF_UNIX; + strcpy(sa.sun_path, name); + + if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0) + return -1; + + if (listen(fd, 8) < 0) + return -1; + + s->fd = fd; + birdloop_add_socket(loop, s); + return 0; +} + + +#define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \ + CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL) +#define CMSG_TX_SPACE MAX(CMSG4_SPACE_PKTINFO,CMSG6_SPACE_PKTINFO) + +static void +sk_prepare_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen) +{ + if (sk_is_ipv4(s)) + sk_prepare_cmsgs4(s, msg, cbuf, cbuflen); + else + sk_prepare_cmsgs6(s, msg, cbuf, cbuflen); +} + +static void +sk_process_cmsgs(sock *s, struct msghdr *msg) +{ + struct cmsghdr *cm; + + s->laddr = IPA_NONE; + s->lifindex = 0; + s->rcv_ttl = -1; + + for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm)) + { + if ((cm->cmsg_level == SOL_IP) && sk_is_ipv4(s)) + { + sk_process_cmsg4_pktinfo(s, cm); + sk_process_cmsg4_ttl(s, cm); + } + + if ((cm->cmsg_level == SOL_IPV6) && sk_is_ipv6(s)) + { + sk_process_cmsg6_pktinfo(s, cm); + sk_process_cmsg6_ttl(s, cm); + } + } +} + + +static inline int +sk_sendmsg(sock *s) +{ + struct iovec iov = {s->tbuf, s->tpos - s->tbuf}; + byte cmsg_buf[CMSG_TX_SPACE]; + sockaddr dst; + int flags = 0; + + sockaddr_fill(&dst, s->af, s->daddr, s->iface, s->dport); + + struct msghdr msg = { + .msg_name = &dst.sa, + .msg_namelen = SA_LEN(dst), + .msg_iov = &iov, + .msg_iovlen = 1 + }; + +#ifdef CONFIG_DONTROUTE_UNICAST + /* FreeBSD silently changes TTL to 1 when MSG_DONTROUTE is used, therefore we + cannot use it for other cases (e.g. when TTL security is used). */ + if (ipa_is_ip4(s->daddr) && ip4_is_unicast(ipa_to_ip4(s->daddr)) && (s->ttl == 1)) + flags = MSG_DONTROUTE; +#endif + +#ifdef CONFIG_USE_HDRINCL + byte hdr[20]; + struct iovec iov2[2] = { {hdr, 20}, iov }; + + if (s->flags & SKF_HDRINCL) + { + sk_prepare_ip_header(s, hdr, iov.iov_len); + msg.msg_iov = iov2; + msg.msg_iovlen = 2; + } +#endif + + if (s->flags & SKF_PKTINFO) + sk_prepare_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf)); + + return sendmsg(s->fd, &msg, flags); +} + +static inline int +sk_recvmsg(sock *s) +{ + struct iovec iov = {s->rbuf, s->rbsize}; + byte cmsg_buf[CMSG_RX_SPACE]; + sockaddr src; + + struct msghdr msg = { + .msg_name = &src.sa, + .msg_namelen = sizeof(src), // XXXX ?? + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = cmsg_buf, + .msg_controllen = sizeof(cmsg_buf), + .msg_flags = 0 + }; + + int rv = recvmsg(s->fd, &msg, 0); + if (rv < 0) + return rv; + + //ifdef IPV4 + // if (cf_type == SK_IP) + // rv = ipv4_skip_header(pbuf, rv); + //endif + + sockaddr_read(&src, s->af, &s->faddr, NULL, &s->fport); + sk_process_cmsgs(s, &msg); + + if (msg.msg_flags & MSG_TRUNC) + s->flags |= SKF_TRUNCATED; + else + s->flags &= ~SKF_TRUNCATED; + + return rv; +} + + +static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; } + +bool +sk_tx_pending(sock *s) +{ + return s->ttx != s->tpos; +} + + +static int +sk_maybe_write(sock *s) +{ + int e; + + switch (s->type) + { + case SK_TCP: + case SK_MAGIC: + case SK_UNIX: + while (sk_tx_pending(s)) + { + e = write(s->fd, s->ttx, s->tpos - s->ttx); + + if (e < 0) + { + if (errno != EINTR && errno != EAGAIN) + { + reset_tx_buffer(s); + /* EPIPE is just a connection close notification during TX */ + s->err_hook(s, (errno != EPIPE) ? errno : 0); + return -1; + } + return 0; + } + s->ttx += e; + } + reset_tx_buffer(s); + return 1; + +#ifdef HAVE_LIBSSH + case SK_SSH: + while (sk_tx_pending(s)) + { + e = ssh_channel_write(s->ssh->channel, s->ttx, s->tpos - s->ttx); + + if (e < 0) + { + s->err = ssh_get_error(s->ssh->session); + s->err_hook(s, ssh_get_error_code(s->ssh->session)); + + reset_tx_buffer(s); + /* EPIPE is just a connection close notification during TX */ + s->err_hook(s, (errno != EPIPE) ? errno : 0); + return -1; + } + s->ttx += e; + } + reset_tx_buffer(s); + return 1; +#endif + + case SK_UDP: + case SK_IP: + { + if (s->tbuf == s->tpos) + return 1; + + e = sk_sendmsg(s); + + if (e < 0) + { + if (errno != EINTR && errno != EAGAIN) + { + reset_tx_buffer(s); + s->err_hook(s, errno); + return -1; + } + + if (!s->tx_hook) + reset_tx_buffer(s); + return 0; + } + reset_tx_buffer(s); + return 1; + } + + default: + bug("sk_maybe_write: unknown socket type %d", s->type); + } +} + +int +sk_rx_ready(sock *s) +{ + int rv; + struct pollfd pfd = { .fd = s->fd }; + pfd.events |= POLLIN; + + redo: + rv = poll(&pfd, 1, 0); + + if ((rv < 0) && (errno == EINTR || errno == EAGAIN)) + goto redo; + + return rv; +} + +/** + * sk_send - send data to a socket + * @s: socket + * @len: number of bytes to send + * + * This function sends @len bytes of data prepared in the + * transmit buffer of the socket @s to the network connection. + * If the packet can be sent immediately, it does so and returns + * 1, else it queues the packet for later processing, returns 0 + * and calls the @tx_hook of the socket when the tranmission + * takes place. + */ +int +sk_send(sock *s, unsigned len) +{ + s->ttx = s->tbuf; + s->tpos = s->tbuf + len; + + int e = sk_maybe_write(s); + if (e == 0) /* Trigger thread poll reload to poll this socket's write. */ + socket_changed(s); + + return e; +} + +/** + * sk_send_to - send data to a specific destination + * @s: socket + * @len: number of bytes to send + * @addr: IP address to send the packet to + * @port: port to send the packet to + * + * This is a sk_send() replacement for connection-less packet sockets + * which allows destination of the packet to be chosen dynamically. + * Raw IP sockets should use 0 for @port. + */ +int +sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port) +{ + s->daddr = addr; + if (port) + s->dport = port; + + s->ttx = s->tbuf; + s->tpos = s->tbuf + len; + return sk_maybe_write(s); +} + +/* +int +sk_send_full(sock *s, unsigned len, struct iface *ifa, + ip_addr saddr, ip_addr daddr, unsigned dport) +{ + s->iface = ifa; + s->saddr = saddr; + s->daddr = daddr; + s->dport = dport; + s->ttx = s->tbuf; + s->tpos = s->tbuf + len; + return sk_maybe_write(s); +} +*/ + +static void +call_rx_hook(sock *s, int size) +{ + if (s->rx_hook(s, size)) + { + /* We need to be careful since the socket could have been deleted by the hook */ + if (s->loop->sock_active == s) + s->rpos = s->rbuf; + } +} + +#ifdef HAVE_LIBSSH +static int +sk_read_ssh(sock *s) +{ + ssh_channel rchans[2] = { s->ssh->channel, NULL }; + struct timeval timev = { 1, 0 }; + + if (ssh_channel_select(rchans, NULL, NULL, &timev) == SSH_EINTR) + return 1; /* Try again */ + + if (ssh_channel_is_eof(s->ssh->channel) != 0) + { + /* The remote side is closing the connection */ + s->err_hook(s, 0); + return 0; + } + + if (rchans[0] == NULL) + return 0; /* No data is available on the socket */ + + const uint used_bytes = s->rpos - s->rbuf; + const int read_bytes = ssh_channel_read_nonblocking(s->ssh->channel, s->rpos, s->rbsize - used_bytes, 0); + if (read_bytes > 0) + { + /* Received data */ + s->rpos += read_bytes; + call_rx_hook(s, used_bytes + read_bytes); + return 1; + } + else if (read_bytes == 0) + { + if (ssh_channel_is_eof(s->ssh->channel) != 0) + { + /* The remote side is closing the connection */ + s->err_hook(s, 0); + } + } + else + { + s->err = ssh_get_error(s->ssh->session); + s->err_hook(s, ssh_get_error_code(s->ssh->session)); + } + + return 0; /* No data is available on the socket */ +} +#endif + + /* sk_read() and sk_write() are called from BFD's event loop */ + +static inline int +sk_read_noflush(sock *s, int revents) +{ + switch (s->type) + { + case SK_TCP_PASSIVE: + return sk_passive_connected(s, SK_TCP); + + case SK_UNIX_PASSIVE: + return sk_passive_connected(s, SK_UNIX); + + case SK_TCP: + case SK_UNIX: + { + int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos); + + if (c < 0) + { + if (errno != EINTR && errno != EAGAIN) + s->err_hook(s, errno); + else if (errno == EAGAIN && !(revents & POLLIN)) + { + log(L_ERR "Got EAGAIN from read when revents=%x (without POLLIN)", revents); + s->err_hook(s, 0); + } + } + else if (!c) + s->err_hook(s, 0); + else + { + s->rpos += c; + call_rx_hook(s, s->rpos - s->rbuf); + return 1; + } + return 0; + } + +#ifdef HAVE_LIBSSH + case SK_SSH: + return sk_read_ssh(s); +#endif + + case SK_MAGIC: + return s->rx_hook(s, 0); + + default: + { + int e = sk_recvmsg(s); + + if (e < 0) + { + if (errno != EINTR && errno != EAGAIN) + s->err_hook(s, errno); + return 0; + } + + s->rpos = s->rbuf + e; + s->rx_hook(s, e); + return 1; + } + } +} + +int +sk_read(sock *s, int revents) +{ + int e = sk_read_noflush(s, revents); + tmp_flush(); + return e; +} + +static inline int +sk_write_noflush(sock *s) +{ + switch (s->type) + { + case SK_TCP_ACTIVE: + { + if (sk_connect(s) >= 0 || errno == EISCONN) + sk_tcp_connected(s); + else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS) + s->err_hook(s, errno); + return 0; + } + +#ifdef HAVE_LIBSSH + case SK_SSH_ACTIVE: + { + switch (sk_ssh_connect(s)) + { + case SSH_OK: + sk_ssh_connected(s); + break; + + case SSH_AGAIN: + return 1; + + case SSH_ERROR: + s->err = ssh_get_error(s->ssh->session); + s->err_hook(s, ssh_get_error_code(s->ssh->session)); + break; + } + return 0; + } +#endif + + default: + if (sk_tx_pending(s) && sk_maybe_write(s) > 0) + { + if (s->tx_hook) + s->tx_hook(s); + return 1; + } + return 0; + } +} + +int +sk_write(sock *s) +{ + int e = sk_write_noflush(s); + tmp_flush(); + return e; +} + +int sk_is_ipv4(sock *s) +{ return s->af == AF_INET; } + +int sk_is_ipv6(sock *s) +{ return s->af == AF_INET6; } + +void +sk_err(sock *s, int revents) +{ + int se = 0, sse = sizeof(se); + if ((s->type != SK_MAGIC) && (revents & POLLERR)) + if (getsockopt(s->fd, SOL_SOCKET, SO_ERROR, &se, &sse) < 0) + { + log(L_ERR "IO: Socket error: SO_ERROR: %m"); + se = 0; + } + + s->err_hook(s, se); + tmp_flush(); +} + +void +sk_dump_all(void) +{ + node *n; + sock *s; + + debug("Open sockets:\n"); + WALK_LIST(n, main_birdloop.sock_list) + { + s = SKIP_BACK(sock, n, n); + debug("%p ", s); + sk_dump(&s->r, 3); + } + debug("\n"); +} +