0
0
mirror of https://gitlab.nic.cz/labs/bird.git synced 2025-01-20 16:01:53 +00:00
bird/proto/radv/packets.c
Maria Matejka 836e857b30 Sockets: Unified API for main and other loops
Now sk_open() requires an explicit IO loop to open the socket in. Also
specific functions for socket RX pause / resume are added to allow for
BGP corking.

And last but not least, socket reloop is now synchronous to resolve
weird cases of the target loop stopping before actually picking up the
relooped socket. Now the caller must ensure that both loops are locked
while relooping, and this way all sockets always have their respective
loop.
2023-04-04 17:00:59 +02:00

518 lines
12 KiB
C

/*
* BIRD -- RAdv Packet Processing
*
* (c) 2011--2019 Ondrej Zajicek <santiago@crfreenet.org>
* (c) 2011--2019 CZ.NIC z.s.p.o.
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
#include <stdlib.h>
#include "radv.h"
struct radv_ra_packet
{
u8 type;
u8 code;
u16 checksum;
u8 current_hop_limit;
u8 flags;
u16 router_lifetime;
u32 reachable_time;
u32 retrans_timer;
};
#define OPT_RA_MANAGED 0x80
#define OPT_RA_OTHER_CFG 0x40
#define OPT_PREFIX 3
#define OPT_MTU 5
#define OPT_ROUTE 24
#define OPT_RDNSS 25
#define OPT_DNSSL 31
struct radv_opt_prefix
{
u8 type;
u8 length;
u8 pxlen;
u8 flags;
u32 valid_lifetime;
u32 preferred_lifetime;
u32 reserved;
ip6_addr prefix;
};
#define OPT_PX_ONLINK 0x80
#define OPT_PX_AUTONOMOUS 0x40
struct radv_opt_mtu
{
u8 type;
u8 length;
u16 reserved;
u32 mtu;
};
struct radv_opt_route {
u8 type;
u8 length;
u8 pxlen;
u8 flags;
u32 lifetime;
u8 prefix[];
};
struct radv_opt_rdnss
{
u8 type;
u8 length;
u16 reserved;
u32 lifetime;
ip6_addr servers[];
};
struct radv_opt_dnssl
{
u8 type;
u8 length;
u16 reserved;
u32 lifetime;
char domain[];
};
static int
radv_prepare_route(struct radv_iface *ifa, struct radv_route *rt,
char **buf, char *bufend)
{
struct radv_proto *p = ifa->ra;
u8 px_blocks = (net6_pxlen(rt->n.addr) + 63) / 64;
u8 opt_len = 8 * (1 + px_blocks);
if (*buf + opt_len > bufend)
{
log(L_WARN, "%s: Too many RA options on interface %s",
p->p.name, ifa->iface->name);
return -1;
}
uint preference = rt->preference_set ? rt->preference : ifa->cf->route_preference;
uint lifetime = rt->lifetime_set ? rt->lifetime : ifa->cf->route_lifetime;
uint valid = rt->valid && p->valid && (p->active || !ifa->cf->route_lifetime_sensitive);
struct radv_opt_route *opt = (void *) *buf;
*buf += opt_len;
opt->type = OPT_ROUTE;
opt->length = 1 + px_blocks;
opt->pxlen = net6_pxlen(rt->n.addr);
opt->flags = preference;
opt->lifetime = valid ? htonl(lifetime) : 0;
/* Copy the relevant part of the prefix */
ip6_addr px_addr = ip6_hton(net6_prefix(rt->n.addr));
memcpy(opt->prefix, &px_addr, 8 * px_blocks);
/* Keeping track of first linger timeout */
if (!rt->valid)
ifa->valid_time = MIN(ifa->valid_time, rt->changed + ifa->cf->route_linger_time S);
return 0;
}
static int
radv_prepare_rdnss(struct radv_iface *ifa, list *rdnss_list, char **buf, char *bufend)
{
struct radv_rdnss_config *rcf = HEAD(*rdnss_list);
while(NODE_VALID(rcf))
{
struct radv_rdnss_config *rcf_base = rcf;
struct radv_opt_rdnss *op = (void *) *buf;
int max_i = (bufend - *buf - sizeof(struct radv_opt_rdnss)) / sizeof(ip6_addr);
int i = 0;
if (max_i < 1)
goto too_much;
op->type = OPT_RDNSS;
op->reserved = 0;
if (rcf->lifetime_mult)
op->lifetime = htonl(rcf->lifetime_mult * ifa->cf->max_ra_int);
else
op->lifetime = htonl(rcf->lifetime);
while(NODE_VALID(rcf) &&
(rcf->lifetime == rcf_base->lifetime) &&
(rcf->lifetime_mult == rcf_base->lifetime_mult))
{
if (i >= max_i)
goto too_much;
op->servers[i] = ip6_hton(rcf->server);
i++;
rcf = NODE_NEXT(rcf);
}
op->length = 1+2*i;
*buf += 8 * op->length;
}
return 0;
too_much:
log(L_WARN "%s: Too many RA options on interface %s",
ifa->ra->p.name, ifa->iface->name);
return -1;
}
int
radv_process_domain(struct radv_dnssl_config *cf)
{
/* Format of domain in search list is <size> <label> <size> <label> ... 0 */
const char *dom = cf->domain;
const char *dom_end = dom; /* Just to */
u8 *dlen_save = &cf->dlen_first;
uint len;
while (dom_end)
{
dom_end = strchr(dom, '.');
len = dom_end ? (uint)(dom_end - dom) : strlen(dom);
if (len < 1 || len > 63)
return -1;
*dlen_save = len;
dlen_save = (u8 *) dom_end;
dom += len + 1;
}
len = dom - cf->domain;
if (len > 254)
return -1;
cf->dlen_all = len;
return 0;
}
static int
radv_prepare_dnssl(struct radv_iface *ifa, list *dnssl_list, char **buf, char *bufend)
{
struct radv_dnssl_config *dcf = HEAD(*dnssl_list);
while(NODE_VALID(dcf))
{
struct radv_dnssl_config *dcf_base = dcf;
struct radv_opt_dnssl *op = (void *) *buf;
int bsize = bufend - *buf - sizeof(struct radv_opt_dnssl);
int bpos = 0;
if (bsize < 0)
goto too_much;
bsize = bsize & ~7; /* Round down to multiples of 8 */
op->type = OPT_DNSSL;
op->reserved = 0;
if (dcf->lifetime_mult)
op->lifetime = htonl(dcf->lifetime_mult * ifa->cf->max_ra_int);
else
op->lifetime = htonl(dcf->lifetime);
while(NODE_VALID(dcf) &&
(dcf->lifetime == dcf_base->lifetime) &&
(dcf->lifetime_mult == dcf_base->lifetime_mult))
{
if (bpos + dcf->dlen_all + 1 > bsize)
goto too_much;
op->domain[bpos++] = dcf->dlen_first;
memcpy(op->domain + bpos, dcf->domain, dcf->dlen_all);
bpos += dcf->dlen_all;
dcf = NODE_NEXT(dcf);
}
int blen = (bpos + 7) / 8;
bzero(op->domain + bpos, 8 * blen - bpos);
op->length = 1 + blen;
*buf += 8 * op->length;
}
return 0;
too_much:
log(L_WARN "%s: Too many RA options on interface %s",
ifa->ra->p.name, ifa->iface->name);
return -1;
}
static int
radv_prepare_prefix(struct radv_iface *ifa, struct radv_prefix *px,
char **buf, char *bufend)
{
struct radv_prefix_config *pc = px->cf;
if (*buf + sizeof(struct radv_opt_prefix) > bufend)
{
log(L_WARN "%s: Too many prefixes on interface %s",
ifa->ra->p.name, ifa->iface->name);
return -1;
}
struct radv_opt_prefix *op = (void *) *buf;
op->type = OPT_PREFIX;
op->length = 4;
op->pxlen = px->prefix.pxlen;
op->flags = (pc->onlink ? OPT_PX_ONLINK : 0) |
(pc->autonomous ? OPT_PX_AUTONOMOUS : 0);
op->valid_lifetime = (ifa->ra->active || !pc->valid_lifetime_sensitive) ?
htonl(pc->valid_lifetime) : 0;
op->preferred_lifetime = (ifa->ra->active || !pc->preferred_lifetime_sensitive) ?
htonl(pc->preferred_lifetime) : 0;
op->reserved = 0;
op->prefix = ip6_hton(px->prefix.prefix);
*buf += sizeof(*op);
/* Keeping track of first linger timeout */
if (!px->valid)
ifa->valid_time = MIN(ifa->valid_time, px->changed + ifa->cf->prefix_linger_time S);
return 0;
}
static void
radv_prepare_ra(struct radv_iface *ifa)
{
struct radv_proto *p = ifa->ra;
struct radv_config *cf = (struct radv_config *) (p->p.cf);
struct radv_iface_config *ic = ifa->cf;
btime now = current_time();
char *buf = ifa->sk->tbuf;
char *bufstart = buf;
char *bufend = buf + ifa->sk->tbsize;
struct radv_ra_packet *pkt = (void *) buf;
pkt->type = ICMPV6_RA;
pkt->code = 0;
pkt->checksum = 0;
pkt->current_hop_limit = ic->current_hop_limit;
pkt->router_lifetime = (p->valid && (p->active || !ic->default_lifetime_sensitive)) ?
htons(ic->default_lifetime) : 0;
pkt->flags = (ic->managed ? OPT_RA_MANAGED : 0) |
(ic->other_config ? OPT_RA_OTHER_CFG : 0) |
(pkt->router_lifetime ? ic->default_preference : 0);
pkt->reachable_time = htonl(ic->reachable_time);
pkt->retrans_timer = htonl(ic->retrans_timer);
buf += sizeof(*pkt);
if (ic->link_mtu)
{
struct radv_opt_mtu *om = (void *) buf;
om->type = OPT_MTU;
om->length = 1;
om->reserved = 0;
om->mtu = htonl(ic->link_mtu);
buf += sizeof (*om);
}
/* Keeping track of first linger timeout */
ifa->valid_time = TIME_INFINITY;
struct radv_prefix *px;
WALK_LIST(px, ifa->prefixes)
{
/* Skip invalid prefixes that are past linger timeout but still not pruned */
if (!px->valid && ((px->changed + ic->prefix_linger_time S) <= now))
continue;
if (radv_prepare_prefix(ifa, px, &buf, bufend) < 0)
goto done;
}
if (! ic->rdnss_local)
if (radv_prepare_rdnss(ifa, &cf->rdnss_list, &buf, bufend) < 0)
goto done;
if (radv_prepare_rdnss(ifa, &ic->rdnss_list, &buf, bufend) < 0)
goto done;
if (! ic->dnssl_local)
if (radv_prepare_dnssl(ifa, &cf->dnssl_list, &buf, bufend) < 0)
goto done;
if (radv_prepare_dnssl(ifa, &ic->dnssl_list, &buf, bufend) < 0)
goto done;
if (p->fib_up)
{
FIB_WALK(&p->routes, struct radv_route, rt)
{
/* Skip invalid routes that are past linger timeout but still not pruned */
if (!rt->valid && ((rt->changed + ic->route_linger_time S) <= now))
continue;
if (radv_prepare_route(ifa, rt, &buf, bufend) < 0)
goto done;
}
FIB_WALK_END;
}
done:
ifa->plen = buf - bufstart;
}
void
radv_send_ra(struct radv_iface *ifa, ip_addr to)
{
struct radv_proto *p = ifa->ra;
/* TX queue is already full */
if (!sk_tx_buffer_empty(ifa->sk))
return;
if (ifa->valid_time <= current_time())
radv_invalidate(ifa);
/* We store prepared RA in tbuf */
if (!ifa->plen)
radv_prepare_ra(ifa);
if (ipa_zero(to))
{
to = IP6_ALL_NODES;
RADV_TRACE(D_PACKETS, "Sending RA via %s", ifa->iface->name);
}
else
{
RADV_TRACE(D_PACKETS, "Sending RA to %I via %s", to, ifa->iface->name);
}
int done = sk_send_to(ifa->sk, ifa->plen, to, 0);
if (!done)
log(L_WARN "%s: TX queue full on %s", p->p.name, ifa->iface->name);
}
static void
radv_receive_rs(struct radv_proto *p, struct radv_iface *ifa, ip_addr from)
{
RADV_TRACE(D_PACKETS, "Received RS from %I via %s",
from, ifa->iface->name);
if (ifa->cf->solicited_ra_unicast && ipa_nonzero(from))
radv_send_ra(ifa, from);
else
radv_iface_notify(ifa, RA_EV_RS);
}
static int
radv_rx_hook(sock *sk, uint size)
{
struct radv_iface *ifa = sk->data;
struct radv_proto *p = ifa->ra;
/* We want just packets from sk->iface */
if (sk->lifindex != sk->iface->index)
return 1;
if (ipa_equal(sk->faddr, sk->saddr))
return 1;
if (size < 8)
return 1;
byte *buf = sk->rbuf;
if (buf[1] != 0)
return 1;
/* Validation is a bit sloppy - Hop Limit is not checked and
length of options is ignored for RS and left to later for RA */
switch (buf[0])
{
case ICMPV6_RS:
radv_receive_rs(p, ifa, sk->faddr);
return 1;
case ICMPV6_RA:
RADV_TRACE(D_PACKETS, "Received RA from %I via %s",
sk->faddr, ifa->iface->name);
/* FIXME - there should be some checking of received RAs, but we just ignore them */
return 1;
default:
return 1;
}
}
static void
radv_tx_hook(sock *sk)
{
struct radv_iface *ifa = sk->data;
log(L_INFO "%s: TX queue ready on %s", ifa->ra->p.name, ifa->iface->name);
/* Some RAs may be missed due to full TX queue */
radv_iface_notify(ifa, RA_EV_RS);
}
static void
radv_err_hook(sock *sk, int err)
{
struct radv_iface *ifa = sk->data;
log(L_ERR "%s: Socket error on %s: %M", ifa->ra->p.name, ifa->iface->name, err);
}
int
radv_sk_open(struct radv_iface *ifa)
{
sock *sk = sk_new(ifa->pool);
sk->type = SK_IP;
sk->subtype = SK_IPV6;
sk->dport = ICMPV6_PROTO;
sk->saddr = ifa->addr->ip;
sk->vrf = ifa->ra->p.vrf;
sk->ttl = 255; /* Mandatory for Neighbor Discovery packets */
sk->rx_hook = radv_rx_hook;
sk->tx_hook = radv_tx_hook;
sk->err_hook = radv_err_hook;
sk->iface = ifa->iface;
sk->rbsize = 1024; // bufsize(ifa);
sk->tbsize = 1024; // bufsize(ifa);
sk->data = ifa;
sk->flags = SKF_LADDR_RX;
if (sk_open(sk, ifa->ra->p.loop) < 0)
goto err;
/* We want listen just to ICMPv6 messages of type RS and RA */
if (sk_set_icmp6_filter(sk, ICMPV6_RS, ICMPV6_RA) < 0)
goto err;
if (sk_setup_multicast(sk) < 0)
goto err;
if (sk_join_group(sk, IP6_ALL_ROUTERS) < 0)
goto err;
ifa->sk = sk;
return 1;
err:
sk_log_error(sk, ifa->ra->p.name);
rfree(sk);
return 0;
}