0
0
mirror of https://gitlab.nic.cz/labs/bird.git synced 2025-01-10 19:11:54 +00:00
bird/sysdep/unix/mkrt.c

504 lines
12 KiB
C

/*
* BIRD -- UNIX Kernel Multicast Routing
*
* (c) 2016 Ondrej Hlavaty <aearsis@eideo.cz>
* (c) 2018 Ondrej Zajicek <santiago@crfreenet.org>
* (c) 2018 CZ.NIC z.s.p.o.
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
/**
* DOC: Kernel Multicast Routing
*
* This protocol is the interface to the kernel part of multicast routing. It
* handles registration of multicast interfaces (MIFs), maintenance of kernel
* Multicast Forwarding Cache (MFC), and reception of incoming IGMP packets.
*
* Multicast forwarding in Linux and BSD kernels is a bit tricky. There must be
* exactly one socket on which setsockopt MRT_INIT is called, then multicast
* forwarding is enabled and kernel multicast routing table is maintained until
* the socket is closed. This MRT control socket is stored in &mrt_sock field.
*
* Multicast forwarding works only on interfaces registered as MIFs, with
* assigned MIF index. While MIFs and MIF indexes are handled by OS-independent
* code in iface.c, actual MIF registration by OS kernel is handled here. The
* MKernel protocol is associated with a MIF group by mkrt_register_mif_group(),
* after that it receive mkrt_register_mif() / mkrt_unregister_mif() calls for
* changes in given MIF group.
*
* Unlike kernel unicast routing API, which is proactive, kernel multicast
* routing API is designed as reactive. Kernel keeps MFC entries for encountered
* (S, G) flows and when a new flow is noticed, BIRD receives cache miss message
* (%IGMPMSG_NOCACHE) from kernel and responds with adding appropriate (S, G)
* MFC entry to the kernel, see mkrt_resolve_mfc(). Therefore, regular route
* notifications handled by mkrt_rt_notify() are not directly translated to
* kernel route updates.
*
* Although there is also support for (*, G) MFC entries in Linux (using
* %MRT_ADD_MFC_PROXY), their behavior is strange and not matching our needs,
* and there is no equivalent in BSD, we do not use them and we manage with
* traditional (S, G) MFC entries.
*
* Finally, the MRT control socket is the only one that receives all IGMP
* packets, even those from non-joined groups. IGMP protocol needs to receive
* these packets, so we forward them internally. To simulate the sane behavior,
* a protocol can open an IGMP socket and use sk_setup_igmp() to register it to
* reception of all IGMP packets. The socket is relinked to internal MIF socket
* list. MKernel protocol then use mif_forward_igmp() to forward packets
* received on the MRT control socket to all sockets on these lists.
*/
#include "nest/bird.h"
#include "nest/iface.h"
#include "lib/socket.h"
#include "unix.h"
#include "mkrt.h"
#include <linux/mroute.h>
/*
* MRT socket options
*/
static inline int
sk_mrt_init4(sock *s)
{
int y = 1;
return setsockopt(s->fd, IPPROTO_IP, MRT_INIT, &y, sizeof(y));
}
static inline int
sk_mrt_done4(sock *s)
{
return setsockopt(s->fd, IPPROTO_IP, MRT_DONE, NULL, 0);
}
static inline int
sk_mrt_add_mif4(sock *s, struct mif *mif)
{
struct vifctl vc = {
.vifc_vifi = mif->index,
.vifc_flags = VIFF_USE_IFINDEX,
.vifc_lcl_ifindex = mif->iface->index,
};
return setsockopt(s->fd, IPPROTO_IP, MRT_ADD_VIF, &vc, sizeof(vc));
}
static inline int
sk_mrt_del_mif4(sock *s, struct mif *mif)
{
struct vifctl vc = {
.vifc_vifi = mif->index,
};
return setsockopt(s->fd, IPPROTO_IP, MRT_DEL_VIF, &vc, sizeof(vc));
}
static inline int
sk_mrt_add_mfc4(sock *s, ip4_addr src, ip4_addr grp, u32 iifs, u32 oifs, int mif_index)
{
struct mfcctl mc = {
.mfcc_origin = ip4_to_in4(src),
.mfcc_mcastgrp = ip4_to_in4(grp),
.mfcc_parent = mif_index,
};
if (BIT32_TEST(&iifs, mif_index) && oifs)
for (int i = 0; i < MIFS_MAX; i++)
if (BIT32_TEST(&oifs, i) && (i != mif_index))
mc.mfcc_ttls[i] = 1;
return setsockopt(s->fd, IPPROTO_IP, MRT_ADD_MFC, &mc, sizeof(mc));
}
static inline int
sk_mrt_del_mfc4(sock *s, ip4_addr src, ip4_addr grp)
{
struct mfcctl mc = {
.mfcc_origin = ip4_to_in4(src),
.mfcc_mcastgrp = ip4_to_in4(grp),
};
return setsockopt(s->fd, IPPROTO_IP, MRT_DEL_MFC, &mc, sizeof(mc));
}
/*
* MIF handling
*/
void
mkrt_register_mif(struct mkrt_proto *p, struct mif *mif)
{
TRACE(D_EVENTS, "Registering interface %s MIF %i", mif->iface->name, mif->index);
if (sk_mrt_add_mif4(p->mrt_sock, mif) < 0)
log(L_ERR "%s: Cannot register interface %s MIF %i: %m",
p->p.name, mif->iface->name, mif->index);
}
void
mkrt_unregister_mif(struct mkrt_proto *p, struct mif *mif)
{
TRACE(D_EVENTS, "Unregistering interface %s MIF %i", mif->iface->name, mif->index);
if (sk_mrt_del_mif4(p->mrt_sock, mif) < 0)
log(L_ERR "%s: Cannot unregister interface %s MIF %i: %m",
p->p.name, mif->iface->name, mif->index);
}
void
mkrt_register_mif_group(struct mkrt_proto *p, struct mif_group *grp)
{
ASSERT(!grp->owner);
grp->owner = &p->p;
WALK_ARRAY(grp->mifs, MIFS_MAX, mif)
if (mif)
mkrt_register_mif(p, mif);
}
void
mkrt_unregister_mif_group(struct mkrt_proto *p, struct mif_group *grp)
{
grp->owner = NULL;
WALK_ARRAY(grp->mifs, MIFS_MAX, mif)
if (mif)
mkrt_unregister_mif(p, mif);
}
/*
* MFC handling
*/
static void
mkrt_init_mfc(void *G)
{
struct mkrt_mfc_group *grp = G;
init_list(&grp->sources);
}
static struct mkrt_mfc_source *
mkrt_get_mfc(struct mkrt_proto *p, ip4_addr source, ip4_addr group)
{
net_addr_mgrp4 n = NET_ADDR_MGRP4(group);
struct mkrt_mfc_group *grp = fib_get(&p->mfc_groups, (net_addr *) &n);
struct mkrt_mfc_source *src;
WALK_LIST(src, grp->sources)
if (ip4_equal(src->addr, source))
return src;
src = mb_allocz(p->p.pool, sizeof(struct mkrt_mfc_source));
src->addr = source;
src->parent = -1;
add_tail(&grp->sources, NODE src);
return src;
}
struct mfc_result {
u32 iifs, oifs;
};
static void
mkrt_resolve_mfc_hook(struct proto *p UNUSED, void *data, rte *rte)
{
struct mfc_result *res = data;
res->iifs = rta_iifs(rte->attrs);
res->oifs = rta_oifs(rte->attrs);
}
/*
* Resolve the MFC miss by adding a MFC entry. If no matching entry in the
* routing table exists, add an empty one to satisfy the kernel.
*/
static void
mkrt_resolve_mfc(struct mkrt_proto *p, ip4_addr src, ip4_addr grp, int mif_index)
{
struct mif *mif = (mif_index < MIFS_MAX) ? p->mif_group->mifs[mif_index] : NULL;
TRACE(D_EVENTS, "MFC miss for (%I4, %I4, %s)", src, grp, mif ? mif->iface->name : "?");
net_addr_mgrp4 n0 = NET_ADDR_MGRP4(grp);
struct mfc_result res = {};
rt_examine(p->p.main_channel, (net_addr *) &n0, mkrt_resolve_mfc_hook, &res);
struct mkrt_mfc_source *mfc = mkrt_get_mfc(p, src, grp);
mfc->iifs = res.iifs;
mfc->oifs = res.oifs;
mfc->parent = mif_index;
TRACE(D_EVENTS, "Adding MFC entry for (%I4, %I4)", src, grp);
if (sk_mrt_add_mfc4(p->mrt_sock, src, grp, mfc->iifs, mfc->oifs, mfc->parent) < 0)
log(L_ERR "%s: Failed to add MFC entry: %m", p->p.name);
}
static void
mkrt_remove_mfc(struct mkrt_proto *p, struct mkrt_mfc_source *src, ip4_addr grp)
{
TRACE(D_EVENTS, "Removing MFC entry for (%I4, %I4)", src->addr, grp);
if (sk_mrt_del_mfc4(p->mrt_sock, src->addr, grp) < 0)
log(L_ERR "%s: Failed to remove MFC entry: %m", p->p.name);
rem_node(NODE src);
mb_free(src);
}
/*
* Because a route in the internal table has changed, all the corresponding MFC
* entries are now wrong. Instead of correcting them, just flush the cache.
*/
static void
mkrt_reset_mfc_group(struct mkrt_proto *p, struct mkrt_mfc_group *grp)
{
ip4_addr group = net4_prefix(grp->n.addr);
struct mkrt_mfc_source *src;
WALK_LIST_FIRST(src, grp->sources)
mkrt_remove_mfc(p, src, group);
}
static void
mkrt_free_mfc_group(struct mkrt_proto *p, struct mkrt_mfc_group *grp)
{
mkrt_reset_mfc_group(p, grp);
fib_delete(&p->mfc_groups, grp);
}
static void
mkrt_rt_notify(struct proto *P, struct channel *c UNUSED, net *net, rte *new, rte *old UNUSED, ea_list *attrs UNUSED)
{
struct mkrt_proto *p = (void *) P;
struct mkrt_mfc_group *grp = fib_find(&p->mfc_groups, net->n.addr);
if (!grp)
return;
/* Drop all MFC entries (possibly along with the state information) for a group */
if (new)
mkrt_reset_mfc_group(p, grp);
else
mkrt_free_mfc_group(p, grp);
}
/*
* On MRT control socket, we receive not only regular IGMP messages but also
* so-called upcalls from the kernel. We must process them here.
*/
void mif_forward_igmp(struct mif_group *grp, struct mif *mif, sock *src, int len);
static int
mkrt_rx_hook(sock *sk, uint len)
{
struct mkrt_proto *p = sk->data;
struct igmpmsg *msg = (void *) sk->rbuf;
u8 igmp_type = * (u8 *) sk_rx_buffer(sk, &len);
switch (igmp_type)
{
case IGMPMSG_NOCACHE:
mkrt_resolve_mfc(p, ip4_from_in4(msg->im_src), ip4_from_in4(msg->im_dst), msg->im_vif);
return 1;
case IGMPMSG_WRONGVIF:
case IGMPMSG_WHOLEPKT:
/* These should not happen unless some PIM-specific MRT options are enabled */
return 1;
default:
// FIXME: Use sk->lifindex or msg->im_vif ?
mif_forward_igmp(p->mif_group, NULL, sk, len);
return 1;
}
}
static void
mkrt_err_hook(sock *sk, int err)
{
struct mkrt_proto *p = sk->data;
log(L_ERR "%s: Socket error: %M", p->p.name, err);
}
static int
mkrt_open_socket(struct mkrt_proto *p)
{
sock *sk = sk_new(p->p.pool);
sk->type = SK_IP;
sk->subtype = SK_IPV4;
sk->dport = IPPROTO_IGMP;
sk->flags = SKF_LADDR_RX;
sk->data = p;
sk->ttl = 1;
sk->rx_hook = mkrt_rx_hook;
sk->err_hook = mkrt_err_hook;
sk->rbsize = 4096;
sk->tbsize = 0;
if (sk_open(sk) < 0)
{
sk_log_error(sk, p->p.name);
goto err;
}
if (sk_mrt_init4(sk) < 0)
{
if (errno == EADDRINUSE)
log(L_ERR "%s: Another multicast daemon is running", p->p.name);
else
log(L_ERR "%s: Cannot enable multicast in kernel: %m", p->p.name);
goto err;
}
p->mrt_sock = sk;
return 1;
err:
rfree(sk);
return 0;
}
static void
mkrt_close_socket(struct mkrt_proto *p)
{
sk_mrt_done4(p->mrt_sock);
rfree(p->mrt_sock);
p->mrt_sock = NULL;
}
/*
* Protocol glue
*/
static struct mkrt_config *mkrt_cf;
static void
mkrt_preconfig(struct protocol *P UNUSED, struct config *c UNUSED)
{
mkrt_cf = NULL;
}
struct proto_config *
mkrt_init_config(int class)
{
if (mkrt_cf)
cf_error("Multicast kernel protocol already defined");
mkrt_cf = (struct mkrt_config *) proto_config_new(&proto_unix_mkrt, class);
return (struct proto_config *) mkrt_cf;
}
void
mkrt_postconfig(struct proto_config *CF)
{
// struct mkrt_config *cf = (void *) CF;
if (EMPTY_LIST(CF->channels))
cf_error("Channel not specified");
}
static struct proto *
mkrt_init(struct proto_config *CF)
{
struct mkrt_proto *p = proto_new(CF);
p->p.main_channel = proto_add_channel(&p->p, proto_cf_main_channel(CF));
p->p.rt_notify = mkrt_rt_notify;
p->mif_group = global_mif_group;
return &p->p;
}
static int
mkrt_start(struct proto *P)
{
struct mkrt_proto *p = (void *) P;
fib_init(&p->mfc_groups, p->p.pool, NET_MGRP4, sizeof(struct mkrt_mfc_group),
OFFSETOF(struct mkrt_mfc_group, n), 6, mkrt_init_mfc);
if (!mkrt_open_socket(p))
return PS_START;
mkrt_register_mif_group(p, p->mif_group);
return PS_UP;
}
static int
mkrt_shutdown(struct proto *P)
{
struct mkrt_proto *p = (void *) P;
if (p->p.proto_state == PS_START)
return PS_DOWN;
mkrt_unregister_mif_group(p, p->mif_group);
mkrt_close_socket(p);
return PS_DOWN;
}
static int
mkrt_reconfigure(struct proto *p, struct proto_config *CF)
{
// struct mkrt_config *o = (void *) p->cf;
// struct mkrt_config *n = (void *) CF;
if (!proto_configure_channel(p, &p->main_channel, proto_cf_main_channel(CF)))
return 0;
return 1;
}
static void
mkrt_dump(struct proto *P)
{
struct mkrt_proto *p = (void *) P;
debug("\t(S,G) entries in MFC in kernel:\n");
FIB_WALK(&p->mfc_groups, struct mkrt_mfc_group, grp)
{
struct mkrt_mfc_source *src;
WALK_LIST(src, grp->sources)
debug("\t\t(%I4, %I4, %d) -> %b %b\n",
src->addr, net4_prefix(grp->n.addr), src->parent, src->iifs, src->oifs);
}
FIB_WALK_END;
}
struct protocol proto_unix_mkrt = {
.name = "MKernel",
.template = "mkernel%d",
.channel_mask = NB_MGRP4,
.proto_size = sizeof(struct mkrt_proto),
.config_size = sizeof(struct mkrt_config),
.preconfig = mkrt_preconfig,
.postconfig = mkrt_postconfig,
.init = mkrt_init,
.start = mkrt_start,
.shutdown = mkrt_shutdown,
.reconfigure = mkrt_reconfigure,
.dump = mkrt_dump,
};