From 76dd4730eac6a632ab28ef85d266cbfc0abc89ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Hlavat=C3=BD?= Date: Fri, 27 May 2016 02:40:27 +0200 Subject: [PATCH] Added the mkernel protocol --- lib/socket.h | 6 + nest/iface.c | 9 +- nest/iface.h | 8 + sysdep/unix/Makefile | 4 +- sysdep/unix/io.c | 22 +- sysdep/unix/mkrt.Y | 39 +++ sysdep/unix/mkrt.c | 585 +++++++++++++++++++++++++++++++++++++++++++ sysdep/unix/mkrt.h | 55 ++++ 8 files changed, 719 insertions(+), 9 deletions(-) create mode 100644 sysdep/unix/mkrt.Y create mode 100644 sysdep/unix/mkrt.c create mode 100644 sysdep/unix/mkrt.h diff --git a/lib/socket.h b/lib/socket.h index 5b8f0dd3..7fed4ca3 100644 --- a/lib/socket.h +++ b/lib/socket.h @@ -116,6 +116,7 @@ extern int sk_priority_control; /* Suggested priority for control traffic, shou #define SK_MAGIC 7 /* Internal use by sysdep code */ #define SK_UNIX_PASSIVE 8 #define SK_UNIX 9 +#define SK_IGMP 10 /* ? - ? - ? ? ? */ /* * Socket subtypes @@ -146,6 +147,11 @@ extern int sk_priority_control; /* Suggested priority for control traffic, shou * per-packet basis using platform dependent options (but these are not * available in some corner cases). The first way is used when SKF_BIND is * specified, the second way is used otherwise. + * + * SK_IGMP sockets are just IP sockets with IPPROTO_IGMP, but does not receive + * packets directly from kernel. Instead, bird forwards all packets received + * on multicast routing control socket to this socket internally. That means, + * all IGMP packets even for groups that noone is joined are received here. */ #endif diff --git a/nest/iface.c b/nest/iface.c index d031d4cc..4c3e0774 100644 --- a/nest/iface.c +++ b/nest/iface.c @@ -85,6 +85,8 @@ if_dump(struct iface *i) debug(" IGN"); if (i->flags & IF_TMP_DOWN) debug(" TDOWN"); + if (i->flags & IF_VIFI_ASSIGNED) + debug(" MR"); debug(" MTU=%d\n", i->mtu); WALK_LIST(a, i->addrs) { @@ -115,7 +117,7 @@ if_what_changed(struct iface *i, struct iface *j) { unsigned c; - if (((i->flags ^ j->flags) & ~(IF_UP | IF_SHUTDOWN | IF_UPDATED | IF_ADMIN_UP | IF_LINK_UP | IF_TMP_DOWN | IF_JUST_CREATED)) + if (((i->flags ^ j->flags) & ~(IF_UP | IF_SHUTDOWN | IF_UPDATED | IF_ADMIN_UP | IF_LINK_UP | IF_TMP_DOWN | IF_JUST_CREATED | IF_VIFI_ASSIGNED)) || i->index != j->index) return IF_CHANGE_TOO_MUCH; c = 0; @@ -131,7 +133,7 @@ if_what_changed(struct iface *i, struct iface *j) static inline void if_copy(struct iface *to, struct iface *from) { - to->flags = from->flags | (to->flags & IF_TMP_DOWN); + to->flags = from->flags | (to->flags & (IF_TMP_DOWN | IF_VIFI_ASSIGNED)); to->mtu = from->mtu; } @@ -780,10 +782,11 @@ if_show(void) type = "PtP"; else type = "MultiAccess"; - cli_msg(-1004, "\t%s%s%s Admin%s Link%s%s%s MTU=%d", + cli_msg(-1004, "\t%s%s%s%s Admin%s Link%s%s%s MTU=%d", type, (i->flags & IF_BROADCAST) ? " Broadcast" : "", (i->flags & IF_MULTICAST) ? " Multicast" : "", + (i->flags & IF_VIFI_ASSIGNED) ? " MRoutable" : "", (i->flags & IF_ADMIN_UP) ? "Up" : "Down", (i->flags & IF_LINK_UP) ? "Up" : "Down", (i->flags & IF_LOOPBACK) ? " Loopback" : "", diff --git a/nest/iface.h b/nest/iface.h index a0411f0a..042543ed 100644 --- a/nest/iface.h +++ b/nest/iface.h @@ -33,6 +33,7 @@ struct iface { unsigned flags; unsigned mtu; unsigned index; /* OS-dependent interface index */ + unsigned vifi; /* VIF index for multicast routing */ list addrs; /* Addresses assigned to this interface */ struct ifa *addr; /* Primary address */ list neighbors; /* All neighbors on this interface */ @@ -47,6 +48,7 @@ struct iface { #define IF_IGNORE 0x40 /* Not to be used by routing protocols (loopbacks etc.) */ #define IF_ADMIN_UP 0x80 /* Administrative up (e.g. IFF_UP in Linux) */ #define IF_LINK_UP 0x100 /* Link available (e.g. IFF_LOWER_UP in Linux) */ +#define IF_VIFI_ASSIGNED 0x200 /* This device is set up for multicast routing */ #define IA_PRIMARY 0x10000 /* This address is primary */ #define IA_SECONDARY 0x20000 /* This address has been reported as secondary by the kernel */ @@ -101,6 +103,12 @@ struct iface *if_find_by_name(char *); struct iface *if_get_by_name(char *); void ifa_recalc_all_primary_addresses(void); +static inline int +if_get_vifi(struct iface *iface) +{ + return (iface && (iface->flags & IF_VIFI_ASSIGNED)) ? iface->vifi : -1; +} + /* The Neighbor Cache */ diff --git a/sysdep/unix/Makefile b/sysdep/unix/Makefile index c5d55431..88b7a95e 100644 --- a/sysdep/unix/Makefile +++ b/sysdep/unix/Makefile @@ -1,5 +1,5 @@ -src := io.c krt.c log.c main.c random.c +src := io.c krt.c log.c main.c random.c mkrt.c obj := $(src-o-files) $(all-daemon) $(cf-local) -$(conf-y-targets): $(s)krt.Y +$(conf-y-targets): $(s)krt.Y $(s)mkrt.Y diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index bd5f3d06..d6e51dcc 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -40,6 +40,7 @@ #include "sysdep/unix/unix.h" #include CONFIG_INCLUDE_SYSIO_H +#include "sysdep/unix/mkrt.h" /* Maximum number of calls of tx handler for one socket in one * poll iteration. Should be small enough to not monopolize CPU by @@ -825,7 +826,7 @@ sk_skip_ip_header(byte *pkt, int *len) byte * sk_rx_buffer(sock *s, int *len) { - if (sk_is_ipv4(s) && s->type == SK_IP) + if (sk_is_ipv4(s) && (s->type == SK_IP || s->type == SK_IGMP)) return sk_skip_ip_header(s->rbuf, len); else return s->rbuf; @@ -1232,7 +1233,7 @@ sk_setup(sock *s) s->flags |= SKF_PKTINFO; #ifdef CONFIG_USE_HDRINCL - if (sk_is_ipv4(s) && (s->type == SK_IP) && (s->flags & SKF_PKTINFO)) + if (sk_is_ipv4(s) && (s->type == SK_IP || s->type == SK_IGMP) && (s->flags & SKF_PKTINFO)) { s->flags &= ~SKF_PKTINFO; s->flags |= SKF_HDRINCL; @@ -1270,7 +1271,7 @@ sk_setup(sock *s) if (sk_request_cmsg4_ttl(s) < 0) return -1; - if ((s->type == SK_UDP) || (s->type == SK_IP)) + if ((s->type == SK_UDP) || (s->type == SK_IP) || (s->type == SK_IGMP)) if (sk_disable_mtu_disc4(s) < 0) return -1; @@ -1297,7 +1298,7 @@ sk_setup(sock *s) if (sk_request_cmsg6_ttl(s) < 0) return -1; - if ((s->type == SK_UDP) || (s->type == SK_IP)) + if ((s->type == SK_UDP) || (s->type == SK_IP) || (s->type == SK_IGMP)) if (sk_disable_mtu_disc6(s) < 0) return -1; @@ -1458,6 +1459,11 @@ sk_open(sock *s) do_bind = 1; break; + case SK_IGMP: + af = AF_INET; + s->dport = IPPROTO_IGMP; + s->rbsize = 0; /* read buffer is shared */ + /* Fall thru */ case SK_IP: fd = socket(af, SOCK_RAW, s->dport); bind_port = 0; @@ -1538,6 +1544,12 @@ sk_open(sock *s) sk_alloc_bufs(s); } + if (s->type == SK_IGMP) + { + mkrt_listen(s); + return 0; + } + if (!(s->flags & SKF_THREAD)) sk_insert(s); return 0; @@ -1724,6 +1736,7 @@ sk_maybe_write(sock *s) case SK_UDP: case SK_IP: + case SK_IGMP: { if (s->tbuf == s->tpos) return 1; @@ -2112,6 +2125,7 @@ io_init(void) init_list(&sock_list); init_list(&global_event_list); krt_io_init(); + mkrt_io_init(); init_times(); update_times(); boot_time = now; diff --git a/sysdep/unix/mkrt.Y b/sysdep/unix/mkrt.Y new file mode 100644 index 00000000..8be6faca --- /dev/null +++ b/sysdep/unix/mkrt.Y @@ -0,0 +1,39 @@ +/* + * BIRD -- UNIX Multicast route syncer configuration + * + * (c) 2016 Ondrej Hlavaty + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +CF_HDR + +#include "sysdep/unix/mkrt.h" + +CF_DEFINES + +CF_DECLS + +CF_KEYWORDS(MKERNEL) + +CF_GRAMMAR + +/* Kernel interface protocol */ + +CF_ADDTO(proto, mkrt_proto '}' { mkrt_config_finish(this_proto); }) + +mkrt_proto_start: proto_start MKERNEL { this_proto = mkrt_config_init($1); } + ; + +mkrt_proto: + mkrt_proto_start proto_name '{' + | mkrt_proto mkrt_proto_item ';' + +mkrt_proto_item: + proto_item + | proto_channel + ; + +CF_CODE + +CF_END diff --git a/sysdep/unix/mkrt.c b/sysdep/unix/mkrt.c new file mode 100644 index 00000000..77a81355 --- /dev/null +++ b/sysdep/unix/mkrt.c @@ -0,0 +1,585 @@ +/* + * BIRD -- Multicast routing kernel + * + * (c) 2016 Ondrej Hlavaty + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +/* + * DOC: Multicast route kernel synchronization + * + * This protocol is the BIRD's interface to the kernel part of multicast + * routing. It assignes the VIF indices to interfaces, forwards the IGMP + * packets to SK_IGMP sockets and, of course, adds MFC entries to the kernel. + * + * Multicast in current kernel is a bit tricky. There must be exactly one + * socket on which setsockopt MRT_INIT is called, then multicast forwarding is + * enabled. Every multicast routing table update must be done through this + * protocol. + * + * Also, that socket is the only one that receives IGMP packets on non-joined + * groups. These packets IGMP protocol needs to receive, so we forward them + * internally. To simulate sane behavior, protocol can open socket with type + * SK_IGMP, which is almost as a SK_IP, IPPROTO_IGMP socket but receives copy + * of all packets. + * + * As always with system-dependent code, prepare for everything. Because the + * BSD kernel knows nothing apart from (S,G) routes, and Linux even blocked the + * (*,G) routes for something not being a regular (*,G) routes, we must add the + * routes in reaction to missed packets. This is very bad, but probably the + * only solution, until someone rewrites the kernel part. + * + * Part of the protocol is global and "static", without the need to configure. + * Another part is a usual proto instance. + */ + +#include "nest/bird.h" +#include "nest/iface.h" +#include "lib/socket.h" +#include "sysdep/unix/unix.h" +#include "sysdep/unix/mkrt.h" + +#define HASH_I_KEY(n) n->iface->index +#define HASH_I_NEXT(n) n->next +#define HASH_I_EQ(a,b) (a == b) +#define HASH_I_FN(n) n + +#define HASH_MFC_KEY(n) n->ga +#define HASH_MFC_NEXT(n) n->next +#define HASH_MFC_EQ(a,b) ipa_equal(a, b) +#define HASH_MFC_FN(k) ipa_hash(k) + +static struct mkrt_config *mkrt_cf; + +/* Global code for SK_IGMP sockets */ + +struct mkrt_iface { + struct mkrt_iface *next; + struct iface *iface; + list sockets; +}; + +static struct mkrt_global { + pool *pool; + list sockets; + HASH(struct mkrt_iface) ifaces; +} mkrt_global; + +void +mkrt_io_init(void) +{ + mkrt_global.pool = rp_new(&root_pool, "Multicast kernel Syncer"); + HASH_INIT(mkrt_global.ifaces, mkrt_global.pool, 6); + init_list(&mkrt_global.sockets); +} + +static struct mkrt_iface * +mkrt_iface_find(struct mkrt_proto *p, unsigned ifindex) +{ + return HASH_FIND(mkrt_global.ifaces, HASH_I, ifindex); +} + +static struct mkrt_iface * +mkrt_iface_get(unsigned ifindex) +{ + struct mkrt_iface *ifa = HASH_FIND(mkrt_global.ifaces, HASH_I, ifindex); + if (ifa) + return ifa; + + ifa = mb_allocz(mkrt_global.pool, sizeof(struct mkrt_iface)); + init_list(&ifa->sockets); + ifa->iface = if_find_by_index(ifindex); + + HASH_INSERT(mkrt_global.ifaces, HASH_I, ifa); + return ifa; +} + +/* + * Add the socket into the list of sockets that are passed a copy of every IGMP + * packet received on the control socket. + */ +void +mkrt_listen(sock *s) +{ + ASSERT(s->type == SK_IGMP); + + if (s->iface) + { + struct mkrt_iface *i = mkrt_iface_get(s->iface->index); + add_tail(&i->sockets, &s->n); + } + else + add_tail(&mkrt_global.sockets, &s->n); + + log(L_INFO "Socket fd %i getting IGMP", s->fd); +} + +/* + * Forward a packet from one socket to another. Emulates the receiving routine. + * Socket is in exactly the same state as if it received the packet itself, but + * must not modify it to preserve it for others. + */ +static inline void +mkrt_rx_forward(sock *from, sock *to, int len) +{ + if (!to->rx_hook) + return; + + to->faddr = from->faddr; + if (to->flags & SKF_LADDR_RX) + { + to->laddr = from->laddr; + to->lifindex = from->lifindex; + } + + to->rbuf = from->rbuf; + to->rpos = from->rpos; + to->rbsize = from->rbsize; + + to->rx_hook(to, len); + + to->faddr = to->laddr = IPA_NONE; + to->lifindex = 0; + to->rbuf = to->rpos = NULL; + to->rbsize = 0; +} + +/* + * Forward a packet to all sockets on a list. + */ +static inline void +mkrt_rx_forward_all(list *sockets, sock *sk, int len) +{ + node *n, *next; + + WALK_LIST_DELSAFE(n, next, *sockets) + mkrt_rx_forward(sk, SKIP_BACK(sock, n, n), len); +} + +/*************** + Mkernel proto + ***************/ + +/* + * Call a setsockopt with a MRT_ option. + */ +static inline int +mkrt_call(struct mkrt_proto *mkrt, int option_name, const void *val, socklen_t len) +{ + return setsockopt(mkrt->igmp_sock->fd, IPPROTO_IP, option_name, val, len); +} + +static inline vifi_t +mkrt_alloc_vifi(struct mkrt_proto *p, struct iface *iface) +{ + if (p->vif_count >= MAXVIFS) + { + log(L_ERR "Maximum number of interfaces for multicast routing reached."); + return -1; + } + + for (vifi_t i = 0; ; i = (i + 1) % MAXVIFS) + if (p->vif_map[i] == NULL) + { + p->vif_map[i] = iface; + iface->vifi = i; + p->vif_count++; + return i; + } +} + +static inline void +mkrt_free_vifi(struct mkrt_proto *p, vifi_t vifi) +{ + p->vif_count -= p->vif_map[vifi] != NULL; + p->vif_map[vifi] = NULL; +} + +static void +mkrt_add_vif(struct mkrt_proto *p, struct iface *i) +{ + int err; + + if (i->flags & IF_VIFI_ASSIGNED) + return; + + mkrt_alloc_vifi(p, i); + + struct vifctl vc = {0}; + vc.vifc_vifi = i->vifi; + vc.vifc_flags = VIFF_USE_IFINDEX; + vc.vifc_lcl_ifindex = i->index; + + if ((err = mkrt_call(p, MRT_ADD_VIF, &vc, sizeof(vc))) < 0) + goto err; + + TRACE(D_EVENTS, "Iface %s (%i) assigned VIF %i", i->name, i->index, i->vifi); + + i->flags |= IF_VIFI_ASSIGNED; + return; + +err: + log(L_ERR "Error while assigning %s VIF %i: %m", i->name, i->vifi, err); + mkrt_free_vifi(p, i->vifi); +} + +static int +mkrt_del_vif(struct mkrt_proto *p, struct iface *i) +{ + int err; + + if (!i->flags & IF_VIFI_ASSIGNED) + return 0; + + struct vifctl vc = {0}; + vc.vifc_vifi = i->vifi; + + if ((err = mkrt_call(p, MRT_DEL_VIF, &vc, sizeof(vc))) < 0) + goto err; + + mkrt_free_vifi(p, i->vifi); + i->flags &= ~IF_VIFI_ASSIGNED; + return 0; +err: + log(L_ERR "Error while unassigning %s VIF %i: %m", i->name, i->vifi, err); + return err; +} + +static struct mkrt_mfc_group * +mkrt_mfc_get(struct mkrt_proto *p, ip_addr ga) +{ + struct mkrt_mfc_group *mg = HASH_FIND(p->mfc_groups, HASH_MFC, ga); + if (mg) + return mg; + + mg = mb_allocz(p->p.pool, sizeof(struct mkrt_mfc_group)); + mg->ga = ga; + init_list(&mg->sources); + HASH_INSERT(p->mfc_groups, HASH_MFC, mg); + return mg; +} + +/* + * Add a MFC entry for (S, G) with parent vifi, according to the route. + */ +static int +mkrt_mfc_update(struct mkrt_proto *p, ip_addr group, ip_addr source, int vifi, struct rte *rte) +{ + struct mfcctl mc = {0}; + int err; + + mc.mfcc_origin = ipa_to_in4(source); + mc.mfcc_mcastgrp = ipa_to_in4(group); + mc.mfcc_parent = vifi; + + if (rte && RTE_MGRP_ISSET(p->vif_map[vifi], rte->u.mkrt.iifs)) + for (int i = 0; i < MAXVIFS; i++) + if (RTE_MGRP_ISSET(p->vif_map[i], rte->u.mkrt.oifs)) + mc.mfcc_ttls[i] = 1; + + TRACE(D_EVENTS, "%s MFC entry for (%I, %I)", (vifi > 0) ? "Add" : "Delete", source, group); + if ((err = mkrt_call(p, (vifi > 0) ? MRT_ADD_MFC : MRT_DEL_MFC, &mc, sizeof(mc)) < 0)) + log(L_WARN "Mkernel: failed to %s MFC entry: %m", (vifi > 0) ? "add" : "delete", err); + + return err; +} + +struct mfc_request { + ip_addr *group, *source; + vifi_t vifi; + u32 iifs, oifs; +}; + +/* + * Expand the attributes from the struct mfc_request and call mkrt_mfc_update, + * and pass back the result. + */ +static void +mkrt_mfc_call_update(struct proto *p, void *data, rte *rte) +{ + struct mfc_request *req = data; + mkrt_mfc_update((struct mkrt_proto *) p, *req->group, *req->source, req->vifi, rte); + if (rte) + { + req->iifs = rte->u.mkrt.iifs; + req->oifs = rte->u.mkrt.oifs; + } +} + +/* + * Resolve the MFC miss by adding a MFC entry. If no matching entry in the + * routing table exists, add an empty one to satisfy the kernel. + */ +static void +mkrt_mfc_resolve(struct mkrt_proto *p, ip_addr group, ip_addr source, vifi_t vifi) +{ + struct iface *iface = p->vif_map[vifi]; + + TRACE(D_EVENTS, "MFC miss for (%I, %I, %s)", source, group, iface ? iface->name : "??"); + + net_addr_mgrp4 n0 = NET_ADDR_MGRP4(ipa_to_ip4(group)); + + struct mfc_request req = { &group, &source, vifi }; + if (!rt_route(p->p.main_channel, (net_addr *) &n0, mkrt_mfc_call_update, &req)) + mkrt_mfc_call_update((struct proto *) p, &req, NULL); + + struct mkrt_mfc_group *grp = mkrt_mfc_get(p, group); + struct mkrt_mfc_source *src = mb_alloc(p->p.pool, sizeof(struct mkrt_mfc_source)); + src->addr = source; + src->vifi = vifi; + src->iifs = req.iifs; + src->oifs = req.oifs; + add_tail(&grp->sources, NODE src); +} + +/* + * Because a route in the internal table has changed, all the corresponding MFC + * entries are now wrong. Instead of correcting them, flush the cache. + */ +static void +mkrt_mfc_clean(struct mkrt_proto *p, struct mkrt_mfc_group *mg) +{ + struct mkrt_mfc_source *n, *next; + WALK_LIST_DELSAFE(n, next, mg->sources) + { + mkrt_mfc_update(p, mg->ga, n->addr, -1, NULL); + rem_node(NODE n); + mb_free(n); + } +} + +static void +mkrt_mfc_free(struct mkrt_proto *p, struct mkrt_mfc_group *mg) +{ + mkrt_mfc_clean(p, mg); + HASH_REMOVE(p->mfc_groups, HASH_MFC, mg); + mb_free(mg); +} + +/* + * An IGMP message received on the socket can be not only a packet received + * from the network, but also a so-called upcall from the kernel. We must process them here. + */ +static int +mkrt_control_message(struct mkrt_proto *p, sock *sk, int len) +{ + struct igmpmsg *msg = (struct igmpmsg *) sk->rbuf; + u8 igmp_type = * (u8 *) sk_rx_buffer(sk, &len); + + switch (igmp_type) + { + case IGMPMSG_NOCACHE: + mkrt_mfc_resolve(p, ipa_from_in4(msg->im_dst), ipa_from_in4(msg->im_src), msg->im_vif); + return 1; + + case IGMPMSG_WRONGVIF: + case IGMPMSG_WHOLEPKT: + /* Neither should ever happen. IGMPMSG_WRONGVIF is a common situation, + * and this upcall is called only when switching to (S,G) tree in other + * PIM variants. + * + * Similarly, the WHOLEPKT should be called only when we add the register + * VIF and ask kernel for giving us whole packets + */ + return 1; + + default: + return 0; + } +} + +static int +mkrt_rx_hook(sock *sk, int len) +{ + struct mkrt_proto *p = sk->data; + + /* Do not forward upcalls, IGMP cannot parse them */ + if (mkrt_control_message(p, sk, len)) + return 1; + + mkrt_rx_forward_all(&mkrt_global.sockets, sk, len); + + struct mkrt_iface *ifa = mkrt_iface_find(p, sk->lifindex); + if (ifa) + mkrt_rx_forward_all(&ifa->sockets, sk, len); + + return 1; +} + +static void +mkrt_err_hook(sock *sk, int err) +{ + log(L_TRACE "IGMP error: %m", err); +} + +static void +mkrt_preconfig(struct protocol *P UNUSED, struct config *c UNUSED) +{ + mkrt_cf = NULL; +} + +struct proto_config * +mkrt_config_init(int class) +{ + if (mkrt_cf) + cf_error("Kernel multicast route syncer already defined"); + + mkrt_cf = (struct mkrt_config *) proto_config_new(&proto_mkrt, class); + return (struct proto_config *) mkrt_cf; +} + +void +mkrt_config_finish(struct proto_config *pc) +{ + struct channel_config *cc = proto_cf_main_channel(pc); + + if (!cc) + cc = channel_config_new(NULL, NET_MGRP4, pc); + + cc->ra_mode = RA_OPTIMAL; +} + +static int +mkrt_init_sock(struct mkrt_proto *p) +{ + sock *sk; + + if (!(sk = sk_new(p->p.pool))) + goto err; + + sk->type = SK_IP; + sk->subtype = SK_IPV4; + sk->dport = IPPROTO_IGMP; + sk->flags = SKF_LADDR_RX; + + sk->data = p; + sk->ttl = 1; + sk->rx_hook = mkrt_rx_hook; + sk->err_hook = mkrt_err_hook; + + sk->rbsize = 4096; + sk->tbsize = 0; + + if (sk_open(sk) < 0) + goto err_sk; + + p->igmp_sock = sk; + + int v = 1; + if (mkrt_call(p, MRT_INIT, &v, sizeof(v)) < 0) + { + if (errno == EADDRINUSE) + log(L_ERR "Mkernel: Another multicast routing daemon is running"); + else + log(L_ERR "Mkernel: Cannot enable multicast features in kernel: %m", errno); + goto err_sk; + } + + log(L_DEBUG "Multicast control socket open with fd %i", sk->fd); + return 0; + +err_sk: + rfree(sk); + p->igmp_sock = NULL; +err: + return -1; +} + +void +mkrt_rt_notify(struct proto *P, struct channel *c, net *net, rte *new, rte *old, ea_list *attrs) +{ + struct mkrt_proto *p = (struct mkrt_proto *) P; + net_addr *n = net->n.addr; + struct mkrt_mfc_group *mg = mkrt_mfc_get(p, net_prefix(n)); + + /* Drop all MFC entries (possibly along with the state information) for a group */ + if (new) + mkrt_mfc_clean(p, mg); + else + mkrt_mfc_free(p, mg); +} + +static void +mkrt_if_notify(struct proto *P, uint flags, struct iface *iface) +{ + struct mkrt_proto *p = (struct mkrt_proto *) P; + + if (iface->flags & IF_IGNORE) + return; + + if (flags & IF_CHANGE_UP) + mkrt_add_vif(p, iface); + + if (flags & IF_CHANGE_DOWN) + mkrt_del_vif(p, iface); +} + +static struct proto * +mkrt_init(struct proto_config *c) +{ + struct mkrt_proto *p = proto_new(c); + + p->p.main_channel = proto_add_channel(&p->p, proto_cf_main_channel(c)); + + p->p.rt_notify = mkrt_rt_notify; + p->p.if_notify = mkrt_if_notify; + + return &p->p; +} + +static int +mkrt_start(struct proto *P) +{ + struct mkrt_proto *p = (struct mkrt_proto *) P; + + p->vif_count = 0; + + HASH_INIT(p->mfc_groups, p->p.pool, 6); + + if (mkrt_init_sock(p) < 0) + return PS_DOWN; + + return PS_UP; +} + +static int +mkrt_shutdown(struct proto *P) +{ + struct mkrt_proto *p = (struct mkrt_proto *) P; + mkrt_call(p, MRT_DONE, NULL, 0); + rfree(p->igmp_sock); + return PS_DOWN; +} + +static void +mkrt_dump(struct proto *P) +{ + struct mkrt_proto *p = (struct mkrt_proto *) P; + struct mkrt_mfc_source *s; + + debug("\tVIFs as in bitmaps:\n\t\t"); + for (int i = MAXVIFS; i >= 0; i--) + if (p->vif_map[i]) + debug("%s ", p->vif_map[i]->name); + debug("\n\t(S,G) entries in MFC in kernel:\n"); + HASH_WALK(p->mfc_groups, next, group) + { + WALK_LIST(s, group->sources) + debug("\t\t(%I, %I, %s) -> %b %b\n", s->addr, group->ga, p->vif_map[s->vifi]->name, s->iifs, s->oifs); + } + HASH_WALK_END; +} + +struct protocol proto_mkrt = { + .name = "mkernel", + .template = "mkernel%d", + .proto_size = sizeof(struct mkrt_proto), + .config_size = sizeof(struct proto_config), + .channel_mask = NB_MGRP, + .preconfig = mkrt_preconfig, + .init = mkrt_init, + .start = mkrt_start, + .shutdown = mkrt_shutdown, + .dump = mkrt_dump, +}; diff --git a/sysdep/unix/mkrt.h b/sysdep/unix/mkrt.h new file mode 100644 index 00000000..586dfc93 --- /dev/null +++ b/sysdep/unix/mkrt.h @@ -0,0 +1,55 @@ +/* + * BIRD -- Multicast routing kernel + * + * (c) 2016 Ondrej Hlavaty + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include "nest/bird.h" +#include "nest/protocol.h" +#include "lib/socket.h" +#include "lib/hash.h" + +#include + +extern struct protocol proto_mkrt; + +/* Proto state - kernel comm */ + +struct mkrt_config { + struct proto_config cf; +}; + +struct mkrt_mfc_group { + struct mkrt_mfc_group *next; + ip_addr ga; + list sources; +}; + +struct mkrt_mfc_source { + node n; + ip_addr addr; + + /* remember these for dumping */ + vifi_t vifi; + u32 iifs, oifs; +}; + +struct mkrt_proto { + struct proto p; + + sock *igmp_sock; + unsigned vif_count; + struct iface *vif_map[MAXVIFS]; + + HASH(struct mkrt_mfc_group) mfc_groups; +}; + +void mkrt_io_init(void); +unsigned mkrt_get_vif(struct iface *i); +void mkrt_listen(sock *s); +void mkrt_stop(sock *s); + +struct proto_config *mkrt_config_init(int class); +void mkrt_config_finish(struct proto_config *);