From 45f01fb32ef17fd5d834ef8e4090528655ae71ee Mon Sep 17 00:00:00 2001 From: Jan Moskyto Matejka Date: Fri, 4 Mar 2016 12:55:50 +0100 Subject: [PATCH] Netlink: MPLS routes in kernel, not fully working yet. TODO: - fix static to detect MPLS stack changes on reload - fix MPLS_ENCAP parsing on route scan Anyway, Bird is now capable to insert both MPLS routes and MPLS encap routes into kernel. It was (among others) needed to define platform-specific AF_MPLS to 28 as this constant has been assigned in the linux kernel. No support for BSD now, it may be added in the future. --- lib/mpls.h | 43 +++++++++ lib/net.h | 15 +++- sysdep/cf/linux.h | 4 + sysdep/linux/netlink.c | 197 +++++++++++++++++++++++++++++++++++++---- sysdep/unix/krt.c | 5 +- 5 files changed, 246 insertions(+), 18 deletions(-) diff --git a/lib/mpls.h b/lib/mpls.h index 8673444c..ac2808c4 100644 --- a/lib/mpls.h +++ b/lib/mpls.h @@ -11,10 +11,53 @@ #define _BIRD_MPLS_H_ #define MPLS_STACK_LENGTH 8 /* Adjust this if you need deeper MPLS stack */ +#define MPLS_PXLEN 20 /* Length of the label in bits. Constant. */ + +/* + * RFC 3032 updated by RFC 5462: + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ Label + * | Label | TC |S| TTL | Stack + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ Entry + * + * Label: Label Value, 20 bits + * TC: Traffic Class, 3 bits + * S: Bottom of Stack, 1 bit + * TTL: Time to Live, 8 bits + */ typedef struct mpls_stack { u8 len; u32 label[MPLS_STACK_LENGTH]; } mpls_stack; +static inline char * const mpls_hton(mpls_stack s) { + static char buf[MPLS_STACK_LENGTH*4]; + int i; + for (i = 0; i < s.len; i++) { + buf[i*4 + 0] = s.label[i] >> 12; + buf[i*4 + 1] = s.label[i] >> 4; + buf[i*4 + 2] = (s.label[i] << 4) | (i == s.len - 1 ? 0x1 : 0); + buf[i*4 + 3] = 0; + } + return buf; +} + +static inline int mpls_buflen(const char *buf) { + // Looking for the Bottom of Stack set to 4. + int i; + for (i = 0; !(buf[i++*4 + 2] & 0x1); ); + return i*4; +} + +static inline mpls_stack mpls_ntoh(const char *buf) { + mpls_stack s = { .len = mpls_buflen(buf) }; + int i; + for (i = 0; i < s.len; i++) + s.label[i] = (buf[i*4 + 0] << 12) | (buf[i*4 + 1] << 4) | (buf[i*4 + 2] >> 4); + return s; +} + #endif diff --git a/lib/net.h b/lib/net.h index a853042e..66ea4f33 100644 --- a/lib/net.h +++ b/lib/net.h @@ -11,6 +11,7 @@ #define _BIRD_NET_H_ #include "lib/ip.h" +#include "lib/mpls.h" #define NET_IP4 1 @@ -134,7 +135,7 @@ extern const u16 net_max_text_length[]; ((net_addr_roa6) { NET_ROA6, pxlen, sizeof(net_addr_roa6), prefix, max_pxlen, asn }) #define NET_ADDR_MPLS(label) \ - ((net_addr_mpls) { NET_MPLS, 0, sizeof(net_addr_mpls), label }) + ((net_addr_mpls) { NET_MPLS, MPLS_PXLEN, sizeof(net_addr_mpls), label }) static inline void net_fill_ip4(net_addr *a, ip4_addr prefix, uint pxlen) @@ -211,6 +212,18 @@ static inline ip_addr net_prefix(const net_addr *a) } } +static inline mpls_stack net_mpls(const net_addr *a) +{ + mpls_stack ms; + if (a->type == NET_MPLS) { + ms.len = 1; + ms.label[0] = ((net_addr_mpls *) a)->label; + return ms; + } + + bug("Can't call net_mpls on non-mpls net_addr"); +} + static inline uint net4_pxlen(const net_addr *a) { return a->pxlen; } diff --git a/sysdep/cf/linux.h b/sysdep/cf/linux.h index 9e34f869..3da58220 100644 --- a/sysdep/cf/linux.h +++ b/sysdep/cf/linux.h @@ -16,6 +16,10 @@ #define CONFIG_RESTRICTED_PRIVILEGES +#ifndef AF_MPLS +#define AF_MPLS 28 +#endif + /* Link: sysdep/linux Link: sysdep/unix diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c index 6240c177..4adcd7fa 100644 --- a/sysdep/linux/netlink.c +++ b/sysdep/linux/netlink.c @@ -26,10 +26,12 @@ #include "lib/socket.h" #include "lib/string.h" #include "lib/hash.h" +#include "lib/mpls.h" #include "conf/conf.h" #include #include +#include #include #include @@ -46,6 +48,25 @@ #define RTA_TABLE 15 #endif +#ifndef RTA_VIA +#define RTA_VIA 18 +#endif + +#ifndef RTA_NEWDST +#define RTA_NEWDST 19 +#endif + +#ifndef RTA_ENCAP_TYPE +#define RTA_ENCAP_TYPE 21 +#endif + +#ifndef RTA_ENCAP +#define RTA_ENCAP 22 +#endif + +#ifndef AF_MPLS +#define AF_MPLS 28 +#endif /* * Synchronous Netlink interface @@ -256,7 +277,7 @@ static struct nl_want_attrs ifa_attr_want6[BIRD_IFA_MAX] = { }; -#define BIRD_RTA_MAX (RTA_TABLE+1) +#define BIRD_RTA_MAX (RTA_ENCAP+1) static struct nl_want_attrs mpnh_attr_want4[BIRD_RTA_MAX] = { [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) }, @@ -272,6 +293,8 @@ static struct nl_want_attrs rtm_attr_want4[BIRD_RTA_MAX] = { [RTA_MULTIPATH] = { 1, 0, 0 }, [RTA_FLOW] = { 1, 1, sizeof(u32) }, [RTA_TABLE] = { 1, 1, sizeof(u32) }, + [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) }, + [RTA_ENCAP] = { 1, 0, 0 }, }; static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = { @@ -284,6 +307,20 @@ static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = { [RTA_METRICS] = { 1, 0, 0 }, [RTA_FLOW] = { 1, 1, sizeof(u32) }, [RTA_TABLE] = { 1, 1, sizeof(u32) }, + [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) }, + [RTA_ENCAP] = { 1, 0, 0 }, +}; + +static struct nl_want_attrs rtm_attr_want_mpls[BIRD_RTA_MAX] = { + [RTA_DST] = { 1, 1, sizeof(u32) }, + [RTA_IIF] = { 1, 1, sizeof(u32) }, + [RTA_OIF] = { 1, 1, sizeof(u32) }, + [RTA_PRIORITY] = { 1, 1, sizeof(u32) }, + [RTA_METRICS] = { 1, 0, 0 }, + [RTA_FLOW] = { 1, 1, sizeof(u32) }, + [RTA_TABLE] = { 1, 1, sizeof(u32) }, + [RTA_VIA] = { 1, 0, 0 }, + [RTA_NEWDST] = { 1, 0, 0 }, }; @@ -333,6 +370,30 @@ static inline ip_addr rta_get_ipa(struct rtattr *a) return ipa_from_ip6(rta_get_ip6(a)); } +static inline ip_addr rta_get_via(struct rtattr *a) +{ + struct rtvia *v = RTA_DATA(a); + switch(v->rtvia_family) { + case AF_INET: return ipa_from_ip4(ip4_ntoh(*(ip4_addr *) v->rtvia_addr)); + case AF_INET6: return ipa_from_ip6(ip6_ntoh(*(ip6_addr *) v->rtvia_addr)); + } + return IPA_NONE; +} + +static inline mpls_stack rta_get_mpls(struct rtattr *a) +{ + mpls_stack ms = { .len = RTA_PAYLOAD(a) / 4 }; + for (int i = 0; i < ms.len; i++) { + ms.label[i] = (((u8 *)RTA_DATA(a))[i*4 + 0] << 12) + | (((u8 *)RTA_DATA(a))[i*4 + 1] << 4) + | (((u8 *)RTA_DATA(a))[i*4 + 2] >> 4); + + if (!!(((u8 *)RTA_DATA(a))[i*4 + 2] & 1) != (i+1 == ms.len)) + log(L_WARN "KRT: Received a route with mismatched MPLS BoS bit, ignoring"); + } + return ms; +} + struct rtattr * nl_add_attr(struct nlmsghdr *h, uint bufsize, uint code, const void *data, uint dlen) { @@ -353,6 +414,24 @@ nl_add_attr(struct nlmsghdr *h, uint bufsize, uint code, const void *data, uint return a; } +static inline struct rtattr * +nl_open_attr(struct nlmsghdr *h, uint bufsize, uint code) +{ + return nl_add_attr(h, bufsize, code, NULL, 0); +} + +static inline void +nl_close_attr(struct nlmsghdr *h, struct rtattr *a) +{ + a->rta_len = (void *)h + h->nlmsg_len - (void *)a; +} + +static inline void +nl_add_attr_u16(struct nlmsghdr *h, uint bufsize, int code, u16 data) +{ + nl_add_attr(h, bufsize, code, &data, 2); +} + static inline void nl_add_attr_u32(struct nlmsghdr *h, uint bufsize, int code, u32 data) { @@ -382,16 +461,46 @@ nl_add_attr_ipa(struct nlmsghdr *h, uint bufsize, int code, ip_addr ipa) nl_add_attr_ip6(h, bufsize, code, ipa_to_ip6(ipa)); } -static inline struct rtattr * -nl_open_attr(struct nlmsghdr *h, uint bufsize, uint code) +static inline void +nl_add_attr_mpls(struct nlmsghdr *h, uint bufsize, int code, mpls_stack ms) { - return nl_add_attr(h, bufsize, code, NULL, 0); + const char *buf = mpls_hton(ms); + nl_add_attr(h, bufsize, code, buf, mpls_buflen(buf)); } static inline void -nl_close_attr(struct nlmsghdr *h, struct rtattr *a) +nl_add_attr_mpls_encap(struct nlmsghdr *h, uint bufsize, mpls_stack ms) { - a->rta_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)a; + struct rtattr *nest = nl_open_attr(h, bufsize, RTA_ENCAP); + nl_add_attr_mpls(h, bufsize, RTA_DST, ms); + nl_close_attr(h, nest); + + nl_add_attr_u16(h, bufsize, RTA_ENCAP_TYPE, LWTUNNEL_ENCAP_MPLS); +} + +static inline void +nl_add_attr_via(struct nlmsghdr *h, uint bufsize, ip_addr ipa) +{ + struct rtattr *nest = nl_open_attr(h, bufsize, RTA_VIA); + struct rtvia *via = RTA_DATA(nest); + + h->nlmsg_len += sizeof(*via); + + if (ipa_is_ip4(ipa)) { + ip4_addr ip4 = ipa_to_ip4(ipa); + ip4 = ip4_hton(ip4); + via->rtvia_family = AF_INET; + memcpy(via->rtvia_addr, &ip4, sizeof(ip4)); + h->nlmsg_len += sizeof(ip4); + } else { + ip6_addr ip6 = ipa_to_ip6(ipa); + ip6 = ip6_hton(ip6); + via->rtvia_family = AF_INET6; + memcpy(via->rtvia_addr, &ip6, sizeof(ip6)); + h->nlmsg_len += sizeof(ip6); + } + + nl_close_attr(h, nest); } static inline struct rtnexthop * @@ -411,7 +520,7 @@ nl_open_nexthop(struct nlmsghdr *h, uint bufsize) static inline void nl_close_nexthop(struct nlmsghdr *h, struct rtnexthop *nh) { - nh->rtnh_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)nh; + nh->rtnh_len = (void *)h + h->nlmsg_len - (void *)nh; } static void @@ -909,6 +1018,15 @@ nh_bufsize(struct mpnh *nh) return rv; } +static inline mpls_stack +mpls_from_ea(struct adata *ad) { + mpls_stack s = { .len = ad->length/sizeof(u32) }; + int i; + for (i = 0; i < s.len; i++) + s.label[i] = ((u32 *) ad->data)[i]; + return s; +} + static int nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new) { @@ -933,7 +1051,10 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new) r.r.rtm_dst_len = net_pxlen(net->n.addr); r.r.rtm_protocol = RTPROT_BIRD; r.r.rtm_scope = RT_SCOPE_UNIVERSE; - nl_add_attr_ipa(&r.h, sizeof(r), RTA_DST, net_prefix(net->n.addr)); + if (p->af == AF_MPLS) + nl_add_attr_mpls(&r.h, sizeof(r), RTA_DST, net_mpls(net->n.addr)); + else + nl_add_attr_ipa(&r.h, sizeof(r), RTA_DST, net_prefix(net->n.addr)); if (krt_table_id(p) < 256) r.r.rtm_table = krt_table_id(p); @@ -944,6 +1065,11 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new) if (!new) return nl_exchange(&r.h); + if (ea = ea_find(eattrs, EA_GEN_MPLS_STACK)) + if (p->af == AF_MPLS) + nl_add_attr_mpls(&r.h, sizeof(r), RTA_NEWDST, mpls_from_ea(ea->u.ptr)); + else + nl_add_attr_mpls_encap(&r.h, sizeof(r), mpls_from_ea(ea->u.ptr)); if (ea = ea_find(eattrs, EA_KRT_METRIC)) nl_add_attr_u32(&r.h, sizeof(r), RTA_PRIORITY, ea->u.data); @@ -977,7 +1103,10 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new) case RTD_ROUTER: r.r.rtm_type = RTN_UNICAST; nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, a->iface->index); - nl_add_attr_ipa(&r.h, sizeof(r), RTA_GATEWAY, a->gw); + if (p->af == AF_MPLS) + nl_add_attr_via(&r.h, sizeof(r), a->gw); + else + nl_add_attr_ipa(&r.h, sizeof(r), RTA_GATEWAY, a->gw); break; case RTD_DEVICE: r.r.rtm_type = RTN_UNICAST; @@ -1058,9 +1187,9 @@ nl_parse_route(struct nlmsghdr *h, int scan) net_fill_ip4(&dst, IP4_NONE, 0); break; - case AF_INET6: - if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a))) - return; + case AF_INET6: + if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a))) + return; if (a[RTA_DST]) net_fill_ip6(&dst, rta_get_ip6(a[RTA_DST]), i->rtm_dst_len); @@ -1068,6 +1197,16 @@ nl_parse_route(struct nlmsghdr *h, int scan) net_fill_ip6(&dst, IP6_NONE, 0); break; + case AF_MPLS: + if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want_mpls, a, sizeof(a))) + return; + + if (a[RTA_DST]) + net_fill_mpls(&dst, rta_get_mpls(a[RTA_DST]).label[0]); + else + return; /* No support for MPLS routes without RTA_DST */ + break; + default: return; } @@ -1096,7 +1235,7 @@ nl_parse_route(struct nlmsghdr *h, int scan) SKIP("RTM_DELROUTE in scan\n"); int c = net_classify(&dst); - if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK)) + if (i->rtm_family != AF_MPLS && ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))) SKIP("strange class/scope\n"); // ignore rtm_scope, it is not a real scope @@ -1160,10 +1299,13 @@ nl_parse_route(struct nlmsghdr *h, int scan) return; } - if (a[RTA_GATEWAY]) + if ((i->rtm_family != AF_MPLS) && a[RTA_GATEWAY] || (i->rtm_family == AF_MPLS) && a[RTA_VIA]) { ra.dest = RTD_ROUTER; - ra.gw = rta_get_ipa(a[RTA_GATEWAY]); + if (i->rtm_family == AF_MPLS) + ra.gw = rta_get_via(a[RTA_VIA]); + else + ra.gw = rta_get_ipa(a[RTA_GATEWAY]); /* Silently skip strange 6to4 routes */ const net_addr_ip6 sit = NET_ADDR_IP6(IP6_NONE, 96); @@ -1210,6 +1352,24 @@ nl_parse_route(struct nlmsghdr *h, int scan) if (a[RTA_PRIORITY]) e->u.krt.metric = rta_get_u32(a[RTA_PRIORITY]); + if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST]) + { + mpls_stack ms = rta_get_mpls(a[RTA_NEWDST]); + + ea_list *ea = alloca(sizeof(ea_list) + sizeof(eattr)); + ea->next = ra.eattrs; + ra.eattrs = ea; + ea->flags = EALF_SORTED; + ea->count = 1; + ea->attrs[0].id = EA_KRT_PREFSRC; + ea->attrs[0].flags = 0; + ea->attrs[0].type = EAF_TYPE_INT_SET; + ea->attrs[0].u.ptr = alloca(sizeof(struct adata) + sizeof(u32)*ms.len); + ea->attrs[0].u.ptr->length = sizeof(u32)*ms.len; + for (int j = 0; j < ms.len; j++) + ((u32 *)ea->attrs[0].u.ptr->data)[j] = ms.label[j]; + } + if (a[RTA_PREFSRC]) { ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]); @@ -1295,6 +1455,13 @@ krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NUL nl_parse_route(h, 1); else log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type); + + nl_request_dump(AF_MPLS, RTM_GETROUTE); + while (h = nl_get_scan()) + if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE) + nl_parse_route(h, 1); + else + log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type); } /* diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index 6b3b4eee..c9826dd0 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -1144,7 +1144,8 @@ krt_start(struct proto *P) { case NET_IP4: p->af = AF_INET; break; case NET_IP6: p->af = AF_INET6; break; - default: ASSERT(0); + case NET_MPLS: p->af = AF_MPLS; break; + default: log(L_ERR "KRT: Tried to start with strange net type: %d", p->p.net_type); return PS_START; break; } add_tail(&krt_proto_list, &p->krt_node); @@ -1255,7 +1256,7 @@ struct protocol proto_unix_kernel = { .template = "kernel%d", .attr_class = EAP_KRT, .preference = DEF_PREF_INHERITED, - .channel_mask = NB_IP, + .channel_mask = NB_IP | NB_MPLS, .proto_size = sizeof(struct krt_proto), .config_size = sizeof(struct krt_config), .preconfig = krt_preconfig,