From 2f4f790a164fd9e34bd62340134c0e512022a5f9 Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Mon, 29 May 2023 05:37:26 +0200 Subject: [PATCH] BGP: EVPN NLRI - preliminary support --- proto/bgp/bgp.c | 9 +- proto/bgp/bgp.h | 9 + proto/bgp/config.Y | 1 + proto/bgp/packets.c | 443 +++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 453 insertions(+), 9 deletions(-) diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index cd57c32b..3d5fd4bd 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -1831,6 +1831,9 @@ bgp_channel_start(struct channel *C) if (bgp_channel_is_ipv6(c) && (ipa_is_ip6(src) || c->ext_next_hop)) c->next_hop_addr = src; + + if (bgp_channel_is_l2vpn(c)) + c->next_hop_addr = src; } /* Use preferred addresses associated with interface / source address */ @@ -2137,10 +2140,10 @@ bgp_postconfig(struct proto_config *CF) /* Default values of IGP tables */ if ((cc->gw_mode == GW_RECURSIVE) && !cc->desc->no_igp) { - if (!cc->igp_table_ip4 && (bgp_cc_is_ipv4(cc) || cc->ext_next_hop)) + if (!cc->igp_table_ip4 && (bgp_cc_is_ipv4(cc) || bgp_cc_is_l2vpn(cc) || cc->ext_next_hop)) cc->igp_table_ip4 = bgp_default_igp_table(cf, cc, NET_IP4); - if (!cc->igp_table_ip6 && (bgp_cc_is_ipv6(cc) || cc->ext_next_hop)) + if (!cc->igp_table_ip6 && (bgp_cc_is_ipv6(cc) || bgp_cc_is_l2vpn(cc) || cc->ext_next_hop)) cc->igp_table_ip6 = bgp_default_igp_table(cf, cc, NET_IP6); if (cc->igp_table_ip4 && bgp_cc_is_ipv6(cc) && !cc->ext_next_hop) @@ -2724,7 +2727,7 @@ struct protocol proto_bgp = { .template = "bgp%d", .class = PROTOCOL_BGP, .preference = DEF_PREF_BGP, - .channel_mask = NB_IP | NB_VPN | NB_FLOW | NB_MPLS, + .channel_mask = NB_IP | NB_VPN | NB_FLOW | NB_EVPN | NB_MPLS, .proto_size = sizeof(struct bgp_proto), .config_size = sizeof(struct bgp_config), .postconfig = bgp_postconfig, diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index 7127bc88..3128d309 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -27,10 +27,12 @@ struct eattr; #define BGP_AFI_IPV4 1 #define BGP_AFI_IPV6 2 +#define BGP_AFI_L2VPN 25 #define BGP_SAFI_UNICAST 1 #define BGP_SAFI_MULTICAST 2 #define BGP_SAFI_MPLS 4 +#define BGP_SAFI_EVPN 70 #define BGP_SAFI_MPLS_VPN 128 #define BGP_SAFI_VPN_MULTICAST 129 #define BGP_SAFI_FLOW 133 @@ -53,6 +55,7 @@ struct eattr; #define BGP_AF_VPN6_MC BGP_AF( BGP_AFI_IPV6, BGP_SAFI_VPN_MULTICAST ) #define BGP_AF_FLOW4 BGP_AF( BGP_AFI_IPV4, BGP_SAFI_FLOW ) #define BGP_AF_FLOW6 BGP_AF( BGP_AFI_IPV6, BGP_SAFI_FLOW ) +#define BGP_AF_EVPN BGP_AF( BGP_AFI_L2VPN, BGP_SAFI_EVPN ) struct bgp_write_state; @@ -529,12 +532,18 @@ static inline int bgp_channel_is_ipv4(struct bgp_channel *c) static inline int bgp_channel_is_ipv6(struct bgp_channel *c) { return BGP_AFI(c->afi) == BGP_AFI_IPV6; } +static inline int bgp_channel_is_l2vpn(struct bgp_channel *c) +{ return BGP_AFI(c->afi) == BGP_AFI_L2VPN; } + static inline int bgp_cc_is_ipv4(struct bgp_channel_config *c) { return BGP_AFI(c->afi) == BGP_AFI_IPV4; } static inline int bgp_cc_is_ipv6(struct bgp_channel_config *c) { return BGP_AFI(c->afi) == BGP_AFI_IPV6; } +static inline int bgp_cc_is_l2vpn(struct bgp_channel_config *c) +{ return BGP_AFI(c->afi) == BGP_AFI_L2VPN; } + static inline int bgp_channel_is_role_applicable(struct bgp_channel *c) { return (c->afi == BGP_AF_IPV4 || c->afi == BGP_AF_IPV6); } diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index 1173ff06..2a831bc2 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -240,6 +240,7 @@ bgp_afi: | VPN6 MULTICAST { $$ = BGP_AF_VPN6_MC; } | FLOW4 { $$ = BGP_AF_FLOW4; } | FLOW6 { $$ = BGP_AF_FLOW6; } + | EVPN { $$ = BGP_AF_EVPN; } ; bgp_channel_start: bgp_afi diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index e8cc4718..3bc5b616 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -34,6 +34,7 @@ #define BGP_RR_END 2 #define BGP_NLRI_MAX (4 + 1 + 32) +#define BGP_NLRI_EVPN_MAX (4 + 2 + 52) #define BGP_MPLS_BOS 1 /* Bottom-of-stack bit */ #define BGP_MPLS_MAX 10 /* Max number of labels that 24*n <= 255 */ @@ -1076,6 +1077,25 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len) #define MISMATCHED_AF " - mismatched address family (%I for %s)" +static int +bgp_channel_match_next_hop_af(struct bgp_channel *c, ip_addr nh) +{ + switch (BGP_AFI(c->afi)) + { + case BGP_AFI_IPV4: + return ipa_is_ip4(nh) || c->ext_next_hop; + + case BGP_AFI_IPV6: + return ipa_is_ip6(nh) || c->ext_next_hop; + + case BGP_AFI_L2VPN: + return 1; + + default: + return 0; + } +} + static void bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll) { @@ -1304,12 +1324,11 @@ bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to) REJECT(BAD_NEXT_HOP " - neighbor address %I", peer); /* Forbid next hop with non-matching AF */ - if ((ipa_is_ip4(nh[0]) != bgp_channel_is_ipv4(s->channel)) && - !s->channel->ext_next_hop) + if (!bgp_channel_match_next_hop_af(s->channel, nh[0])) REJECT(BAD_NEXT_HOP MISMATCHED_AF, nh[0], s->channel->desc->name); - /* Just check if MPLS stack */ - if (s->mpls && !bgp_find_attr(*to, BA_MPLS_LABEL_STACK)) + /* Just check if there is MPLS stack - not applicable for EVPN */ + if (s->mpls && (s->channel->afi != BGP_AF_EVPN) && !bgp_find_attr(*to, BA_MPLS_LABEL_STACK)) REJECT(NO_LABEL_STACK); } @@ -1380,7 +1399,7 @@ bgp_decode_next_hop_ip(struct bgp_parse_state *s, byte *data, uint len, rta *a) if (ipa_zero(nh[1])) ad->length = 16; - if ((bgp_channel_is_ipv4(c) != ipa_is_ip4(nh[0])) && !c->ext_next_hop) + if (!bgp_channel_match_next_hop_af(c, nh[0])) WITHDRAW(BAD_NEXT_HOP MISMATCHED_AF, nh[0], c->desc->name); // XXXX validate next hop @@ -1461,7 +1480,7 @@ bgp_decode_next_hop_vpn(struct bgp_parse_state *s, byte *data, uint len, rta *a) if ((get_u64(data) != 0) || ((len == 48) && (get_u64(data+24) != 0))) bgp_parse_error(s, 9); - if ((bgp_channel_is_ipv4(c) != ipa_is_ip4(nh[0])) && !c->ext_next_hop) + if (!bgp_channel_match_next_hop_af(c, nh[0])) WITHDRAW(BAD_NEXT_HOP MISMATCHED_AF, nh[0], c->desc->name); // XXXX validate next hop @@ -2170,6 +2189,407 @@ bgp_decode_nlri_flow6(struct bgp_parse_state *s, byte *pos, uint len, rta *a) } } +static inline void +bgp_encode_evpn_ip(byte **pos, uint *size, ip_addr ip) +{ + if (ipa_is_ip4(ip)) + { + **pos = IP4_MAX_PREFIX_LENGTH; + put_ip4(*pos+1, ipa_to_ip4(ip)); + ADVANCE(*pos, *size, 1+4); + } + else + { + **pos = IP6_MAX_PREFIX_LENGTH; + put_ip6(*pos+1, ipa_to_ip6(ip)); + ADVANCE(*pos, *size, 1+16); + } +} + +static inline ip_addr +bgp_decode_evpn_ip(struct bgp_parse_state *s, byte **pos, uint *len) +{ + uint alen = **pos; /* Assume this is validated by caller */ + uint blen = 1 + (alen >> 3); + + if (*len < blen) + bgp_parse_error(s, 1); + + ip_addr ip; + if (alen == IP4_MAX_PREFIX_LENGTH) + ip = ipa_from_ip4(get_ip4(*pos + 1)); + else if (alen == IP6_MAX_PREFIX_LENGTH) + ip = ipa_from_ip6(get_ip6(*pos + 1)); + else + bgp_parse_error(s, 10); /* ? */ + + ADVANCE(*pos, *len, blen); + return ip; +} + +static inline u32 bgp_label_ready(const adata *m, uint pos) +{ return m && (m->length >= 4*(pos+1)); } + +static inline u32 bgp_get_label_(const adata *m, uint pos) +{ return ((u32 *) m->data)[pos]; } + +static inline u32 bgp_get_label(const adata *m, uint pos) +{ return bgp_label_ready(m, pos) ? bgp_get_label_(m, pos) : 0; } + +static uint +bgp_encode_evpn_ead(struct bgp_write_state *s UNUSED, const net_addr_evpn *net, byte *buf, uint size) +{ + byte *pos = buf; + + /* Encode route distinguisher */ + put_u64(pos, net->rd); + ADVANCE(pos, size, 8); + + /* Encode ethernet segment ID */ + memcpy(pos, &net->ead.esi, 10); + ADVANCE(pos, size, 10); + + /* Encode ethernet tag ID */ + put_u32(pos, net->tag); + ADVANCE(pos, size, 4); + + /* Encode MPLS label */ + u32 label = bgp_get_label(s->mpls_labels, 0); + put_u24(pos, label << 4); + ADVANCE(pos, size, 3); + + return pos - buf; +} + +static void +bgp_decode_evpn_ead(struct bgp_parse_state *s, net_addr_evpn *net, byte *pos, uint len) +{ + if (len < (8+10+4+3)) + bgp_parse_error(s, 1); + + /* Decode route distinguisher */ + u64 rd = get_u64(pos); + ADVANCE(pos, len, 8); + + /* Decode ethernet segment ID */ + evpn_esi esi; + memcpy(&esi, pos, 10); + ADVANCE(pos, len, 10); + + /* Decode ethernet tag ID */ + u32 tag = get_u32(pos); + ADVANCE(pos, len, 4); + + /* Decode MPLS label */ + u32 label = get_u24(pos) >> 4; + ADVANCE(pos, len, 3); + + s->mpls_labels = lp_alloc_adata(s->pool, 4); + memcpy(s->mpls_labels->data, &label, 4); + + if (len) + bgp_parse_error(s, 1); + + net->ead = NET_ADDR_EVPN_EAD(rd, tag, esi); +} + +static uint +bgp_encode_evpn_mac(struct bgp_write_state *s UNUSED, const net_addr_evpn *net, byte *buf, uint size) +{ + byte *pos = buf; + + /* Encode route distinguisher */ + put_u64(pos, net->rd); + ADVANCE(pos, size, 8); + + /* Encode ethernet segment ID - XXX */ + memset(pos, 0, 10); + ADVANCE(pos, size, 10); + + /* Encode ethernet tag ID */ + put_u32(pos, net->tag); + ADVANCE(pos, size, 4); + + /* Encode MAC address */ + pos[0] = 48; + memcpy(pos+1, &net->mac.mac, 6); + ADVANCE(pos, size, 7); + + /* Encode IP address */ + pos[0] = 0; + if (net->length == sizeof(net_addr_evpn_mac_ip)) + bgp_encode_evpn_ip(&pos, &size, net->mac_ip.ip); + else + ADVANCE(pos, size, 1); + + /* Encode MPLS label */ + u32 label1 = bgp_get_label(s->mpls_labels, 0); + put_u24(pos, label1 << 4); + ADVANCE(pos, size, 3); + + if (bgp_label_ready(s->mpls_labels, 1)) + { + u32 label2 = bgp_get_label_(s->mpls_labels, 1); + put_u24(pos, label2 << 4); + ADVANCE(pos, size, 3); + } + + return pos - buf; +} + +static void +bgp_decode_evpn_mac(struct bgp_parse_state *s, net_addr_evpn *net, byte *pos, uint len) +{ + if (len < (8+10+4+7+1)) + bgp_parse_error(s, 1); + + /* Decode route distinguisher */ + u64 rd = get_u64(pos); + ADVANCE(pos, len, 8); + + /* Decode ethernet segment ID - XXX */ + evpn_esi esi; + memcpy(&esi, pos, 10); + ADVANCE(pos, len, 10); + + /* Decode ethernet tag ID */ + u32 tag = get_u32(pos); + ADVANCE(pos, len, 4); + + /* Decode MAC address */ + if (pos[0] != 48) + bgp_parse_error(s, 10); /* ? */ + + mac_addr mac; + memcpy(&mac, pos+1, 6); + ADVANCE(pos, len, 7); + + /* Decode IP address */ + ip_addr ip = IPA_NONE; + if (pos[0]) + ip = bgp_decode_evpn_ip(s, &pos, &len); + else + ADVANCE(pos, len, 1); + + /* Decode MPLS labels */ + if (len < 3) + bgp_parse_error(s, 1); + + u32 label[2], lnum = 1; + label[0] = get_u24(pos) >> 4; + ADVANCE(pos, len, 3); + + if (len >= 3) + { + label[1] = get_u24(pos) >> 4; + ADVANCE(pos, len, 3); + lnum++; + } + + s->mpls_labels = lp_alloc_adata(s->pool, 4 * lnum); + memcpy(s->mpls_labels->data, label, 4 * lnum); + + if (len) + bgp_parse_error(s, 1); + + if (ipa_zero(ip)) + net->mac = NET_ADDR_EVPN_MAC(rd, tag, mac); + else + net->mac_ip = NET_ADDR_EVPN_MAC_IP(rd, tag, mac, ip); +} + +static uint +bgp_encode_evpn_imet(struct bgp_write_state *s UNUSED, const net_addr_evpn *net, byte *buf, uint size) +{ + byte *pos = buf; + + /* Encode route distinguisher */ + put_u64(pos, net->rd); + ADVANCE(pos, size, 8); + + /* Encode ethernet tag ID */ + put_u32(pos, net->tag); + ADVANCE(pos, size, 4); + + /* Encode router IP address */ + bgp_encode_evpn_ip(&pos, &size, net->imet.rtr); + + return pos - buf; +} + +static void +bgp_decode_evpn_imet(struct bgp_parse_state *s, net_addr_evpn *net, byte *pos, uint len) +{ + if (len < (8+4+1)) + bgp_parse_error(s, 1); + + /* Decode route distinguisher */ + u64 rd = get_u64(pos); + ADVANCE(pos, len, 8); + + /* Decode ethernet tag ID */ + u32 tag = get_u32(pos); + ADVANCE(pos, len, 4); + + /* Decode router IP address */ + ip_addr rtr = bgp_decode_evpn_ip(s, &pos, &len); + + if (len) + bgp_parse_error(s, 1); + + net->imet = NET_ADDR_EVPN_IMET(rd, tag, rtr); +} + +static uint +bgp_encode_evpn_es(struct bgp_write_state *s UNUSED, const net_addr_evpn *net, byte *buf, uint size) +{ + byte *pos = buf; + + /* Encode route distinguisher */ + put_u64(pos, net->rd); + ADVANCE(pos, size, 8); + + /* Encode ethernet segment ID */ + memcpy(pos, &net->es.esi, 10); + ADVANCE(pos, size, 10); + + /* Encode router IP address */ + bgp_encode_evpn_ip(&pos, &size, net->es.rtr); + + return pos - buf; +} + +static void +bgp_decode_evpn_es(struct bgp_parse_state *s, net_addr_evpn *net, byte *pos, uint len) +{ + if (len < (8+10+1)) + bgp_parse_error(s, 1); + + /* Decode route distinguisher */ + u64 rd = get_u64(pos); + ADVANCE(pos, len, 8); + + /* Decode ethernet segment ID */ + evpn_esi esi; + memcpy(&esi, pos, 10); + ADVANCE(pos, len, 10); + + /* Decode router IP address */ + ip_addr rtr = bgp_decode_evpn_ip(s, &pos, &len); + + if (len) + bgp_parse_error(s, 1); + + net->es = NET_ADDR_EVPN_ES(rd, esi, rtr); +} + +static uint +bgp_encode_nlri_evpn(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size) +{ + byte *pos = buf; + + while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_EVPN_MAX)) + { + struct bgp_prefix *px = HEAD(buck->prefixes); + const net_addr_evpn *net = (void *) px->net; + + /* Encode path ID */ + if (s->add_path) + { + put_u32(pos, px->path_id); + ADVANCE(pos, size, 4); + } + + /* Encode EVPN header */ + pos[0] = net->subtype; + pos[1] = 0; + ADVANCE(pos, size, 2); + + uint rlen; + switch (net->subtype) + { + case NET_EVPN_EAD: rlen = bgp_encode_evpn_ead(s, net, pos, size); break; + case NET_EVPN_MAC: rlen = bgp_encode_evpn_mac(s, net, pos, size); break; + case NET_EVPN_IMET: rlen = bgp_encode_evpn_imet(s, net, pos, size); break; + case NET_EVPN_ES: rlen = bgp_encode_evpn_es(s, net, pos, size); break; + } + + /* Fix length */ + pos[-1] = rlen; + + ADVANCE(pos, size, rlen); + + if (!s->sham) + bgp_free_prefix(s->channel, px); + else + rem_node(&px->buck_node); + } + + return pos - buf; +} + +static void +bgp_decode_nlri_evpn(struct bgp_parse_state *s, byte *pos, uint len, rta *a) +{ + ea_list *base_eattrs = a ? a->eattrs : NULL; + + while (len) + { + net_addr_evpn net; + u32 path_id = 0; + + s->mpls_labels = NULL; + + /* Reset attributes */ + if (a) + a->eattrs = base_eattrs; + + /* Decode path ID */ + if (s->add_path) + { + if (len < 5) + bgp_parse_error(s, 1); + + path_id = get_u32(pos); + ADVANCE(pos, len, 4); + } + + if (len < 2) + bgp_parse_error(s, 1); + + /* Decode EVPN header */ + uint type = pos[0]; + uint rlen = pos[1]; + ADVANCE(pos, len, 2); + + if (len < rlen) + bgp_parse_error(s, 1); + + switch (type) + { + case NET_EVPN_EAD: bgp_decode_evpn_ead(s, &net, pos, rlen); break; + case NET_EVPN_MAC: bgp_decode_evpn_mac(s, &net, pos, rlen); break; + case NET_EVPN_IMET: bgp_decode_evpn_imet(s, &net, pos, rlen); break; + case NET_EVPN_ES: bgp_decode_evpn_es(s, &net, pos, rlen); break; + default: net = (net_addr_evpn){}; // XXX + } + + ADVANCE(pos, len, rlen); + + if (a && s->mpls_labels) + { + adata *m = s->mpls_labels; + bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_MPLS_LABEL_STACK, 0, m); + bgp_apply_mpls_labels(s, a, (u32 *) m->data, m->length / 4); + } + + bgp_rte_update(s, (net_addr *) &net, path_id, a); + + rta_free(s->cached_rta); + s->cached_rta = NULL; + } +} + static const struct bgp_af_desc bgp_af_table[] = { { @@ -2298,6 +2718,17 @@ static const struct bgp_af_desc bgp_af_table[] = { .decode_next_hop = bgp_decode_next_hop_none, .update_next_hop = bgp_update_next_hop_none, }, + { + .afi = BGP_AF_EVPN, + .net = NET_EVPN, + .mpls = 1, + .name = "evpn", + .encode_nlri = bgp_encode_nlri_evpn, + .decode_nlri = bgp_decode_nlri_evpn, + .encode_next_hop = bgp_encode_next_hop_ip, + .decode_next_hop = bgp_decode_next_hop_ip, + .update_next_hop = bgp_update_next_hop_ip, + }, }; const struct bgp_af_desc *