0
0
mirror of https://gitlab.nic.cz/labs/bird.git synced 2025-03-11 17:08:46 +00:00
bird/proto/evpn/evpn.c
Ondrej Zajicek dc17f1982f BGP: PMSI tunnel attribute support
PMSI tunnel attribute is required for EVPN IMET routes.
2024-01-31 17:35:25 +01:00

573 lines
14 KiB
C

/*
* BIRD -- BGP/MPLS Ethernet Virtual Private Networks (EVPN)
*
* (c) 2023 Ondrej Zajicek <santiago@crfreenet.org>
* (c) 2023 CZ.NIC z.s.p.o.
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
/**
* DOC: BGP/MPLS Ethernet Virtual Private Networks (EVPN)
*
* The EVPN protocol implements RFC 7432 BGP Etherent VPNs using VXLAN overlays.
* It works similarly to L3VPN. It connects ethernet table (one per VRF) with
* (global) EVPN table. Routes passed from EVPN table to ethernet table are
* stripped of RD and filtered by import targets, routes passed in the other
* direction are extended with RD, MPLS/VNI labels, and export targets in
* extended communities.
*
* The EVPN protocol supports MAC (type 2) and IMET (type 3) EVPN routes, there
* is no support for EAD / ES routes, or routes with non-zero tag. There is also
* no support for MPLS backbone, just VXLAN overlays.
*
* Supported standards:
* RFC 7432 - BGP MPLS-Based Ethernet VPN
* RFC 8365 - Network Virtualization Using Ethernet VPN
*/
/*
* TODO:
* - Encapsulation community handling
* - MAC mobility community handling
* - Review preference handling
* - Wait for existence (and active state) of the tunnel device
* - Learn VNI / router address from the tunnel device
* - Improved VLAN handling
* - MPLS encapsulation mode
*/
#undef LOCAL_DEBUG
#include "nest/bird.h"
#include "nest/iface.h"
#include "nest/protocol.h"
#include "nest/route.h"
#include "nest/mpls.h"
#include "nest/cli.h"
#include "conf/conf.h"
#include "filter/filter.h"
#include "filter/data.h"
#include "lib/string.h"
#include "evpn.h"
#include "proto/bgp/bgp.h"
#define EA_BGP_NEXT_HOP EA_CODE(PROTOCOL_BGP, BA_NEXT_HOP)
#define EA_BGP_EXT_COMMUNITY EA_CODE(PROTOCOL_BGP, BA_EXT_COMMUNITY)
#define EA_BGP_PMSI_TUNNEL EA_CODE(PROTOCOL_BGP, BA_PMSI_TUNNEL)
#define EA_BGP_MPLS_LABEL_STACK EA_CODE(PROTOCOL_BGP, BA_MPLS_LABEL_STACK)
static inline const struct adata * ea_get_adata(ea_list *e, uint id)
{ eattr *a = ea_find(e, id); return a ? a->u.ptr : &null_adata; }
static inline int
mpls_valid_nexthop(const rta *a)
{
/* MPLS does not support special blackhole targets */
if (a->dest != RTD_UNICAST)
return 0;
/* MPLS does not support ARP / neighbor discovery */
for (const struct nexthop *nh = &a->nh; nh ; nh = nh->next)
if (ipa_zero(nh->gw) && (nh->iface->flags & IF_MULTIACCESS))
return 0;
return 1;
}
static int
evpn_import_targets(struct evpn_proto *p, const struct adata *list)
{
return (p->import_target_one) ?
ec_set_contains(list, p->import_target->from.val.ec) :
eclist_match_set(list, p->import_target);
}
static struct adata *
evpn_export_targets(struct evpn_proto *p, const struct adata *src)
{
u32 *s = int_set_get_data(src);
int len = int_set_get_size(src);
struct adata *dst = lp_alloc(tmp_linpool, sizeof(struct adata) + (len + p->export_target_length) * sizeof(u32));
u32 *d = int_set_get_data(dst);
int end = 0;
for (int i = 0; i < len; i += 2)
{
/* Remove existing route targets */
uint type = s[i] >> 16;
if (ec_type_is_rt(type))
continue;
d[end++] = s[i];
d[end++] = s[i+1];
}
/* Add new route targets */
memcpy(d + end, p->export_target_data, p->export_target_length * sizeof(u32));
end += p->export_target_length;
/* Set length */
dst->length = end * sizeof(u32);
return dst;
}
static inline void
evpn_prepare_import_targets(struct evpn_proto *p)
{
const struct f_tree *t = p->import_target;
p->import_target_one = !t->left && !t->right && (t->from.val.ec == t->to.val.ec);
}
static void
evpn_add_ec(const struct f_tree *t, void *P)
{
struct evpn_proto *p = P;
ec_put(p->export_target_data, p->export_target_length, t->from.val.ec);
p->export_target_length += 2;
}
static void
evpn_prepare_export_targets(struct evpn_proto *p)
{
if (p->export_target_data)
mb_free(p->export_target_data);
uint len = 2 * tree_node_count(p->export_target);
p->export_target_data = mb_alloc(p->p.pool, len * sizeof(u32));
p->export_target_length = 0;
tree_walk(p->export_target, evpn_add_ec, p);
ASSERT(p->export_target_length == len);
}
static void
evpn_announce_mac(struct evpn_proto *p, const net_addr_eth *n0, rte *new)
{
struct channel *c = p->evpn_channel;
net_addr *n = alloca(sizeof(net_addr_evpn_mac));
net_fill_evpn_mac(n, p->rd, 0, n0->mac);
if (new)
{
rta *a = alloca(RTA_MAX_SIZE);
*a = (rta) {
.source = RTS_EVPN,
.scope = SCOPE_UNIVERSE,
.pref = c->preference,
};
struct adata *ad = evpn_export_targets(p, &null_adata);
ea_set_attr_ptr(&a->eattrs, tmp_linpool, EA_BGP_EXT_COMMUNITY, 0, EAF_TYPE_EC_SET, ad);
ea_set_attr_u32(&a->eattrs, tmp_linpool, EA_MPLS_LABEL, 0, EAF_TYPE_INT, p->vni);
rte *e = rte_get_temp(a, p->p.main_source);
rte_update2(c, n, e, p->p.main_source);
}
else
{
rte_update2(c, n, NULL, p->p.main_source);
}
}
static void
evpn_announce_imet(struct evpn_proto *p, int new)
{
struct channel *c = p->evpn_channel;
net_addr *n = alloca(sizeof(net_addr_evpn_imet));
net_fill_evpn_imet(n, p->rd, 0, p->router_addr);
if (new)
{
rta *a = alloca(RTA_MAX_SIZE);
*a = (rta) {
.source = RTS_EVPN,
.scope = SCOPE_UNIVERSE,
.pref = c->preference,
};
struct adata *ad = evpn_export_targets(p, &null_adata);
ea_set_attr_ptr(&a->eattrs, tmp_linpool, EA_BGP_EXT_COMMUNITY, 0, EAF_TYPE_EC_SET, ad);
ad = bgp_pmsi_new_ingress_replication(tmp_linpool, p->router_addr, p->vni);
ea_set_attr_ptr(&a->eattrs, tmp_linpool, EA_BGP_PMSI_TUNNEL, 0, EAF_TYPE_OPAQUE, ad);
rte *e = rte_get_temp(a, p->p.main_source);
rte_update2(c, n, e, p->p.main_source);
}
else
{
rte_update2(c, n, NULL, p->p.main_source);
}
}
#define BAD(msg, args...) \
({ log(L_ERR "%s: " msg, p->p.name, ## args); goto withdraw; })
static void
evpn_receive_mac(struct evpn_proto *p, const net_addr_evpn_mac *n0, rte *new)
{
struct channel *c = p->eth_channel;
net_addr *n = alloca(sizeof(net_addr_eth));
net_fill_eth(n, n0->mac, p->vid);
if (new && rte_resolvable(new))
{
eattr *nh = ea_find(new->attrs->eattrs, EA_BGP_NEXT_HOP);
if (!nh)
BAD("Missing NEXT_HOP attribute in %N", n0);
eattr *ms = ea_find(new->attrs->eattrs, EA_BGP_MPLS_LABEL_STACK);
if (!ms)
BAD("Missing MPLS label stack in %N", n0);
rta *a = alloca(RTA_MAX_SIZE);
*a = (rta) {
.source = RTS_EVPN,
.scope = SCOPE_UNIVERSE,
.dest = RTD_UNICAST,
.pref = c->preference,
.nh.gw = *((ip_addr *) nh->u.ptr->data),
.nh.iface = p->tunnel_dev,
};
a->nh.labels = MIN(ms->u.ptr->length / 4, MPLS_MAX_LABEL_STACK);
memcpy(a->nh.label, ms->u.ptr->data, a->nh.labels * 4);
rte *e = rte_get_temp(a, p->p.main_source);
rte_update2(c, n, e, p->p.main_source);
}
else
{
withdraw:
rte_update2(c, n, NULL, p->p.main_source);
}
}
static void
evpn_receive_imet(struct evpn_proto *p, const net_addr_evpn_imet *n0, rte *new)
{
struct channel *c = p->eth_channel;
struct rte_src *s = rt_get_source(&p->p, n0->rd);
net_addr *n = alloca(sizeof(net_addr_eth));
net_fill_eth(n, MAC_NONE, p->vid);
if (new && rte_resolvable(new))
{
eattr *pt = ea_find(new->attrs->eattrs, EA_BGP_PMSI_TUNNEL);
if (!pt)
BAD("Missing PMSI_TUNNEL attribute in %N", n0);
uint pmsi_type = bgp_pmsi_get_type(pt->u.ptr);
if (pmsi_type != BGP_PMSI_TYPE_INGRESS_REPLICATION)
BAD("Unsupported PMSI_TUNNEL type %u in %N", pmsi_type, n0);
rta *a = alloca(RTA_MAX_SIZE);
*a = (rta) {
.source = RTS_EVPN,
.scope = SCOPE_UNIVERSE,
.dest = RTD_UNICAST,
.pref = c->preference,
.nh.gw = bgp_pmsi_ir_get_endpoint(pt->u.ptr),
.nh.iface = p->tunnel_dev,
};
a->nh.labels = 1;
a->nh.label[0] = bgp_pmsi_get_label(pt->u.ptr);
rte *e = rte_get_temp(a, s);
rte_update2(c, n, e, s);
}
else
{
withdraw:
rte_update2(c, n, NULL, s);
}
}
static void
evpn_rt_notify(struct proto *P, struct channel *c0 UNUSED, net *net, rte *new, rte *old UNUSED)
{
struct evpn_proto *p = (void *) P;
const net_addr *n = net->n.addr;
switch (n->type)
{
case NET_ETH:
evpn_announce_mac(p, (const net_addr_eth *) n, new);
return;
case NET_EVPN:
switch (((const net_addr_evpn *) n)->subtype)
{
case NET_EVPN_MAC:
evpn_receive_mac(p, (const net_addr_evpn_mac *) n, new);
return;
case NET_EVPN_IMET:
evpn_receive_imet(p, (const net_addr_evpn_imet *) n, new);;
return;
}
return;
case NET_MPLS:
return;
}
}
static int
evpn_preexport(struct channel *C, rte *e)
{
struct evpn_proto *p = (void *) C->proto;
struct proto *pp = e->sender->proto;
const net_addr *n = e->net->n.addr;
if (pp == C->proto)
return -1; /* Avoid local loops automatically */
switch (n->type)
{
case NET_ETH:
if (((const net_addr_eth *) n)->vid != p->vid)
return -1;
return 0;
case NET_EVPN:
return evpn_import_targets(p, ea_get_adata(e->attrs->eattrs, EA_BGP_EXT_COMMUNITY)) ? 0 : -1;
case NET_MPLS:
return -1;
default:
bug("invalid type");
}
}
static void
evpn_reload_routes(struct channel *C)
{
struct evpn_proto *p = (void *) C->proto;
/* Route reload on one channel is just refeed on the other */
switch (C->net_type)
{
case NET_ETH:
channel_request_feeding(p->evpn_channel);
break;
case NET_EVPN:
channel_request_feeding(p->eth_channel);
break;
case NET_MPLS:
channel_request_feeding(p->eth_channel);
break;
}
}
static inline u32
evpn_metric(rte *e)
{
u32 metric = ea_get_int(e->attrs->eattrs, EA_GEN_IGP_METRIC, e->attrs->igp_metric);
return MIN(metric, IGP_METRIC_UNKNOWN);
}
static int
evpn_rte_better(rte *new, rte *old)
{
/* This is hack, we should have full BGP-style comparison */
return evpn_metric(new) < evpn_metric(old);
}
static void
evpn_postconfig(struct proto_config *CF)
{
struct evpn_config *cf = (void *) CF;
if (!proto_cf_find_channel(CF, NET_ETH))
cf_error("Ethernet channel not specified");
if (!proto_cf_find_channel(CF, NET_EVPN))
cf_error("EVPN channel not specified");
// if (!proto_cf_find_channel(CF, NET_MPLS))
// cf_error("MPLS channel not specified");
if (!cf->rd)
cf_error("Route distinguisher not specified");
if (!cf->import_target && !cf->export_target)
cf_error("Route target not specified");
if (!cf->import_target)
cf_error("Import target not specified");
if (!cf->export_target)
cf_error("Export target not specified");
}
static struct proto *
evpn_init(struct proto_config *CF)
{
struct proto *P = proto_new(CF);
struct evpn_proto *p = (void *) P;
// struct evpn_config *cf = (void *) CF;
proto_configure_channel(P, &p->eth_channel, proto_cf_find_channel(CF, NET_ETH));
proto_configure_channel(P, &p->evpn_channel, proto_cf_find_channel(CF, NET_EVPN));
proto_configure_channel(P, &P->mpls_channel, proto_cf_find_channel(CF, NET_MPLS));
P->rt_notify = evpn_rt_notify;
P->preexport = evpn_preexport;
P->reload_routes = evpn_reload_routes;
P->rte_better = evpn_rte_better;
return P;
}
static int
evpn_start(struct proto *P)
{
struct evpn_proto *p = (void *) P;
struct evpn_config *cf = (void *) P->cf;
p->rd = cf->rd;
p->import_target = cf->import_target;
p->export_target = cf->export_target;
p->export_target_data = NULL;
p->tunnel_dev = cf->tunnel_dev;
p->router_addr = cf->router_addr;
p->vni = cf->vni;
p->vid = cf->vid;
evpn_prepare_import_targets(p);
evpn_prepare_export_targets(p);
proto_setup_mpls_map(P, RTS_EVPN, 1);
// XXX ?
if (P->vrf_set)
P->mpls_map->vrf_iface = P->vrf;
proto_notify_state(P, PS_UP);
evpn_announce_imet(p, 1);
return PS_UP;
}
static int
evpn_shutdown(struct proto *P)
{
// struct evpn_proto *p = (void *) P;
proto_shutdown_mpls_map(P, 1);
return PS_DOWN;
}
static int
evpn_reconfigure(struct proto *P, struct proto_config *CF)
{
struct evpn_proto *p = (void *) P;
struct evpn_config *cf = (void *) CF;
if (!proto_configure_channel(P, &p->eth_channel, proto_cf_find_channel(CF, NET_ETH)) ||
!proto_configure_channel(P, &p->evpn_channel, proto_cf_find_channel(CF, NET_EVPN)) ||
!proto_configure_channel(P, &P->mpls_channel, proto_cf_find_channel(CF, NET_MPLS)))
return 0;
if ((p->rd != cf->rd) ||
(p->tunnel_dev != cf->tunnel_dev) ||
(!ipa_equal(p->router_addr, cf->router_addr)) ||
(p->vni != cf->vni) ||
(p->vid != cf->vid))
return 0;
int import_changed = !same_tree(p->import_target, cf->import_target);
int export_changed = !same_tree(p->export_target, cf->export_target);
/* Update pointers to config structures */
p->import_target = cf->import_target;
p->export_target = cf->export_target;
proto_setup_mpls_map(P, RTS_EVPN, 1);
if (import_changed)
{
TRACE(D_EVENTS, "Import target changed");
evpn_prepare_import_targets(p);
if (p->evpn_channel && (p->evpn_channel->channel_state == CS_UP))
channel_request_feeding(p->evpn_channel);
}
if (export_changed)
{
TRACE(D_EVENTS, "Export target changed");
evpn_prepare_export_targets(p);
if (p->eth_channel && (p->eth_channel->channel_state == CS_UP))
channel_request_feeding(p->eth_channel);
}
return 1;
}
static void
evpn_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUSED)
{
/* Just a shallow copy, not many items here */
}
/*
static void
evpn_get_route_info(rte *rte, byte *buf)
{
u32 metric = evpn_metric(rte);
if (metric < IGP_METRIC_UNKNOWN)
bsprintf(buf, " (%u/%u)", rte->attrs->pref, metric);
else
bsprintf(buf, " (%u/?)", rte->attrs->pref);
}
*/
struct protocol proto_evpn = {
.name = "EVPN",
.template = "evpn%d",
.class = PROTOCOL_EVPN,
.channel_mask = NB_ETH | NB_EVPN | NB_MPLS,
.proto_size = sizeof(struct evpn_proto),
.config_size = sizeof(struct evpn_config),
.postconfig = evpn_postconfig,
.init = evpn_init,
.start = evpn_start,
.shutdown = evpn_shutdown,
.reconfigure = evpn_reconfigure,
.copy_config = evpn_copy_config,
// .get_route_info = evpn_get_route_info
};
void
evpn_build(void)
{
proto_build(&proto_evpn);
}