0
0
mirror of https://gitlab.nic.cz/labs/bird.git synced 2025-01-03 15:41:54 +00:00

Merge commit 'a9646efd40569f3a1d749bc1bd13219876b33a00' into sark-bgp-rebased

This commit is contained in:
Maria Matejka 2022-02-04 15:26:36 +01:00
commit fe840ddad9
14 changed files with 2378 additions and 28 deletions

View File

@ -154,9 +154,9 @@ docker_ubuntu-20_04-amd64:
IMG_NAME: "ubuntu-20.04-amd64"
<<: *docker_build
docker_ubuntu-20_10-amd64:
docker_ubuntu-21_10-amd64:
variables:
IMG_NAME: "ubuntu-20.10-amd64"
IMG_NAME: "ubuntu-21.10-amd64"
<<: *docker_build
# GPG error
@ -318,9 +318,9 @@ build-ubuntu-20_04-amd64:
<<: *build-linux
image: registry.nic.cz/labs/bird:ubuntu-20.04-amd64
build-ubuntu-20_10-amd64:
build-ubuntu-21_10-amd64:
<<: *build-linux
image: registry.nic.cz/labs/bird:ubuntu-20.10-amd64
image: registry.nic.cz/labs/bird:ubuntu-21.10-amd64
#build-ubuntu-21_04-amd64:
# <<: *build-linux
@ -465,10 +465,11 @@ pkg-ubuntu-20.04-amd64:
needs: [build-ubuntu-20_04-amd64]
image: registry.nic.cz/labs/bird:ubuntu-20.04-amd64
pkg-ubuntu-20.10-amd64:
pkg-ubuntu-21.10-amd64:
<<: *pkg-deb
needs: [build-ubuntu-20_10-amd64]
image: registry.nic.cz/labs/bird:ubuntu-20.10-amd64
needs: [build-ubuntu-21_10-amd64]
image: registry.nic.cz/labs/bird:ubuntu-21.10-amd64
#pkg-ubuntu-21.04-amd64:
# <<: *pkg-deb

View File

@ -2419,6 +2419,12 @@ using the following configuration parameters:
same address family and using the same local port) should have set
<cf/strict bind/, or none of them. Default: disabled.
<tag><label id="bgp-free-bind">free bind <m/switch/</tag>
Use IP_FREEBIND socket option for the listening socket, which allows
binding to an IP address not (yet) assigned to an interface. Note that
all BGP instances that share a listening socket should have the same
value of the <cf/freebind/ option. Default: disabled.
<tag><label id="bgp-check-link">check link <M>switch</M></tag>
BGP could use hardware link state into consideration. If enabled,
BIRD tracks the link state of the associated interface and when link
@ -3249,6 +3255,12 @@ channels.
allows to specify a limit on maximal number of nexthops in one route. By
default, multipath merging is disabled. If enabled, default value of the
limit is 16.
<tag><label id="krt-netlink-rx-buffer">netlink rx buffer <m/number/</tag> (Linux)
Set kernel receive buffer size (in bytes) for the netlink socket. The default
value is OS-dependent (from the <file>/proc/sys/net/core/rmem_default</file>
file), If you get some "Kernel dropped some netlink message ..." warnings,
you may increase this value.
</descrip>
<sect1>Attributes

View File

@ -128,6 +128,7 @@ extern int sk_priority_control; /* Suggested priority for control traffic, shou
#define SKF_TTL_RX 0x08 /* Report TTL / Hop Limit for RX packets */
#define SKF_BIND 0x10 /* Bind datagram socket to given source address */
#define SKF_HIGH_PORT 0x20 /* Choose port from high range if possible */
#define SKF_FREEBIND 0x40 /* Allow socket to bind to a nonlocal address */
#define SKF_THREAD 0x100 /* Socked used in thread, Do not add to main loop */
#define SKF_TRUNCATED 0x200 /* Received packet was truncated, set by IO layer */

View File

@ -1,9 +1,10 @@
FROM ubuntu:20.10
FROM ubuntu:21.10
ENV DEBIAN_FRONTEND=noninteractive
RUN sed -i 's/deb.debian.org/ftp.cz.debian.org/' /etc/apt/sources.list
RUN apt-get -y update
RUN apt-get -y upgrade
RUN apt-get -y install \
RUN apt-get -y --no-install-recommends install \
tzdata \
build-essential \
flex \
bison \

View File

@ -165,12 +165,17 @@ bgp_open(struct bgp_proto *p)
ip_addr addr = p->cf->strict_bind ? p->cf->local_ip :
(p->ipv4 ? IPA_NONE4 : IPA_NONE6);
uint port = p->cf->local_port;
uint flags = p->cf->free_bind ? SKF_FREEBIND : 0;
uint flag_mask = SKF_FREEBIND;
/* We assume that cf->iface is defined iff cf->local_ip is link-local */
WALK_LIST(bs, bgp_sockets)
if (ipa_equal(bs->sk->saddr, addr) && (bs->sk->sport == port) &&
(bs->sk->iface == ifa) && (bs->sk->vrf == p->p.vrf))
if (ipa_equal(bs->sk->saddr, addr) &&
(bs->sk->sport == port) &&
(bs->sk->iface == ifa) &&
(bs->sk->vrf == p->p.vrf) &&
((bs->sk->flags & flag_mask) == flags))
{
bs->uc++;
p->sock = bs;
@ -184,7 +189,7 @@ bgp_open(struct bgp_proto *p)
sk->sport = port;
sk->iface = ifa;
sk->vrf = p->p.vrf;
sk->flags = SKF_PASSIVE_THREAD;
sk->flags = flags | SKF_PASSIVE_THREAD;
sk->tos = IP_PREC_INTERNET_CONTROL;
sk->rbsize = BGP_RX_BUFFER_SIZE;
sk->tbsize = BGP_TX_BUFFER_SIZE;

View File

@ -86,6 +86,7 @@ struct bgp_config {
int peer_type; /* Internal or external BGP (BGP_PT_*, optional) */
int multihop; /* Number of hops if multihop */
int strict_bind; /* Bind listening socket to local address */
int free_bind; /* Bind listening socket with SKF_FREEBIND */
int ttl_security; /* Enable TTL security [RFC 5082] */
int compare_path_lengths; /* Use path lengths when selecting best route */
int med_metric; /* Compare MULTI_EXIT_DISC even between routes from differen ASes */

View File

@ -31,7 +31,7 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE,
STRICT, BIND, CONFEDERATION, MEMBER, MULTICAST, FLOW4, FLOW6, LONG,
LIVED, STALE, IMPORT, IBGP, EBGP, MANDATORY, INTERNAL, EXTERNAL, SETS,
DYNAMIC, RANGE, NAME, DIGITS, BGP_AIGP, AIGP, ORIGINATE, COST, ENFORCE,
FIRST)
FIRST, FREE)
%type <i> bgp_nh
%type <i32> bgp_afi
@ -156,6 +156,7 @@ bgp_proto:
}
| bgp_proto DYNAMIC NAME DIGITS expr ';' { BGP_CFG->dynamic_name_digits = $5; if ($5>10) cf_error("Dynamic name digits must be at most 10"); }
| bgp_proto STRICT BIND bool ';' { BGP_CFG->strict_bind = $4; }
| bgp_proto FREE BIND bool ';' { BGP_CFG->free_bind = $4; }
| bgp_proto PATH METRIC bool ';' { BGP_CFG->compare_path_lengths = $4; }
| bgp_proto MED METRIC bool ';' { BGP_CFG->med_metric = $4; }
| bgp_proto IGP METRIC bool ';' { BGP_CFG->igp_metric = $4; }

View File

@ -271,3 +271,9 @@ sk_set_priority(sock *s, int prio UNUSED)
{
ERR_MSG("Socket priority not supported");
}
static inline int
sk_set_freebind(sock *s)
{
ERR_MSG("Freebind is not supported");
}

View File

@ -69,6 +69,7 @@ static inline struct ifa * kif_get_primary_ip(struct iface *i UNUSED) { return N
struct krt_params {
u32 table_id; /* Kernel table ID we sync with */
u32 metric; /* Kernel metric used for all routes */
uint netlink_rx_buffer; /* Rx buffer size for the netlink socket */
};
struct krt_state {

View File

@ -10,7 +10,8 @@ CF_HDR
CF_DECLS
CF_KEYWORDS(KERNEL, TABLE, METRIC, KRT_PREFSRC, KRT_REALM, KRT_SCOPE, KRT_MTU, KRT_WINDOW,
CF_KEYWORDS(KERNEL, TABLE, METRIC, NETLINK, RX, BUFFER,
KRT_PREFSRC, KRT_REALM, KRT_SCOPE, KRT_MTU, KRT_WINDOW,
KRT_RTT, KRT_RTTVAR, KRT_SSTRESH, KRT_CWND, KRT_ADVMSS, KRT_REORDERING,
KRT_HOPLIMIT, KRT_INITCWND, KRT_RTO_MIN, KRT_INITRWND, KRT_QUICKACK,
KRT_LOCK_MTU, KRT_LOCK_WINDOW, KRT_LOCK_RTT, KRT_LOCK_RTTVAR,
@ -24,6 +25,7 @@ kern_proto: kern_proto kern_sys_item ';' ;
kern_sys_item:
KERNEL TABLE expr { THIS_KRT->sys.table_id = $3; }
| METRIC expr { THIS_KRT->sys.metric = $2; }
| NETLINK RX BUFFER expr { THIS_KRT->sys.netlink_rx_buffer = $4; }
;
dynamic_attr: KRT_PREFSRC { $$ = f_new_dynamic_attr(EAF_TYPE_IP_ADDRESS, T_IP, EA_KRT_PREFSRC); } ;

View File

@ -69,6 +69,10 @@
#define RTA_ENCAP 22
#endif
#ifndef NETLINK_GET_STRICT_CHK
#define NETLINK_GET_STRICT_CHK 12
#endif
#define krt_ipv4(p) ((p)->af == AF_INET)
#define krt_ecmp6(p) ((p)->af == AF_INET6)
@ -130,7 +134,7 @@ struct nl_sock
uint last_size;
};
#define NL_RX_SIZE 8192
#define NL_RX_SIZE 32768
#define NL_OP_DELETE 0
#define NL_OP_ADD (NLM_F_CREATE|NLM_F_EXCL)
@ -157,11 +161,47 @@ nl_open_sock(struct nl_sock *nl)
}
}
static void
nl_set_strict_dump(struct nl_sock *nl, int strict)
{
/*
* Strict checking is not necessary, it improves behavior on newer kernels.
* If it is not available (missing SOL_NETLINK compile-time, or ENOPROTOOPT
* run-time), we can just ignore it.
*/
#ifdef SOL_NETLINK
setsockopt(nl->fd, SOL_NETLINK, NETLINK_GET_STRICT_CHK, &strict, sizeof(strict));
#endif
}
static void
nl_set_rcvbuf(int fd, uint val)
{
if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val)) < 0)
log(L_WARN "KRT: Cannot set netlink rx buffer size to %u: %m", val);
}
static uint
nl_cfg_rx_buffer_size(struct config *cfg)
{
uint bufsize = 0;
struct proto_config *pc;
WALK_LIST(pc, cfg->protos)
if ((pc->protocol == &proto_unix_kernel) && !pc->disabled)
bufsize = MAX(bufsize, ((struct krt_config *) pc)->sys.netlink_rx_buffer);
return bufsize;
}
static void
nl_open(void)
{
nl_open_sock(&nl_scan);
nl_open_sock(&nl_req);
nl_set_strict_dump(&nl_scan, 1);
}
static void
@ -180,20 +220,60 @@ nl_send(struct nl_sock *nl, struct nlmsghdr *nh)
}
static void
nl_request_dump(int af, int cmd)
nl_request_dump_link(void)
{
struct {
struct nlmsghdr nh;
struct rtgenmsg g;
struct ifinfomsg ifi;
} req = {
.nh.nlmsg_type = cmd,
.nh.nlmsg_len = sizeof(req),
.nh.nlmsg_type = RTM_GETLINK,
.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
.g.rtgen_family = af
.nh.nlmsg_seq = ++(nl_scan.seq),
.ifi.ifi_family = AF_UNSPEC,
};
nl_send(&nl_scan, &req.nh);
send(nl_scan.fd, &req, sizeof(req), 0);
nl_scan.last_hdr = NULL;
}
static void
nl_request_dump_addr(int af)
{
struct {
struct nlmsghdr nh;
struct ifaddrmsg ifa;
} req = {
.nh.nlmsg_type = RTM_GETADDR,
.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
.nh.nlmsg_seq = ++(nl_scan.seq),
.ifa.ifa_family = af,
};
send(nl_scan.fd, &req, sizeof(req), 0);
nl_scan.last_hdr = NULL;
}
static void
nl_request_dump_route(int af)
{
struct {
struct nlmsghdr nh;
struct rtmsg rtm;
} req = {
.nh.nlmsg_type = RTM_GETROUTE,
.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)),
.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
.nh.nlmsg_seq = ++(nl_scan.seq),
.rtm.rtm_family = af,
};
send(nl_scan.fd, &req, sizeof(req), 0);
nl_scan.last_hdr = NULL;
}
static struct nlmsghdr *
nl_get_reply(struct nl_sock *nl)
{
@ -1151,7 +1231,7 @@ kif_do_scan(struct kif_proto *p UNUSED)
if_start_update();
nl_request_dump(AF_UNSPEC, RTM_GETLINK);
nl_request_dump_link();
while (h = nl_get_scan())
if (h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK)
nl_parse_link(h, 1);
@ -1179,14 +1259,14 @@ kif_do_scan(struct kif_proto *p UNUSED)
}
}
nl_request_dump(AF_INET, RTM_GETADDR);
nl_request_dump_addr(AF_INET);
while (h = nl_get_scan())
if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
nl_parse_addr(h, 1);
else
log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
nl_request_dump(AF_INET6, RTM_GETADDR);
nl_request_dump_addr(AF_INET6);
while (h = nl_get_scan())
if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
nl_parse_addr(h, 1);
@ -1545,7 +1625,8 @@ nl_parse_end(struct nl_parse_state *s)
}
#define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
#define SKIP0(ARG, ...) do { DBG("KRT: Ignoring route - " ARG, ##__VA_ARGS__); return; } while(0)
#define SKIP(ARG, ...) do { DBG("KRT: Ignoring route %N - " ARG, &dst, ##__VA_ARGS__); return; } while(0)
static void
nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
@ -1598,10 +1679,10 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
return;
if (!a[RTA_DST])
SKIP("MPLS route without RTA_DST");
SKIP0("MPLS route without RTA_DST\n");
if (rta_get_mpls(a[RTA_DST], rta_mpls_stack) != 1)
SKIP("MPLS route with multi-label RTA_DST");
SKIP0("MPLS route with multi-label RTA_DST\n");
net_fill_mpls(&dst, rta_mpls_stack[0]);
break;
@ -1619,6 +1700,9 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
else
table_id = i->rtm_table;
if (i->rtm_flags & RTM_F_CLONED)
SKIP("cloned\n");
/* Do we know this table? */
p = HASH_FIND(nl_table_map, RTH, i->rtm_family, table_id);
if (!p)
@ -1906,7 +1990,7 @@ krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NUL
struct nl_parse_state s;
nl_parse_begin(&s, 1);
nl_request_dump(AF_UNSPEC, RTM_GETROUTE);
nl_request_dump_route(AF_UNSPEC);
while (h = nl_get_scan())
if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
nl_parse_route(&s, h);
@ -1921,6 +2005,8 @@ krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NUL
static sock *nl_async_sk; /* BIRD socket for asynchronous notifications */
static byte *nl_async_rx_buffer; /* Receive buffer */
static uint nl_async_bufsize; /* Kernel rx buffer size for the netlink socket */
static struct config *nl_last_config; /* For tracking changes to nl_async_bufsize */
static void
nl_async_msg(struct nlmsghdr *h)
@ -2056,6 +2142,32 @@ nl_open_async(void)
bug("Netlink: sk_open failed");
}
static void
nl_update_async_bufsize(void)
{
/* No async socket */
if (!nl_async_sk)
return;
/* Already reconfigured */
if (nl_last_config == config)
return;
/* Update netlink buffer size */
uint bufsize = nl_cfg_rx_buffer_size(config);
if (bufsize && (bufsize != nl_async_bufsize))
{
/* Log message for reconfigurations only */
if (nl_last_config)
log(L_INFO "KRT: Changing netlink rx buffer size to %u", bufsize);
nl_set_rcvbuf(nl_async_sk->fd, bufsize);
nl_async_bufsize = bufsize;
}
nl_last_config = config;
}
/*
* Interface to the UNIX krt module
@ -2084,6 +2196,7 @@ krt_sys_start(struct krt_proto *p)
nl_open();
nl_open_async();
nl_update_async_bufsize();
return 1;
}
@ -2091,12 +2204,16 @@ krt_sys_start(struct krt_proto *p)
void
krt_sys_shutdown(struct krt_proto *p)
{
nl_update_async_bufsize();
HASH_REMOVE2(nl_table_map, RTH, krt_pool, p);
}
int
krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o)
{
nl_update_async_bufsize();
return (n->sys.table_id == o->sys.table_id) && (n->sys.metric == o->sys.metric);
}

2179
sysdep/linux/netlink.c.orig Normal file

File diff suppressed because it is too large Load Diff

View File

@ -10,6 +10,10 @@
#define IPV6_MINHOPCOUNT 73
#endif
#ifndef IPV6_FREEBIND
#define IPV6_FREEBIND 78
#endif
#ifndef TCP_MD5SIG_EXT
#define TCP_MD5SIG_EXT 32
#endif
@ -266,3 +270,18 @@ sk_set_priority(sock *s, int prio)
return 0;
}
static inline int
sk_set_freebind(sock *s)
{
int y = 1;
if (sk_is_ipv4(s))
if (setsockopt(s->fd, SOL_IP, IP_FREEBIND, &y, sizeof(y)) < 0)
ERR("IP_FREEBIND");
if (sk_is_ipv6(s))
if (setsockopt(s->fd, SOL_IPV6, IPV6_FREEBIND, &y, sizeof(y)) < 0)
ERR("IPV6_FREEBIND");
return 0;
}

View File

@ -1463,6 +1463,10 @@ sk_open(sock *s)
if (sk_set_high_port(s) < 0)
log(L_WARN "Socket error: %s%#m", s->err);
if (s->flags & SKF_FREEBIND)
if (sk_set_freebind(s) < 0)
log(L_WARN "Socket error: %s%#m", s->err);
sockaddr_fill(&sa, s->af, bind_addr, s->iface, bind_port);
if (bind(fd, &sa.sa, SA_LEN(sa)) < 0)
ERR2("bind");